scraper stuff
This commit is contained in:
parent
2b8c53f9d7
commit
86b20aaee5
4 changed files with 36 additions and 11 deletions
|
|
@ -9,7 +9,12 @@ from typing import Any
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from .base import BaseScraper, clear_platform_cookies, load_platform_cookies, save_platform_cookies
|
from .base import (
|
||||||
|
BaseScraper,
|
||||||
|
clear_platform_cookies,
|
||||||
|
load_platform_cookies,
|
||||||
|
save_platform_cookies,
|
||||||
|
)
|
||||||
from .timeouts import BROWSER_SESSION_TIMEOUT, HTTP_TIMEOUT
|
from .timeouts import BROWSER_SESSION_TIMEOUT, HTTP_TIMEOUT
|
||||||
from .models import (
|
from .models import (
|
||||||
ContestListResult,
|
ContestListResult,
|
||||||
|
|
@ -234,9 +239,7 @@ def _submit_headless_codechef(
|
||||||
print(json.dumps({"status": "logging_in"}), flush=True)
|
print(json.dumps({"status": "logging_in"}), flush=True)
|
||||||
session.fetch(f"{BASE_URL}/login", page_action=login_action)
|
session.fetch(f"{BASE_URL}/login", page_action=login_action)
|
||||||
if login_error:
|
if login_error:
|
||||||
return SubmitResult(
|
return SubmitResult(success=False, error=login_error)
|
||||||
success=False, error=login_error
|
|
||||||
)
|
|
||||||
logged_in = True
|
logged_in = True
|
||||||
|
|
||||||
if not _practice:
|
if not _practice:
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,13 @@ from typing import Any
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup, Tag
|
from bs4 import BeautifulSoup, Tag
|
||||||
|
|
||||||
from .base import BaseScraper, clear_platform_cookies, extract_precision, load_platform_cookies, save_platform_cookies
|
from .base import (
|
||||||
|
BaseScraper,
|
||||||
|
clear_platform_cookies,
|
||||||
|
extract_precision,
|
||||||
|
load_platform_cookies,
|
||||||
|
save_platform_cookies,
|
||||||
|
)
|
||||||
from .models import (
|
from .models import (
|
||||||
ContestListResult,
|
ContestListResult,
|
||||||
ContestSummary,
|
ContestSummary,
|
||||||
|
|
@ -387,7 +393,9 @@ def _login_headless_cf(credentials: dict[str, str]) -> LoginResult:
|
||||||
google_search=False,
|
google_search=False,
|
||||||
cookies=saved_cookies,
|
cookies=saved_cookies,
|
||||||
) as session:
|
) as session:
|
||||||
session.fetch(f"{BASE_URL}/", page_action=check_action, solve_cloudflare=True)
|
session.fetch(
|
||||||
|
f"{BASE_URL}/", page_action=check_action, solve_cloudflare=True
|
||||||
|
)
|
||||||
if logged_in:
|
if logged_in:
|
||||||
return LoginResult(success=True, error="")
|
return LoginResult(success=True, error="")
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|
@ -419,7 +427,9 @@ def _login_headless_cf(credentials: dict[str, str]) -> LoginResult:
|
||||||
|
|
||||||
session.fetch(f"{BASE_URL}/", page_action=verify_action, network_idle=True)
|
session.fetch(f"{BASE_URL}/", page_action=verify_action, network_idle=True)
|
||||||
if not logged_in:
|
if not logged_in:
|
||||||
return LoginResult(success=False, error="Login failed (bad credentials?)")
|
return LoginResult(
|
||||||
|
success=False, error="Login failed (bad credentials?)"
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
browser_cookies = session.context.cookies()
|
browser_cookies = session.context.cookies()
|
||||||
|
|
@ -445,7 +455,6 @@ def _submit_headless(
|
||||||
|
|
||||||
source_code = Path(file_path).read_text()
|
source_code = Path(file_path).read_text()
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from scrapling.fetchers import StealthySession # type: ignore[import-untyped,unresolved-import]
|
from scrapling.fetchers import StealthySession # type: ignore[import-untyped,unresolved-import]
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
|
@ -519,7 +528,9 @@ def _submit_headless(
|
||||||
) as session:
|
) as session:
|
||||||
if not _retried and saved_cookies:
|
if not _retried and saved_cookies:
|
||||||
print(json.dumps({"status": "checking_login"}), flush=True)
|
print(json.dumps({"status": "checking_login"}), flush=True)
|
||||||
session.fetch(f"{BASE_URL}/", page_action=check_login, solve_cloudflare=True)
|
session.fetch(
|
||||||
|
f"{BASE_URL}/", page_action=check_login, solve_cloudflare=True
|
||||||
|
)
|
||||||
|
|
||||||
if not logged_in:
|
if not logged_in:
|
||||||
print(json.dumps({"status": "logging_in"}), flush=True)
|
print(json.dumps({"status": "logging_in"}), flush=True)
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,13 @@ from pathlib import Path
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from .base import BaseScraper, clear_platform_cookies, extract_precision, load_platform_cookies, save_platform_cookies
|
from .base import (
|
||||||
|
BaseScraper,
|
||||||
|
clear_platform_cookies,
|
||||||
|
extract_precision,
|
||||||
|
load_platform_cookies,
|
||||||
|
save_platform_cookies,
|
||||||
|
)
|
||||||
from .timeouts import HTTP_TIMEOUT
|
from .timeouts import HTTP_TIMEOUT
|
||||||
from .models import (
|
from .models import (
|
||||||
ContestListResult,
|
ContestListResult,
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,12 @@ from typing import Any, cast
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from .base import BaseScraper, extract_precision, load_platform_cookies, save_platform_cookies
|
from .base import (
|
||||||
|
BaseScraper,
|
||||||
|
extract_precision,
|
||||||
|
load_platform_cookies,
|
||||||
|
save_platform_cookies,
|
||||||
|
)
|
||||||
from .timeouts import HTTP_TIMEOUT
|
from .timeouts import HTTP_TIMEOUT
|
||||||
from .models import (
|
from .models import (
|
||||||
ContestListResult,
|
ContestListResult,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue