scraper stuff

This commit is contained in:
Barrett Ruth 2026-03-07 16:29:02 -05:00
parent 2b8c53f9d7
commit 86b20aaee5
Signed by: barrett
GPG key ID: A6C96C9349D2FC81
4 changed files with 36 additions and 11 deletions

View file

@ -9,7 +9,12 @@ from typing import Any
import httpx
from .base import BaseScraper, clear_platform_cookies, load_platform_cookies, save_platform_cookies
from .base import (
BaseScraper,
clear_platform_cookies,
load_platform_cookies,
save_platform_cookies,
)
from .timeouts import BROWSER_SESSION_TIMEOUT, HTTP_TIMEOUT
from .models import (
ContestListResult,
@ -234,9 +239,7 @@ def _submit_headless_codechef(
print(json.dumps({"status": "logging_in"}), flush=True)
session.fetch(f"{BASE_URL}/login", page_action=login_action)
if login_error:
return SubmitResult(
success=False, error=login_error
)
return SubmitResult(success=False, error=login_error)
logged_in = True
if not _practice:

View file

@ -8,7 +8,13 @@ from typing import Any
import requests
from bs4 import BeautifulSoup, Tag
from .base import BaseScraper, clear_platform_cookies, extract_precision, load_platform_cookies, save_platform_cookies
from .base import (
BaseScraper,
clear_platform_cookies,
extract_precision,
load_platform_cookies,
save_platform_cookies,
)
from .models import (
ContestListResult,
ContestSummary,
@ -387,7 +393,9 @@ def _login_headless_cf(credentials: dict[str, str]) -> LoginResult:
google_search=False,
cookies=saved_cookies,
) as session:
session.fetch(f"{BASE_URL}/", page_action=check_action, solve_cloudflare=True)
session.fetch(
f"{BASE_URL}/", page_action=check_action, solve_cloudflare=True
)
if logged_in:
return LoginResult(success=True, error="")
except Exception:
@ -419,7 +427,9 @@ def _login_headless_cf(credentials: dict[str, str]) -> LoginResult:
session.fetch(f"{BASE_URL}/", page_action=verify_action, network_idle=True)
if not logged_in:
return LoginResult(success=False, error="Login failed (bad credentials?)")
return LoginResult(
success=False, error="Login failed (bad credentials?)"
)
try:
browser_cookies = session.context.cookies()
@ -445,7 +455,6 @@ def _submit_headless(
source_code = Path(file_path).read_text()
try:
from scrapling.fetchers import StealthySession # type: ignore[import-untyped,unresolved-import]
except ImportError:
@ -519,7 +528,9 @@ def _submit_headless(
) as session:
if not _retried and saved_cookies:
print(json.dumps({"status": "checking_login"}), flush=True)
session.fetch(f"{BASE_URL}/", page_action=check_login, solve_cloudflare=True)
session.fetch(
f"{BASE_URL}/", page_action=check_login, solve_cloudflare=True
)
if not logged_in:
print(json.dumps({"status": "logging_in"}), flush=True)

View file

@ -10,7 +10,13 @@ from pathlib import Path
import httpx
from .base import BaseScraper, clear_platform_cookies, extract_precision, load_platform_cookies, save_platform_cookies
from .base import (
BaseScraper,
clear_platform_cookies,
extract_precision,
load_platform_cookies,
save_platform_cookies,
)
from .timeouts import HTTP_TIMEOUT
from .models import (
ContestListResult,

View file

@ -8,7 +8,12 @@ from typing import Any, cast
import httpx
from .base import BaseScraper, extract_precision, load_platform_cookies, save_platform_cookies
from .base import (
BaseScraper,
extract_precision,
load_platform_cookies,
save_platform_cookies,
)
from .timeouts import HTTP_TIMEOUT
from .models import (
ContestListResult,