fix(scrapers): harden CSES and CF submit edge cases (#295)

Problem: CSES `_web_login` did bare dict indexing on the API response,
causing an opaque `KeyError` if fields were absent. `_check_token`
swallowed all exceptions as `False`, treating transient network errors
as invalid tokens. CF wrote cookies unconditionally (login and submit),
and swallowed `_solve_turnstile` failures in `submit_action`.

Solution: Replace direct indexing with `.get()` + `RuntimeError` for
missing CSES API fields. Re-raise `httpx` network/timeout exceptions
from `_check_token`. Guard CF cookie writes behind an `X-User-Handle`
check (the CF auth cookie). Propagate `_solve_turnstile` errors so
failures surface rather than silently proceeding.
This commit is contained in:
Barrett Ruth 2026-03-05 18:58:27 -05:00
parent 6c036a7b2e
commit cc48c901c0
Signed by: barrett
GPG key ID: A6C96C9349D2FC81
2 changed files with 12 additions and 10 deletions

View file

@ -401,7 +401,8 @@ def _login_headless_cf(credentials: dict[str, str]) -> LoginResult:
try:
browser_cookies = session.context.cookies()
cookie_cache.write_text(json.dumps(browser_cookies))
if any(c.get("name") == "X-User-Handle" for c in browser_cookies):
cookie_cache.write_text(json.dumps(browser_cookies))
except Exception:
pass
@ -478,10 +479,7 @@ def _submit_headless(
if "/enter" in page.url or "/login" in page.url:
needs_relogin = True
return
try:
_solve_turnstile(page)
except Exception:
pass
_solve_turnstile(page)
try:
page.select_option(
'select[name="submittedProblemIndex"]',
@ -550,7 +548,7 @@ def _submit_headless(
try:
browser_cookies = session.context.cookies()
if browser_cookies:
if any(c.get("name") == "X-User-Handle" for c in browser_cookies):
cookie_cache.write_text(json.dumps(browser_cookies))
except Exception:
pass

View file

@ -352,8 +352,12 @@ class CSESScraper(BaseScraper):
f"{API_URL}/login", headers=HEADERS, timeout=HTTP_TIMEOUT
)
api_data = api_resp.json()
token: str = api_data["X-Auth-Token"]
auth_url: str = api_data["authentication_url"]
token: str | None = api_data.get("X-Auth-Token")
auth_url: str | None = api_data.get("authentication_url")
if not token:
raise RuntimeError("CSES API login response missing 'X-Auth-Token'")
if not auth_url:
raise RuntimeError("CSES API login response missing 'authentication_url'")
auth_page = await client.get(auth_url, headers=HEADERS, timeout=HTTP_TIMEOUT)
auth_csrf = re.search(r'name="csrf_token" value="([^"]+)"', auth_page.text)
@ -388,8 +392,8 @@ class CSESScraper(BaseScraper):
timeout=HTTP_TIMEOUT,
)
return r.status_code == 200
except Exception:
return False
except (httpx.ConnectError, httpx.TimeoutException, httpx.NetworkError):
raise
async def submit(
self,