From cb5806246454080b90853b3e1d2f13634be7fd1a Mon Sep 17 00:00:00 2001 From: Barrett Ruth Date: Sat, 7 Mar 2026 03:46:28 -0500 Subject: [PATCH] refactor(scrapers): centralize cookie storage into shared file Problem: each platform scraper managed its own cookie file path and load/save/clear logic, duplicating boilerplate across kattis, usaco, codeforces, and codechef. Solution: add \`load_platform_cookies\`, \`save_platform_cookies\`, and \`clear_platform_cookies\` to \`base.py\` backed by a single \`~/.cache/cp-nvim/cookies.json\` keyed by platform name. Update all scrapers to use these helpers. --- scrapers/base.py | 32 ++++++++++++++++++++++++++++++++ scrapers/codeforces.py | 29 ++++++++++------------------- scrapers/kattis.py | 20 +++++++------------- scrapers/usaco.py | 17 ++++++----------- 4 files changed, 55 insertions(+), 43 deletions(-) diff --git a/scrapers/base.py b/scrapers/base.py index 11ab8c6..03b467a 100644 --- a/scrapers/base.py +++ b/scrapers/base.py @@ -4,6 +4,38 @@ import os import re import sys from abc import ABC, abstractmethod +from pathlib import Path +from typing import Any + +_COOKIE_FILE = Path.home() / ".cache" / "cp-nvim" / "cookies.json" + + +def load_platform_cookies(platform: str) -> Any | None: + try: + data = json.loads(_COOKIE_FILE.read_text()) + return data.get(platform) + except Exception: + return None + + +def save_platform_cookies(platform: str, data: Any) -> None: + _COOKIE_FILE.parent.mkdir(parents=True, exist_ok=True) + try: + existing = json.loads(_COOKIE_FILE.read_text()) + except Exception: + existing = {} + existing[platform] = data + _COOKIE_FILE.write_text(json.dumps(existing)) + + +def clear_platform_cookies(platform: str) -> None: + try: + existing = json.loads(_COOKIE_FILE.read_text()) + existing.pop(platform, None) + _COOKIE_FILE.write_text(json.dumps(existing)) + except Exception: + pass + from .language_ids import get_language_id from .models import ( diff --git a/scrapers/codeforces.py b/scrapers/codeforces.py index d5b6161..5088a39 100644 --- a/scrapers/codeforces.py +++ b/scrapers/codeforces.py @@ -8,7 +8,7 @@ from typing import Any import requests from bs4 import BeautifulSoup, Tag -from .base import BaseScraper, extract_precision +from .base import BaseScraper, clear_platform_cookies, extract_precision, load_platform_cookies, save_platform_cookies from .models import ( ContestListResult, ContestSummary, @@ -332,8 +332,6 @@ class CodeforcesScraper(BaseScraper): def _login_headless_cf(credentials: dict[str, str]) -> LoginResult: - from pathlib import Path - try: from scrapling.fetchers import StealthySession # type: ignore[import-untyped,unresolved-import] except ImportError: @@ -346,9 +344,6 @@ def _login_headless_cf(credentials: dict[str, str]) -> LoginResult: _ensure_browser() - cookie_cache = Path.home() / ".cache" / "cp-nvim" / "codeforces-cookies.json" - cookie_cache.parent.mkdir(parents=True, exist_ok=True) - logged_in = False login_error: str | None = None @@ -405,7 +400,7 @@ def _login_headless_cf(credentials: dict[str, str]) -> LoginResult: try: browser_cookies = session.context.cookies() if any(c.get("name") == "X-User-Handle" for c in browser_cookies): - cookie_cache.write_text(json.dumps(browser_cookies)) + save_platform_cookies("codeforces", browser_cookies) except Exception: pass @@ -426,6 +421,7 @@ def _submit_headless( source_code = Path(file_path).read_text() + try: from scrapling.fetchers import StealthySession # type: ignore[import-untyped,unresolved-import] except ImportError: @@ -438,16 +434,11 @@ def _submit_headless( _ensure_browser() - cookie_cache = Path.home() / ".cache" / "cp-nvim" / "codeforces-cookies.json" - cookie_cache.parent.mkdir(parents=True, exist_ok=True) saved_cookies: list[dict[str, Any]] = [] - if cookie_cache.exists(): - try: - saved_cookies = json.loads(cookie_cache.read_text()) - except Exception: - pass + if not _retried: + saved_cookies = load_platform_cookies("codeforces") or [] - logged_in = cookie_cache.exists() and not _retried + logged_in = bool(saved_cookies) and not _retried login_error: str | None = None submit_error: str | None = None needs_relogin = False @@ -520,9 +511,9 @@ def _submit_headless( headless=True, timeout=BROWSER_SESSION_TIMEOUT, google_search=False, - cookies=saved_cookies if (cookie_cache.exists() and not _retried) else [], + cookies=saved_cookies if (saved_cookies and not _retried) else [], ) as session: - if not (cookie_cache.exists() and not _retried): + if not (saved_cookies and not _retried): print(json.dumps({"status": "checking_login"}), flush=True) session.fetch( f"{BASE_URL}/", @@ -552,12 +543,12 @@ def _submit_headless( try: browser_cookies = session.context.cookies() if any(c.get("name") == "X-User-Handle" for c in browser_cookies): - cookie_cache.write_text(json.dumps(browser_cookies)) + save_platform_cookies("codeforces", browser_cookies) except Exception: pass if needs_relogin and not _retried: - cookie_cache.unlink(missing_ok=True) + clear_platform_cookies("codeforces") return _submit_headless( contest_id, problem_id, diff --git a/scrapers/kattis.py b/scrapers/kattis.py index 1177445..55c3759 100644 --- a/scrapers/kattis.py +++ b/scrapers/kattis.py @@ -10,7 +10,7 @@ from pathlib import Path import httpx -from .base import BaseScraper, extract_precision +from .base import BaseScraper, clear_platform_cookies, extract_precision, load_platform_cookies, save_platform_cookies from .timeouts import HTTP_TIMEOUT from .models import ( ContestListResult, @@ -28,8 +28,6 @@ HEADERS = { } CONNECTIONS = 8 -_COOKIE_PATH = Path.home() / ".cache" / "cp-nvim" / "kattis-cookies.json" - TIME_RE = re.compile( r"CPU Time limit\s*]*>\s*(\d+)\s*seconds?\s*", re.DOTALL, @@ -209,20 +207,16 @@ async def _stream_single_problem(client: httpx.AsyncClient, slug: str) -> None: async def _load_kattis_cookies(client: httpx.AsyncClient) -> None: - if not _COOKIE_PATH.exists(): - return - try: - for k, v in json.loads(_COOKIE_PATH.read_text()).items(): + data = load_platform_cookies("kattis") + if isinstance(data, dict): + for k, v in data.items(): client.cookies.set(k, v) - except Exception: - pass async def _save_kattis_cookies(client: httpx.AsyncClient) -> None: - cookies = {k: v for k, v in client.cookies.items()} + cookies = dict(client.cookies.items()) if cookies: - _COOKIE_PATH.parent.mkdir(parents=True, exist_ok=True) - _COOKIE_PATH.write_text(json.dumps(cookies)) + save_platform_cookies("kattis", cookies) async def _do_kattis_login( @@ -368,7 +362,7 @@ class KattisScraper(BaseScraper): return self._submit_error(f"Submit request failed: {e}") if r.status_code in (400, 403) or r.text == "Request validation failed": - _COOKIE_PATH.unlink(missing_ok=True) + clear_platform_cookies("kattis") print(json.dumps({"status": "logging_in"}), flush=True) ok = await _do_kattis_login(client, username, password) if not ok: diff --git a/scrapers/usaco.py b/scrapers/usaco.py index b009cf0..3c542ab 100644 --- a/scrapers/usaco.py +++ b/scrapers/usaco.py @@ -8,7 +8,7 @@ from typing import Any, cast import httpx -from .base import BaseScraper, extract_precision +from .base import BaseScraper, extract_precision, load_platform_cookies, save_platform_cookies from .timeouts import HTTP_TIMEOUT from .models import ( ContestListResult, @@ -27,7 +27,6 @@ HEADERS = { } CONNECTIONS = 4 -_COOKIE_PATH = Path.home() / ".cache" / "cp-nvim" / "usaco-cookies.json" _LOGIN_PATH = "/current/tpcm/login-session.php" _SUBMIT_PATH = "/current/tpcm/submit-solution.php" @@ -202,20 +201,16 @@ def _parse_submit_form( async def _load_usaco_cookies(client: httpx.AsyncClient) -> None: - if not _COOKIE_PATH.exists(): - return - try: - for k, v in json.loads(_COOKIE_PATH.read_text()).items(): + data = load_platform_cookies("usaco") + if isinstance(data, dict): + for k, v in data.items(): client.cookies.set(k, v) - except Exception: - pass async def _save_usaco_cookies(client: httpx.AsyncClient) -> None: - cookies = {k: v for k, v in client.cookies.items()} + cookies = dict(client.cookies.items()) if cookies: - _COOKIE_PATH.parent.mkdir(parents=True, exist_ok=True) - _COOKIE_PATH.write_text(json.dumps(cookies)) + save_platform_cookies("usaco", cookies) async def _check_usaco_login(client: httpx.AsyncClient, username: str) -> bool: