refactor(scrapers): centralize cookie storage into shared file

Problem: each platform scraper managed its own cookie file path and
load/save/clear logic, duplicating boilerplate across kattis, usaco,
codeforces, and codechef.

Solution: add \`load_platform_cookies\`, \`save_platform_cookies\`, and
\`clear_platform_cookies\` to \`base.py\` backed by a single
\`~/.cache/cp-nvim/cookies.json\` keyed by platform name. Update all
scrapers to use these helpers.
This commit is contained in:
Barrett Ruth 2026-03-07 03:46:28 -05:00
parent eb0dea777e
commit cb58062464
Signed by: barrett
GPG key ID: A6C96C9349D2FC81
4 changed files with 55 additions and 43 deletions

View file

@ -4,6 +4,38 @@ import os
import re
import sys
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Any
_COOKIE_FILE = Path.home() / ".cache" / "cp-nvim" / "cookies.json"
def load_platform_cookies(platform: str) -> Any | None:
try:
data = json.loads(_COOKIE_FILE.read_text())
return data.get(platform)
except Exception:
return None
def save_platform_cookies(platform: str, data: Any) -> None:
_COOKIE_FILE.parent.mkdir(parents=True, exist_ok=True)
try:
existing = json.loads(_COOKIE_FILE.read_text())
except Exception:
existing = {}
existing[platform] = data
_COOKIE_FILE.write_text(json.dumps(existing))
def clear_platform_cookies(platform: str) -> None:
try:
existing = json.loads(_COOKIE_FILE.read_text())
existing.pop(platform, None)
_COOKIE_FILE.write_text(json.dumps(existing))
except Exception:
pass
from .language_ids import get_language_id
from .models import (

View file

@ -8,7 +8,7 @@ from typing import Any
import requests
from bs4 import BeautifulSoup, Tag
from .base import BaseScraper, extract_precision
from .base import BaseScraper, clear_platform_cookies, extract_precision, load_platform_cookies, save_platform_cookies
from .models import (
ContestListResult,
ContestSummary,
@ -332,8 +332,6 @@ class CodeforcesScraper(BaseScraper):
def _login_headless_cf(credentials: dict[str, str]) -> LoginResult:
from pathlib import Path
try:
from scrapling.fetchers import StealthySession # type: ignore[import-untyped,unresolved-import]
except ImportError:
@ -346,9 +344,6 @@ def _login_headless_cf(credentials: dict[str, str]) -> LoginResult:
_ensure_browser()
cookie_cache = Path.home() / ".cache" / "cp-nvim" / "codeforces-cookies.json"
cookie_cache.parent.mkdir(parents=True, exist_ok=True)
logged_in = False
login_error: str | None = None
@ -405,7 +400,7 @@ def _login_headless_cf(credentials: dict[str, str]) -> LoginResult:
try:
browser_cookies = session.context.cookies()
if any(c.get("name") == "X-User-Handle" for c in browser_cookies):
cookie_cache.write_text(json.dumps(browser_cookies))
save_platform_cookies("codeforces", browser_cookies)
except Exception:
pass
@ -426,6 +421,7 @@ def _submit_headless(
source_code = Path(file_path).read_text()
try:
from scrapling.fetchers import StealthySession # type: ignore[import-untyped,unresolved-import]
except ImportError:
@ -438,16 +434,11 @@ def _submit_headless(
_ensure_browser()
cookie_cache = Path.home() / ".cache" / "cp-nvim" / "codeforces-cookies.json"
cookie_cache.parent.mkdir(parents=True, exist_ok=True)
saved_cookies: list[dict[str, Any]] = []
if cookie_cache.exists():
try:
saved_cookies = json.loads(cookie_cache.read_text())
except Exception:
pass
if not _retried:
saved_cookies = load_platform_cookies("codeforces") or []
logged_in = cookie_cache.exists() and not _retried
logged_in = bool(saved_cookies) and not _retried
login_error: str | None = None
submit_error: str | None = None
needs_relogin = False
@ -520,9 +511,9 @@ def _submit_headless(
headless=True,
timeout=BROWSER_SESSION_TIMEOUT,
google_search=False,
cookies=saved_cookies if (cookie_cache.exists() and not _retried) else [],
cookies=saved_cookies if (saved_cookies and not _retried) else [],
) as session:
if not (cookie_cache.exists() and not _retried):
if not (saved_cookies and not _retried):
print(json.dumps({"status": "checking_login"}), flush=True)
session.fetch(
f"{BASE_URL}/",
@ -552,12 +543,12 @@ def _submit_headless(
try:
browser_cookies = session.context.cookies()
if any(c.get("name") == "X-User-Handle" for c in browser_cookies):
cookie_cache.write_text(json.dumps(browser_cookies))
save_platform_cookies("codeforces", browser_cookies)
except Exception:
pass
if needs_relogin and not _retried:
cookie_cache.unlink(missing_ok=True)
clear_platform_cookies("codeforces")
return _submit_headless(
contest_id,
problem_id,

View file

@ -10,7 +10,7 @@ from pathlib import Path
import httpx
from .base import BaseScraper, extract_precision
from .base import BaseScraper, clear_platform_cookies, extract_precision, load_platform_cookies, save_platform_cookies
from .timeouts import HTTP_TIMEOUT
from .models import (
ContestListResult,
@ -28,8 +28,6 @@ HEADERS = {
}
CONNECTIONS = 8
_COOKIE_PATH = Path.home() / ".cache" / "cp-nvim" / "kattis-cookies.json"
TIME_RE = re.compile(
r"CPU Time limit</span>\s*<span[^>]*>\s*(\d+)\s*seconds?\s*</span>",
re.DOTALL,
@ -209,20 +207,16 @@ async def _stream_single_problem(client: httpx.AsyncClient, slug: str) -> None:
async def _load_kattis_cookies(client: httpx.AsyncClient) -> None:
if not _COOKIE_PATH.exists():
return
try:
for k, v in json.loads(_COOKIE_PATH.read_text()).items():
data = load_platform_cookies("kattis")
if isinstance(data, dict):
for k, v in data.items():
client.cookies.set(k, v)
except Exception:
pass
async def _save_kattis_cookies(client: httpx.AsyncClient) -> None:
cookies = {k: v for k, v in client.cookies.items()}
cookies = dict(client.cookies.items())
if cookies:
_COOKIE_PATH.parent.mkdir(parents=True, exist_ok=True)
_COOKIE_PATH.write_text(json.dumps(cookies))
save_platform_cookies("kattis", cookies)
async def _do_kattis_login(
@ -368,7 +362,7 @@ class KattisScraper(BaseScraper):
return self._submit_error(f"Submit request failed: {e}")
if r.status_code in (400, 403) or r.text == "Request validation failed":
_COOKIE_PATH.unlink(missing_ok=True)
clear_platform_cookies("kattis")
print(json.dumps({"status": "logging_in"}), flush=True)
ok = await _do_kattis_login(client, username, password)
if not ok:

View file

@ -8,7 +8,7 @@ from typing import Any, cast
import httpx
from .base import BaseScraper, extract_precision
from .base import BaseScraper, extract_precision, load_platform_cookies, save_platform_cookies
from .timeouts import HTTP_TIMEOUT
from .models import (
ContestListResult,
@ -27,7 +27,6 @@ HEADERS = {
}
CONNECTIONS = 4
_COOKIE_PATH = Path.home() / ".cache" / "cp-nvim" / "usaco-cookies.json"
_LOGIN_PATH = "/current/tpcm/login-session.php"
_SUBMIT_PATH = "/current/tpcm/submit-solution.php"
@ -202,20 +201,16 @@ def _parse_submit_form(
async def _load_usaco_cookies(client: httpx.AsyncClient) -> None:
if not _COOKIE_PATH.exists():
return
try:
for k, v in json.loads(_COOKIE_PATH.read_text()).items():
data = load_platform_cookies("usaco")
if isinstance(data, dict):
for k, v in data.items():
client.cookies.set(k, v)
except Exception:
pass
async def _save_usaco_cookies(client: httpx.AsyncClient) -> None:
cookies = {k: v for k, v in client.cookies.items()}
cookies = dict(client.cookies.items())
if cookies:
_COOKIE_PATH.parent.mkdir(parents=True, exist_ok=True)
_COOKIE_PATH.write_text(json.dumps(cookies))
save_platform_cookies("usaco", cookies)
async def _check_usaco_login(client: httpx.AsyncClient, username: str) -> bool: