refactor(scrapers): centralize cookie storage into shared file
Problem: each platform scraper managed its own cookie file path and load/save/clear logic, duplicating boilerplate across kattis, usaco, codeforces, and codechef. Solution: add \`load_platform_cookies\`, \`save_platform_cookies\`, and \`clear_platform_cookies\` to \`base.py\` backed by a single \`~/.cache/cp-nvim/cookies.json\` keyed by platform name. Update all scrapers to use these helpers.
This commit is contained in:
parent
eb0dea777e
commit
cb58062464
4 changed files with 55 additions and 43 deletions
|
|
@ -4,6 +4,38 @@ import os
|
|||
import re
|
||||
import sys
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
_COOKIE_FILE = Path.home() / ".cache" / "cp-nvim" / "cookies.json"
|
||||
|
||||
|
||||
def load_platform_cookies(platform: str) -> Any | None:
|
||||
try:
|
||||
data = json.loads(_COOKIE_FILE.read_text())
|
||||
return data.get(platform)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def save_platform_cookies(platform: str, data: Any) -> None:
|
||||
_COOKIE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
existing = json.loads(_COOKIE_FILE.read_text())
|
||||
except Exception:
|
||||
existing = {}
|
||||
existing[platform] = data
|
||||
_COOKIE_FILE.write_text(json.dumps(existing))
|
||||
|
||||
|
||||
def clear_platform_cookies(platform: str) -> None:
|
||||
try:
|
||||
existing = json.loads(_COOKIE_FILE.read_text())
|
||||
existing.pop(platform, None)
|
||||
_COOKIE_FILE.write_text(json.dumps(existing))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
from .language_ids import get_language_id
|
||||
from .models import (
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ from typing import Any
|
|||
import requests
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
|
||||
from .base import BaseScraper, extract_precision
|
||||
from .base import BaseScraper, clear_platform_cookies, extract_precision, load_platform_cookies, save_platform_cookies
|
||||
from .models import (
|
||||
ContestListResult,
|
||||
ContestSummary,
|
||||
|
|
@ -332,8 +332,6 @@ class CodeforcesScraper(BaseScraper):
|
|||
|
||||
|
||||
def _login_headless_cf(credentials: dict[str, str]) -> LoginResult:
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
from scrapling.fetchers import StealthySession # type: ignore[import-untyped,unresolved-import]
|
||||
except ImportError:
|
||||
|
|
@ -346,9 +344,6 @@ def _login_headless_cf(credentials: dict[str, str]) -> LoginResult:
|
|||
|
||||
_ensure_browser()
|
||||
|
||||
cookie_cache = Path.home() / ".cache" / "cp-nvim" / "codeforces-cookies.json"
|
||||
cookie_cache.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
logged_in = False
|
||||
login_error: str | None = None
|
||||
|
||||
|
|
@ -405,7 +400,7 @@ def _login_headless_cf(credentials: dict[str, str]) -> LoginResult:
|
|||
try:
|
||||
browser_cookies = session.context.cookies()
|
||||
if any(c.get("name") == "X-User-Handle" for c in browser_cookies):
|
||||
cookie_cache.write_text(json.dumps(browser_cookies))
|
||||
save_platform_cookies("codeforces", browser_cookies)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
|
@ -426,6 +421,7 @@ def _submit_headless(
|
|||
|
||||
source_code = Path(file_path).read_text()
|
||||
|
||||
|
||||
try:
|
||||
from scrapling.fetchers import StealthySession # type: ignore[import-untyped,unresolved-import]
|
||||
except ImportError:
|
||||
|
|
@ -438,16 +434,11 @@ def _submit_headless(
|
|||
|
||||
_ensure_browser()
|
||||
|
||||
cookie_cache = Path.home() / ".cache" / "cp-nvim" / "codeforces-cookies.json"
|
||||
cookie_cache.parent.mkdir(parents=True, exist_ok=True)
|
||||
saved_cookies: list[dict[str, Any]] = []
|
||||
if cookie_cache.exists():
|
||||
try:
|
||||
saved_cookies = json.loads(cookie_cache.read_text())
|
||||
except Exception:
|
||||
pass
|
||||
if not _retried:
|
||||
saved_cookies = load_platform_cookies("codeforces") or []
|
||||
|
||||
logged_in = cookie_cache.exists() and not _retried
|
||||
logged_in = bool(saved_cookies) and not _retried
|
||||
login_error: str | None = None
|
||||
submit_error: str | None = None
|
||||
needs_relogin = False
|
||||
|
|
@ -520,9 +511,9 @@ def _submit_headless(
|
|||
headless=True,
|
||||
timeout=BROWSER_SESSION_TIMEOUT,
|
||||
google_search=False,
|
||||
cookies=saved_cookies if (cookie_cache.exists() and not _retried) else [],
|
||||
cookies=saved_cookies if (saved_cookies and not _retried) else [],
|
||||
) as session:
|
||||
if not (cookie_cache.exists() and not _retried):
|
||||
if not (saved_cookies and not _retried):
|
||||
print(json.dumps({"status": "checking_login"}), flush=True)
|
||||
session.fetch(
|
||||
f"{BASE_URL}/",
|
||||
|
|
@ -552,12 +543,12 @@ def _submit_headless(
|
|||
try:
|
||||
browser_cookies = session.context.cookies()
|
||||
if any(c.get("name") == "X-User-Handle" for c in browser_cookies):
|
||||
cookie_cache.write_text(json.dumps(browser_cookies))
|
||||
save_platform_cookies("codeforces", browser_cookies)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if needs_relogin and not _retried:
|
||||
cookie_cache.unlink(missing_ok=True)
|
||||
clear_platform_cookies("codeforces")
|
||||
return _submit_headless(
|
||||
contest_id,
|
||||
problem_id,
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ from pathlib import Path
|
|||
|
||||
import httpx
|
||||
|
||||
from .base import BaseScraper, extract_precision
|
||||
from .base import BaseScraper, clear_platform_cookies, extract_precision, load_platform_cookies, save_platform_cookies
|
||||
from .timeouts import HTTP_TIMEOUT
|
||||
from .models import (
|
||||
ContestListResult,
|
||||
|
|
@ -28,8 +28,6 @@ HEADERS = {
|
|||
}
|
||||
CONNECTIONS = 8
|
||||
|
||||
_COOKIE_PATH = Path.home() / ".cache" / "cp-nvim" / "kattis-cookies.json"
|
||||
|
||||
TIME_RE = re.compile(
|
||||
r"CPU Time limit</span>\s*<span[^>]*>\s*(\d+)\s*seconds?\s*</span>",
|
||||
re.DOTALL,
|
||||
|
|
@ -209,20 +207,16 @@ async def _stream_single_problem(client: httpx.AsyncClient, slug: str) -> None:
|
|||
|
||||
|
||||
async def _load_kattis_cookies(client: httpx.AsyncClient) -> None:
|
||||
if not _COOKIE_PATH.exists():
|
||||
return
|
||||
try:
|
||||
for k, v in json.loads(_COOKIE_PATH.read_text()).items():
|
||||
data = load_platform_cookies("kattis")
|
||||
if isinstance(data, dict):
|
||||
for k, v in data.items():
|
||||
client.cookies.set(k, v)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
async def _save_kattis_cookies(client: httpx.AsyncClient) -> None:
|
||||
cookies = {k: v for k, v in client.cookies.items()}
|
||||
cookies = dict(client.cookies.items())
|
||||
if cookies:
|
||||
_COOKIE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
_COOKIE_PATH.write_text(json.dumps(cookies))
|
||||
save_platform_cookies("kattis", cookies)
|
||||
|
||||
|
||||
async def _do_kattis_login(
|
||||
|
|
@ -368,7 +362,7 @@ class KattisScraper(BaseScraper):
|
|||
return self._submit_error(f"Submit request failed: {e}")
|
||||
|
||||
if r.status_code in (400, 403) or r.text == "Request validation failed":
|
||||
_COOKIE_PATH.unlink(missing_ok=True)
|
||||
clear_platform_cookies("kattis")
|
||||
print(json.dumps({"status": "logging_in"}), flush=True)
|
||||
ok = await _do_kattis_login(client, username, password)
|
||||
if not ok:
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ from typing import Any, cast
|
|||
|
||||
import httpx
|
||||
|
||||
from .base import BaseScraper, extract_precision
|
||||
from .base import BaseScraper, extract_precision, load_platform_cookies, save_platform_cookies
|
||||
from .timeouts import HTTP_TIMEOUT
|
||||
from .models import (
|
||||
ContestListResult,
|
||||
|
|
@ -27,7 +27,6 @@ HEADERS = {
|
|||
}
|
||||
CONNECTIONS = 4
|
||||
|
||||
_COOKIE_PATH = Path.home() / ".cache" / "cp-nvim" / "usaco-cookies.json"
|
||||
_LOGIN_PATH = "/current/tpcm/login-session.php"
|
||||
_SUBMIT_PATH = "/current/tpcm/submit-solution.php"
|
||||
|
||||
|
|
@ -202,20 +201,16 @@ def _parse_submit_form(
|
|||
|
||||
|
||||
async def _load_usaco_cookies(client: httpx.AsyncClient) -> None:
|
||||
if not _COOKIE_PATH.exists():
|
||||
return
|
||||
try:
|
||||
for k, v in json.loads(_COOKIE_PATH.read_text()).items():
|
||||
data = load_platform_cookies("usaco")
|
||||
if isinstance(data, dict):
|
||||
for k, v in data.items():
|
||||
client.cookies.set(k, v)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
async def _save_usaco_cookies(client: httpx.AsyncClient) -> None:
|
||||
cookies = {k: v for k, v in client.cookies.items()}
|
||||
cookies = dict(client.cookies.items())
|
||||
if cookies:
|
||||
_COOKIE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
_COOKIE_PATH.write_text(json.dumps(cookies))
|
||||
save_platform_cookies("usaco", cookies)
|
||||
|
||||
|
||||
async def _check_usaco_login(client: httpx.AsyncClient, username: str) -> bool:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue