refactor(scrapers): centralize cookie storage into shared file
Problem: each platform scraper managed its own cookie file path and load/save/clear logic, duplicating boilerplate across kattis, usaco, codeforces, and codechef. Solution: add \`load_platform_cookies\`, \`save_platform_cookies\`, and \`clear_platform_cookies\` to \`base.py\` backed by a single \`~/.cache/cp-nvim/cookies.json\` keyed by platform name. Update all scrapers to use these helpers.
This commit is contained in:
parent
eb0dea777e
commit
cb58062464
4 changed files with 55 additions and 43 deletions
|
|
@ -4,6 +4,38 @@ import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
_COOKIE_FILE = Path.home() / ".cache" / "cp-nvim" / "cookies.json"
|
||||||
|
|
||||||
|
|
||||||
|
def load_platform_cookies(platform: str) -> Any | None:
|
||||||
|
try:
|
||||||
|
data = json.loads(_COOKIE_FILE.read_text())
|
||||||
|
return data.get(platform)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def save_platform_cookies(platform: str, data: Any) -> None:
|
||||||
|
_COOKIE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
try:
|
||||||
|
existing = json.loads(_COOKIE_FILE.read_text())
|
||||||
|
except Exception:
|
||||||
|
existing = {}
|
||||||
|
existing[platform] = data
|
||||||
|
_COOKIE_FILE.write_text(json.dumps(existing))
|
||||||
|
|
||||||
|
|
||||||
|
def clear_platform_cookies(platform: str) -> None:
|
||||||
|
try:
|
||||||
|
existing = json.loads(_COOKIE_FILE.read_text())
|
||||||
|
existing.pop(platform, None)
|
||||||
|
_COOKIE_FILE.write_text(json.dumps(existing))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
from .language_ids import get_language_id
|
from .language_ids import get_language_id
|
||||||
from .models import (
|
from .models import (
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ from typing import Any
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup, Tag
|
from bs4 import BeautifulSoup, Tag
|
||||||
|
|
||||||
from .base import BaseScraper, extract_precision
|
from .base import BaseScraper, clear_platform_cookies, extract_precision, load_platform_cookies, save_platform_cookies
|
||||||
from .models import (
|
from .models import (
|
||||||
ContestListResult,
|
ContestListResult,
|
||||||
ContestSummary,
|
ContestSummary,
|
||||||
|
|
@ -332,8 +332,6 @@ class CodeforcesScraper(BaseScraper):
|
||||||
|
|
||||||
|
|
||||||
def _login_headless_cf(credentials: dict[str, str]) -> LoginResult:
|
def _login_headless_cf(credentials: dict[str, str]) -> LoginResult:
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from scrapling.fetchers import StealthySession # type: ignore[import-untyped,unresolved-import]
|
from scrapling.fetchers import StealthySession # type: ignore[import-untyped,unresolved-import]
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
|
@ -346,9 +344,6 @@ def _login_headless_cf(credentials: dict[str, str]) -> LoginResult:
|
||||||
|
|
||||||
_ensure_browser()
|
_ensure_browser()
|
||||||
|
|
||||||
cookie_cache = Path.home() / ".cache" / "cp-nvim" / "codeforces-cookies.json"
|
|
||||||
cookie_cache.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
logged_in = False
|
logged_in = False
|
||||||
login_error: str | None = None
|
login_error: str | None = None
|
||||||
|
|
||||||
|
|
@ -405,7 +400,7 @@ def _login_headless_cf(credentials: dict[str, str]) -> LoginResult:
|
||||||
try:
|
try:
|
||||||
browser_cookies = session.context.cookies()
|
browser_cookies = session.context.cookies()
|
||||||
if any(c.get("name") == "X-User-Handle" for c in browser_cookies):
|
if any(c.get("name") == "X-User-Handle" for c in browser_cookies):
|
||||||
cookie_cache.write_text(json.dumps(browser_cookies))
|
save_platform_cookies("codeforces", browser_cookies)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
@ -426,6 +421,7 @@ def _submit_headless(
|
||||||
|
|
||||||
source_code = Path(file_path).read_text()
|
source_code = Path(file_path).read_text()
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from scrapling.fetchers import StealthySession # type: ignore[import-untyped,unresolved-import]
|
from scrapling.fetchers import StealthySession # type: ignore[import-untyped,unresolved-import]
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
|
@ -438,16 +434,11 @@ def _submit_headless(
|
||||||
|
|
||||||
_ensure_browser()
|
_ensure_browser()
|
||||||
|
|
||||||
cookie_cache = Path.home() / ".cache" / "cp-nvim" / "codeforces-cookies.json"
|
|
||||||
cookie_cache.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
saved_cookies: list[dict[str, Any]] = []
|
saved_cookies: list[dict[str, Any]] = []
|
||||||
if cookie_cache.exists():
|
if not _retried:
|
||||||
try:
|
saved_cookies = load_platform_cookies("codeforces") or []
|
||||||
saved_cookies = json.loads(cookie_cache.read_text())
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
logged_in = cookie_cache.exists() and not _retried
|
logged_in = bool(saved_cookies) and not _retried
|
||||||
login_error: str | None = None
|
login_error: str | None = None
|
||||||
submit_error: str | None = None
|
submit_error: str | None = None
|
||||||
needs_relogin = False
|
needs_relogin = False
|
||||||
|
|
@ -520,9 +511,9 @@ def _submit_headless(
|
||||||
headless=True,
|
headless=True,
|
||||||
timeout=BROWSER_SESSION_TIMEOUT,
|
timeout=BROWSER_SESSION_TIMEOUT,
|
||||||
google_search=False,
|
google_search=False,
|
||||||
cookies=saved_cookies if (cookie_cache.exists() and not _retried) else [],
|
cookies=saved_cookies if (saved_cookies and not _retried) else [],
|
||||||
) as session:
|
) as session:
|
||||||
if not (cookie_cache.exists() and not _retried):
|
if not (saved_cookies and not _retried):
|
||||||
print(json.dumps({"status": "checking_login"}), flush=True)
|
print(json.dumps({"status": "checking_login"}), flush=True)
|
||||||
session.fetch(
|
session.fetch(
|
||||||
f"{BASE_URL}/",
|
f"{BASE_URL}/",
|
||||||
|
|
@ -552,12 +543,12 @@ def _submit_headless(
|
||||||
try:
|
try:
|
||||||
browser_cookies = session.context.cookies()
|
browser_cookies = session.context.cookies()
|
||||||
if any(c.get("name") == "X-User-Handle" for c in browser_cookies):
|
if any(c.get("name") == "X-User-Handle" for c in browser_cookies):
|
||||||
cookie_cache.write_text(json.dumps(browser_cookies))
|
save_platform_cookies("codeforces", browser_cookies)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if needs_relogin and not _retried:
|
if needs_relogin and not _retried:
|
||||||
cookie_cache.unlink(missing_ok=True)
|
clear_platform_cookies("codeforces")
|
||||||
return _submit_headless(
|
return _submit_headless(
|
||||||
contest_id,
|
contest_id,
|
||||||
problem_id,
|
problem_id,
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ from pathlib import Path
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from .base import BaseScraper, extract_precision
|
from .base import BaseScraper, clear_platform_cookies, extract_precision, load_platform_cookies, save_platform_cookies
|
||||||
from .timeouts import HTTP_TIMEOUT
|
from .timeouts import HTTP_TIMEOUT
|
||||||
from .models import (
|
from .models import (
|
||||||
ContestListResult,
|
ContestListResult,
|
||||||
|
|
@ -28,8 +28,6 @@ HEADERS = {
|
||||||
}
|
}
|
||||||
CONNECTIONS = 8
|
CONNECTIONS = 8
|
||||||
|
|
||||||
_COOKIE_PATH = Path.home() / ".cache" / "cp-nvim" / "kattis-cookies.json"
|
|
||||||
|
|
||||||
TIME_RE = re.compile(
|
TIME_RE = re.compile(
|
||||||
r"CPU Time limit</span>\s*<span[^>]*>\s*(\d+)\s*seconds?\s*</span>",
|
r"CPU Time limit</span>\s*<span[^>]*>\s*(\d+)\s*seconds?\s*</span>",
|
||||||
re.DOTALL,
|
re.DOTALL,
|
||||||
|
|
@ -209,20 +207,16 @@ async def _stream_single_problem(client: httpx.AsyncClient, slug: str) -> None:
|
||||||
|
|
||||||
|
|
||||||
async def _load_kattis_cookies(client: httpx.AsyncClient) -> None:
|
async def _load_kattis_cookies(client: httpx.AsyncClient) -> None:
|
||||||
if not _COOKIE_PATH.exists():
|
data = load_platform_cookies("kattis")
|
||||||
return
|
if isinstance(data, dict):
|
||||||
try:
|
for k, v in data.items():
|
||||||
for k, v in json.loads(_COOKIE_PATH.read_text()).items():
|
|
||||||
client.cookies.set(k, v)
|
client.cookies.set(k, v)
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
async def _save_kattis_cookies(client: httpx.AsyncClient) -> None:
|
async def _save_kattis_cookies(client: httpx.AsyncClient) -> None:
|
||||||
cookies = {k: v for k, v in client.cookies.items()}
|
cookies = dict(client.cookies.items())
|
||||||
if cookies:
|
if cookies:
|
||||||
_COOKIE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
save_platform_cookies("kattis", cookies)
|
||||||
_COOKIE_PATH.write_text(json.dumps(cookies))
|
|
||||||
|
|
||||||
|
|
||||||
async def _do_kattis_login(
|
async def _do_kattis_login(
|
||||||
|
|
@ -368,7 +362,7 @@ class KattisScraper(BaseScraper):
|
||||||
return self._submit_error(f"Submit request failed: {e}")
|
return self._submit_error(f"Submit request failed: {e}")
|
||||||
|
|
||||||
if r.status_code in (400, 403) or r.text == "Request validation failed":
|
if r.status_code in (400, 403) or r.text == "Request validation failed":
|
||||||
_COOKIE_PATH.unlink(missing_ok=True)
|
clear_platform_cookies("kattis")
|
||||||
print(json.dumps({"status": "logging_in"}), flush=True)
|
print(json.dumps({"status": "logging_in"}), flush=True)
|
||||||
ok = await _do_kattis_login(client, username, password)
|
ok = await _do_kattis_login(client, username, password)
|
||||||
if not ok:
|
if not ok:
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ from typing import Any, cast
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from .base import BaseScraper, extract_precision
|
from .base import BaseScraper, extract_precision, load_platform_cookies, save_platform_cookies
|
||||||
from .timeouts import HTTP_TIMEOUT
|
from .timeouts import HTTP_TIMEOUT
|
||||||
from .models import (
|
from .models import (
|
||||||
ContestListResult,
|
ContestListResult,
|
||||||
|
|
@ -27,7 +27,6 @@ HEADERS = {
|
||||||
}
|
}
|
||||||
CONNECTIONS = 4
|
CONNECTIONS = 4
|
||||||
|
|
||||||
_COOKIE_PATH = Path.home() / ".cache" / "cp-nvim" / "usaco-cookies.json"
|
|
||||||
_LOGIN_PATH = "/current/tpcm/login-session.php"
|
_LOGIN_PATH = "/current/tpcm/login-session.php"
|
||||||
_SUBMIT_PATH = "/current/tpcm/submit-solution.php"
|
_SUBMIT_PATH = "/current/tpcm/submit-solution.php"
|
||||||
|
|
||||||
|
|
@ -202,20 +201,16 @@ def _parse_submit_form(
|
||||||
|
|
||||||
|
|
||||||
async def _load_usaco_cookies(client: httpx.AsyncClient) -> None:
|
async def _load_usaco_cookies(client: httpx.AsyncClient) -> None:
|
||||||
if not _COOKIE_PATH.exists():
|
data = load_platform_cookies("usaco")
|
||||||
return
|
if isinstance(data, dict):
|
||||||
try:
|
for k, v in data.items():
|
||||||
for k, v in json.loads(_COOKIE_PATH.read_text()).items():
|
|
||||||
client.cookies.set(k, v)
|
client.cookies.set(k, v)
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
async def _save_usaco_cookies(client: httpx.AsyncClient) -> None:
|
async def _save_usaco_cookies(client: httpx.AsyncClient) -> None:
|
||||||
cookies = {k: v for k, v in client.cookies.items()}
|
cookies = dict(client.cookies.items())
|
||||||
if cookies:
|
if cookies:
|
||||||
_COOKIE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
save_platform_cookies("usaco", cookies)
|
||||||
_COOKIE_PATH.write_text(json.dumps(cookies))
|
|
||||||
|
|
||||||
|
|
||||||
async def _check_usaco_login(client: httpx.AsyncClient, username: str) -> bool:
|
async def _check_usaco_login(client: httpx.AsyncClient, username: str) -> bool:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue