cp.nvim/tests/conftest.py
Barrett Ruth 3c11d609f5
feat(codechef): implement full CodeChef support (#354)
## Problem

CodeChef had no working login, submit, or contest list. The browser
selectors were wrong, the contest list was missing present/past
contests,
and problem/contest URLs were unset.

## Solution

Fix login and submit selectors for the Drupal-based site. Paginate
`/api/list/contests/past` to collect all 228 Starters, then expand each
parent contest into individual division entries (e.g. `START228 (Div.
4)`).
Add language IDs, correct `url`/`contest_url`/`standings_url` in
metadata,
and make `:CP <platform>` open the contest picker directly.
2026-03-06 23:10:44 -05:00

320 lines
12 KiB
Python

import asyncio
import importlib.util
import io
import json
import sys
from pathlib import Path
from types import SimpleNamespace
from typing import Any
import re
import httpx
import pytest
import requests
ROOT = Path(__file__).resolve().parent.parent
FIX = Path(__file__).resolve().parent / "fixtures"
@pytest.fixture
def fixture_text():
def _load(name: str) -> str:
p = FIX / name
return p.read_text(encoding="utf-8")
return _load
def _load_scraper_module(module_path: Path, module_name: str):
spec = importlib.util.spec_from_file_location(
f"scrapers.{module_name}", module_path
)
if spec is None or spec.loader is None:
raise ImportError(f"Cannot load module {module_name}")
module = importlib.util.module_from_spec(spec)
sys.modules[f"scrapers.{module_name}"] = module
spec.loader.exec_module(module)
return module
def _capture_stdout(coro):
buf = io.StringIO()
old = sys.stdout
sys.stdout = buf
try:
rc = asyncio.run(coro)
out = buf.getvalue()
finally:
sys.stdout = old
return rc, out
@pytest.fixture
def run_scraper_offline(fixture_text):
def _router_cses(*, path: str | None = None, url: str | None = None) -> str:
if not path and not url:
raise AssertionError("CSES expects path or url")
target = path or url
if target is None:
raise AssertionError(f"No target for CSES (path={path!r}, url={url!r})")
if target.startswith("https://cses.fi"):
target = target.removeprefix("https://cses.fi")
if target.strip("/") == "problemset":
return fixture_text("cses/contests.html")
if target.startswith("/problemset/task/") or target.startswith(
"problemset/task/"
):
pid = target.rstrip("/").split("/")[-1]
return fixture_text(f"cses/task_{pid}.html")
raise AssertionError(f"No fixture for CSES path={path!r} url={url!r}")
def _router_atcoder(*, path: str | None = None, url: str | None = None) -> str:
if not url:
raise AssertionError("AtCoder expects url routing")
if "/contests/archive" in url:
return fixture_text("atcoder/contests.html")
if url.endswith("/tasks"):
return fixture_text("atcoder/abc100_tasks.html")
if "/tasks/" in url:
slug = url.rsplit("/", 1)[-1]
return fixture_text(f"atcoder/task_{slug}.html")
raise AssertionError(f"No fixture for AtCoder url={url!r}")
def _router_codeforces(*, path: str | None = None, url: str | None = None) -> str:
if not url:
raise AssertionError("Codeforces expects url routing")
if "/contest/" in url and url.endswith("/problems"):
contest_id = url.rstrip("/").split("/")[-2]
return fixture_text(f"codeforces/{contest_id}_problems.html")
if "/contests" in url and "/problem/" not in url:
return fixture_text("codeforces/contests.html")
if "/problem/" in url:
parts = url.rstrip("/").split("/")
contest_id, index = parts[-3], parts[-1]
return fixture_text(f"codeforces/{contest_id}_{index}.html")
if "/problemset/problem/" in url:
parts = url.rstrip("/").split("/")
contest_id, index = parts[-2], parts[-1]
return fixture_text(f"codeforces/{contest_id}_{index}.html")
raise AssertionError(f"No fixture for Codeforces url={url!r}")
def _router_kattis(*, url: str) -> str:
url = url.removeprefix("https://open.kattis.com")
if "/contests?" in url:
return fixture_text("kattis/contests.html")
m = re.search(r"/contests/([^/]+)/problems", url)
if m:
try:
return fixture_text(f"kattis/contest_{m.group(1)}_problems.html")
except FileNotFoundError:
return "<html></html>"
if "/problems/" in url and "/file/statement" not in url:
slug = url.rstrip("/").split("/")[-1]
return fixture_text(f"kattis/problem_{slug}.html")
raise AssertionError(f"No fixture for Kattis url={url!r}")
def _router_usaco(*, url: str) -> str:
if "page=contests" in url and "results" not in url:
return fixture_text("usaco/contests.html")
m = re.search(r"page=([a-z]+\d{2,4}results)", url)
if m:
try:
return fixture_text(f"usaco/{m.group(1)}.html")
except FileNotFoundError:
return "<html></html>"
m = re.search(r"page=viewproblem2&cpid=(\d+)", url)
if m:
return fixture_text(f"usaco/problem_{m.group(1)}.html")
raise AssertionError(f"No fixture for USACO url={url!r}")
def _make_offline_fetches(scraper_name: str):
match scraper_name:
case "cses":
async def __offline_fetch_text(client, path: str, **kwargs):
html = _router_cses(path=path)
return SimpleNamespace(
text=html,
status_code=200,
raise_for_status=lambda: None,
)
return {
"__offline_fetch_text": __offline_fetch_text,
}
case "atcoder":
def __offline_fetch(url: str, *args, **kwargs):
html = _router_atcoder(url=url)
return html
async def __offline_get_async(client, url: str, **kwargs):
return _router_atcoder(url=url)
return {
"_fetch": __offline_fetch,
"_get_async": __offline_get_async,
}
case "codeforces":
def _mock_fetch_problems_html(cid: str) -> str:
return _router_codeforces(
url=f"https://codeforces.com/contest/{cid}/problems"
)
def _mock_requests_get(url: str, **kwargs):
if "api/contest.list" in url:
data = {
"status": "OK",
"result": [
{
"id": 1550,
"name": "Educational Codeforces Round 155 (Rated for Div. 2)",
"phase": "FINISHED",
},
{
"id": 1000,
"name": "Codeforces Round #1000",
"phase": "FINISHED",
},
],
}
class R:
def json(self_inner):
return data
def raise_for_status(self_inner):
return None
return R()
raise AssertionError(f"Unexpected requests.get call: {url}")
return {
"_fetch_problems_html": _mock_fetch_problems_html,
"requests.get": _mock_requests_get,
}
case "codechef":
class MockResponse:
def __init__(self, json_data):
self._json_data = json_data
self.status_code = 200
def json(self):
return self._json_data
def raise_for_status(self):
pass
async def __offline_get_async(client, url: str, **kwargs):
if "/api/list/contests/all" in url:
data = json.loads(fixture_text("codechef/contests.json"))
return MockResponse(data)
if "/api/list/contests/past" in url:
data = json.loads(fixture_text("codechef/contests_past.json"))
return MockResponse(data)
if "/api/contests/START" in url and "/problems/" not in url:
contest_id = url.rstrip("/").split("/")[-1]
try:
data = json.loads(
fixture_text(f"codechef/{contest_id}.json")
)
return MockResponse(data)
except FileNotFoundError:
raise AssertionError(f"No fixture for CodeChef url={url!r}")
if "/api/contests/START" in url and "/problems/" in url:
parts = url.rstrip("/").split("/")
contest_id = parts[-3]
problem_id = parts[-1]
data = json.loads(
fixture_text(f"codechef/{contest_id}_{problem_id}.json")
)
return MockResponse(data)
raise AssertionError(f"No fixture for CodeChef url={url!r}")
return {
"__offline_get_async": __offline_get_async,
}
case "kattis":
async def __offline_get_kattis(client, url: str, **kwargs):
if "/file/statement/samples.zip" in url:
raise httpx.HTTPError("not found")
html = _router_kattis(url=url)
return SimpleNamespace(
text=html,
content=html.encode(),
status_code=200,
raise_for_status=lambda: None,
)
return {
"__offline_get_async": __offline_get_kattis,
}
case "usaco":
async def __offline_get_usaco(client, url: str, **kwargs):
html = _router_usaco(url=url)
return SimpleNamespace(
text=html,
status_code=200,
raise_for_status=lambda: None,
)
return {
"__offline_get_async": __offline_get_usaco,
}
case _:
raise AssertionError(f"Unknown scraper: {scraper_name}")
scraper_classes = {
"cses": "CSESScraper",
"atcoder": "AtcoderScraper",
"codeforces": "CodeforcesScraper",
"codechef": "CodeChefScraper",
"kattis": "KattisScraper",
"usaco": "USACOScraper",
}
def _run(scraper_name: str, mode: str, *args: str):
mod_path = ROOT / "scrapers" / f"{scraper_name}.py"
ns = _load_scraper_module(mod_path, scraper_name)
offline_fetches = _make_offline_fetches(scraper_name)
if scraper_name == "codeforces":
ns._fetch_problems_html = offline_fetches["_fetch_problems_html"]
requests.get = offline_fetches["requests.get"]
elif scraper_name == "atcoder":
ns._fetch = offline_fetches["_fetch"]
ns._get_async = offline_fetches["_get_async"]
elif scraper_name == "cses":
httpx.AsyncClient.get = offline_fetches["__offline_fetch_text"]
elif scraper_name in ("codechef", "kattis", "usaco"):
httpx.AsyncClient.get = offline_fetches["__offline_get_async"]
scraper_class = getattr(ns, scraper_classes[scraper_name])
scraper = scraper_class()
argv = [str(mod_path), mode, *args]
rc, out = _capture_stdout(scraper._run_cli_async(argv))
json_lines: list[Any] = []
for line in (_line for _line in out.splitlines() if _line.strip()):
json_lines.append(json.loads(line))
return rc, json_lines
return _run