From e89c2e1cf575c19a86f6db958d0b3ad73f6dfbfe Mon Sep 17 00:00:00 2001 From: Barrett Ruth Date: Sat, 25 Oct 2025 01:41:55 -0400 Subject: [PATCH] feat(codechef): finalize codechef impl --- lua/cp/config.lua | 4 + scrapers/codechef.py | 133 ++++++++++++++++---------- tests/conftest.py | 20 ++-- tests/fixtures/codechef/START209.json | 1 + tests/test_scrapers.py | 4 +- 5 files changed, 106 insertions(+), 56 deletions(-) create mode 100644 tests/fixtures/codechef/START209.json diff --git a/lua/cp/config.lua b/lua/cp/config.lua index 5b3b584..78f321f 100644 --- a/lua/cp/config.lua +++ b/lua/cp/config.lua @@ -139,6 +139,10 @@ M.defaults = { enabled_languages = { 'cpp', 'python' }, default_language = 'cpp', }, + codechef = { + enabled_languages = { 'cpp', 'python' }, + default_language = 'cpp', + }, cses = { enabled_languages = { 'cpp', 'python' }, default_language = 'cpp', diff --git a/scrapers/codechef.py b/scrapers/codechef.py index 96d4cac..0f5636f 100644 --- a/scrapers/codechef.py +++ b/scrapers/codechef.py @@ -31,7 +31,9 @@ HEADERS = { TIMEOUT_S = 15.0 CONNECTIONS = 8 -MEMORY_LIMIT_RE = re.compile(r"Memory\s+[Ll]imit[:\s]+([0-9.]+)\s*MB", re.IGNORECASE) +MEMORY_LIMIT_RE = re.compile( + r"Memory\s+[Ll]imit.*?([0-9.]+)\s*(MB|GB)", re.IGNORECASE | re.DOTALL +) async def fetch_json(client: httpx.AsyncClient, path: str) -> dict: @@ -42,7 +44,13 @@ async def fetch_json(client: httpx.AsyncClient, path: str) -> dict: def _extract_memory_limit(html: str) -> float: m = MEMORY_LIMIT_RE.search(html) - return float(m.group(1)) if m else 256.0 + if not m: + return 256.0 + value = float(m.group(1)) + unit = m.group(2).upper() + if unit == "GB": + return value * 1024.0 + return value def _fetch_html_sync(url: str) -> str: @@ -50,20 +58,17 @@ def _fetch_html_sync(url: str) -> str: return str(response.body) -def get_div4_contest_id(contest_id: str) -> str: - return f"{contest_id}D" - - class CodeChefScraper(BaseScraper): @property def platform_name(self) -> str: return "codechef" async def scrape_contest_metadata(self, contest_id: str) -> MetadataResult: - div4_id = get_div4_contest_id(contest_id) async with httpx.AsyncClient() as client: try: - data = await fetch_json(client, API_CONTEST.format(contest_id=div4_id)) + data = await fetch_json( + client, API_CONTEST.format(contest_id=contest_id) + ) except httpx.HTTPStatusError as e: return self._create_metadata_error( f"Failed to fetch contest {contest_id}: {e}", contest_id @@ -76,12 +81,13 @@ class CodeChefScraper(BaseScraper): problems = [] for problem_code, problem_data in data["problems"].items(): - problems.append( - ProblemSummary( - id=problem_code, - name=problem_data.get("name", problem_code), + if problem_data.get("category_name") == "main": + problems.append( + ProblemSummary( + id=problem_code, + name=problem_data.get("name", problem_code), + ) ) - ) return MetadataResult( success=True, @@ -98,56 +104,87 @@ class CodeChefScraper(BaseScraper): except httpx.HTTPStatusError as e: return self._create_contests_error(f"Failed to fetch contests: {e}") - all_contests = data.get("future_contests", []) + data.get("past_contests", []) - - max_num = 0 - contest_names = {} - - for contest in all_contests: - contest_code = contest.get("contest_code", "") - if contest_code.startswith("START"): - match = re.match(r"START(\d+)", contest_code) - if match: - num = int(match.group(1)) - max_num = max(max_num, num) - contest_names[contest_code] = contest.get( - "contest_name", contest_code - ) - - if max_num == 0: - return self._create_contests_error("No Starters contests found") - - contests = [] - for i in range(1, max_num + 1): - contest_id = f"START{i}" - name = contest_names.get(contest_id, f"Starters {i}") - contests.append( - ContestSummary( - id=contest_id, - name=name, - display_name=name, - ) + all_contests = data.get("future_contests", []) + data.get( + "past_contests", [] ) + max_num = 0 + for contest in all_contests: + contest_code = contest.get("contest_code", "") + if contest_code.startswith("START"): + match = re.match(r"START(\d+)", contest_code) + if match: + num = int(match.group(1)) + max_num = max(max_num, num) + + if max_num == 0: + return self._create_contests_error("No Starters contests found") + + contests = [] + sem = asyncio.Semaphore(CONNECTIONS) + + async def fetch_divisions(i: int) -> list[ContestSummary]: + parent_id = f"START{i}" + async with sem: + try: + parent_data = await fetch_json( + client, API_CONTEST.format(contest_id=parent_id) + ) + except Exception as e: + import sys + + print(f"Error fetching {parent_id}: {e}", file=sys.stderr) + return [] + + child_contests = parent_data.get("child_contests", {}) + if not child_contests: + return [] + + base_name = f"Starters {i}" + divisions = [] + + for div_key, div_data in child_contests.items(): + div_code = div_data.get("contest_code", "") + div_num = div_data.get("div", {}).get("div_number", "") + if div_code and div_num: + divisions.append( + ContestSummary( + id=div_code, + name=base_name, + display_name=f"{base_name} (Div. {div_num})", + ) + ) + + return divisions + + tasks = [fetch_divisions(i) for i in range(1, max_num + 1)] + for coro in asyncio.as_completed(tasks): + divisions = await coro + contests.extend(divisions) + return ContestListResult(success=True, error="", contests=contests) async def stream_tests_for_category_async(self, contest_id: str) -> None: - div4_id = get_div4_contest_id(contest_id) - async with httpx.AsyncClient( limits=httpx.Limits(max_connections=CONNECTIONS) ) as client: try: contest_data = await fetch_json( - client, API_CONTEST.format(contest_id=div4_id) + client, API_CONTEST.format(contest_id=contest_id) ) except Exception: return - problems = contest_data.get("problems", {}) - if not problems: + all_problems = contest_data.get("problems", {}) + if not all_problems: return + problems = { + code: data + for code, data in all_problems.items() + if data.get("category_name") == "main" + } + sem = asyncio.Semaphore(CONNECTIONS) async def run_one(problem_code: str) -> dict[str, Any]: @@ -156,7 +193,7 @@ class CodeChefScraper(BaseScraper): problem_data = await fetch_json( client, API_PROBLEM.format( - contest_id=div4_id, problem_id=problem_code + contest_id=contest_id, problem_id=problem_code ), ) diff --git a/tests/conftest.py b/tests/conftest.py index b3ea40a..fd856bf 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -193,13 +193,21 @@ def run_scraper_offline(fixture_text): if "/api/list/contests/all" in url: data = json.loads(fixture_text("codechef/contests.json")) return MockResponse(data) - if "/api/contests/START209D" in url and "/problems/" not in url: - data = json.loads(fixture_text("codechef/START209D.json")) - return MockResponse(data) - if "/api/contests/START209D/problems/" in url: - problem_id = url.rstrip("/").split("/")[-1] + if "/api/contests/START" in url and "/problems/" not in url: + contest_id = url.rstrip("/").split("/")[-1] + try: + data = json.loads( + fixture_text(f"codechef/{contest_id}.json") + ) + return MockResponse(data) + except FileNotFoundError: + raise AssertionError(f"No fixture for CodeChef url={url!r}") + if "/api/contests/START" in url and "/problems/" in url: + parts = url.rstrip("/").split("/") + contest_id = parts[-3] + problem_id = parts[-1] data = json.loads( - fixture_text(f"codechef/START209D_{problem_id}.json") + fixture_text(f"codechef/{contest_id}_{problem_id}.json") ) return MockResponse(data) raise AssertionError(f"No fixture for CodeChef url={url!r}") diff --git a/tests/fixtures/codechef/START209.json b/tests/fixtures/codechef/START209.json new file mode 100644 index 0000000..7ef37a0 --- /dev/null +++ b/tests/fixtures/codechef/START209.json @@ -0,0 +1 @@ +{"status":"success","user":{"username":null},"code":"START209","isRatedContest":"1","isParentContestRated":"0","name":"Starters 209 (Rated till 5 star)","problems":[],"banner":"https:\/\/cdn.codechef.com\/download\/small-banner\/START209\/1760933061.png","rules":"

CodeChef: A Platform for Aspiring Programmers<\/h4>\n

CodeChef was created as a platform to help programmers make it big in the world of algorithms, computer programming, and programming contests. At CodeChef, our dedicated efforts are aimed at reviving the inner geek within you, as we proudly host a thrilling programming (coding) contest every Wednesday.<\/p>\n

About CodeChef Starters:<\/h4>\n

CodeChef Starters is a short programming contest which takes place on every Wednesday\u00a0<\/p>\n

Contest Details:<\/h4>\n