feat(codechef): finalize codechef impl

This commit is contained in:
Barrett Ruth 2025-10-25 01:41:55 -04:00
parent f78e43bdd4
commit e89c2e1cf5
5 changed files with 106 additions and 56 deletions

View file

@ -139,6 +139,10 @@ M.defaults = {
enabled_languages = { 'cpp', 'python' }, enabled_languages = { 'cpp', 'python' },
default_language = 'cpp', default_language = 'cpp',
}, },
codechef = {
enabled_languages = { 'cpp', 'python' },
default_language = 'cpp',
},
cses = { cses = {
enabled_languages = { 'cpp', 'python' }, enabled_languages = { 'cpp', 'python' },
default_language = 'cpp', default_language = 'cpp',

View file

@ -31,7 +31,9 @@ HEADERS = {
TIMEOUT_S = 15.0 TIMEOUT_S = 15.0
CONNECTIONS = 8 CONNECTIONS = 8
MEMORY_LIMIT_RE = re.compile(r"Memory\s+[Ll]imit[:\s]+([0-9.]+)\s*MB", re.IGNORECASE) MEMORY_LIMIT_RE = re.compile(
r"Memory\s+[Ll]imit.*?([0-9.]+)\s*(MB|GB)", re.IGNORECASE | re.DOTALL
)
async def fetch_json(client: httpx.AsyncClient, path: str) -> dict: async def fetch_json(client: httpx.AsyncClient, path: str) -> dict:
@ -42,7 +44,13 @@ async def fetch_json(client: httpx.AsyncClient, path: str) -> dict:
def _extract_memory_limit(html: str) -> float: def _extract_memory_limit(html: str) -> float:
m = MEMORY_LIMIT_RE.search(html) m = MEMORY_LIMIT_RE.search(html)
return float(m.group(1)) if m else 256.0 if not m:
return 256.0
value = float(m.group(1))
unit = m.group(2).upper()
if unit == "GB":
return value * 1024.0
return value
def _fetch_html_sync(url: str) -> str: def _fetch_html_sync(url: str) -> str:
@ -50,20 +58,17 @@ def _fetch_html_sync(url: str) -> str:
return str(response.body) return str(response.body)
def get_div4_contest_id(contest_id: str) -> str:
return f"{contest_id}D"
class CodeChefScraper(BaseScraper): class CodeChefScraper(BaseScraper):
@property @property
def platform_name(self) -> str: def platform_name(self) -> str:
return "codechef" return "codechef"
async def scrape_contest_metadata(self, contest_id: str) -> MetadataResult: async def scrape_contest_metadata(self, contest_id: str) -> MetadataResult:
div4_id = get_div4_contest_id(contest_id)
async with httpx.AsyncClient() as client: async with httpx.AsyncClient() as client:
try: try:
data = await fetch_json(client, API_CONTEST.format(contest_id=div4_id)) data = await fetch_json(
client, API_CONTEST.format(contest_id=contest_id)
)
except httpx.HTTPStatusError as e: except httpx.HTTPStatusError as e:
return self._create_metadata_error( return self._create_metadata_error(
f"Failed to fetch contest {contest_id}: {e}", contest_id f"Failed to fetch contest {contest_id}: {e}", contest_id
@ -76,12 +81,13 @@ class CodeChefScraper(BaseScraper):
problems = [] problems = []
for problem_code, problem_data in data["problems"].items(): for problem_code, problem_data in data["problems"].items():
problems.append( if problem_data.get("category_name") == "main":
ProblemSummary( problems.append(
id=problem_code, ProblemSummary(
name=problem_data.get("name", problem_code), id=problem_code,
name=problem_data.get("name", problem_code),
)
) )
)
return MetadataResult( return MetadataResult(
success=True, success=True,
@ -98,56 +104,87 @@ class CodeChefScraper(BaseScraper):
except httpx.HTTPStatusError as e: except httpx.HTTPStatusError as e:
return self._create_contests_error(f"Failed to fetch contests: {e}") return self._create_contests_error(f"Failed to fetch contests: {e}")
all_contests = data.get("future_contests", []) + data.get("past_contests", []) all_contests = data.get("future_contests", []) + data.get(
"past_contests", []
max_num = 0
contest_names = {}
for contest in all_contests:
contest_code = contest.get("contest_code", "")
if contest_code.startswith("START"):
match = re.match(r"START(\d+)", contest_code)
if match:
num = int(match.group(1))
max_num = max(max_num, num)
contest_names[contest_code] = contest.get(
"contest_name", contest_code
)
if max_num == 0:
return self._create_contests_error("No Starters contests found")
contests = []
for i in range(1, max_num + 1):
contest_id = f"START{i}"
name = contest_names.get(contest_id, f"Starters {i}")
contests.append(
ContestSummary(
id=contest_id,
name=name,
display_name=name,
)
) )
max_num = 0
for contest in all_contests:
contest_code = contest.get("contest_code", "")
if contest_code.startswith("START"):
match = re.match(r"START(\d+)", contest_code)
if match:
num = int(match.group(1))
max_num = max(max_num, num)
if max_num == 0:
return self._create_contests_error("No Starters contests found")
contests = []
sem = asyncio.Semaphore(CONNECTIONS)
async def fetch_divisions(i: int) -> list[ContestSummary]:
parent_id = f"START{i}"
async with sem:
try:
parent_data = await fetch_json(
client, API_CONTEST.format(contest_id=parent_id)
)
except Exception as e:
import sys
print(f"Error fetching {parent_id}: {e}", file=sys.stderr)
return []
child_contests = parent_data.get("child_contests", {})
if not child_contests:
return []
base_name = f"Starters {i}"
divisions = []
for div_key, div_data in child_contests.items():
div_code = div_data.get("contest_code", "")
div_num = div_data.get("div", {}).get("div_number", "")
if div_code and div_num:
divisions.append(
ContestSummary(
id=div_code,
name=base_name,
display_name=f"{base_name} (Div. {div_num})",
)
)
return divisions
tasks = [fetch_divisions(i) for i in range(1, max_num + 1)]
for coro in asyncio.as_completed(tasks):
divisions = await coro
contests.extend(divisions)
return ContestListResult(success=True, error="", contests=contests) return ContestListResult(success=True, error="", contests=contests)
async def stream_tests_for_category_async(self, contest_id: str) -> None: async def stream_tests_for_category_async(self, contest_id: str) -> None:
div4_id = get_div4_contest_id(contest_id)
async with httpx.AsyncClient( async with httpx.AsyncClient(
limits=httpx.Limits(max_connections=CONNECTIONS) limits=httpx.Limits(max_connections=CONNECTIONS)
) as client: ) as client:
try: try:
contest_data = await fetch_json( contest_data = await fetch_json(
client, API_CONTEST.format(contest_id=div4_id) client, API_CONTEST.format(contest_id=contest_id)
) )
except Exception: except Exception:
return return
problems = contest_data.get("problems", {}) all_problems = contest_data.get("problems", {})
if not problems: if not all_problems:
return return
problems = {
code: data
for code, data in all_problems.items()
if data.get("category_name") == "main"
}
sem = asyncio.Semaphore(CONNECTIONS) sem = asyncio.Semaphore(CONNECTIONS)
async def run_one(problem_code: str) -> dict[str, Any]: async def run_one(problem_code: str) -> dict[str, Any]:
@ -156,7 +193,7 @@ class CodeChefScraper(BaseScraper):
problem_data = await fetch_json( problem_data = await fetch_json(
client, client,
API_PROBLEM.format( API_PROBLEM.format(
contest_id=div4_id, problem_id=problem_code contest_id=contest_id, problem_id=problem_code
), ),
) )

View file

@ -193,13 +193,21 @@ def run_scraper_offline(fixture_text):
if "/api/list/contests/all" in url: if "/api/list/contests/all" in url:
data = json.loads(fixture_text("codechef/contests.json")) data = json.loads(fixture_text("codechef/contests.json"))
return MockResponse(data) return MockResponse(data)
if "/api/contests/START209D" in url and "/problems/" not in url: if "/api/contests/START" in url and "/problems/" not in url:
data = json.loads(fixture_text("codechef/START209D.json")) contest_id = url.rstrip("/").split("/")[-1]
return MockResponse(data) try:
if "/api/contests/START209D/problems/" in url: data = json.loads(
problem_id = url.rstrip("/").split("/")[-1] fixture_text(f"codechef/{contest_id}.json")
)
return MockResponse(data)
except FileNotFoundError:
raise AssertionError(f"No fixture for CodeChef url={url!r}")
if "/api/contests/START" in url and "/problems/" in url:
parts = url.rstrip("/").split("/")
contest_id = parts[-3]
problem_id = parts[-1]
data = json.loads( data = json.loads(
fixture_text(f"codechef/START209D_{problem_id}.json") fixture_text(f"codechef/{contest_id}_{problem_id}.json")
) )
return MockResponse(data) return MockResponse(data)
raise AssertionError(f"No fixture for CodeChef url={url!r}") raise AssertionError(f"No fixture for CodeChef url={url!r}")

1
tests/fixtures/codechef/START209.json vendored Normal file

File diff suppressed because one or more lines are too long

View file

@ -28,8 +28,8 @@ MATRIX = {
"contests": tuple(), "contests": tuple(),
}, },
"codechef": { "codechef": {
"metadata": ("START209",), "metadata": ("START209D",),
"tests": ("START209",), "tests": ("START209D",),
"contests": tuple(), "contests": tuple(),
}, },
} }