From 82640709d6dcc884c5c0312070a1c457cd835a0d Mon Sep 17 00:00:00 2001 From: Barrett Ruth <62671086+barrettruth@users.noreply.github.com> Date: Fri, 6 Mar 2026 15:23:55 -0500 Subject: [PATCH] fix(kattis,usaco): precision, open URLs, and Kattis submit error surface (#335) ## Problem Kattis and USACO problem tests never extracted float precision, so epsilon problems got no tolerance. Kattis `scrape_contest_metadata` omitted `contest_url` and `standings_url`, breaking `:CP open contest/standings`. Kattis submit always returned success even when the server responded with an error (e.g. "You need to join the contest"). ## Solution Call `extract_precision` on problem HTML in both scrapers and emit it in the JSON payload. Set `contest_url` and `standings_url` on Kattis metadata paths. After Kattis submit, check for `Submission ID:` in the response and surface the error text if absent. --- scrapers/kattis.py | 18 +++++++++++++++--- scrapers/usaco.py | 6 +++++- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/scrapers/kattis.py b/scrapers/kattis.py index 7741847..2c5c1fc 100644 --- a/scrapers/kattis.py +++ b/scrapers/kattis.py @@ -10,7 +10,7 @@ from pathlib import Path import httpx -from .base import BaseScraper +from .base import BaseScraper, extract_precision from .timeouts import HTTP_TIMEOUT from .models import ( ContestListResult, @@ -173,6 +173,7 @@ async def _stream_single_problem(client: httpx.AsyncClient, slug: str) -> None: timeout_ms, memory_mb = _parse_limits(html) interactive = _is_interactive(html) + precision = extract_precision(html) tests: list[TestCase] = [] try: @@ -200,6 +201,7 @@ async def _stream_single_problem(client: httpx.AsyncClient, slug: str) -> None: "memory_mb": memory_mb, "interactive": interactive, "multi_test": False, + "precision": precision, } ), flush=True, @@ -254,6 +256,8 @@ class KattisScraper(BaseScraper): ProblemSummary(id=slug, name=name) for slug, name in slugs ], url=f"{BASE_URL}/problems/%s", + contest_url=f"{BASE_URL}/contests/{contest_id}", + standings_url=f"{BASE_URL}/contests/{contest_id}/standings", ) try: html = await _fetch_text( @@ -273,6 +277,8 @@ class KattisScraper(BaseScraper): contest_id=contest_id, problems=[ProblemSummary(id=contest_id, name=name)], url=f"{BASE_URL}/problems/%s", + contest_url=f"{BASE_URL}/problems/{contest_id}", + standings_url="", ) except Exception as e: return self._metadata_error(str(e)) @@ -373,9 +379,15 @@ class KattisScraper(BaseScraper): return self._submit_error(f"Submit request failed: {e}") sid_m = re.search(r"Submission ID:\s*(\d+)", r.text, re.IGNORECASE) - sid = sid_m.group(1) if sid_m else "" + if not sid_m: + return self._submit_error( + r.text.strip() or "Submit failed (no submission ID)" + ) return SubmitResult( - success=True, error="", submission_id=sid, verdict="submitted" + success=True, + error="", + submission_id=sid_m.group(1), + verdict="submitted", ) async def login(self, credentials: dict[str, str]) -> LoginResult: diff --git a/scrapers/usaco.py b/scrapers/usaco.py index 074cbf9..5ab89f7 100644 --- a/scrapers/usaco.py +++ b/scrapers/usaco.py @@ -8,7 +8,7 @@ from typing import Any, cast import httpx -from .base import BaseScraper +from .base import BaseScraper, extract_precision from .timeouts import HTTP_TIMEOUT from .models import ( ContestListResult, @@ -130,12 +130,14 @@ def _parse_problem_page(html: str) -> dict[str, Any]: memory_mb = int(mm.group(1)) if mm else 256 interactive = "interactive problem" in html.lower() + precision = extract_precision(html) return { "tests": tests, "timeout_ms": timeout_ms, "memory_mb": memory_mb, "interactive": interactive, + "precision": precision, } @@ -375,6 +377,7 @@ class USACOScraper(BaseScraper): "timeout_ms": 4000, "memory_mb": 256, "interactive": False, + "precision": None, } tests = cast(list[TestCase], info["tests"]) @@ -396,6 +399,7 @@ class USACOScraper(BaseScraper): "memory_mb": info["memory_mb"], "interactive": info["interactive"], "multi_test": False, + "precision": info["precision"], } tasks = [run_one(cpid) for cpid, _ in problems_raw]