feat(scraper): add precision extraction, start_time, and submit support

Problem: problem pages contain floating-point precision requirements and
contest start timestamps that were not being extracted or stored. The
submit workflow also needed a foundation in the scraper layer.

Solution: add extract_precision() to base.py and propagate through all
scrapers into cache. Add start_time to ContestSummary and extract it
from AtCoder and Codeforces. Add SubmitResult model, abstract submit()
method, submit CLI case with get_language_id() resolution, stdin/env_extra
support in run_scraper, and a full AtCoder submit implementation; stub
the remaining platforms.
This commit is contained in:
Barrett Ruth 2026-03-03 14:51:42 -05:00 committed by Barrett Ruth
parent 865e3b5928
commit 90bd13580b
9 changed files with 245 additions and 20 deletions

View file

@ -7,12 +7,13 @@ from typing import Any
import httpx
from .base import BaseScraper
from .base import BaseScraper, extract_precision
from .models import (
ContestListResult,
ContestSummary,
MetadataResult,
ProblemSummary,
SubmitResult,
TestCase,
)
@ -129,17 +130,21 @@ def parse_category_problems(category_id: str, html: str) -> list[ProblemSummary]
return []
def _extract_problem_info(html: str) -> tuple[int, int, bool]:
def _extract_problem_info(html: str) -> tuple[int, int, bool, float | None]:
tm = TIME_RE.search(html)
mm = MEM_RE.search(html)
t = int(round(float(tm.group(1)) * 1000)) if tm else 0
m = int(mm.group(1)) if mm else 0
md = MD_BLOCK_RE.search(html)
interactive = False
precision = None
if md:
body = md.group(1)
interactive = "This is an interactive problem." in body
return t, m, interactive
from bs4 import BeautifulSoup
precision = extract_precision(BeautifulSoup(body, "html.parser").get_text(" "))
return t, m, interactive, precision
def parse_title(html: str) -> str:
@ -257,6 +262,9 @@ class CSESScraper(BaseScraper):
payload = await coro
print(json.dumps(payload), flush=True)
async def submit(self, contest_id: str, problem_id: str, source_code: str, language_id: str, credentials: dict[str, str]) -> SubmitResult:
return SubmitResult(success=False, error="CSES submit not yet implemented", submission_id="", verdict="")
if __name__ == "__main__":
CSESScraper().run_cli()