feat(scraper): add precision extraction, start_time, and submit support

Problem: problem pages contain floating-point precision requirements and
contest start timestamps that were not being extracted or stored. The
submit workflow also needed a foundation in the scraper layer.

Solution: add extract_precision() to base.py and propagate through all
scrapers into cache. Add start_time to ContestSummary and extract it
from AtCoder and Codeforces. Add SubmitResult model, abstract submit()
method, submit CLI case with get_language_id() resolution, stdin/env_extra
support in run_scraper, and a full AtCoder submit implementation; stub
the remaining platforms.
This commit is contained in:
Barrett Ruth 2026-03-03 14:51:42 -05:00 committed by Barrett Ruth
parent 865e3b5928
commit 90bd13580b
9 changed files with 245 additions and 20 deletions

View file

@ -1,8 +1,31 @@
import asyncio
import json
import os
import re
import sys
from abc import ABC, abstractmethod
from .models import CombinedTest, ContestListResult, MetadataResult, TestsResult
from .language_ids import get_language_id
from .models import CombinedTest, ContestListResult, MetadataResult, SubmitResult, TestsResult
_PRECISION_ABS_REL_RE = re.compile(
r"(?:absolute|relative)\s+error[^.]*?10\s*[\^{]\s*\{?\s*[-\u2212]\s*(\d+)\s*\}?",
re.IGNORECASE,
)
_PRECISION_DECIMAL_RE = re.compile(
r"round(?:ed)?\s+to\s+(\d+)\s+decimal\s+place",
re.IGNORECASE,
)
def extract_precision(text: str) -> float | None:
m = _PRECISION_ABS_REL_RE.search(text)
if m:
return 10 ** -int(m.group(1))
m = _PRECISION_DECIMAL_RE.search(text)
if m:
return 10 ** -int(m.group(1))
return None
class BaseScraper(ABC):
@ -19,6 +42,9 @@ class BaseScraper(ABC):
@abstractmethod
async def stream_tests_for_category_async(self, category_id: str) -> None: ...
@abstractmethod
async def submit(self, contest_id: str, problem_id: str, source_code: str, language_id: str, credentials: dict[str, str]) -> SubmitResult: ...
def _usage(self) -> str:
name = self.platform_name
return f"Usage: {name}.py metadata <id> | tests <id> | contests"
@ -40,6 +66,9 @@ class BaseScraper(ABC):
def _contests_error(self, msg: str) -> ContestListResult:
return ContestListResult(success=False, error=msg)
def _submit_error(self, msg: str) -> SubmitResult:
return SubmitResult(success=False, error=msg)
async def _run_cli_async(self, args: list[str]) -> int:
if len(args) < 2:
print(self._metadata_error(self._usage()).model_dump_json())
@ -71,6 +100,21 @@ class BaseScraper(ABC):
print(result.model_dump_json())
return 0 if result.success else 1
case "submit":
if len(args) != 5:
print(self._submit_error("Usage: <platform> submit <contest_id> <problem_id> <language_id>").model_dump_json())
return 1
source_code = sys.stdin.read()
creds_raw = os.environ.get("CP_CREDENTIALS", "{}")
try:
credentials = json.loads(creds_raw)
except json.JSONDecodeError:
credentials = {}
language_id = get_language_id(self.platform_name, args[4]) or args[4]
result = await self.submit(args[2], args[3], source_code, language_id, credentials)
print(result.model_dump_json())
return 0 if result.success else 1
case _:
print(
self._metadata_error(