fix: scrapers

This commit is contained in:
Barrett Ruth 2025-10-05 22:10:26 -04:00
parent c509102b37
commit 2426e1cbd4
3 changed files with 23 additions and 46 deletions

View file

@ -8,7 +8,7 @@
"singleQuote": true,
"overrides": [
{
"files": ["*.md", "docs/**/*.md"],
"files": ["**/*.md"],
"options": {
"parser": "markdown"
}

View file

@ -1,20 +1,18 @@
from pydantic import BaseModel, Field
from pydantic import BaseModel, ConfigDict, Field
class TestCase(BaseModel):
input: str
expected: str
class Config:
extra = "forbid"
model_config = ConfigDict(extra="forbid")
class ProblemSummary(BaseModel):
id: str
name: str
class Config:
extra = "forbid"
model_config = ConfigDict(extra="forbid")
class ContestSummary(BaseModel):
@ -22,31 +20,27 @@ class ContestSummary(BaseModel):
name: str
display_name: str | None = None
class Config:
extra = "forbid"
model_config = ConfigDict(extra="forbid")
class ScrapingResult(BaseModel):
success: bool
error: str
class Config:
extra = "forbid"
model_config = ConfigDict(extra="forbid")
class MetadataResult(ScrapingResult):
contest_id: str = ""
problems: list[ProblemSummary] = Field(default_factory=list)
class Config:
extra = "forbid"
model_config = ConfigDict(extra="forbid")
class ContestListResult(ScrapingResult):
contests: list[ContestSummary] = Field(default_factory=list)
class Config:
extra = "forbid"
model_config = ConfigDict(extra="forbid")
class TestsResult(ScrapingResult):
@ -57,8 +51,7 @@ class TestsResult(ScrapingResult):
memory_mb: float
interactive: bool = False
class Config:
extra = "forbid"
model_config = ConfigDict(extra="forbid")
class ScraperConfig(BaseModel):
@ -67,5 +60,4 @@ class ScraperConfig(BaseModel):
backoff_base: float = 2.0
rate_limit_delay: float = 1.0
class Config:
extra = "forbid"
model_config = ConfigDict(extra="forbid")

View file

@ -1,8 +1,9 @@
import importlib.util
import io
import json
import sys
from pathlib import Path
from typing import Callable
from types import ModuleType
import pytest
@ -19,32 +20,16 @@ def fixture_text():
return _load
def _compile_and_exec_module(
module_path: Path, offline_fetch_impls: dict[str, Callable]
):
src = module_path.read_text(encoding="utf-8")
replacements: list[tuple[str, str]] = [
("def _fetch(", "def _orig_fetch("),
("def fetch_text(", "def _orig_fetch_text("),
("async def _get_async(", "async def _orig_get_async("),
]
for old, new in replacements:
src = src.replace(old, new)
stub_lines = []
if " _orig_fetch(" in src or "def _orig_fetch(" in src:
stub_lines.append("_fetch = __offline_fetch_sync")
if " _orig_fetch_text(" in src or "def _orig_fetch_text(" in src:
stub_lines.append("fetch_text = __offline_fetch_text")
if " _orig_get_async(" in src or "async def _orig_get_async(" in src:
stub_lines.append("_get_async = __offline_fetch_async")
src += "\n" + "\n".join(stub_lines) + "\n"
ns = {}
ns.update(offline_fetch_impls)
exec(compile(src, str(module_path), "exec"), ns)
return ns
def _load_scraper_module(module_path: Path, module_name: str) -> ModuleType:
spec = importlib.util.spec_from_file_location(
f"scrapers.{module_name}", module_path
)
if spec is None or spec.loader is None:
raise ImportError(f"Could not load spec for {module_name} from {module_path}")
module = importlib.util.module_from_spec(spec)
sys.modules[f"scrapers.{module_name}"] = module
spec.loader.exec_module(module)
return module
def _capture_stdout(coro):
@ -146,7 +131,7 @@ def run_scraper_offline(fixture_text):
def _run(scraper_name: str, mode: str, *args: str):
mod_path = ROOT / "scrapers" / f"{scraper_name}.py"
ns = _compile_and_exec_module(mod_path, _make_offline_fetches(scraper_name))
ns = _load_scraper_module(mod_path, scraper_name)
main_async = ns.get("main_async")
assert callable(main_async), f"main_async not found in {scraper_name}"