fix: scrapers

This commit is contained in:
Barrett Ruth 2025-10-05 22:10:26 -04:00
parent c509102b37
commit 2426e1cbd4
3 changed files with 23 additions and 46 deletions

View file

@ -8,7 +8,7 @@
"singleQuote": true, "singleQuote": true,
"overrides": [ "overrides": [
{ {
"files": ["*.md", "docs/**/*.md"], "files": ["**/*.md"],
"options": { "options": {
"parser": "markdown" "parser": "markdown"
} }

View file

@ -1,20 +1,18 @@
from pydantic import BaseModel, Field from pydantic import BaseModel, ConfigDict, Field
class TestCase(BaseModel): class TestCase(BaseModel):
input: str input: str
expected: str expected: str
class Config: model_config = ConfigDict(extra="forbid")
extra = "forbid"
class ProblemSummary(BaseModel): class ProblemSummary(BaseModel):
id: str id: str
name: str name: str
class Config: model_config = ConfigDict(extra="forbid")
extra = "forbid"
class ContestSummary(BaseModel): class ContestSummary(BaseModel):
@ -22,31 +20,27 @@ class ContestSummary(BaseModel):
name: str name: str
display_name: str | None = None display_name: str | None = None
class Config: model_config = ConfigDict(extra="forbid")
extra = "forbid"
class ScrapingResult(BaseModel): class ScrapingResult(BaseModel):
success: bool success: bool
error: str error: str
class Config: model_config = ConfigDict(extra="forbid")
extra = "forbid"
class MetadataResult(ScrapingResult): class MetadataResult(ScrapingResult):
contest_id: str = "" contest_id: str = ""
problems: list[ProblemSummary] = Field(default_factory=list) problems: list[ProblemSummary] = Field(default_factory=list)
class Config: model_config = ConfigDict(extra="forbid")
extra = "forbid"
class ContestListResult(ScrapingResult): class ContestListResult(ScrapingResult):
contests: list[ContestSummary] = Field(default_factory=list) contests: list[ContestSummary] = Field(default_factory=list)
class Config: model_config = ConfigDict(extra="forbid")
extra = "forbid"
class TestsResult(ScrapingResult): class TestsResult(ScrapingResult):
@ -57,8 +51,7 @@ class TestsResult(ScrapingResult):
memory_mb: float memory_mb: float
interactive: bool = False interactive: bool = False
class Config: model_config = ConfigDict(extra="forbid")
extra = "forbid"
class ScraperConfig(BaseModel): class ScraperConfig(BaseModel):
@ -67,5 +60,4 @@ class ScraperConfig(BaseModel):
backoff_base: float = 2.0 backoff_base: float = 2.0
rate_limit_delay: float = 1.0 rate_limit_delay: float = 1.0
class Config: model_config = ConfigDict(extra="forbid")
extra = "forbid"

View file

@ -1,8 +1,9 @@
import importlib.util
import io import io
import json import json
import sys import sys
from pathlib import Path from pathlib import Path
from typing import Callable from types import ModuleType
import pytest import pytest
@ -19,32 +20,16 @@ def fixture_text():
return _load return _load
def _compile_and_exec_module( def _load_scraper_module(module_path: Path, module_name: str) -> ModuleType:
module_path: Path, offline_fetch_impls: dict[str, Callable] spec = importlib.util.spec_from_file_location(
): f"scrapers.{module_name}", module_path
src = module_path.read_text(encoding="utf-8") )
if spec is None or spec.loader is None:
replacements: list[tuple[str, str]] = [ raise ImportError(f"Could not load spec for {module_name} from {module_path}")
("def _fetch(", "def _orig_fetch("), module = importlib.util.module_from_spec(spec)
("def fetch_text(", "def _orig_fetch_text("), sys.modules[f"scrapers.{module_name}"] = module
("async def _get_async(", "async def _orig_get_async("), spec.loader.exec_module(module)
] return module
for old, new in replacements:
src = src.replace(old, new)
stub_lines = []
if " _orig_fetch(" in src or "def _orig_fetch(" in src:
stub_lines.append("_fetch = __offline_fetch_sync")
if " _orig_fetch_text(" in src or "def _orig_fetch_text(" in src:
stub_lines.append("fetch_text = __offline_fetch_text")
if " _orig_get_async(" in src or "async def _orig_get_async(" in src:
stub_lines.append("_get_async = __offline_fetch_async")
src += "\n" + "\n".join(stub_lines) + "\n"
ns = {}
ns.update(offline_fetch_impls)
exec(compile(src, str(module_path), "exec"), ns)
return ns
def _capture_stdout(coro): def _capture_stdout(coro):
@ -146,7 +131,7 @@ def run_scraper_offline(fixture_text):
def _run(scraper_name: str, mode: str, *args: str): def _run(scraper_name: str, mode: str, *args: str):
mod_path = ROOT / "scrapers" / f"{scraper_name}.py" mod_path = ROOT / "scrapers" / f"{scraper_name}.py"
ns = _compile_and_exec_module(mod_path, _make_offline_fetches(scraper_name)) ns = _load_scraper_module(mod_path, scraper_name)
main_async = ns.get("main_async") main_async = ns.get("main_async")
assert callable(main_async), f"main_async not found in {scraper_name}" assert callable(main_async), f"main_async not found in {scraper_name}"