fix: scrapers
This commit is contained in:
parent
c509102b37
commit
2426e1cbd4
3 changed files with 23 additions and 46 deletions
|
|
@ -8,7 +8,7 @@
|
|||
"singleQuote": true,
|
||||
"overrides": [
|
||||
{
|
||||
"files": ["*.md", "docs/**/*.md"],
|
||||
"files": ["**/*.md"],
|
||||
"options": {
|
||||
"parser": "markdown"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,20 +1,18 @@
|
|||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
|
||||
class TestCase(BaseModel):
|
||||
input: str
|
||||
expected: str
|
||||
|
||||
class Config:
|
||||
extra = "forbid"
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
|
||||
class ProblemSummary(BaseModel):
|
||||
id: str
|
||||
name: str
|
||||
|
||||
class Config:
|
||||
extra = "forbid"
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
|
||||
class ContestSummary(BaseModel):
|
||||
|
|
@ -22,31 +20,27 @@ class ContestSummary(BaseModel):
|
|||
name: str
|
||||
display_name: str | None = None
|
||||
|
||||
class Config:
|
||||
extra = "forbid"
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
|
||||
class ScrapingResult(BaseModel):
|
||||
success: bool
|
||||
error: str
|
||||
|
||||
class Config:
|
||||
extra = "forbid"
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
|
||||
class MetadataResult(ScrapingResult):
|
||||
contest_id: str = ""
|
||||
problems: list[ProblemSummary] = Field(default_factory=list)
|
||||
|
||||
class Config:
|
||||
extra = "forbid"
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
|
||||
class ContestListResult(ScrapingResult):
|
||||
contests: list[ContestSummary] = Field(default_factory=list)
|
||||
|
||||
class Config:
|
||||
extra = "forbid"
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
|
||||
class TestsResult(ScrapingResult):
|
||||
|
|
@ -57,8 +51,7 @@ class TestsResult(ScrapingResult):
|
|||
memory_mb: float
|
||||
interactive: bool = False
|
||||
|
||||
class Config:
|
||||
extra = "forbid"
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
|
||||
class ScraperConfig(BaseModel):
|
||||
|
|
@ -67,5 +60,4 @@ class ScraperConfig(BaseModel):
|
|||
backoff_base: float = 2.0
|
||||
rate_limit_delay: float = 1.0
|
||||
|
||||
class Config:
|
||||
extra = "forbid"
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
import importlib.util
|
||||
import io
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Callable
|
||||
from types import ModuleType
|
||||
|
||||
import pytest
|
||||
|
||||
|
|
@ -19,32 +20,16 @@ def fixture_text():
|
|||
return _load
|
||||
|
||||
|
||||
def _compile_and_exec_module(
|
||||
module_path: Path, offline_fetch_impls: dict[str, Callable]
|
||||
):
|
||||
src = module_path.read_text(encoding="utf-8")
|
||||
|
||||
replacements: list[tuple[str, str]] = [
|
||||
("def _fetch(", "def _orig_fetch("),
|
||||
("def fetch_text(", "def _orig_fetch_text("),
|
||||
("async def _get_async(", "async def _orig_get_async("),
|
||||
]
|
||||
for old, new in replacements:
|
||||
src = src.replace(old, new)
|
||||
|
||||
stub_lines = []
|
||||
if " _orig_fetch(" in src or "def _orig_fetch(" in src:
|
||||
stub_lines.append("_fetch = __offline_fetch_sync")
|
||||
if " _orig_fetch_text(" in src or "def _orig_fetch_text(" in src:
|
||||
stub_lines.append("fetch_text = __offline_fetch_text")
|
||||
if " _orig_get_async(" in src or "async def _orig_get_async(" in src:
|
||||
stub_lines.append("_get_async = __offline_fetch_async")
|
||||
src += "\n" + "\n".join(stub_lines) + "\n"
|
||||
|
||||
ns = {}
|
||||
ns.update(offline_fetch_impls)
|
||||
exec(compile(src, str(module_path), "exec"), ns)
|
||||
return ns
|
||||
def _load_scraper_module(module_path: Path, module_name: str) -> ModuleType:
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
f"scrapers.{module_name}", module_path
|
||||
)
|
||||
if spec is None or spec.loader is None:
|
||||
raise ImportError(f"Could not load spec for {module_name} from {module_path}")
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
sys.modules[f"scrapers.{module_name}"] = module
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
def _capture_stdout(coro):
|
||||
|
|
@ -146,7 +131,7 @@ def run_scraper_offline(fixture_text):
|
|||
|
||||
def _run(scraper_name: str, mode: str, *args: str):
|
||||
mod_path = ROOT / "scrapers" / f"{scraper_name}.py"
|
||||
ns = _compile_and_exec_module(mod_path, _make_offline_fetches(scraper_name))
|
||||
ns = _load_scraper_module(mod_path, scraper_name)
|
||||
main_async = ns.get("main_async")
|
||||
assert callable(main_async), f"main_async not found in {scraper_name}"
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue