fix: scrapers
This commit is contained in:
parent
c509102b37
commit
2426e1cbd4
3 changed files with 23 additions and 46 deletions
|
|
@ -8,7 +8,7 @@
|
||||||
"singleQuote": true,
|
"singleQuote": true,
|
||||||
"overrides": [
|
"overrides": [
|
||||||
{
|
{
|
||||||
"files": ["*.md", "docs/**/*.md"],
|
"files": ["**/*.md"],
|
||||||
"options": {
|
"options": {
|
||||||
"parser": "markdown"
|
"parser": "markdown"
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,20 +1,18 @@
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, ConfigDict, Field
|
||||||
|
|
||||||
|
|
||||||
class TestCase(BaseModel):
|
class TestCase(BaseModel):
|
||||||
input: str
|
input: str
|
||||||
expected: str
|
expected: str
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(extra="forbid")
|
||||||
extra = "forbid"
|
|
||||||
|
|
||||||
|
|
||||||
class ProblemSummary(BaseModel):
|
class ProblemSummary(BaseModel):
|
||||||
id: str
|
id: str
|
||||||
name: str
|
name: str
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(extra="forbid")
|
||||||
extra = "forbid"
|
|
||||||
|
|
||||||
|
|
||||||
class ContestSummary(BaseModel):
|
class ContestSummary(BaseModel):
|
||||||
|
|
@ -22,31 +20,27 @@ class ContestSummary(BaseModel):
|
||||||
name: str
|
name: str
|
||||||
display_name: str | None = None
|
display_name: str | None = None
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(extra="forbid")
|
||||||
extra = "forbid"
|
|
||||||
|
|
||||||
|
|
||||||
class ScrapingResult(BaseModel):
|
class ScrapingResult(BaseModel):
|
||||||
success: bool
|
success: bool
|
||||||
error: str
|
error: str
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(extra="forbid")
|
||||||
extra = "forbid"
|
|
||||||
|
|
||||||
|
|
||||||
class MetadataResult(ScrapingResult):
|
class MetadataResult(ScrapingResult):
|
||||||
contest_id: str = ""
|
contest_id: str = ""
|
||||||
problems: list[ProblemSummary] = Field(default_factory=list)
|
problems: list[ProblemSummary] = Field(default_factory=list)
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(extra="forbid")
|
||||||
extra = "forbid"
|
|
||||||
|
|
||||||
|
|
||||||
class ContestListResult(ScrapingResult):
|
class ContestListResult(ScrapingResult):
|
||||||
contests: list[ContestSummary] = Field(default_factory=list)
|
contests: list[ContestSummary] = Field(default_factory=list)
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(extra="forbid")
|
||||||
extra = "forbid"
|
|
||||||
|
|
||||||
|
|
||||||
class TestsResult(ScrapingResult):
|
class TestsResult(ScrapingResult):
|
||||||
|
|
@ -57,8 +51,7 @@ class TestsResult(ScrapingResult):
|
||||||
memory_mb: float
|
memory_mb: float
|
||||||
interactive: bool = False
|
interactive: bool = False
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(extra="forbid")
|
||||||
extra = "forbid"
|
|
||||||
|
|
||||||
|
|
||||||
class ScraperConfig(BaseModel):
|
class ScraperConfig(BaseModel):
|
||||||
|
|
@ -67,5 +60,4 @@ class ScraperConfig(BaseModel):
|
||||||
backoff_base: float = 2.0
|
backoff_base: float = 2.0
|
||||||
rate_limit_delay: float = 1.0
|
rate_limit_delay: float = 1.0
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(extra="forbid")
|
||||||
extra = "forbid"
|
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,9 @@
|
||||||
|
import importlib.util
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Callable
|
from types import ModuleType
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
@ -19,32 +20,16 @@ def fixture_text():
|
||||||
return _load
|
return _load
|
||||||
|
|
||||||
|
|
||||||
def _compile_and_exec_module(
|
def _load_scraper_module(module_path: Path, module_name: str) -> ModuleType:
|
||||||
module_path: Path, offline_fetch_impls: dict[str, Callable]
|
spec = importlib.util.spec_from_file_location(
|
||||||
):
|
f"scrapers.{module_name}", module_path
|
||||||
src = module_path.read_text(encoding="utf-8")
|
)
|
||||||
|
if spec is None or spec.loader is None:
|
||||||
replacements: list[tuple[str, str]] = [
|
raise ImportError(f"Could not load spec for {module_name} from {module_path}")
|
||||||
("def _fetch(", "def _orig_fetch("),
|
module = importlib.util.module_from_spec(spec)
|
||||||
("def fetch_text(", "def _orig_fetch_text("),
|
sys.modules[f"scrapers.{module_name}"] = module
|
||||||
("async def _get_async(", "async def _orig_get_async("),
|
spec.loader.exec_module(module)
|
||||||
]
|
return module
|
||||||
for old, new in replacements:
|
|
||||||
src = src.replace(old, new)
|
|
||||||
|
|
||||||
stub_lines = []
|
|
||||||
if " _orig_fetch(" in src or "def _orig_fetch(" in src:
|
|
||||||
stub_lines.append("_fetch = __offline_fetch_sync")
|
|
||||||
if " _orig_fetch_text(" in src or "def _orig_fetch_text(" in src:
|
|
||||||
stub_lines.append("fetch_text = __offline_fetch_text")
|
|
||||||
if " _orig_get_async(" in src or "async def _orig_get_async(" in src:
|
|
||||||
stub_lines.append("_get_async = __offline_fetch_async")
|
|
||||||
src += "\n" + "\n".join(stub_lines) + "\n"
|
|
||||||
|
|
||||||
ns = {}
|
|
||||||
ns.update(offline_fetch_impls)
|
|
||||||
exec(compile(src, str(module_path), "exec"), ns)
|
|
||||||
return ns
|
|
||||||
|
|
||||||
|
|
||||||
def _capture_stdout(coro):
|
def _capture_stdout(coro):
|
||||||
|
|
@ -146,7 +131,7 @@ def run_scraper_offline(fixture_text):
|
||||||
|
|
||||||
def _run(scraper_name: str, mode: str, *args: str):
|
def _run(scraper_name: str, mode: str, *args: str):
|
||||||
mod_path = ROOT / "scrapers" / f"{scraper_name}.py"
|
mod_path = ROOT / "scrapers" / f"{scraper_name}.py"
|
||||||
ns = _compile_and_exec_module(mod_path, _make_offline_fetches(scraper_name))
|
ns = _load_scraper_module(mod_path, scraper_name)
|
||||||
main_async = ns.get("main_async")
|
main_async = ns.get("main_async")
|
||||||
assert callable(main_async), f"main_async not found in {scraper_name}"
|
assert callable(main_async), f"main_async not found in {scraper_name}"
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue