diff --git a/tests/conftest.py b/tests/conftest.py index b6ff810..f1248a5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,6 +7,8 @@ from pathlib import Path from types import SimpleNamespace from typing import Any +import re + import httpx import pytest import requests @@ -103,6 +105,35 @@ def run_scraper_offline(fixture_text): raise AssertionError(f"No fixture for Codeforces url={url!r}") + def _router_kattis(*, url: str) -> str: + url = url.removeprefix("https://open.kattis.com") + if "/contests?" in url: + return fixture_text("kattis/contests.html") + m = re.search(r"/contests/([^/]+)/problems", url) + if m: + try: + return fixture_text(f"kattis/contest_{m.group(1)}_problems.html") + except FileNotFoundError: + return "" + if "/problems/" in url and "/file/statement" not in url: + slug = url.rstrip("/").split("/")[-1] + return fixture_text(f"kattis/problem_{slug}.html") + raise AssertionError(f"No fixture for Kattis url={url!r}") + + def _router_usaco(*, url: str) -> str: + if "page=contests" in url and "results" not in url: + return fixture_text("usaco/contests.html") + m = re.search(r"page=([a-z]+\d{2,4}results)", url) + if m: + try: + return fixture_text(f"usaco/{m.group(1)}.html") + except FileNotFoundError: + return "" + m = re.search(r"page=viewproblem2&cpid=(\d+)", url) + if m: + return fixture_text(f"usaco/problem_{m.group(1)}.html") + raise AssertionError(f"No fixture for USACO url={url!r}") + def _make_offline_fetches(scraper_name: str): match scraper_name: case "cses": @@ -213,6 +244,37 @@ def run_scraper_offline(fixture_text): "__offline_get_async": __offline_get_async, } + case "kattis": + + async def __offline_get_kattis(client, url: str, **kwargs): + if "/file/statement/samples.zip" in url: + raise httpx.HTTPError("not found") + html = _router_kattis(url=url) + return SimpleNamespace( + text=html, + content=html.encode(), + status_code=200, + raise_for_status=lambda: None, + ) + + return { + "__offline_get_async": __offline_get_kattis, + } + + case "usaco": + + async def __offline_get_usaco(client, url: str, **kwargs): + html = _router_usaco(url=url) + return SimpleNamespace( + text=html, + status_code=200, + raise_for_status=lambda: None, + ) + + return { + "__offline_get_async": __offline_get_usaco, + } + case _: raise AssertionError(f"Unknown scraper: {scraper_name}") @@ -221,6 +283,8 @@ def run_scraper_offline(fixture_text): "atcoder": "AtcoderScraper", "codeforces": "CodeforcesScraper", "codechef": "CodeChefScraper", + "kattis": "KattisScraper", + "usaco": "USACOScraper", } def _run(scraper_name: str, mode: str, *args: str): @@ -236,7 +300,7 @@ def run_scraper_offline(fixture_text): ns._get_async = offline_fetches["_get_async"] elif scraper_name == "cses": httpx.AsyncClient.get = offline_fetches["__offline_fetch_text"] - elif scraper_name == "codechef": + elif scraper_name in ("codechef", "kattis", "usaco"): httpx.AsyncClient.get = offline_fetches["__offline_get_async"] scraper_class = getattr(ns, scraper_classes[scraper_name]) diff --git a/tests/fixtures/kattis/contest_open2024_problems.html b/tests/fixtures/kattis/contest_open2024_problems.html new file mode 100644 index 0000000..4f6cda2 --- /dev/null +++ b/tests/fixtures/kattis/contest_open2024_problems.html @@ -0,0 +1,10 @@ +
| A | +Arithmetic Sequence | +
| B | +Binary Tree | +
| Open 2024 | +2024-03-30 | +
| ICPC 2023 | +2023-10-31 | +
Time limit: 4s. Memory limit: 256 MB.
+Given N cows, find the answer.
+3 +1 2 3+
6+
1 +5+
5+ diff --git a/tests/fixtures/usaco/problem_1470.html b/tests/fixtures/usaco/problem_1470.html new file mode 100644 index 0000000..bd88503 --- /dev/null +++ b/tests/fixtures/usaco/problem_1470.html @@ -0,0 +1,7 @@ + +
Time limit: 2s. Memory limit: 512 MB.
+Build a binary indexed tree.
+4 +1 3 2 4+
10+ diff --git a/tests/fixtures/usaco/problem_1471.html b/tests/fixtures/usaco/problem_1471.html new file mode 100644 index 0000000..0c6fdd0 --- /dev/null +++ b/tests/fixtures/usaco/problem_1471.html @@ -0,0 +1,7 @@ + +
Time limit: 4s. Memory limit: 256 MB.
+Output the answer with absolute error at most 10^{-6}.
+2 +1 2+
1.500000+ diff --git a/tests/test_scrapers.py b/tests/test_scrapers.py index 8ce468f..c4fa6d5 100644 --- a/tests/test_scrapers.py +++ b/tests/test_scrapers.py @@ -27,6 +27,16 @@ MATRIX = { "tests": ("START209D",), "contests": tuple(), }, + "kattis": { + "metadata": ("hello",), + "tests": ("hello",), + "contests": tuple(), + }, + "usaco": { + "metadata": ("dec24_gold",), + "tests": ("dec24_gold",), + "contests": tuple(), + }, } @@ -85,5 +95,45 @@ def test_scraper_offline_fixture_matrix(run_scraper_offline, scraper, mode): ) assert "multi_test" in obj, "Missing multi_test field in raw JSON" assert isinstance(obj["multi_test"], bool), "multi_test not boolean" + assert "precision" in obj, "Missing precision field in raw JSON" + assert obj["precision"] is None or isinstance( + obj["precision"], float + ), "precision must be None or float" validated_any = True assert validated_any, "No valid tests payloads validated" + + +def test_kattis_contest_metadata(run_scraper_offline): + rc, objs = run_scraper_offline("kattis", "metadata", "open2024") + assert rc == 0 + assert objs + model = MetadataResult.model_validate(objs[-1]) + assert model.success is True + assert len(model.problems) == 2 + assert model.contest_url != "" + assert model.standings_url != "" + + +def test_usaco_precision_extracted(run_scraper_offline): + rc, objs = run_scraper_offline("usaco", "tests", "dec24_gold") + assert rc == 0 + precisions = [obj["precision"] for obj in objs if "problem_id" in obj] + assert any(p is not None for p in precisions), ( + "Expected at least one problem with precision" + ) + + +@pytest.mark.parametrize( + "scraper,contest_id", + [ + ("cses", "nonexistent_category_xyz"), + ("usaco", "badformat"), + ("kattis", "nonexistent_problem_xyz"), + ], +) +def test_scraper_metadata_error(run_scraper_offline, scraper, contest_id): + rc, objs = run_scraper_offline(scraper, "metadata", contest_id) + assert rc == 1 + assert objs + assert objs[-1].get("success") is False + assert objs[-1].get("error")