test: add offline fixture coverage for Kattis and USACO

Problem: Kattis and USACO had zero offline test coverage — no fixtures,
no conftest routers, and no entries in the test matrix. Precision, error
cases, and the Kattis contest-vs-slug fallback were also untested.

Solution: Add HTML fixtures for both platforms covering metadata, tests,
and contest list modes. Wire up conftest routers that patch
`httpx.AsyncClient.get` using the same pattern as CSES/CodeChef. Extend
the test matrix to include Kattis and USACO (18 parametrized cases, up
from 12). Add a dedicated test for the Kattis contest-path (verifies
`contest_url`/`standings_url` are set). Add parametrized metadata error
tests for CSES, USACO, and Kattis. Assert `precision` field type in all
tests-mode payloads; `usaco/problem_1471.html` includes an absolute-error
hint to exercise `extract_precision`.
This commit is contained in:
Barrett Ruth 2026-03-06 16:46:03 -05:00
parent 9727dccc6f
commit 37ad92432e
Signed by: barrett
GPG key ID: A6C96C9349D2FC81
12 changed files with 207 additions and 1 deletions

View file

@ -7,6 +7,8 @@ from pathlib import Path
from types import SimpleNamespace from types import SimpleNamespace
from typing import Any from typing import Any
import re
import httpx import httpx
import pytest import pytest
import requests import requests
@ -103,6 +105,35 @@ def run_scraper_offline(fixture_text):
raise AssertionError(f"No fixture for Codeforces url={url!r}") raise AssertionError(f"No fixture for Codeforces url={url!r}")
def _router_kattis(*, url: str) -> str:
url = url.removeprefix("https://open.kattis.com")
if "/contests?" in url:
return fixture_text("kattis/contests.html")
m = re.search(r"/contests/([^/]+)/problems", url)
if m:
try:
return fixture_text(f"kattis/contest_{m.group(1)}_problems.html")
except FileNotFoundError:
return "<html></html>"
if "/problems/" in url and "/file/statement" not in url:
slug = url.rstrip("/").split("/")[-1]
return fixture_text(f"kattis/problem_{slug}.html")
raise AssertionError(f"No fixture for Kattis url={url!r}")
def _router_usaco(*, url: str) -> str:
if "page=contests" in url and "results" not in url:
return fixture_text("usaco/contests.html")
m = re.search(r"page=([a-z]+\d{2,4}results)", url)
if m:
try:
return fixture_text(f"usaco/{m.group(1)}.html")
except FileNotFoundError:
return "<html></html>"
m = re.search(r"page=viewproblem2&cpid=(\d+)", url)
if m:
return fixture_text(f"usaco/problem_{m.group(1)}.html")
raise AssertionError(f"No fixture for USACO url={url!r}")
def _make_offline_fetches(scraper_name: str): def _make_offline_fetches(scraper_name: str):
match scraper_name: match scraper_name:
case "cses": case "cses":
@ -213,6 +244,37 @@ def run_scraper_offline(fixture_text):
"__offline_get_async": __offline_get_async, "__offline_get_async": __offline_get_async,
} }
case "kattis":
async def __offline_get_kattis(client, url: str, **kwargs):
if "/file/statement/samples.zip" in url:
raise httpx.HTTPError("not found")
html = _router_kattis(url=url)
return SimpleNamespace(
text=html,
content=html.encode(),
status_code=200,
raise_for_status=lambda: None,
)
return {
"__offline_get_async": __offline_get_kattis,
}
case "usaco":
async def __offline_get_usaco(client, url: str, **kwargs):
html = _router_usaco(url=url)
return SimpleNamespace(
text=html,
status_code=200,
raise_for_status=lambda: None,
)
return {
"__offline_get_async": __offline_get_usaco,
}
case _: case _:
raise AssertionError(f"Unknown scraper: {scraper_name}") raise AssertionError(f"Unknown scraper: {scraper_name}")
@ -221,6 +283,8 @@ def run_scraper_offline(fixture_text):
"atcoder": "AtcoderScraper", "atcoder": "AtcoderScraper",
"codeforces": "CodeforcesScraper", "codeforces": "CodeforcesScraper",
"codechef": "CodeChefScraper", "codechef": "CodeChefScraper",
"kattis": "KattisScraper",
"usaco": "USACOScraper",
} }
def _run(scraper_name: str, mode: str, *args: str): def _run(scraper_name: str, mode: str, *args: str):
@ -236,7 +300,7 @@ def run_scraper_offline(fixture_text):
ns._get_async = offline_fetches["_get_async"] ns._get_async = offline_fetches["_get_async"]
elif scraper_name == "cses": elif scraper_name == "cses":
httpx.AsyncClient.get = offline_fetches["__offline_fetch_text"] httpx.AsyncClient.get = offline_fetches["__offline_fetch_text"]
elif scraper_name == "codechef": elif scraper_name in ("codechef", "kattis", "usaco"):
httpx.AsyncClient.get = offline_fetches["__offline_get_async"] httpx.AsyncClient.get = offline_fetches["__offline_get_async"]
scraper_class = getattr(ns, scraper_classes[scraper_name]) scraper_class = getattr(ns, scraper_classes[scraper_name])

View file

@ -0,0 +1,10 @@
<html><body><table>
<tr>
<td>A</td>
<td><a href="/contests/open2024/problems/kth2024a">Arithmetic Sequence</a></td>
</tr>
<tr>
<td>B</td>
<td><a href="/contests/open2024/problems/kth2024b">Binary Tree</a></td>
</tr>
</table></body></html>

10
tests/fixtures/kattis/contests.html vendored Normal file
View file

@ -0,0 +1,10 @@
<html><body><table>
<tr>
<td><a href="/contests/open2024">Open 2024</a></td>
<td data-timestamp="1711800000">2024-03-30</td>
</tr>
<tr>
<td><a href="/contests/icpc2023">ICPC 2023</a></td>
<td data-timestamp="1698768000">2023-10-31</td>
</tr>
</table></body></html>

View file

@ -0,0 +1,11 @@
<html>
<head><title>Hello World</title></head>
<body>
<span>CPU Time limit</span><span class="num">1 second</span>
<span>Memory limit</span><span class="num">256 MB</span>
<table class="sample">
<pre>Hello World</pre>
<pre>Hello World</pre>
</table>
</body>
</html>

View file

@ -0,0 +1,17 @@
<html>
<head><title>Arithmetic Sequence</title></head>
<body>
<span>CPU Time limit</span><span class="num">2 seconds</span>
<span>Memory limit</span><span class="num">512 MB</span>
<table class="sample">
<pre>3
1 2 3</pre>
<pre>YES</pre>
</table>
<table class="sample">
<pre>2
1 3</pre>
<pre>NO</pre>
</table>
</body>
</html>

View file

@ -0,0 +1,12 @@
<html>
<head><title>Binary Tree</title></head>
<body>
<span>CPU Time limit</span><span class="num">1 second</span>
<span>Memory limit</span><span class="num">256 MB</span>
<table class="sample">
<pre>5
1 2 3 4 5</pre>
<pre>3</pre>
</table>
</body>
</html>

3
tests/fixtures/usaco/contests.html vendored Normal file
View file

@ -0,0 +1,3 @@
<html><body>
<a href="index.php?page=dec24results">December 2024 Results</a>
</body></html>

14
tests/fixtures/usaco/dec24results.html vendored Normal file
View file

@ -0,0 +1,14 @@
<html><body>
<h2>USACO 2024 December Contest, Gold</h2>
<b>Farmer John's Favorite Problem</b><br/>
<a href="index.php?page=viewproblem2&cpid=1469">View Problem</a>
<b>Binary Indexed Tree</b><br/>
<a href="index.php?page=viewproblem2&cpid=1470">View Problem</a>
<b>Counting Subsequences</b><br/>
<a href="index.php?page=viewproblem2&cpid=1471">View Problem</a>
</body></html>

10
tests/fixtures/usaco/problem_1469.html vendored Normal file
View file

@ -0,0 +1,10 @@
<html><body>
<p>Time limit: 4s. Memory limit: 256 MB.</p>
<p>Given N cows, find the answer.</p>
<pre class="in">3
1 2 3</pre>
<pre class="out">6</pre>
<pre class="in">1
5</pre>
<pre class="out">5</pre>
</body></html>

View file

@ -0,0 +1,7 @@
<html><body>
<p>Time limit: 2s. Memory limit: 512 MB.</p>
<p>Build a binary indexed tree.</p>
<pre class="in">4
1 3 2 4</pre>
<pre class="out">10</pre>
</body></html>

View file

@ -0,0 +1,7 @@
<html><body>
<p>Time limit: 4s. Memory limit: 256 MB.</p>
<p>Output the answer with absolute error at most 10^{-6}.</p>
<pre class="in">2
1 2</pre>
<pre class="out">1.500000</pre>
</body></html>

View file

@ -27,6 +27,16 @@ MATRIX = {
"tests": ("START209D",), "tests": ("START209D",),
"contests": tuple(), "contests": tuple(),
}, },
"kattis": {
"metadata": ("hello",),
"tests": ("hello",),
"contests": tuple(),
},
"usaco": {
"metadata": ("dec24_gold",),
"tests": ("dec24_gold",),
"contests": tuple(),
},
} }
@ -85,5 +95,36 @@ def test_scraper_offline_fixture_matrix(run_scraper_offline, scraper, mode):
) )
assert "multi_test" in obj, "Missing multi_test field in raw JSON" assert "multi_test" in obj, "Missing multi_test field in raw JSON"
assert isinstance(obj["multi_test"], bool), "multi_test not boolean" assert isinstance(obj["multi_test"], bool), "multi_test not boolean"
assert "precision" in obj, "Missing precision field in raw JSON"
assert obj["precision"] is None or isinstance(
obj["precision"], float
), "precision must be None or float"
validated_any = True validated_any = True
assert validated_any, "No valid tests payloads validated" assert validated_any, "No valid tests payloads validated"
def test_kattis_contest_metadata(run_scraper_offline):
rc, objs = run_scraper_offline("kattis", "metadata", "open2024")
assert rc == 0
assert objs
model = MetadataResult.model_validate(objs[-1])
assert model.success is True
assert len(model.problems) == 2
assert model.contest_url != ""
assert model.standings_url != ""
@pytest.mark.parametrize(
"scraper,contest_id",
[
("cses", "nonexistent_category_xyz"),
("usaco", "badformat"),
("kattis", "nonexistent_problem_xyz"),
],
)
def test_scraper_metadata_error(run_scraper_offline, scraper, contest_id):
rc, objs = run_scraper_offline(scraper, "metadata", contest_id)
assert rc == 1
assert objs
assert objs[-1].get("success") is False
assert objs[-1].get("error")