test: add offline fixture coverage for Kattis and USACO (#342)

## Problem

Kattis and USACO had zero offline test coverage — no fixtures, no
conftest
routers, and no entries in the test matrix. The `precision` field and
error
paths were also unverified across all platforms.

## Solution

Add HTML fixtures for both platforms and wire up `httpx.AsyncClient.get`
routers in `conftest.py` following the existing CSES/CodeChef pattern.
Extend the test matrix from 12 to 23 parametrized cases. Add a dedicated
test for the Kattis contest-vs-slug fallback path (verifying
`contest_url`
and `standings_url`), three parametrized metadata error cases, and a
targeted assertion that `extract_precision` returns a non-`None` float
for
problems with floating-point tolerance hints.

Closes #281.
This commit is contained in:
Barrett Ruth 2026-03-06 16:49:49 -05:00 committed by GitHub
parent 9727dccc6f
commit 8465e70772
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 216 additions and 1 deletions

View file

@ -7,6 +7,8 @@ from pathlib import Path
from types import SimpleNamespace from types import SimpleNamespace
from typing import Any from typing import Any
import re
import httpx import httpx
import pytest import pytest
import requests import requests
@ -103,6 +105,35 @@ def run_scraper_offline(fixture_text):
raise AssertionError(f"No fixture for Codeforces url={url!r}") raise AssertionError(f"No fixture for Codeforces url={url!r}")
def _router_kattis(*, url: str) -> str:
url = url.removeprefix("https://open.kattis.com")
if "/contests?" in url:
return fixture_text("kattis/contests.html")
m = re.search(r"/contests/([^/]+)/problems", url)
if m:
try:
return fixture_text(f"kattis/contest_{m.group(1)}_problems.html")
except FileNotFoundError:
return "<html></html>"
if "/problems/" in url and "/file/statement" not in url:
slug = url.rstrip("/").split("/")[-1]
return fixture_text(f"kattis/problem_{slug}.html")
raise AssertionError(f"No fixture for Kattis url={url!r}")
def _router_usaco(*, url: str) -> str:
if "page=contests" in url and "results" not in url:
return fixture_text("usaco/contests.html")
m = re.search(r"page=([a-z]+\d{2,4}results)", url)
if m:
try:
return fixture_text(f"usaco/{m.group(1)}.html")
except FileNotFoundError:
return "<html></html>"
m = re.search(r"page=viewproblem2&cpid=(\d+)", url)
if m:
return fixture_text(f"usaco/problem_{m.group(1)}.html")
raise AssertionError(f"No fixture for USACO url={url!r}")
def _make_offline_fetches(scraper_name: str): def _make_offline_fetches(scraper_name: str):
match scraper_name: match scraper_name:
case "cses": case "cses":
@ -213,6 +244,37 @@ def run_scraper_offline(fixture_text):
"__offline_get_async": __offline_get_async, "__offline_get_async": __offline_get_async,
} }
case "kattis":
async def __offline_get_kattis(client, url: str, **kwargs):
if "/file/statement/samples.zip" in url:
raise httpx.HTTPError("not found")
html = _router_kattis(url=url)
return SimpleNamespace(
text=html,
content=html.encode(),
status_code=200,
raise_for_status=lambda: None,
)
return {
"__offline_get_async": __offline_get_kattis,
}
case "usaco":
async def __offline_get_usaco(client, url: str, **kwargs):
html = _router_usaco(url=url)
return SimpleNamespace(
text=html,
status_code=200,
raise_for_status=lambda: None,
)
return {
"__offline_get_async": __offline_get_usaco,
}
case _: case _:
raise AssertionError(f"Unknown scraper: {scraper_name}") raise AssertionError(f"Unknown scraper: {scraper_name}")
@ -221,6 +283,8 @@ def run_scraper_offline(fixture_text):
"atcoder": "AtcoderScraper", "atcoder": "AtcoderScraper",
"codeforces": "CodeforcesScraper", "codeforces": "CodeforcesScraper",
"codechef": "CodeChefScraper", "codechef": "CodeChefScraper",
"kattis": "KattisScraper",
"usaco": "USACOScraper",
} }
def _run(scraper_name: str, mode: str, *args: str): def _run(scraper_name: str, mode: str, *args: str):
@ -236,7 +300,7 @@ def run_scraper_offline(fixture_text):
ns._get_async = offline_fetches["_get_async"] ns._get_async = offline_fetches["_get_async"]
elif scraper_name == "cses": elif scraper_name == "cses":
httpx.AsyncClient.get = offline_fetches["__offline_fetch_text"] httpx.AsyncClient.get = offline_fetches["__offline_fetch_text"]
elif scraper_name == "codechef": elif scraper_name in ("codechef", "kattis", "usaco"):
httpx.AsyncClient.get = offline_fetches["__offline_get_async"] httpx.AsyncClient.get = offline_fetches["__offline_get_async"]
scraper_class = getattr(ns, scraper_classes[scraper_name]) scraper_class = getattr(ns, scraper_classes[scraper_name])

View file

@ -0,0 +1,10 @@
<html><body><table>
<tr>
<td>A</td>
<td><a href="/contests/open2024/problems/kth2024a">Arithmetic Sequence</a></td>
</tr>
<tr>
<td>B</td>
<td><a href="/contests/open2024/problems/kth2024b">Binary Tree</a></td>
</tr>
</table></body></html>

10
tests/fixtures/kattis/contests.html vendored Normal file
View file

@ -0,0 +1,10 @@
<html><body><table>
<tr>
<td><a href="/contests/open2024">Open 2024</a></td>
<td data-timestamp="1711800000">2024-03-30</td>
</tr>
<tr>
<td><a href="/contests/icpc2023">ICPC 2023</a></td>
<td data-timestamp="1698768000">2023-10-31</td>
</tr>
</table></body></html>

View file

@ -0,0 +1,11 @@
<html>
<head><title>Hello World</title></head>
<body>
<span>CPU Time limit</span><span class="num">1 second</span>
<span>Memory limit</span><span class="num">256 MB</span>
<table class="sample">
<pre>Hello World</pre>
<pre>Hello World</pre>
</table>
</body>
</html>

View file

@ -0,0 +1,17 @@
<html>
<head><title>Arithmetic Sequence</title></head>
<body>
<span>CPU Time limit</span><span class="num">2 seconds</span>
<span>Memory limit</span><span class="num">512 MB</span>
<table class="sample">
<pre>3
1 2 3</pre>
<pre>YES</pre>
</table>
<table class="sample">
<pre>2
1 3</pre>
<pre>NO</pre>
</table>
</body>
</html>

View file

@ -0,0 +1,12 @@
<html>
<head><title>Binary Tree</title></head>
<body>
<span>CPU Time limit</span><span class="num">1 second</span>
<span>Memory limit</span><span class="num">256 MB</span>
<table class="sample">
<pre>5
1 2 3 4 5</pre>
<pre>3</pre>
</table>
</body>
</html>

3
tests/fixtures/usaco/contests.html vendored Normal file
View file

@ -0,0 +1,3 @@
<html><body>
<a href="index.php?page=dec24results">December 2024 Results</a>
</body></html>

14
tests/fixtures/usaco/dec24results.html vendored Normal file
View file

@ -0,0 +1,14 @@
<html><body>
<h2>USACO 2024 December Contest, Gold</h2>
<b>Farmer John's Favorite Problem</b><br/>
<a href="index.php?page=viewproblem2&cpid=1469">View Problem</a>
<b>Binary Indexed Tree</b><br/>
<a href="index.php?page=viewproblem2&cpid=1470">View Problem</a>
<b>Counting Subsequences</b><br/>
<a href="index.php?page=viewproblem2&cpid=1471">View Problem</a>
</body></html>

10
tests/fixtures/usaco/problem_1469.html vendored Normal file
View file

@ -0,0 +1,10 @@
<html><body>
<p>Time limit: 4s. Memory limit: 256 MB.</p>
<p>Given N cows, find the answer.</p>
<pre class="in">3
1 2 3</pre>
<pre class="out">6</pre>
<pre class="in">1
5</pre>
<pre class="out">5</pre>
</body></html>

View file

@ -0,0 +1,7 @@
<html><body>
<p>Time limit: 2s. Memory limit: 512 MB.</p>
<p>Build a binary indexed tree.</p>
<pre class="in">4
1 3 2 4</pre>
<pre class="out">10</pre>
</body></html>

View file

@ -0,0 +1,7 @@
<html><body>
<p>Time limit: 4s. Memory limit: 256 MB.</p>
<p>Output the answer with absolute error at most 10^{-6}.</p>
<pre class="in">2
1 2</pre>
<pre class="out">1.500000</pre>
</body></html>

View file

@ -27,6 +27,16 @@ MATRIX = {
"tests": ("START209D",), "tests": ("START209D",),
"contests": tuple(), "contests": tuple(),
}, },
"kattis": {
"metadata": ("hello",),
"tests": ("hello",),
"contests": tuple(),
},
"usaco": {
"metadata": ("dec24_gold",),
"tests": ("dec24_gold",),
"contests": tuple(),
},
} }
@ -85,5 +95,45 @@ def test_scraper_offline_fixture_matrix(run_scraper_offline, scraper, mode):
) )
assert "multi_test" in obj, "Missing multi_test field in raw JSON" assert "multi_test" in obj, "Missing multi_test field in raw JSON"
assert isinstance(obj["multi_test"], bool), "multi_test not boolean" assert isinstance(obj["multi_test"], bool), "multi_test not boolean"
assert "precision" in obj, "Missing precision field in raw JSON"
assert obj["precision"] is None or isinstance(
obj["precision"], float
), "precision must be None or float"
validated_any = True validated_any = True
assert validated_any, "No valid tests payloads validated" assert validated_any, "No valid tests payloads validated"
def test_kattis_contest_metadata(run_scraper_offline):
rc, objs = run_scraper_offline("kattis", "metadata", "open2024")
assert rc == 0
assert objs
model = MetadataResult.model_validate(objs[-1])
assert model.success is True
assert len(model.problems) == 2
assert model.contest_url != ""
assert model.standings_url != ""
def test_usaco_precision_extracted(run_scraper_offline):
rc, objs = run_scraper_offline("usaco", "tests", "dec24_gold")
assert rc == 0
precisions = [obj["precision"] for obj in objs if "problem_id" in obj]
assert any(p is not None for p in precisions), (
"Expected at least one problem with precision"
)
@pytest.mark.parametrize(
"scraper,contest_id",
[
("cses", "nonexistent_category_xyz"),
("usaco", "badformat"),
("kattis", "nonexistent_problem_xyz"),
],
)
def test_scraper_metadata_error(run_scraper_offline, scraper, contest_id):
rc, objs = run_scraper_offline(scraper, "metadata", contest_id)
assert rc == 1
assert objs
assert objs[-1].get("success") is False
assert objs[-1].get("error")