test: add offline fixture coverage for Kattis and USACO
Problem: Kattis and USACO had zero offline test coverage — no fixtures, no conftest routers, and no entries in the test matrix. Precision, error cases, and the Kattis contest-vs-slug fallback were also untested. Solution: Add HTML fixtures for both platforms covering metadata, tests, and contest list modes. Wire up conftest routers that patch `httpx.AsyncClient.get` using the same pattern as CSES/CodeChef. Extend the test matrix to include Kattis and USACO (18 parametrized cases, up from 12). Add a dedicated test for the Kattis contest-path (verifies `contest_url`/`standings_url` are set). Add parametrized metadata error tests for CSES, USACO, and Kattis. Assert `precision` field type in all tests-mode payloads; `usaco/problem_1471.html` includes an absolute-error hint to exercise `extract_precision`.
This commit is contained in:
parent
9727dccc6f
commit
37ad92432e
12 changed files with 207 additions and 1 deletions
|
|
@ -7,6 +7,8 @@ from pathlib import Path
|
|||
from types import SimpleNamespace
|
||||
from typing import Any
|
||||
|
||||
import re
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
import requests
|
||||
|
|
@ -103,6 +105,35 @@ def run_scraper_offline(fixture_text):
|
|||
|
||||
raise AssertionError(f"No fixture for Codeforces url={url!r}")
|
||||
|
||||
def _router_kattis(*, url: str) -> str:
|
||||
url = url.removeprefix("https://open.kattis.com")
|
||||
if "/contests?" in url:
|
||||
return fixture_text("kattis/contests.html")
|
||||
m = re.search(r"/contests/([^/]+)/problems", url)
|
||||
if m:
|
||||
try:
|
||||
return fixture_text(f"kattis/contest_{m.group(1)}_problems.html")
|
||||
except FileNotFoundError:
|
||||
return "<html></html>"
|
||||
if "/problems/" in url and "/file/statement" not in url:
|
||||
slug = url.rstrip("/").split("/")[-1]
|
||||
return fixture_text(f"kattis/problem_{slug}.html")
|
||||
raise AssertionError(f"No fixture for Kattis url={url!r}")
|
||||
|
||||
def _router_usaco(*, url: str) -> str:
|
||||
if "page=contests" in url and "results" not in url:
|
||||
return fixture_text("usaco/contests.html")
|
||||
m = re.search(r"page=([a-z]+\d{2,4}results)", url)
|
||||
if m:
|
||||
try:
|
||||
return fixture_text(f"usaco/{m.group(1)}.html")
|
||||
except FileNotFoundError:
|
||||
return "<html></html>"
|
||||
m = re.search(r"page=viewproblem2&cpid=(\d+)", url)
|
||||
if m:
|
||||
return fixture_text(f"usaco/problem_{m.group(1)}.html")
|
||||
raise AssertionError(f"No fixture for USACO url={url!r}")
|
||||
|
||||
def _make_offline_fetches(scraper_name: str):
|
||||
match scraper_name:
|
||||
case "cses":
|
||||
|
|
@ -213,6 +244,37 @@ def run_scraper_offline(fixture_text):
|
|||
"__offline_get_async": __offline_get_async,
|
||||
}
|
||||
|
||||
case "kattis":
|
||||
|
||||
async def __offline_get_kattis(client, url: str, **kwargs):
|
||||
if "/file/statement/samples.zip" in url:
|
||||
raise httpx.HTTPError("not found")
|
||||
html = _router_kattis(url=url)
|
||||
return SimpleNamespace(
|
||||
text=html,
|
||||
content=html.encode(),
|
||||
status_code=200,
|
||||
raise_for_status=lambda: None,
|
||||
)
|
||||
|
||||
return {
|
||||
"__offline_get_async": __offline_get_kattis,
|
||||
}
|
||||
|
||||
case "usaco":
|
||||
|
||||
async def __offline_get_usaco(client, url: str, **kwargs):
|
||||
html = _router_usaco(url=url)
|
||||
return SimpleNamespace(
|
||||
text=html,
|
||||
status_code=200,
|
||||
raise_for_status=lambda: None,
|
||||
)
|
||||
|
||||
return {
|
||||
"__offline_get_async": __offline_get_usaco,
|
||||
}
|
||||
|
||||
case _:
|
||||
raise AssertionError(f"Unknown scraper: {scraper_name}")
|
||||
|
||||
|
|
@ -221,6 +283,8 @@ def run_scraper_offline(fixture_text):
|
|||
"atcoder": "AtcoderScraper",
|
||||
"codeforces": "CodeforcesScraper",
|
||||
"codechef": "CodeChefScraper",
|
||||
"kattis": "KattisScraper",
|
||||
"usaco": "USACOScraper",
|
||||
}
|
||||
|
||||
def _run(scraper_name: str, mode: str, *args: str):
|
||||
|
|
@ -236,7 +300,7 @@ def run_scraper_offline(fixture_text):
|
|||
ns._get_async = offline_fetches["_get_async"]
|
||||
elif scraper_name == "cses":
|
||||
httpx.AsyncClient.get = offline_fetches["__offline_fetch_text"]
|
||||
elif scraper_name == "codechef":
|
||||
elif scraper_name in ("codechef", "kattis", "usaco"):
|
||||
httpx.AsyncClient.get = offline_fetches["__offline_get_async"]
|
||||
|
||||
scraper_class = getattr(ns, scraper_classes[scraper_name])
|
||||
|
|
|
|||
10
tests/fixtures/kattis/contest_open2024_problems.html
vendored
Normal file
10
tests/fixtures/kattis/contest_open2024_problems.html
vendored
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
<html><body><table>
|
||||
<tr>
|
||||
<td>A</td>
|
||||
<td><a href="/contests/open2024/problems/kth2024a">Arithmetic Sequence</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>B</td>
|
||||
<td><a href="/contests/open2024/problems/kth2024b">Binary Tree</a></td>
|
||||
</tr>
|
||||
</table></body></html>
|
||||
10
tests/fixtures/kattis/contests.html
vendored
Normal file
10
tests/fixtures/kattis/contests.html
vendored
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
<html><body><table>
|
||||
<tr>
|
||||
<td><a href="/contests/open2024">Open 2024</a></td>
|
||||
<td data-timestamp="1711800000">2024-03-30</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="/contests/icpc2023">ICPC 2023</a></td>
|
||||
<td data-timestamp="1698768000">2023-10-31</td>
|
||||
</tr>
|
||||
</table></body></html>
|
||||
11
tests/fixtures/kattis/problem_hello.html
vendored
Normal file
11
tests/fixtures/kattis/problem_hello.html
vendored
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
<html>
|
||||
<head><title>Hello World</title></head>
|
||||
<body>
|
||||
<span>CPU Time limit</span><span class="num">1 second</span>
|
||||
<span>Memory limit</span><span class="num">256 MB</span>
|
||||
<table class="sample">
|
||||
<pre>Hello World</pre>
|
||||
<pre>Hello World</pre>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
17
tests/fixtures/kattis/problem_kth2024a.html
vendored
Normal file
17
tests/fixtures/kattis/problem_kth2024a.html
vendored
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
<html>
|
||||
<head><title>Arithmetic Sequence</title></head>
|
||||
<body>
|
||||
<span>CPU Time limit</span><span class="num">2 seconds</span>
|
||||
<span>Memory limit</span><span class="num">512 MB</span>
|
||||
<table class="sample">
|
||||
<pre>3
|
||||
1 2 3</pre>
|
||||
<pre>YES</pre>
|
||||
</table>
|
||||
<table class="sample">
|
||||
<pre>2
|
||||
1 3</pre>
|
||||
<pre>NO</pre>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
12
tests/fixtures/kattis/problem_kth2024b.html
vendored
Normal file
12
tests/fixtures/kattis/problem_kth2024b.html
vendored
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
<html>
|
||||
<head><title>Binary Tree</title></head>
|
||||
<body>
|
||||
<span>CPU Time limit</span><span class="num">1 second</span>
|
||||
<span>Memory limit</span><span class="num">256 MB</span>
|
||||
<table class="sample">
|
||||
<pre>5
|
||||
1 2 3 4 5</pre>
|
||||
<pre>3</pre>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
3
tests/fixtures/usaco/contests.html
vendored
Normal file
3
tests/fixtures/usaco/contests.html
vendored
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
<html><body>
|
||||
<a href="index.php?page=dec24results">December 2024 Results</a>
|
||||
</body></html>
|
||||
14
tests/fixtures/usaco/dec24results.html
vendored
Normal file
14
tests/fixtures/usaco/dec24results.html
vendored
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
<html><body>
|
||||
|
||||
<h2>USACO 2024 December Contest, Gold</h2>
|
||||
|
||||
<b>Farmer John's Favorite Problem</b><br/>
|
||||
<a href="index.php?page=viewproblem2&cpid=1469">View Problem</a>
|
||||
|
||||
<b>Binary Indexed Tree</b><br/>
|
||||
<a href="index.php?page=viewproblem2&cpid=1470">View Problem</a>
|
||||
|
||||
<b>Counting Subsequences</b><br/>
|
||||
<a href="index.php?page=viewproblem2&cpid=1471">View Problem</a>
|
||||
|
||||
</body></html>
|
||||
10
tests/fixtures/usaco/problem_1469.html
vendored
Normal file
10
tests/fixtures/usaco/problem_1469.html
vendored
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
<html><body>
|
||||
<p>Time limit: 4s. Memory limit: 256 MB.</p>
|
||||
<p>Given N cows, find the answer.</p>
|
||||
<pre class="in">3
|
||||
1 2 3</pre>
|
||||
<pre class="out">6</pre>
|
||||
<pre class="in">1
|
||||
5</pre>
|
||||
<pre class="out">5</pre>
|
||||
</body></html>
|
||||
7
tests/fixtures/usaco/problem_1470.html
vendored
Normal file
7
tests/fixtures/usaco/problem_1470.html
vendored
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
<html><body>
|
||||
<p>Time limit: 2s. Memory limit: 512 MB.</p>
|
||||
<p>Build a binary indexed tree.</p>
|
||||
<pre class="in">4
|
||||
1 3 2 4</pre>
|
||||
<pre class="out">10</pre>
|
||||
</body></html>
|
||||
7
tests/fixtures/usaco/problem_1471.html
vendored
Normal file
7
tests/fixtures/usaco/problem_1471.html
vendored
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
<html><body>
|
||||
<p>Time limit: 4s. Memory limit: 256 MB.</p>
|
||||
<p>Output the answer with absolute error at most 10^{-6}.</p>
|
||||
<pre class="in">2
|
||||
1 2</pre>
|
||||
<pre class="out">1.500000</pre>
|
||||
</body></html>
|
||||
|
|
@ -27,6 +27,16 @@ MATRIX = {
|
|||
"tests": ("START209D",),
|
||||
"contests": tuple(),
|
||||
},
|
||||
"kattis": {
|
||||
"metadata": ("hello",),
|
||||
"tests": ("hello",),
|
||||
"contests": tuple(),
|
||||
},
|
||||
"usaco": {
|
||||
"metadata": ("dec24_gold",),
|
||||
"tests": ("dec24_gold",),
|
||||
"contests": tuple(),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -85,5 +95,36 @@ def test_scraper_offline_fixture_matrix(run_scraper_offline, scraper, mode):
|
|||
)
|
||||
assert "multi_test" in obj, "Missing multi_test field in raw JSON"
|
||||
assert isinstance(obj["multi_test"], bool), "multi_test not boolean"
|
||||
assert "precision" in obj, "Missing precision field in raw JSON"
|
||||
assert obj["precision"] is None or isinstance(
|
||||
obj["precision"], float
|
||||
), "precision must be None or float"
|
||||
validated_any = True
|
||||
assert validated_any, "No valid tests payloads validated"
|
||||
|
||||
|
||||
def test_kattis_contest_metadata(run_scraper_offline):
|
||||
rc, objs = run_scraper_offline("kattis", "metadata", "open2024")
|
||||
assert rc == 0
|
||||
assert objs
|
||||
model = MetadataResult.model_validate(objs[-1])
|
||||
assert model.success is True
|
||||
assert len(model.problems) == 2
|
||||
assert model.contest_url != ""
|
||||
assert model.standings_url != ""
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"scraper,contest_id",
|
||||
[
|
||||
("cses", "nonexistent_category_xyz"),
|
||||
("usaco", "badformat"),
|
||||
("kattis", "nonexistent_problem_xyz"),
|
||||
],
|
||||
)
|
||||
def test_scraper_metadata_error(run_scraper_offline, scraper, contest_id):
|
||||
rc, objs = run_scraper_offline(scraper, "metadata", contest_id)
|
||||
assert rc == 1
|
||||
assert objs
|
||||
assert objs[-1].get("success") is False
|
||||
assert objs[-1].get("error")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue