diff --git a/scrapers/atcoder.py b/scrapers/atcoder.py index d46159c..5a82439 100644 --- a/scrapers/atcoder.py +++ b/scrapers/atcoder.py @@ -314,16 +314,23 @@ class AtcoderScraper(BaseScraper): return data = await asyncio.to_thread(_scrape_problem_page_sync, category_id, slug) tests: list[TestCase] = data.get("tests", []) + combined_input = "\n".join(t.input for t in tests) if tests else "" + combined_expected = "\n".join(t.expected for t in tests) if tests else "" print( json.dumps( { "problem_id": letter, + "combined": { + "input": combined_input, + "expected": combined_expected, + }, "tests": [ {"input": t.input, "expected": t.expected} for t in tests ], "timeout_ms": data.get("timeout_ms", 0), "memory_mb": data.get("memory_mb", 0), "interactive": bool(data.get("interactive")), + "multi_test": False, } ), flush=True, diff --git a/scrapers/codechef.py b/scrapers/codechef.py index c2c768c..37fd9b5 100644 --- a/scrapers/codechef.py +++ b/scrapers/codechef.py @@ -231,14 +231,24 @@ class CodeChefScraper(BaseScraper): memory_mb = 256.0 interactive = False + combined_input = "\n".join(t.input for t in tests) if tests else "" + combined_expected = ( + "\n".join(t.expected for t in tests) if tests else "" + ) + return { "problem_id": problem_code, + "combined": { + "input": combined_input, + "expected": combined_expected, + }, "tests": [ {"input": t.input, "expected": t.expected} for t in tests ], "timeout_ms": timeout_ms, "memory_mb": memory_mb, "interactive": interactive, + "multi_test": False, } tasks = [run_one(problem_code) for problem_code in problems.keys()] diff --git a/scrapers/cses.py b/scrapers/cses.py index d7b40f9..620cb7f 100644 --- a/scrapers/cses.py +++ b/scrapers/cses.py @@ -235,8 +235,10 @@ class CSESScraper(BaseScraper): tests = [] timeout_ms, memory_mb, interactive = 0, 0, False - combined_input = "\n".join(t.input for t in tests) - combined_expected = "\n".join(t.expected for t in tests) + combined_input = "\n".join(t.input for t in tests) if tests else "" + combined_expected = ( + "\n".join(t.expected for t in tests) if tests else "" + ) return { "problem_id": pid, @@ -250,6 +252,7 @@ class CSESScraper(BaseScraper): "timeout_ms": timeout_ms, "memory_mb": memory_mb, "interactive": interactive, + "multi_test": False, } tasks = [run_one(p.id) for p in problems] diff --git a/tests/test_scrapers.py b/tests/test_scrapers.py index 85b793a..d22579f 100644 --- a/tests/test_scrapers.py +++ b/tests/test_scrapers.py @@ -55,18 +55,21 @@ def test_scraper_offline_fixture_matrix(run_scraper_offline, scraper, mode): else: assert len(model.contests) >= 1 else: + assert len(objs) >= 1, "No test objects returned" validated_any = False for obj in objs: - if "success" in obj and "tests" in obj and "problem_id" in obj: - tr = TestsResult.model_validate(obj) - assert tr.problem_id != "" - assert isinstance(tr.tests, list) - validated_any = True - else: - assert "problem_id" in obj - assert "tests" in obj and isinstance(obj["tests"], list) - assert ( - "timeout_ms" in obj and "memory_mb" in obj and "interactive" in obj - ) - validated_any = True + assert "problem_id" in obj, "Missing problem_id" + assert obj["problem_id"] != "", "Empty problem_id" + assert "combined" in obj, "Missing combined field" + assert isinstance(obj["combined"], dict), "combined must be a dict" + assert "input" in obj["combined"], "Missing combined.input" + assert "expected" in obj["combined"], "Missing combined.expected" + assert "tests" in obj, "Missing tests field" + assert isinstance(obj["tests"], list), "tests must be a list" + assert "timeout_ms" in obj, "Missing timeout_ms" + assert "memory_mb" in obj, "Missing memory_mb" + assert "interactive" in obj, "Missing interactive" + assert "multi_test" in obj, "Missing multi_test field" + assert isinstance(obj["multi_test"], bool), "multi_test must be bool" + validated_any = True assert validated_any, "No valid tests payloads validated"