diff --git a/scrapers/codechef.py b/scrapers/codechef.py index 1680e83..c9e402c 100644 --- a/scrapers/codechef.py +++ b/scrapers/codechef.py @@ -10,13 +10,11 @@ from scrapling.fetchers import Fetcher from .base import BaseScraper from .models import ( - CombinedTest, ContestListResult, ContestSummary, MetadataResult, ProblemSummary, TestCase, - TestsResult, ) BASE_URL = "https://www.codechef.com" @@ -62,42 +60,40 @@ class CodeChefScraper(BaseScraper): return "codechef" async def scrape_contest_metadata(self, contest_id: str) -> MetadataResult: - async with httpx.AsyncClient() as client: - try: + try: + async with httpx.AsyncClient() as client: data = await fetch_json( client, API_CONTEST.format(contest_id=contest_id) ) - except httpx.HTTPStatusError as e: - return self._create_metadata_error( - f"Failed to fetch contest {contest_id}: {e}", contest_id + if not data.get("problems"): + return self._metadata_error( + f"No problems found for contest {contest_id}" ) - if not data.get("problems"): - return self._create_metadata_error( - f"No problems found for contest {contest_id}", contest_id - ) - problems = [] - for problem_code, problem_data in data["problems"].items(): - if problem_data.get("category_name") == "main": - problems.append( - ProblemSummary( - id=problem_code, - name=problem_data.get("name", problem_code), + problems = [] + for problem_code, problem_data in data["problems"].items(): + if problem_data.get("category_name") == "main": + problems.append( + ProblemSummary( + id=problem_code, + name=problem_data.get("name", problem_code), + ) ) - ) - return MetadataResult( - success=True, - error="", - contest_id=contest_id, - problems=problems, - url=f"{BASE_URL}/{contest_id}", - ) + return MetadataResult( + success=True, + error="", + contest_id=contest_id, + problems=problems, + url=f"{BASE_URL}/{contest_id}", + ) + except Exception as e: + return self._metadata_error(f"Failed to fetch contest {contest_id}: {e}") async def scrape_contest_list(self) -> ContestListResult: async with httpx.AsyncClient() as client: try: data = await fetch_json(client, API_CONTESTS_ALL) except httpx.HTTPStatusError as e: - return self._create_contests_error(f"Failed to fetch contests: {e}") + return self._contests_error(f"Failed to fetch contests: {e}") all_contests = data.get("future_contests", []) + data.get( "past_contests", [] ) @@ -110,7 +106,7 @@ class CodeChefScraper(BaseScraper): num = int(match.group(1)) max_num = max(max_num, num) if max_num == 0: - return self._create_contests_error("No Starters contests found") + return self._contests_error("No Starters contests found") contests = [] sem = asyncio.Semaphore(CONNECTIONS) @@ -252,68 +248,5 @@ class CodeChefScraper(BaseScraper): print(json.dumps(payload), flush=True) -async def main_async() -> int: - if len(sys.argv) < 2: - result = MetadataResult( - success=False, - error="Usage: codechef.py metadata OR codechef.py tests OR codechef.py contests", - url="", - ) - print(result.model_dump_json()) - return 1 - mode: str = sys.argv[1] - scraper = CodeChefScraper() - if mode == "metadata": - if len(sys.argv) != 3: - result = MetadataResult( - success=False, - error="Usage: codechef.py metadata ", - url="", - ) - print(result.model_dump_json()) - return 1 - contest_id = sys.argv[2] - result = await scraper.scrape_contest_metadata(contest_id) - print(result.model_dump_json()) - return 0 if result.success else 1 - if mode == "tests": - if len(sys.argv) != 3: - tests_result = TestsResult( - success=False, - error="Usage: codechef.py tests ", - problem_id="", - combined=CombinedTest(input="", expected=""), - tests=[], - timeout_ms=0, - memory_mb=0, - ) - print(tests_result.model_dump_json()) - return 1 - contest_id = sys.argv[2] - await scraper.stream_tests_for_category_async(contest_id) - return 0 - if mode == "contests": - if len(sys.argv) != 2: - contest_result = ContestListResult( - success=False, error="Usage: codechef.py contests" - ) - print(contest_result.model_dump_json()) - return 1 - contest_result = await scraper.scrape_contest_list() - print(contest_result.model_dump_json()) - return 0 if contest_result.success else 1 - result = MetadataResult( - success=False, - error=f"Unknown mode: {mode}. Use 'metadata ', 'tests ', or 'contests'", - url="", - ) - print(result.model_dump_json()) - return 1 - - -def main() -> None: - sys.exit(asyncio.run(main_async())) - - if __name__ == "__main__": - main() + CodeChefScraper().run_cli() diff --git a/tests/conftest.py b/tests/conftest.py index 63e6108..bd84941 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -232,6 +232,13 @@ def run_scraper_offline(fixture_text): case _: raise AssertionError(f"Unknown scraper: {scraper_name}") + scraper_classes = { + "cses": "CSESScraper", + "atcoder": "AtcoderScraper", + "codeforces": "CodeforcesScraper", + "codechef": "CodeChefScraper", + } + def _run(scraper_name: str, mode: str, *args: str): mod_path = ROOT / "scrapers" / f"{scraper_name}.py" ns = _load_scraper_module(mod_path, scraper_name) @@ -249,16 +256,11 @@ def run_scraper_offline(fixture_text): httpx.AsyncClient.get = offline_fetches["__offline_get_async"] # type: ignore[assignment] fetchers.Fetcher.get = offline_fetches["Fetcher.get"] # type: ignore[assignment] - main_async = getattr(ns, "main_async") - assert callable(main_async), f"main_async not found in {scraper_name}" + scraper_class = getattr(ns, scraper_classes[scraper_name]) + scraper = scraper_class() argv = [str(mod_path), mode, *args] - old_argv = sys.argv - sys.argv = argv - try: - rc, out = _capture_stdout(main_async()) - finally: - sys.argv = old_argv + rc, out = _capture_stdout(scraper._run_cli_async(argv)) json_lines: list[Any] = [] for line in (_line for _line in out.splitlines() if _line.strip()):