diff --git a/scrapers/base.py b/scrapers/base.py index 6409c9a..4b685d0 100644 --- a/scrapers/base.py +++ b/scrapers/base.py @@ -1,9 +1,8 @@ +import asyncio +import sys from abc import ABC, abstractmethod -from typing import Any, Awaitable, Callable, ParamSpec, cast -from .models import ContestListResult, MetadataResult, TestsResult - -P = ParamSpec("P") +from .models import CombinedTest, ContestListResult, MetadataResult, TestsResult class BaseScraper(ABC): @@ -20,57 +19,65 @@ class BaseScraper(ABC): @abstractmethod async def stream_tests_for_category_async(self, category_id: str) -> None: ... - def _create_metadata_error( - self, error_msg: str, contest_id: str = "" - ) -> MetadataResult: - return MetadataResult( - success=False, - error=f"{self.platform_name}: {error_msg}", - contest_id=contest_id, - problems=[], - url="", - ) + def _usage(self) -> str: + name = self.platform_name + return f"Usage: {name}.py metadata | tests | contests" - def _create_tests_error( - self, error_msg: str, problem_id: str = "", url: str = "" - ) -> TestsResult: - from .models import CombinedTest + def _metadata_error(self, msg: str) -> MetadataResult: + return MetadataResult(success=False, error=msg, url="") + def _tests_error(self, msg: str) -> TestsResult: return TestsResult( success=False, - error=f"{self.platform_name}: {error_msg}", - problem_id=problem_id, + error=msg, + problem_id="", combined=CombinedTest(input="", expected=""), tests=[], timeout_ms=0, memory_mb=0, - interactive=False, ) - def _create_contests_error(self, error_msg: str) -> ContestListResult: - return ContestListResult( - success=False, - error=f"{self.platform_name}: {error_msg}", - contests=[], - ) + def _contests_error(self, msg: str) -> ContestListResult: + return ContestListResult(success=False, error=msg) - async def _safe_execute( - self, - operation: str, - func: Callable[P, Awaitable[Any]], - *args: P.args, - **kwargs: P.kwargs, - ): - try: - return await func(*args, **kwargs) - except Exception as e: - if operation == "metadata": - contest_id = cast(str, args[0]) if args else "" - return self._create_metadata_error(str(e), contest_id) - elif operation == "tests": - problem_id = cast(str, args[1]) if len(args) > 1 else "" - return self._create_tests_error(str(e), problem_id) - elif operation == "contests": - return self._create_contests_error(str(e)) - else: - raise + async def _run_cli_async(self, args: list[str]) -> int: + if len(args) < 2: + print(self._metadata_error(self._usage()).model_dump_json()) + return 1 + + mode = args[1] + + match mode: + case "metadata": + if len(args) != 3: + print(self._metadata_error(self._usage()).model_dump_json()) + return 1 + result = await self.scrape_contest_metadata(args[2]) + print(result.model_dump_json()) + return 0 if result.success else 1 + + case "tests": + if len(args) != 3: + print(self._tests_error(self._usage()).model_dump_json()) + return 1 + await self.stream_tests_for_category_async(args[2]) + return 0 + + case "contests": + if len(args) != 2: + print(self._contests_error(self._usage()).model_dump_json()) + return 1 + result = await self.scrape_contest_list() + print(result.model_dump_json()) + return 0 if result.success else 1 + + case _: + print( + self._metadata_error( + f"Unknown mode: {mode}. {self._usage()}" + ).model_dump_json() + ) + return 1 + + def run_cli(self) -> None: + sys.exit(asyncio.run(self._run_cli_async(sys.argv))) diff --git a/scrapers/codeforces.py b/scrapers/codeforces.py index 24f55f6..840616f 100644 --- a/scrapers/codeforces.py +++ b/scrapers/codeforces.py @@ -4,7 +4,6 @@ import asyncio import json import logging import re -import sys from typing import Any import requests @@ -13,13 +12,11 @@ from scrapling.fetchers import Fetcher from .base import BaseScraper from .models import ( - CombinedTest, ContestListResult, ContestSummary, MetadataResult, ProblemSummary, TestCase, - TestsResult, ) # suppress scrapling logging - https://github.com/D4Vinci/Scrapling/issues/31) @@ -209,49 +206,46 @@ class CodeforcesScraper(BaseScraper): return "codeforces" async def scrape_contest_metadata(self, contest_id: str) -> MetadataResult: - async def impl(cid: str) -> MetadataResult: - problems = await asyncio.to_thread(_scrape_contest_problems_sync, cid) + try: + problems = await asyncio.to_thread( + _scrape_contest_problems_sync, contest_id + ) if not problems: - return self._create_metadata_error( - f"No problems found for contest {cid}", cid + return self._metadata_error( + f"No problems found for contest {contest_id}" ) return MetadataResult( success=True, error="", - contest_id=cid, + contest_id=contest_id, problems=problems, url=f"https://codeforces.com/contest/{contest_id}/problem/%s", ) - - return await self._safe_execute("metadata", impl, contest_id) + except Exception as e: + return self._metadata_error(str(e)) async def scrape_contest_list(self) -> ContestListResult: - async def impl() -> ContestListResult: - try: - r = requests.get(API_CONTEST_LIST_URL, timeout=TIMEOUT_SECONDS) - r.raise_for_status() - data = r.json() - if data.get("status") != "OK": - return self._create_contests_error("Invalid API response") + try: + r = requests.get(API_CONTEST_LIST_URL, timeout=TIMEOUT_SECONDS) + r.raise_for_status() + data = r.json() + if data.get("status") != "OK": + return self._contests_error("Invalid API response") - contests: list[ContestSummary] = [] - for c in data["result"]: - if c.get("phase") != "FINISHED": - continue - cid = str(c["id"]) - name = c["name"] - contests.append( - ContestSummary(id=cid, name=name, display_name=name) - ) + contests: list[ContestSummary] = [] + for c in data["result"]: + if c.get("phase") != "FINISHED": + continue + cid = str(c["id"]) + name = c["name"] + contests.append(ContestSummary(id=cid, name=name, display_name=name)) - if not contests: - return self._create_contests_error("No contests found") + if not contests: + return self._contests_error("No contests found") - return ContestListResult(success=True, error="", contests=contests) - except Exception as e: - return self._create_contests_error(str(e)) - - return await self._safe_execute("contests", impl) + return ContestListResult(success=True, error="", contests=contests) + except Exception as e: + return self._contests_error(str(e)) async def stream_tests_for_category_async(self, category_id: str) -> None: html = await asyncio.to_thread(_fetch_problems_html, category_id) @@ -281,73 +275,5 @@ class CodeforcesScraper(BaseScraper): ) -async def main_async() -> int: - if len(sys.argv) < 2: - result = MetadataResult( - success=False, - error="Usage: codeforces.py metadata OR codeforces.py tests OR codeforces.py contests", - url="", - ) - print(result.model_dump_json()) - return 1 - - mode: str = sys.argv[1] - scraper = CodeforcesScraper() - - if mode == "metadata": - if len(sys.argv) != 3: - result = MetadataResult( - success=False, - error="Usage: codeforces.py metadata ", - url="", - ) - print(result.model_dump_json()) - return 1 - contest_id = sys.argv[2] - result = await scraper.scrape_contest_metadata(contest_id) - print(result.model_dump_json()) - return 0 if result.success else 1 - - if mode == "tests": - if len(sys.argv) != 3: - tests_result = TestsResult( - success=False, - error="Usage: codeforces.py tests ", - problem_id="", - combined=CombinedTest(input="", expected=""), - tests=[], - timeout_ms=0, - memory_mb=0, - ) - print(tests_result.model_dump_json()) - return 1 - contest_id = sys.argv[2] - await scraper.stream_tests_for_category_async(contest_id) - return 0 - - if mode == "contests": - if len(sys.argv) != 2: - contest_result = ContestListResult( - success=False, error="Usage: codeforces.py contests" - ) - print(contest_result.model_dump_json()) - return 1 - contest_result = await scraper.scrape_contest_list() - print(contest_result.model_dump_json()) - return 0 if contest_result.success else 1 - - result = MetadataResult( - success=False, - error="Unknown mode. Use 'metadata ', 'tests ', or 'contests'", - url="", - ) - print(result.model_dump_json()) - return 1 - - -def main() -> None: - sys.exit(asyncio.run(main_async())) - - if __name__ == "__main__": - main() + CodeforcesScraper().run_cli() diff --git a/scrapers/cses.py b/scrapers/cses.py index 620cb7f..5440b34 100644 --- a/scrapers/cses.py +++ b/scrapers/cses.py @@ -3,20 +3,17 @@ import asyncio import json import re -import sys from typing import Any import httpx from .base import BaseScraper from .models import ( - CombinedTest, ContestListResult, ContestSummary, MetadataResult, ProblemSummary, TestCase, - TestsResult, ) BASE_URL = "https://cses.fi" @@ -261,73 +258,5 @@ class CSESScraper(BaseScraper): print(json.dumps(payload), flush=True) -async def main_async() -> int: - if len(sys.argv) < 2: - result = MetadataResult( - success=False, - error="Usage: cses.py metadata OR cses.py tests OR cses.py contests", - url="", - ) - print(result.model_dump_json()) - return 1 - - mode: str = sys.argv[1] - scraper = CSESScraper() - - if mode == "metadata": - if len(sys.argv) != 3: - result = MetadataResult( - success=False, - error="Usage: cses.py metadata ", - url="", - ) - print(result.model_dump_json()) - return 1 - category_id = sys.argv[2] - result = await scraper.scrape_contest_metadata(category_id) - print(result.model_dump_json()) - return 0 if result.success else 1 - - if mode == "tests": - if len(sys.argv) != 3: - tests_result = TestsResult( - success=False, - error="Usage: cses.py tests ", - problem_id="", - combined=CombinedTest(input="", expected=""), - tests=[], - timeout_ms=0, - memory_mb=0, - ) - print(tests_result.model_dump_json()) - return 1 - category = sys.argv[2] - await scraper.stream_tests_for_category_async(category) - return 0 - - if mode == "contests": - if len(sys.argv) != 2: - contest_result = ContestListResult( - success=False, error="Usage: cses.py contests" - ) - print(contest_result.model_dump_json()) - return 1 - contest_result = await scraper.scrape_contest_list() - print(contest_result.model_dump_json()) - return 0 if contest_result.success else 1 - - result = MetadataResult( - success=False, - error=f"Unknown mode: {mode}. Use 'metadata ', 'tests ', or 'contests'", - url="", - ) - print(result.model_dump_json()) - return 1 - - -def main() -> None: - sys.exit(asyncio.run(main_async())) - - if __name__ == "__main__": - main() + CSESScraper().run_cli()