From 89440e5d1491142490a16d9a9a1afe57a9f05312 Mon Sep 17 00:00:00 2001 From: Barrett Ruth Date: Mon, 22 Sep 2025 22:44:08 -0400 Subject: [PATCH] feat(scrapers): simplify structure --- scrapers/__init__.py | 44 +--------------- tests/scrapers/test_interface_compliance.py | 21 ++++---- tests/scrapers/test_registry.py | 58 --------------------- 3 files changed, 14 insertions(+), 109 deletions(-) delete mode 100644 tests/scrapers/test_registry.py diff --git a/scrapers/__init__.py b/scrapers/__init__.py index 8de8c42..f0cfd45 100644 --- a/scrapers/__init__.py +++ b/scrapers/__init__.py @@ -11,20 +11,11 @@ from .models import ( TestsResult, ) -ALL_SCRAPERS: dict[str, type[BaseScraper]] = { - "atcoder": AtCoderScraper, - "codeforces": CodeforcesScraper, - "cses": CSESScraper, -} - -_SCRAPER_CLASSES = [ +__all__ = [ "AtCoderScraper", + "BaseScraper", "CodeforcesScraper", "CSESScraper", -] - -_BASE_EXPORTS = [ - "BaseScraper", "ScraperConfig", "ContestListResult", "ContestSummary", @@ -33,34 +24,3 @@ _BASE_EXPORTS = [ "TestCase", "TestsResult", ] - -_REGISTRY_FUNCTIONS = [ - "get_scraper", - "list_platforms", - "ALL_SCRAPERS", -] - -__all__ = _BASE_EXPORTS + _SCRAPER_CLASSES + _REGISTRY_FUNCTIONS - -_exported_types = ( - ScraperConfig, - ContestListResult, - ContestSummary, - MetadataResult, - ProblemSummary, - TestCase, - TestsResult, -) - - -def get_scraper(platform: str) -> type[BaseScraper]: - if platform not in ALL_SCRAPERS: - available = ", ".join(ALL_SCRAPERS.keys()) - raise KeyError( - f"Unknown platform '{platform}'. Available platforms: {available}" - ) - return ALL_SCRAPERS[platform] - - -def list_platforms() -> list[str]: - return list(ALL_SCRAPERS.keys()) diff --git a/tests/scrapers/test_interface_compliance.py b/tests/scrapers/test_interface_compliance.py index 753e0de..8bfb185 100644 --- a/tests/scrapers/test_interface_compliance.py +++ b/tests/scrapers/test_interface_compliance.py @@ -2,14 +2,17 @@ from unittest.mock import Mock import pytest -from scrapers import ALL_SCRAPERS, BaseScraper +from scrapers.atcoder import AtCoderScraper +from scrapers.base import BaseScraper +from scrapers.codeforces import CodeforcesScraper +from scrapers.cses import CSESScraper from scrapers.models import ContestListResult, MetadataResult, TestsResult -ALL_SCRAPER_CLASSES = list(ALL_SCRAPERS.values()) +SCRAPERS = [AtCoderScraper, CodeforcesScraper, CSESScraper] class TestScraperInterfaceCompliance: - @pytest.mark.parametrize("scraper_class", ALL_SCRAPER_CLASSES) + @pytest.mark.parametrize("scraper_class", SCRAPERS) def test_implements_base_interface(self, scraper_class): scraper = scraper_class() @@ -19,7 +22,7 @@ class TestScraperInterfaceCompliance: assert hasattr(scraper, "scrape_problem_tests") assert hasattr(scraper, "scrape_contest_list") - @pytest.mark.parametrize("scraper_class", ALL_SCRAPER_CLASSES) + @pytest.mark.parametrize("scraper_class", SCRAPERS) def test_platform_name_is_string(self, scraper_class): scraper = scraper_class() platform_name = scraper.platform_name @@ -28,7 +31,7 @@ class TestScraperInterfaceCompliance: assert len(platform_name) > 0 assert platform_name.islower() # Convention: lowercase platform names - @pytest.mark.parametrize("scraper_class", ALL_SCRAPER_CLASSES) + @pytest.mark.parametrize("scraper_class", SCRAPERS) def test_metadata_method_signature(self, scraper_class, mocker): scraper = scraper_class() @@ -53,7 +56,7 @@ class TestScraperInterfaceCompliance: assert isinstance(result.success, bool) assert isinstance(result.error, str) - @pytest.mark.parametrize("scraper_class", ALL_SCRAPER_CLASSES) + @pytest.mark.parametrize("scraper_class", SCRAPERS) def test_problem_tests_method_signature(self, scraper_class, mocker): scraper = scraper_class() @@ -85,7 +88,7 @@ class TestScraperInterfaceCompliance: assert isinstance(result.success, bool) assert isinstance(result.error, str) - @pytest.mark.parametrize("scraper_class", ALL_SCRAPER_CLASSES) + @pytest.mark.parametrize("scraper_class", SCRAPERS) def test_contest_list_method_signature(self, scraper_class, mocker): scraper = scraper_class() @@ -111,7 +114,7 @@ class TestScraperInterfaceCompliance: assert isinstance(result.success, bool) assert isinstance(result.error, str) - @pytest.mark.parametrize("scraper_class", ALL_SCRAPER_CLASSES) + @pytest.mark.parametrize("scraper_class", SCRAPERS) def test_error_message_format(self, scraper_class, mocker): scraper = scraper_class() platform_name = scraper.platform_name @@ -148,7 +151,7 @@ class TestScraperInterfaceCompliance: assert not result.success assert result.error.startswith(f"{platform_name}: ") - @pytest.mark.parametrize("scraper_class", ALL_SCRAPER_CLASSES) + @pytest.mark.parametrize("scraper_class", SCRAPERS) def test_scraper_instantiation(self, scraper_class): scraper1 = scraper_class() assert isinstance(scraper1, BaseScraper) diff --git a/tests/scrapers/test_registry.py b/tests/scrapers/test_registry.py deleted file mode 100644 index a656d1e..0000000 --- a/tests/scrapers/test_registry.py +++ /dev/null @@ -1,58 +0,0 @@ -import pytest - -from scrapers import ALL_SCRAPERS, get_scraper, list_platforms -from scrapers.base import BaseScraper -from scrapers.codeforces import CodeforcesScraper - - -class TestScraperRegistry: - def test_get_scraper_valid_platform(self): - scraper_class = get_scraper("codeforces") - assert scraper_class == CodeforcesScraper - assert issubclass(scraper_class, BaseScraper) - - scraper = scraper_class() - assert isinstance(scraper, BaseScraper) - assert scraper.platform_name == "codeforces" - - def test_get_scraper_invalid_platform(self): - with pytest.raises(KeyError) as exc_info: - get_scraper("nonexistent") - - error_msg = str(exc_info.value) - assert "nonexistent" in error_msg - assert "Available platforms" in error_msg - - def test_list_platforms(self): - platforms = list_platforms() - - assert isinstance(platforms, list) - assert len(platforms) > 0 - assert "codeforces" in platforms - - assert set(platforms) == set(ALL_SCRAPERS.keys()) - - def test_all_scrapers_registry(self): - assert isinstance(ALL_SCRAPERS, dict) - assert len(ALL_SCRAPERS) > 0 - - for platform_name, scraper_class in ALL_SCRAPERS.items(): - assert isinstance(platform_name, str) - assert platform_name.islower() - - assert issubclass(scraper_class, BaseScraper) - - scraper = scraper_class() - assert scraper.platform_name == platform_name - - def test_registry_import_consistency(self): - from scrapers.codeforces import CodeforcesScraper as DirectImport - - registry_class = get_scraper("codeforces") - assert registry_class == DirectImport - - def test_all_scrapers_can_be_instantiated(self): - for platform_name, scraper_class in ALL_SCRAPERS.items(): - scraper = scraper_class() - assert isinstance(scraper, BaseScraper) - assert scraper.platform_name == platform_name