feat(scrapers): total refactor
This commit is contained in:
parent
eb3f7762de
commit
db391da52c
9 changed files with 559 additions and 307 deletions
|
|
@ -5,14 +5,16 @@ from scrapers.models import ContestSummary, ProblemSummary
|
|||
|
||||
|
||||
def test_scrape_success(mocker, mock_codeforces_html):
|
||||
mock_client = Mock()
|
||||
mock_scraper = Mock()
|
||||
mock_response = Mock()
|
||||
mock_response.text = mock_codeforces_html
|
||||
mock_client.get.return_value = mock_response
|
||||
mock_scraper.get.return_value = mock_response
|
||||
|
||||
mocker.patch(
|
||||
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper
|
||||
)
|
||||
|
||||
scraper = CodeforcesScraper()
|
||||
mocker.patch.object(scraper, "_create_client", return_value=mock_client)
|
||||
|
||||
result = scraper.scrape_problem_tests("1900", "A")
|
||||
|
||||
assert result.success == True
|
||||
|
|
@ -22,17 +24,19 @@ def test_scrape_success(mocker, mock_codeforces_html):
|
|||
|
||||
|
||||
def test_scrape_contest_problems(mocker):
|
||||
mock_client = Mock()
|
||||
mock_scraper = Mock()
|
||||
mock_response = Mock()
|
||||
mock_response.text = """
|
||||
<a href="/contest/1900/problem/A">A. Problem A</a>
|
||||
<a href="/contest/1900/problem/B">B. Problem B</a>
|
||||
"""
|
||||
mock_client.get.return_value = mock_response
|
||||
mock_scraper.get.return_value = mock_response
|
||||
|
||||
mocker.patch(
|
||||
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper
|
||||
)
|
||||
|
||||
scraper = CodeforcesScraper()
|
||||
mocker.patch.object(scraper, "_create_client", return_value=mock_client)
|
||||
|
||||
result = scraper.scrape_contest_metadata("1900")
|
||||
|
||||
assert result.success == True
|
||||
|
|
@ -42,12 +46,14 @@ def test_scrape_contest_problems(mocker):
|
|||
|
||||
|
||||
def test_scrape_network_error(mocker):
|
||||
mock_client = Mock()
|
||||
mock_client.get.side_effect = Exception("Network error")
|
||||
mock_scraper = Mock()
|
||||
mock_scraper.get.side_effect = Exception("Network error")
|
||||
|
||||
mocker.patch(
|
||||
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper
|
||||
)
|
||||
|
||||
scraper = CodeforcesScraper()
|
||||
mocker.patch.object(scraper, "_create_client", return_value=mock_client)
|
||||
|
||||
result = scraper.scrape_problem_tests("1900", "A")
|
||||
|
||||
assert result.success == False
|
||||
|
|
@ -55,7 +61,7 @@ def test_scrape_network_error(mocker):
|
|||
|
||||
|
||||
def test_scrape_contests_success(mocker):
|
||||
mock_client = Mock()
|
||||
mock_scraper = Mock()
|
||||
mock_response = Mock()
|
||||
mock_response.json.return_value = {
|
||||
"status": "OK",
|
||||
|
|
@ -65,11 +71,13 @@ def test_scrape_contests_success(mocker):
|
|||
{"id": 1949, "name": "Codeforces Global Round 26"},
|
||||
],
|
||||
}
|
||||
mock_client.get.return_value = mock_response
|
||||
mock_scraper.get.return_value = mock_response
|
||||
|
||||
mocker.patch(
|
||||
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper
|
||||
)
|
||||
|
||||
scraper = CodeforcesScraper()
|
||||
mocker.patch.object(scraper, "_create_client", return_value=mock_client)
|
||||
|
||||
result = scraper.scrape_contest_list()
|
||||
|
||||
assert result.success == True
|
||||
|
|
@ -92,14 +100,16 @@ def test_scrape_contests_success(mocker):
|
|||
|
||||
|
||||
def test_scrape_contests_api_error(mocker):
|
||||
mock_client = Mock()
|
||||
mock_scraper = Mock()
|
||||
mock_response = Mock()
|
||||
mock_response.json.return_value = {"status": "FAILED", "result": []}
|
||||
mock_client.get.return_value = mock_response
|
||||
mock_scraper.get.return_value = mock_response
|
||||
|
||||
mocker.patch(
|
||||
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper
|
||||
)
|
||||
|
||||
scraper = CodeforcesScraper()
|
||||
mocker.patch.object(scraper, "_create_client", return_value=mock_client)
|
||||
|
||||
result = scraper.scrape_contest_list()
|
||||
|
||||
assert result.success == False
|
||||
|
|
@ -107,12 +117,14 @@ def test_scrape_contests_api_error(mocker):
|
|||
|
||||
|
||||
def test_scrape_contests_network_error(mocker):
|
||||
mock_client = Mock()
|
||||
mock_client.get.side_effect = Exception("Network error")
|
||||
mock_scraper = Mock()
|
||||
mock_scraper.get.side_effect = Exception("Network error")
|
||||
|
||||
mocker.patch(
|
||||
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper
|
||||
)
|
||||
|
||||
scraper = CodeforcesScraper()
|
||||
mocker.patch.object(scraper, "_create_client", return_value=mock_client)
|
||||
|
||||
result = scraper.scrape_contest_list()
|
||||
|
||||
assert result.success == False
|
||||
|
|
|
|||
162
tests/scrapers/test_interface_compliance.py
Normal file
162
tests/scrapers/test_interface_compliance.py
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
from scrapers import ALL_SCRAPERS, BaseScraper
|
||||
from scrapers.models import ContestListResult, MetadataResult, TestsResult
|
||||
|
||||
ALL_SCRAPER_CLASSES = list(ALL_SCRAPERS.values())
|
||||
|
||||
|
||||
class TestScraperInterfaceCompliance:
|
||||
@pytest.mark.parametrize("scraper_class", ALL_SCRAPER_CLASSES)
|
||||
def test_implements_base_interface(self, scraper_class):
|
||||
scraper = scraper_class()
|
||||
|
||||
assert isinstance(scraper, BaseScraper)
|
||||
assert hasattr(scraper, "platform_name")
|
||||
assert hasattr(scraper, "scrape_contest_metadata")
|
||||
assert hasattr(scraper, "scrape_problem_tests")
|
||||
assert hasattr(scraper, "scrape_contest_list")
|
||||
|
||||
@pytest.mark.parametrize("scraper_class", ALL_SCRAPER_CLASSES)
|
||||
def test_platform_name_is_string(self, scraper_class):
|
||||
scraper = scraper_class()
|
||||
platform_name = scraper.platform_name
|
||||
|
||||
assert isinstance(platform_name, str)
|
||||
assert len(platform_name) > 0
|
||||
assert platform_name.islower() # Convention: lowercase platform names
|
||||
|
||||
@pytest.mark.parametrize("scraper_class", ALL_SCRAPER_CLASSES)
|
||||
def test_metadata_method_signature(self, scraper_class, mocker):
|
||||
scraper = scraper_class()
|
||||
|
||||
# Mock the underlying HTTP calls to avoid network requests
|
||||
if scraper.platform_name == "codeforces":
|
||||
mock_scraper = Mock()
|
||||
mock_response = Mock()
|
||||
mock_response.text = "<a href='/contest/1900/problem/A'>A. Test</a>"
|
||||
mock_scraper.get.return_value = mock_response
|
||||
mocker.patch(
|
||||
"scrapers.codeforces.cloudscraper.create_scraper",
|
||||
return_value=mock_scraper,
|
||||
)
|
||||
|
||||
result = scraper.scrape_contest_metadata("test_contest")
|
||||
|
||||
assert isinstance(result, MetadataResult)
|
||||
assert hasattr(result, "success")
|
||||
assert hasattr(result, "error")
|
||||
assert hasattr(result, "problems")
|
||||
assert hasattr(result, "contest_id")
|
||||
assert isinstance(result.success, bool)
|
||||
assert isinstance(result.error, str)
|
||||
|
||||
@pytest.mark.parametrize("scraper_class", ALL_SCRAPER_CLASSES)
|
||||
def test_problem_tests_method_signature(self, scraper_class, mocker):
|
||||
scraper = scraper_class()
|
||||
|
||||
if scraper.platform_name == "codeforces":
|
||||
mock_scraper = Mock()
|
||||
mock_response = Mock()
|
||||
mock_response.text = """
|
||||
<div class="time-limit">Time limit: 1 seconds</div>
|
||||
<div class="memory-limit">Memory limit: 256 megabytes</div>
|
||||
<div class="input"><pre><div class="test-example-line-1">3</div></pre></div>
|
||||
<div class="output"><pre><div class="test-example-line-1">6</div></pre></div>
|
||||
"""
|
||||
mock_scraper.get.return_value = mock_response
|
||||
mocker.patch(
|
||||
"scrapers.codeforces.cloudscraper.create_scraper",
|
||||
return_value=mock_scraper,
|
||||
)
|
||||
|
||||
result = scraper.scrape_problem_tests("test_contest", "A")
|
||||
|
||||
assert isinstance(result, TestsResult)
|
||||
assert hasattr(result, "success")
|
||||
assert hasattr(result, "error")
|
||||
assert hasattr(result, "tests")
|
||||
assert hasattr(result, "problem_id")
|
||||
assert hasattr(result, "url")
|
||||
assert hasattr(result, "timeout_ms")
|
||||
assert hasattr(result, "memory_mb")
|
||||
assert isinstance(result.success, bool)
|
||||
assert isinstance(result.error, str)
|
||||
|
||||
@pytest.mark.parametrize("scraper_class", ALL_SCRAPER_CLASSES)
|
||||
def test_contest_list_method_signature(self, scraper_class, mocker):
|
||||
scraper = scraper_class()
|
||||
|
||||
if scraper.platform_name == "codeforces":
|
||||
mock_scraper = Mock()
|
||||
mock_response = Mock()
|
||||
mock_response.json.return_value = {
|
||||
"status": "OK",
|
||||
"result": [{"id": 1900, "name": "Test Contest"}],
|
||||
}
|
||||
mock_scraper.get.return_value = mock_response
|
||||
mocker.patch(
|
||||
"scrapers.codeforces.cloudscraper.create_scraper",
|
||||
return_value=mock_scraper,
|
||||
)
|
||||
|
||||
result = scraper.scrape_contest_list()
|
||||
|
||||
assert isinstance(result, ContestListResult)
|
||||
assert hasattr(result, "success")
|
||||
assert hasattr(result, "error")
|
||||
assert hasattr(result, "contests")
|
||||
assert isinstance(result.success, bool)
|
||||
assert isinstance(result.error, str)
|
||||
|
||||
@pytest.mark.parametrize("scraper_class", ALL_SCRAPER_CLASSES)
|
||||
def test_error_message_format(self, scraper_class, mocker):
|
||||
scraper = scraper_class()
|
||||
platform_name = scraper.platform_name
|
||||
|
||||
# Force an error by mocking HTTP failure
|
||||
if scraper.platform_name == "codeforces":
|
||||
mock_scraper = Mock()
|
||||
mock_scraper.get.side_effect = Exception("Network error")
|
||||
mocker.patch(
|
||||
"scrapers.codeforces.cloudscraper.create_scraper",
|
||||
return_value=mock_scraper,
|
||||
)
|
||||
elif scraper.platform_name == "atcoder":
|
||||
mocker.patch(
|
||||
"scrapers.atcoder.requests.get", side_effect=Exception("Network error")
|
||||
)
|
||||
elif scraper.platform_name == "cses":
|
||||
mocker.patch(
|
||||
"scrapers.cses.make_request", side_effect=Exception("Network error")
|
||||
)
|
||||
|
||||
# Test metadata error format
|
||||
result = scraper.scrape_contest_metadata("test")
|
||||
assert result.success == False
|
||||
assert result.error.startswith(f"{platform_name}: ")
|
||||
|
||||
# Test problem tests error format
|
||||
result = scraper.scrape_problem_tests("test", "A")
|
||||
assert result.success == False
|
||||
assert result.error.startswith(f"{platform_name}: ")
|
||||
|
||||
# Test contest list error format
|
||||
result = scraper.scrape_contest_list()
|
||||
assert result.success == False
|
||||
assert result.error.startswith(f"{platform_name}: ")
|
||||
|
||||
@pytest.mark.parametrize("scraper_class", ALL_SCRAPER_CLASSES)
|
||||
def test_scraper_instantiation(self, scraper_class):
|
||||
scraper1 = scraper_class()
|
||||
assert isinstance(scraper1, BaseScraper)
|
||||
assert scraper1.config is not None
|
||||
|
||||
from scrapers.base import ScraperConfig
|
||||
|
||||
custom_config = ScraperConfig(timeout_seconds=60)
|
||||
scraper2 = scraper_class(custom_config)
|
||||
assert isinstance(scraper2, BaseScraper)
|
||||
assert scraper2.config.timeout_seconds == 60
|
||||
58
tests/scrapers/test_registry.py
Normal file
58
tests/scrapers/test_registry.py
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
import pytest
|
||||
|
||||
from scrapers import ALL_SCRAPERS, get_scraper, list_platforms
|
||||
from scrapers.base import BaseScraper
|
||||
from scrapers.codeforces import CodeforcesScraper
|
||||
|
||||
|
||||
class TestScraperRegistry:
|
||||
def test_get_scraper_valid_platform(self):
|
||||
scraper_class = get_scraper("codeforces")
|
||||
assert scraper_class == CodeforcesScraper
|
||||
assert issubclass(scraper_class, BaseScraper)
|
||||
|
||||
scraper = scraper_class()
|
||||
assert isinstance(scraper, BaseScraper)
|
||||
assert scraper.platform_name == "codeforces"
|
||||
|
||||
def test_get_scraper_invalid_platform(self):
|
||||
with pytest.raises(KeyError) as exc_info:
|
||||
get_scraper("nonexistent")
|
||||
|
||||
error_msg = str(exc_info.value)
|
||||
assert "nonexistent" in error_msg
|
||||
assert "Available platforms" in error_msg
|
||||
|
||||
def test_list_platforms(self):
|
||||
platforms = list_platforms()
|
||||
|
||||
assert isinstance(platforms, list)
|
||||
assert len(platforms) > 0
|
||||
assert "codeforces" in platforms
|
||||
|
||||
assert set(platforms) == set(ALL_SCRAPERS.keys())
|
||||
|
||||
def test_all_scrapers_registry(self):
|
||||
assert isinstance(ALL_SCRAPERS, dict)
|
||||
assert len(ALL_SCRAPERS) > 0
|
||||
|
||||
for platform_name, scraper_class in ALL_SCRAPERS.items():
|
||||
assert isinstance(platform_name, str)
|
||||
assert platform_name.islower()
|
||||
|
||||
assert issubclass(scraper_class, BaseScraper)
|
||||
|
||||
scraper = scraper_class()
|
||||
assert scraper.platform_name == platform_name
|
||||
|
||||
def test_registry_import_consistency(self):
|
||||
from scrapers.codeforces import CodeforcesScraper as DirectImport
|
||||
|
||||
registry_class = get_scraper("codeforces")
|
||||
assert registry_class == DirectImport
|
||||
|
||||
def test_all_scrapers_can_be_instantiated(self):
|
||||
for platform_name, scraper_class in ALL_SCRAPERS.items():
|
||||
scraper = scraper_class()
|
||||
assert isinstance(scraper, BaseScraper)
|
||||
assert scraper.platform_name == platform_name
|
||||
Loading…
Add table
Add a link
Reference in a new issue