diff --git a/scrapers/cses.py b/scrapers/cses.py index e07e2ec..3e67877 100755 --- a/scrapers/cses.py +++ b/scrapers/cses.py @@ -8,7 +8,14 @@ from dataclasses import asdict import requests from bs4 import BeautifulSoup, Tag -from .models import MetadataResult, ProblemSummary, TestCase, TestsResult +from .models import ( + ContestListResult, + ContestSummary, + MetadataResult, + ProblemSummary, + TestCase, + TestsResult, +) def normalize_category_name(category_name: str) -> str: @@ -131,6 +138,46 @@ def extract_problem_limits(soup: BeautifulSoup) -> tuple[int, float]: return timeout_ms, memory_mb +def scrape_categories() -> list[ContestSummary]: + try: + headers = { + "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" + } + response = requests.get( + "https://cses.fi/problemset/", headers=headers, timeout=10 + ) + response.raise_for_status() + + soup = BeautifulSoup(response.text, "html.parser") + categories = [] + + for h2 in soup.find_all("h2"): + category_name = h2.get_text().strip() + if category_name == "General": + continue + + category_id = normalize_category_name(category_name) + + ul = h2.find_next_sibling("ul", class_="task-list") + problem_count = 0 + if ul: + problem_count = len(ul.find_all("li", class_="task")) + + display_name = f"{category_name} ({problem_count} problems)" + + categories.append( + ContestSummary( + id=category_id, name=category_name, display_name=display_name + ) + ) + + return categories + + except Exception as e: + print(f"Failed to scrape CSES categories: {e}", file=sys.stderr) + return [] + + def process_problem_element( element, current_category: str | None, @@ -267,7 +314,7 @@ def main() -> None: if len(sys.argv) < 2: result = MetadataResult( success=False, - error="Usage: cses.py metadata OR cses.py tests ", + error="Usage: cses.py metadata OR cses.py tests OR cses.py categories", ) print(json.dumps(asdict(result))) sys.exit(1) @@ -379,10 +426,29 @@ def main() -> None: ) print(json.dumps(asdict(tests_result))) + elif mode == "categories": + if len(sys.argv) != 2: + contest_result = ContestListResult( + success=False, error="Usage: cses.py categories" + ) + print(json.dumps(asdict(contest_result))) + sys.exit(1) + + categories = scrape_categories() + if not categories: + contest_result = ContestListResult( + success=False, error="No categories found" + ) + print(json.dumps(asdict(contest_result))) + sys.exit(1) + + contest_result = ContestListResult(success=True, error="", contests=categories) + print(json.dumps(asdict(contest_result))) + else: result = MetadataResult( success=False, - error=f"Unknown mode: {mode}. Use 'metadata' or 'tests'", + error=f"Unknown mode: {mode}. Use 'metadata', 'tests', or 'categories'", ) print(json.dumps(asdict(result))) sys.exit(1) diff --git a/tests/scrapers/test_cses.py b/tests/scrapers/test_cses.py index 68af557..a1e84a2 100644 --- a/tests/scrapers/test_cses.py +++ b/tests/scrapers/test_cses.py @@ -5,9 +5,10 @@ from scrapers.cses import ( normalize_category_name, scrape, scrape_all_problems, + scrape_categories, scrape_category_problems, ) -from scrapers.models import ProblemSummary +from scrapers.models import ContestSummary, ProblemSummary def test_scrape_success(mocker, mock_cses_html): @@ -130,3 +131,55 @@ def test_scrape_category_problems_network_error(mocker): result = scrape_category_problems("sorting_and_searching") assert result == [] + + +def test_scrape_categories_success(mocker): + mock_response = Mock() + mock_response.text = """ + + +

General

+ + +

Introductory Problems

+ + +

Sorting and Searching

+ + + + """ + mock_response.raise_for_status = Mock() + + mocker.patch("scrapers.cses.requests.get", return_value=mock_response) + + result = scrape_categories() + + assert len(result) == 2 + assert result[0] == ContestSummary( + id="introductory_problems", + name="Introductory Problems", + display_name="Introductory Problems (2 problems)", + ) + assert result[1] == ContestSummary( + id="sorting_and_searching", + name="Sorting and Searching", + display_name="Sorting and Searching (3 problems)", + ) + + +def test_scrape_categories_network_error(mocker): + mocker.patch("scrapers.cses.requests.get", side_effect=Exception("Network error")) + + result = scrape_categories() + + assert result == []