diff --git a/scrapers/cses.py b/scrapers/cses.py index c9144c6..8326b71 100644 --- a/scrapers/cses.py +++ b/scrapers/cses.py @@ -24,21 +24,38 @@ def normalize_category_name(category_name: str) -> str: return category_name.lower().replace(" ", "_").replace("&", "and") -def denormalize_category_name(category_id: str) -> str: - category_map = { - "introductory_problems": "Introductory Problems", - "sorting_and_searching": "Sorting and Searching", - "dynamic_programming": "Dynamic Programming", - "graph_algorithms": "Graph Algorithms", - "range_queries": "Range Queries", - "tree_algorithms": "Tree Algorithms", - "mathematics": "Mathematics", - "string_algorithms": "String Algorithms", - "geometry": "Geometry", - "advanced_techniques": "Advanced Techniques", +def snake_to_title(name: str) -> str: + small_words = { + "a", + "an", + "the", + "and", + "but", + "or", + "nor", + "for", + "so", + "yet", + "at", + "by", + "in", + "of", + "on", + "per", + "to", + "vs", + "via", } - return category_map.get(category_id, category_id.replace("_", " ").title()) + words: list[str] = name.split("_") + n = len(words) + + def fix_word(i_word): + i, word = i_word + lw = word.lower() + return lw.capitalize() if i == 0 or i == n - 1 or lw not in small_words else lw + + return " ".join(map(fix_word, enumerate(words))) @backoff.on_exception( @@ -67,7 +84,7 @@ def make_request(url: str, headers: dict) -> requests.Response: def scrape_category_problems(category_id: str) -> list[ProblemSummary]: - category_name = denormalize_category_name(category_id) + category_name = snake_to_title(category_id) try: problemset_url = "https://cses.fi/problemset/" diff --git a/tests/scrapers/test_cses.py b/tests/scrapers/test_cses.py index 545176d..0e3a8cb 100644 --- a/tests/scrapers/test_cses.py +++ b/tests/scrapers/test_cses.py @@ -1,12 +1,12 @@ from unittest.mock import Mock from scrapers.cses import ( - denormalize_category_name, normalize_category_name, scrape, scrape_all_problems, scrape_categories, scrape_category_problems, + snake_to_title, ) from scrapers.models import ContestSummary, ProblemSummary @@ -68,10 +68,10 @@ def test_normalize_category_name(): assert normalize_category_name("Graph Algorithms") == "graph_algorithms" -def test_denormalize_category_name(): - assert denormalize_category_name("sorting_and_searching") == "Sorting and Searching" - assert denormalize_category_name("dynamic_programming") == "Dynamic Programming" - assert denormalize_category_name("graph_algorithms") == "Graph Algorithms" +def test_snake_to_title(): + assert snake_to_title("sorting_and_searching") == "Sorting and Searching" + assert snake_to_title("dynamic_programming") == "Dynamic Programming" + assert snake_to_title("graph_algorithms") == "Graph Algorithms" def test_scrape_category_problems_success(mocker):