Merge pull request #108 from barrett-ruth/fix/cses-titles
Don't Hard Code CSES Title Names
This commit is contained in:
commit
092b4de05f
2 changed files with 36 additions and 19 deletions
|
|
@ -24,21 +24,38 @@ def normalize_category_name(category_name: str) -> str:
|
||||||
return category_name.lower().replace(" ", "_").replace("&", "and")
|
return category_name.lower().replace(" ", "_").replace("&", "and")
|
||||||
|
|
||||||
|
|
||||||
def denormalize_category_name(category_id: str) -> str:
|
def snake_to_title(name: str) -> str:
|
||||||
category_map = {
|
small_words = {
|
||||||
"introductory_problems": "Introductory Problems",
|
"a",
|
||||||
"sorting_and_searching": "Sorting and Searching",
|
"an",
|
||||||
"dynamic_programming": "Dynamic Programming",
|
"the",
|
||||||
"graph_algorithms": "Graph Algorithms",
|
"and",
|
||||||
"range_queries": "Range Queries",
|
"but",
|
||||||
"tree_algorithms": "Tree Algorithms",
|
"or",
|
||||||
"mathematics": "Mathematics",
|
"nor",
|
||||||
"string_algorithms": "String Algorithms",
|
"for",
|
||||||
"geometry": "Geometry",
|
"so",
|
||||||
"advanced_techniques": "Advanced Techniques",
|
"yet",
|
||||||
|
"at",
|
||||||
|
"by",
|
||||||
|
"in",
|
||||||
|
"of",
|
||||||
|
"on",
|
||||||
|
"per",
|
||||||
|
"to",
|
||||||
|
"vs",
|
||||||
|
"via",
|
||||||
}
|
}
|
||||||
|
|
||||||
return category_map.get(category_id, category_id.replace("_", " ").title())
|
words: list[str] = name.split("_")
|
||||||
|
n = len(words)
|
||||||
|
|
||||||
|
def fix_word(i_word):
|
||||||
|
i, word = i_word
|
||||||
|
lw = word.lower()
|
||||||
|
return lw.capitalize() if i == 0 or i == n - 1 or lw not in small_words else lw
|
||||||
|
|
||||||
|
return " ".join(map(fix_word, enumerate(words)))
|
||||||
|
|
||||||
|
|
||||||
@backoff.on_exception(
|
@backoff.on_exception(
|
||||||
|
|
@ -67,7 +84,7 @@ def make_request(url: str, headers: dict) -> requests.Response:
|
||||||
|
|
||||||
|
|
||||||
def scrape_category_problems(category_id: str) -> list[ProblemSummary]:
|
def scrape_category_problems(category_id: str) -> list[ProblemSummary]:
|
||||||
category_name = denormalize_category_name(category_id)
|
category_name = snake_to_title(category_id)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
problemset_url = "https://cses.fi/problemset/"
|
problemset_url = "https://cses.fi/problemset/"
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,12 @@
|
||||||
from unittest.mock import Mock
|
from unittest.mock import Mock
|
||||||
|
|
||||||
from scrapers.cses import (
|
from scrapers.cses import (
|
||||||
denormalize_category_name,
|
|
||||||
normalize_category_name,
|
normalize_category_name,
|
||||||
scrape,
|
scrape,
|
||||||
scrape_all_problems,
|
scrape_all_problems,
|
||||||
scrape_categories,
|
scrape_categories,
|
||||||
scrape_category_problems,
|
scrape_category_problems,
|
||||||
|
snake_to_title,
|
||||||
)
|
)
|
||||||
from scrapers.models import ContestSummary, ProblemSummary
|
from scrapers.models import ContestSummary, ProblemSummary
|
||||||
|
|
||||||
|
|
@ -68,10 +68,10 @@ def test_normalize_category_name():
|
||||||
assert normalize_category_name("Graph Algorithms") == "graph_algorithms"
|
assert normalize_category_name("Graph Algorithms") == "graph_algorithms"
|
||||||
|
|
||||||
|
|
||||||
def test_denormalize_category_name():
|
def test_snake_to_title():
|
||||||
assert denormalize_category_name("sorting_and_searching") == "Sorting and Searching"
|
assert snake_to_title("sorting_and_searching") == "Sorting and Searching"
|
||||||
assert denormalize_category_name("dynamic_programming") == "Dynamic Programming"
|
assert snake_to_title("dynamic_programming") == "Dynamic Programming"
|
||||||
assert denormalize_category_name("graph_algorithms") == "Graph Algorithms"
|
assert snake_to_title("graph_algorithms") == "Graph Algorithms"
|
||||||
|
|
||||||
|
|
||||||
def test_scrape_category_problems_success(mocker):
|
def test_scrape_category_problems_success(mocker):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue