From 315e5a790c8bb99b479676565d677bb26f490dee Mon Sep 17 00:00:00 2001 From: Barrett Ruth Date: Sat, 20 Sep 2025 14:13:25 -0400 Subject: [PATCH] fix(ci): guess im adding the atcoder scraper too --- doc/cp.txt | 14 ++--- lua/cp/scrape.lua | 2 +- scrapers/atcoder.py | 88 ++++++++++++++++++++++++++++++- spec/scraper_spec.lua | 6 +-- tests/scrapers/test_atcoder.py | 83 ++++++++++++++++++++++++++++- tests/scrapers/test_codeforces.py | 68 +++++++++++++++++++++++- 6 files changed, 244 insertions(+), 17 deletions(-) diff --git a/doc/cp.txt b/doc/cp.txt index 37b2de9..bb0f0f4 100644 --- a/doc/cp.txt +++ b/doc/cp.txt @@ -326,7 +326,7 @@ CSES ~ URL format: https://cses.fi/problemset/task/1068 CSES (Code Submission Evaluation System) is organized by problem categories -rather than traditional contests. All problems are accessible individually. +rather than traditional contests. Problems are grouped by topic and difficulty. Platform characteristics: • Organization: Category-based (Introductory, Sorting, Dynamic Programming) @@ -337,17 +337,17 @@ Platform characteristics: In terms of cp.nvim, this corresponds to: - Platform: cses -- Contest ID: Problem ID (1068) - used as both contest and problem identifier -- Problem ID: nil (not applicable for CSES structure) +- Contest ID: Category name (introductory_problems, sorting_and_searching) +- Problem ID: Problem number (1068, 1640) Usage examples: > - :CP cses 1068 " Set up problem 1068 from CSES - :CP 1070 " Switch to problem 1070 (if CSES context loaded) - :CP next " Navigate to next problem in CSES sequence + :CP cses dynamic_programming 1633 " Set up problem 1633 from DP category < - Note: CSES problems are treated as individual + Note: Both category and problem ID are required entities rather than contest problems. + ============================================================================== + COMPLETE WORKFLOW EXAMPLE *cp-example* Example: Setting up and solving AtCoder contest ABC324 diff --git a/lua/cp/scrape.lua b/lua/cp/scrape.lua index ec5376e..d01bbb6 100644 --- a/lua/cp/scrape.lua +++ b/lua/cp/scrape.lua @@ -205,7 +205,7 @@ function M.scrape_problem(ctx) '-m', 'scrapers.' .. ctx.contest, 'tests', - ctx.contest_id, + ctx.problem_id, } else args = { diff --git a/scrapers/atcoder.py b/scrapers/atcoder.py index fbc1453..02beda8 100644 --- a/scrapers/atcoder.py +++ b/scrapers/atcoder.py @@ -167,11 +167,78 @@ def scrape(url: str) -> list[TestCase]: return [] +def scrape_contests() -> list[ContestSummary]: + contests = [] + max_pages = 15 + + for page in range(1, max_pages + 1): + try: + headers = { + "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" + } + url = f"https://atcoder.jp/contests/archive?page={page}" + response = requests.get(url, headers=headers, timeout=10) + response.raise_for_status() + + soup = BeautifulSoup(response.text, "html.parser") + table = soup.find("table", class_="table") + if not table: + break + + tbody = table.find("tbody") + if not tbody or not isinstance(tbody, Tag): + break + + rows = tbody.find_all("tr") + if not rows: + break + + for row in rows: + cells = row.find_all("td") + if len(cells) < 2: + continue + + contest_cell = cells[1] + link = contest_cell.find("a") + if not link or not link.get("href"): + continue + + href = link.get("href") + contest_id = href.split("/")[-1] + name = link.get_text().strip() + + display_name = name + if "AtCoder Beginner Contest" in name: + match = re.search(r"AtCoder Beginner Contest (\d+)", name) + if match: + display_name = f"Beginner Contest {match.group(1)} (ABC)" + elif "AtCoder Regular Contest" in name: + match = re.search(r"AtCoder Regular Contest (\d+)", name) + if match: + display_name = f"Regular Contest {match.group(1)} (ARC)" + elif "AtCoder Grand Contest" in name: + match = re.search(r"AtCoder Grand Contest (\d+)", name) + if match: + display_name = f"Grand Contest {match.group(1)} (AGC)" + + contests.append( + ContestSummary(id=contest_id, name=name, display_name=display_name) + ) + + time.sleep(0.5) + + except Exception as e: + print(f"Failed to scrape page {page}: {e}", file=sys.stderr) + continue + + return contests + + def main() -> None: if len(sys.argv) < 2: result = MetadataResult( success=False, - error="Usage: atcoder.py metadata OR atcoder.py tests ", + error="Usage: atcoder.py metadata OR atcoder.py tests OR atcoder.py contests", ) print(json.dumps(asdict(result))) sys.exit(1) @@ -272,10 +339,27 @@ def main() -> None: ) print(json.dumps(asdict(tests_result))) + elif mode == "contests": + if len(sys.argv) != 2: + contest_result = ContestListResult( + success=False, error="Usage: atcoder.py contests" + ) + print(json.dumps(asdict(contest_result))) + sys.exit(1) + + contests = scrape_contests() + if not contests: + contest_result = ContestListResult(success=False, error="No contests found") + print(json.dumps(asdict(contest_result))) + sys.exit(1) + + contest_result = ContestListResult(success=True, error="", contests=contests) + print(json.dumps(asdict(contest_result))) + else: result = MetadataResult( success=False, - error=f"Unknown mode: {mode}. Use 'metadata' or 'tests'", + error=f"Unknown mode: {mode}. Use 'metadata', 'tests', or 'contests'", ) print(json.dumps(asdict(result))) sys.exit(1) diff --git a/spec/scraper_spec.lua b/spec/scraper_spec.lua index ff504aa..67c1daa 100644 --- a/spec/scraper_spec.lua +++ b/spec/scraper_spec.lua @@ -380,8 +380,8 @@ describe('cp.scrape', function() it('constructs correct command for cses problem tests', function() test_context.contest = 'cses' - test_context.contest_id = '1001' - test_context.problem_id = nil + test_context.contest_id = 'sorting_and_searching' + test_context.problem_id = '1001' scrape.scrape_problem(test_context) @@ -396,7 +396,7 @@ describe('cp.scrape', function() assert.is_not_nil(tests_call) assert.is_true(vim.tbl_contains(tests_call.cmd, 'tests')) assert.is_true(vim.tbl_contains(tests_call.cmd, '1001')) - assert.is_false(vim.tbl_contains(tests_call.cmd, 'a')) + assert.is_false(vim.tbl_contains(tests_call.cmd, 'sorting_and_searching')) end) end) diff --git a/tests/scrapers/test_atcoder.py b/tests/scrapers/test_atcoder.py index 95ff09d..0474c6a 100644 --- a/tests/scrapers/test_atcoder.py +++ b/tests/scrapers/test_atcoder.py @@ -1,6 +1,7 @@ from unittest.mock import Mock -from scrapers.atcoder import scrape, scrape_contest_problems -from scrapers.models import ProblemSummary + +from scrapers.atcoder import scrape, scrape_contest_problems, scrape_contests +from scrapers.models import ContestSummary, ProblemSummary def test_scrape_success(mocker, mock_atcoder_html): @@ -49,3 +50,81 @@ def test_scrape_network_error(mocker): result = scrape("https://atcoder.jp/contests/abc350/tasks/abc350_a") assert result == [] + + +def test_scrape_contests_success(mocker): + def mock_get_side_effect(url, **kwargs): + if "page=1" in url: + mock_response = Mock() + mock_response.text = """ + + + + + + + + + + + + + + + + + + + + + + + +
Start TimeContest NameDurationRated Range
2025-01-15 21:00:00+0900AtCoder Beginner Contest 35001:40 - 1999
2025-01-14 21:00:00+0900AtCoder Regular Contest 17002:001000 - 2799
+ """ + return mock_response + else: + # Return empty page for all other pages + mock_response = Mock() + mock_response.text = "No table found" + return mock_response + + mocker.patch("scrapers.atcoder.requests.get", side_effect=mock_get_side_effect) + mocker.patch("scrapers.atcoder.time.sleep") + + result = scrape_contests() + + assert len(result) == 2 + assert result[0] == ContestSummary( + id="abc350", + name="AtCoder Beginner Contest 350", + display_name="Beginner Contest 350 (ABC)", + ) + assert result[1] == ContestSummary( + id="arc170", + name="AtCoder Regular Contest 170", + display_name="Regular Contest 170 (ARC)", + ) + + +def test_scrape_contests_no_table(mocker): + mock_response = Mock() + mock_response.text = "No table found" + + mocker.patch("scrapers.atcoder.requests.get", return_value=mock_response) + mocker.patch("scrapers.atcoder.time.sleep") + + result = scrape_contests() + + assert result == [] + + +def test_scrape_contests_network_error(mocker): + mocker.patch( + "scrapers.atcoder.requests.get", side_effect=Exception("Network error") + ) + mocker.patch("scrapers.atcoder.time.sleep") + + result = scrape_contests() + + assert result == [] diff --git a/tests/scrapers/test_codeforces.py b/tests/scrapers/test_codeforces.py index 1fbfbd1..b95a489 100644 --- a/tests/scrapers/test_codeforces.py +++ b/tests/scrapers/test_codeforces.py @@ -1,6 +1,7 @@ from unittest.mock import Mock -from scrapers.codeforces import scrape, scrape_contest_problems -from scrapers.models import ProblemSummary + +from scrapers.codeforces import scrape, scrape_contest_problems, scrape_contests +from scrapers.models import ContestSummary, ProblemSummary def test_scrape_success(mocker, mock_codeforces_html): @@ -51,3 +52,66 @@ def test_scrape_network_error(mocker): result = scrape("https://codeforces.com/contest/1900/problem/A") assert result == [] + + +def test_scrape_contests_success(mocker): + mock_scraper = Mock() + mock_response = Mock() + mock_response.json.return_value = { + "status": "OK", + "result": [ + {"id": 1951, "name": "Educational Codeforces Round 168 (Rated for Div. 2)"}, + {"id": 1950, "name": "Codeforces Round 936 (Div. 2)"}, + {"id": 1949, "name": "Codeforces Global Round 26"}, + ], + } + mock_scraper.get.return_value = mock_response + + mocker.patch( + "scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper + ) + + result = scrape_contests() + + assert len(result) == 3 + assert result[0] == ContestSummary( + id="1951", + name="Educational Codeforces Round 168 (Rated for Div. 2)", + display_name="Educational Round 168", + ) + assert result[1] == ContestSummary( + id="1950", + name="Codeforces Round 936 (Div. 2)", + display_name="Round 936 (Div. 2)", + ) + assert result[2] == ContestSummary( + id="1949", name="Codeforces Global Round 26", display_name="Global Round 26" + ) + + +def test_scrape_contests_api_error(mocker): + mock_scraper = Mock() + mock_response = Mock() + mock_response.json.return_value = {"status": "FAILED", "result": []} + mock_scraper.get.return_value = mock_response + + mocker.patch( + "scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper + ) + + result = scrape_contests() + + assert result == [] + + +def test_scrape_contests_network_error(mocker): + mock_scraper = Mock() + mock_scraper.get.side_effect = Exception("Network error") + + mocker.patch( + "scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper + ) + + result = scrape_contests() + + assert result == []