fix(ci): guess im adding the atcoder scraper too

This commit is contained in:
Barrett Ruth 2025-09-20 14:13:25 -04:00
parent 35545a1ad2
commit 315e5a790c
6 changed files with 244 additions and 17 deletions

View file

@ -326,7 +326,7 @@ CSES ~
URL format: https://cses.fi/problemset/task/1068
CSES (Code Submission Evaluation System) is organized by problem categories
rather than traditional contests. All problems are accessible individually.
rather than traditional contests. Problems are grouped by topic and difficulty.
Platform characteristics:
• Organization: Category-based (Introductory, Sorting, Dynamic Programming)
@ -337,17 +337,17 @@ Platform characteristics:
In terms of cp.nvim, this corresponds to:
- Platform: cses
- Contest ID: Problem ID (1068) - used as both contest and problem identifier
- Problem ID: nil (not applicable for CSES structure)
- Contest ID: Category name (introductory_problems, sorting_and_searching)
- Problem ID: Problem number (1068, 1640)
Usage examples: >
:CP cses 1068 " Set up problem 1068 from CSES
:CP 1070 " Switch to problem 1070 (if CSES context loaded)
:CP next " Navigate to next problem in CSES sequence
:CP cses dynamic_programming 1633 " Set up problem 1633 from DP category
<
Note: CSES problems are treated as individual
Note: Both category and problem ID are required
entities rather than contest problems.
==============================================================================
COMPLETE WORKFLOW EXAMPLE *cp-example*
Example: Setting up and solving AtCoder contest ABC324

View file

@ -205,7 +205,7 @@ function M.scrape_problem(ctx)
'-m',
'scrapers.' .. ctx.contest,
'tests',
ctx.contest_id,
ctx.problem_id,
}
else
args = {

View file

@ -167,11 +167,78 @@ def scrape(url: str) -> list[TestCase]:
return []
def scrape_contests() -> list[ContestSummary]:
contests = []
max_pages = 15
for page in range(1, max_pages + 1):
try:
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
url = f"https://atcoder.jp/contests/archive?page={page}"
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
table = soup.find("table", class_="table")
if not table:
break
tbody = table.find("tbody")
if not tbody or not isinstance(tbody, Tag):
break
rows = tbody.find_all("tr")
if not rows:
break
for row in rows:
cells = row.find_all("td")
if len(cells) < 2:
continue
contest_cell = cells[1]
link = contest_cell.find("a")
if not link or not link.get("href"):
continue
href = link.get("href")
contest_id = href.split("/")[-1]
name = link.get_text().strip()
display_name = name
if "AtCoder Beginner Contest" in name:
match = re.search(r"AtCoder Beginner Contest (\d+)", name)
if match:
display_name = f"Beginner Contest {match.group(1)} (ABC)"
elif "AtCoder Regular Contest" in name:
match = re.search(r"AtCoder Regular Contest (\d+)", name)
if match:
display_name = f"Regular Contest {match.group(1)} (ARC)"
elif "AtCoder Grand Contest" in name:
match = re.search(r"AtCoder Grand Contest (\d+)", name)
if match:
display_name = f"Grand Contest {match.group(1)} (AGC)"
contests.append(
ContestSummary(id=contest_id, name=name, display_name=display_name)
)
time.sleep(0.5)
except Exception as e:
print(f"Failed to scrape page {page}: {e}", file=sys.stderr)
continue
return contests
def main() -> None:
if len(sys.argv) < 2:
result = MetadataResult(
success=False,
error="Usage: atcoder.py metadata <contest_id> OR atcoder.py tests <contest_id> <problem_letter>",
error="Usage: atcoder.py metadata <contest_id> OR atcoder.py tests <contest_id> <problem_letter> OR atcoder.py contests",
)
print(json.dumps(asdict(result)))
sys.exit(1)
@ -272,10 +339,27 @@ def main() -> None:
)
print(json.dumps(asdict(tests_result)))
elif mode == "contests":
if len(sys.argv) != 2:
contest_result = ContestListResult(
success=False, error="Usage: atcoder.py contests"
)
print(json.dumps(asdict(contest_result)))
sys.exit(1)
contests = scrape_contests()
if not contests:
contest_result = ContestListResult(success=False, error="No contests found")
print(json.dumps(asdict(contest_result)))
sys.exit(1)
contest_result = ContestListResult(success=True, error="", contests=contests)
print(json.dumps(asdict(contest_result)))
else:
result = MetadataResult(
success=False,
error=f"Unknown mode: {mode}. Use 'metadata' or 'tests'",
error=f"Unknown mode: {mode}. Use 'metadata', 'tests', or 'contests'",
)
print(json.dumps(asdict(result)))
sys.exit(1)

View file

@ -380,8 +380,8 @@ describe('cp.scrape', function()
it('constructs correct command for cses problem tests', function()
test_context.contest = 'cses'
test_context.contest_id = '1001'
test_context.problem_id = nil
test_context.contest_id = 'sorting_and_searching'
test_context.problem_id = '1001'
scrape.scrape_problem(test_context)
@ -396,7 +396,7 @@ describe('cp.scrape', function()
assert.is_not_nil(tests_call)
assert.is_true(vim.tbl_contains(tests_call.cmd, 'tests'))
assert.is_true(vim.tbl_contains(tests_call.cmd, '1001'))
assert.is_false(vim.tbl_contains(tests_call.cmd, 'a'))
assert.is_false(vim.tbl_contains(tests_call.cmd, 'sorting_and_searching'))
end)
end)

View file

@ -1,6 +1,7 @@
from unittest.mock import Mock
from scrapers.atcoder import scrape, scrape_contest_problems
from scrapers.models import ProblemSummary
from scrapers.atcoder import scrape, scrape_contest_problems, scrape_contests
from scrapers.models import ContestSummary, ProblemSummary
def test_scrape_success(mocker, mock_atcoder_html):
@ -49,3 +50,81 @@ def test_scrape_network_error(mocker):
result = scrape("https://atcoder.jp/contests/abc350/tasks/abc350_a")
assert result == []
def test_scrape_contests_success(mocker):
def mock_get_side_effect(url, **kwargs):
if "page=1" in url:
mock_response = Mock()
mock_response.text = """
<table class="table table-default table-striped table-hover table-condensed table-bordered small">
<thead>
<tr>
<th>Start Time</th>
<th>Contest Name</th>
<th>Duration</th>
<th>Rated Range</th>
</tr>
</thead>
<tbody>
<tr>
<td>2025-01-15 21:00:00+0900</td>
<td><a href="/contests/abc350">AtCoder Beginner Contest 350</a></td>
<td>01:40</td>
<td> - 1999</td>
</tr>
<tr>
<td>2025-01-14 21:00:00+0900</td>
<td><a href="/contests/arc170">AtCoder Regular Contest 170</a></td>
<td>02:00</td>
<td>1000 - 2799</td>
</tr>
</tbody>
</table>
"""
return mock_response
else:
# Return empty page for all other pages
mock_response = Mock()
mock_response.text = "<html><body>No table found</body></html>"
return mock_response
mocker.patch("scrapers.atcoder.requests.get", side_effect=mock_get_side_effect)
mocker.patch("scrapers.atcoder.time.sleep")
result = scrape_contests()
assert len(result) == 2
assert result[0] == ContestSummary(
id="abc350",
name="AtCoder Beginner Contest 350",
display_name="Beginner Contest 350 (ABC)",
)
assert result[1] == ContestSummary(
id="arc170",
name="AtCoder Regular Contest 170",
display_name="Regular Contest 170 (ARC)",
)
def test_scrape_contests_no_table(mocker):
mock_response = Mock()
mock_response.text = "<html><body>No table found</body></html>"
mocker.patch("scrapers.atcoder.requests.get", return_value=mock_response)
mocker.patch("scrapers.atcoder.time.sleep")
result = scrape_contests()
assert result == []
def test_scrape_contests_network_error(mocker):
mocker.patch(
"scrapers.atcoder.requests.get", side_effect=Exception("Network error")
)
mocker.patch("scrapers.atcoder.time.sleep")
result = scrape_contests()
assert result == []

View file

@ -1,6 +1,7 @@
from unittest.mock import Mock
from scrapers.codeforces import scrape, scrape_contest_problems
from scrapers.models import ProblemSummary
from scrapers.codeforces import scrape, scrape_contest_problems, scrape_contests
from scrapers.models import ContestSummary, ProblemSummary
def test_scrape_success(mocker, mock_codeforces_html):
@ -51,3 +52,66 @@ def test_scrape_network_error(mocker):
result = scrape("https://codeforces.com/contest/1900/problem/A")
assert result == []
def test_scrape_contests_success(mocker):
mock_scraper = Mock()
mock_response = Mock()
mock_response.json.return_value = {
"status": "OK",
"result": [
{"id": 1951, "name": "Educational Codeforces Round 168 (Rated for Div. 2)"},
{"id": 1950, "name": "Codeforces Round 936 (Div. 2)"},
{"id": 1949, "name": "Codeforces Global Round 26"},
],
}
mock_scraper.get.return_value = mock_response
mocker.patch(
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper
)
result = scrape_contests()
assert len(result) == 3
assert result[0] == ContestSummary(
id="1951",
name="Educational Codeforces Round 168 (Rated for Div. 2)",
display_name="Educational Round 168",
)
assert result[1] == ContestSummary(
id="1950",
name="Codeforces Round 936 (Div. 2)",
display_name="Round 936 (Div. 2)",
)
assert result[2] == ContestSummary(
id="1949", name="Codeforces Global Round 26", display_name="Global Round 26"
)
def test_scrape_contests_api_error(mocker):
mock_scraper = Mock()
mock_response = Mock()
mock_response.json.return_value = {"status": "FAILED", "result": []}
mock_scraper.get.return_value = mock_response
mocker.patch(
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper
)
result = scrape_contests()
assert result == []
def test_scrape_contests_network_error(mocker):
mock_scraper = Mock()
mock_scraper.get.side_effect = Exception("Network error")
mocker.patch(
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper
)
result = scrape_contests()
assert result == []