fix(ci): move imports
This commit is contained in:
parent
847307bd1f
commit
7b8aae7921
5 changed files with 475 additions and 95 deletions
131
lua/cp/init.lua
131
lua/cp/init.lua
|
|
@ -145,6 +145,42 @@ local function setup_problem(contest_id, problem_id, language)
|
|||
logger.log(('switched to problem %s'):format(ctx.problem_name))
|
||||
end
|
||||
|
||||
local function ensure_io_directory()
|
||||
vim.fn.mkdir('io', 'p')
|
||||
end
|
||||
|
||||
local function scrape_missing_problems(contest_id, missing_problems)
|
||||
ensure_io_directory()
|
||||
|
||||
logger.log(('scraping %d uncached problems...'):format(#missing_problems))
|
||||
|
||||
local results =
|
||||
scrape.scrape_problems_parallel(state.platform, contest_id, missing_problems, config)
|
||||
|
||||
local success_count = 0
|
||||
local failed_problems = {}
|
||||
for problem_id, result in pairs(results) do
|
||||
if result.success then
|
||||
success_count = success_count + 1
|
||||
else
|
||||
table.insert(failed_problems, problem_id)
|
||||
end
|
||||
end
|
||||
|
||||
if #failed_problems > 0 then
|
||||
logger.log(
|
||||
('scraping complete: %d/%d successful, failed: %s'):format(
|
||||
success_count,
|
||||
#missing_problems,
|
||||
table.concat(failed_problems, ', ')
|
||||
),
|
||||
vim.log.levels.WARN
|
||||
)
|
||||
else
|
||||
logger.log(('scraping complete: %d/%d successful'):format(success_count, #missing_problems))
|
||||
end
|
||||
end
|
||||
|
||||
local function get_current_problem()
|
||||
local filename = vim.fn.expand('%:t:r')
|
||||
if filename == '' then
|
||||
|
|
@ -557,6 +593,62 @@ end
|
|||
|
||||
---@param delta number 1 for next, -1 for prev
|
||||
---@param language? string
|
||||
local function setup_contest(contest_id, language)
|
||||
if not state.platform then
|
||||
logger.log('no platform set', vim.log.levels.ERROR)
|
||||
return false
|
||||
end
|
||||
|
||||
if not vim.tbl_contains(config.scrapers, state.platform) then
|
||||
logger.log('scraping disabled for ' .. state.platform, vim.log.levels.WARN)
|
||||
return false
|
||||
end
|
||||
|
||||
logger.log(('setting up contest %s %s'):format(state.platform, contest_id))
|
||||
|
||||
local metadata_result = scrape.scrape_contest_metadata(state.platform, contest_id)
|
||||
if not metadata_result.success then
|
||||
logger.log(
|
||||
'failed to load contest metadata: ' .. (metadata_result.error or 'unknown error'),
|
||||
vim.log.levels.ERROR
|
||||
)
|
||||
return false
|
||||
end
|
||||
|
||||
local problems = metadata_result.problems
|
||||
if not problems or #problems == 0 then
|
||||
logger.log('no problems found in contest', vim.log.levels.ERROR)
|
||||
return false
|
||||
end
|
||||
|
||||
logger.log(('found %d problems, checking cache...'):format(#problems))
|
||||
|
||||
cache.load()
|
||||
local missing_problems = {}
|
||||
for _, problem in ipairs(problems) do
|
||||
local cached_tests = cache.get_test_cases(state.platform, contest_id, problem.id)
|
||||
if not cached_tests then
|
||||
table.insert(missing_problems, problem)
|
||||
end
|
||||
end
|
||||
|
||||
if #missing_problems > 0 then
|
||||
logger.log(('scraping %d uncached problems...'):format(#missing_problems))
|
||||
scrape_missing_problems(contest_id, missing_problems)
|
||||
else
|
||||
logger.log('all problems already cached')
|
||||
end
|
||||
|
||||
state.contest_id = contest_id
|
||||
if state.platform == 'cses' then
|
||||
setup_problem(problems[1].id, nil, language)
|
||||
else
|
||||
setup_problem(contest_id, problems[1].id, language)
|
||||
end
|
||||
|
||||
return true
|
||||
end
|
||||
|
||||
local function navigate_problem(delta, language)
|
||||
if not state.platform or not state.contest_id then
|
||||
logger.log('no contest set. run :CP <platform> <contest> first', vim.log.levels.ERROR)
|
||||
|
|
@ -701,20 +793,12 @@ local function parse_command(args)
|
|||
language = language,
|
||||
}
|
||||
elseif #filtered_args == 2 then
|
||||
if first == 'cses' then
|
||||
logger.log(
|
||||
'CSES requires both category and problem ID. Usage: :CP cses <category> <problem_id>',
|
||||
vim.log.levels.ERROR
|
||||
)
|
||||
return { type = 'error' }
|
||||
else
|
||||
return {
|
||||
type = 'contest_setup',
|
||||
platform = first,
|
||||
contest = filtered_args[2],
|
||||
language = language,
|
||||
}
|
||||
end
|
||||
return {
|
||||
type = 'contest_setup',
|
||||
platform = first,
|
||||
contest = filtered_args[2],
|
||||
language = language,
|
||||
}
|
||||
elseif #filtered_args == 3 then
|
||||
return {
|
||||
type = 'full_setup',
|
||||
|
|
@ -779,24 +863,7 @@ function M.handle_command(opts)
|
|||
|
||||
if cmd.type == 'contest_setup' then
|
||||
if set_platform(cmd.platform) then
|
||||
state.contest_id = cmd.contest
|
||||
if vim.tbl_contains(config.scrapers, cmd.platform) then
|
||||
local metadata_result = scrape.scrape_contest_metadata(cmd.platform, cmd.contest)
|
||||
if not metadata_result.success then
|
||||
logger.log(
|
||||
'failed to load contest metadata: ' .. (metadata_result.error or 'unknown error'),
|
||||
vim.log.levels.WARN
|
||||
)
|
||||
else
|
||||
logger.log(
|
||||
('loaded %d problems for %s %s'):format(
|
||||
#metadata_result.problems,
|
||||
cmd.platform,
|
||||
cmd.contest
|
||||
)
|
||||
)
|
||||
end
|
||||
end
|
||||
setup_contest(cmd.contest, cmd.language)
|
||||
end
|
||||
return
|
||||
end
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@
|
|||
local M = {}
|
||||
local cache = require('cp.cache')
|
||||
local logger = require('cp.log')
|
||||
local problem = require('cp.problem')
|
||||
|
||||
local function get_plugin_path()
|
||||
local plugin_path = debug.getinfo(1, 'S').source:sub(2)
|
||||
|
|
@ -294,4 +295,127 @@ function M.scrape_problem(ctx)
|
|||
}
|
||||
end
|
||||
|
||||
---@param platform string
|
||||
---@param contest_id string
|
||||
---@param problems table[]
|
||||
---@param config table
|
||||
---@return table[]
|
||||
function M.scrape_problems_parallel(platform, contest_id, problems, config)
|
||||
vim.validate({
|
||||
platform = { platform, 'string' },
|
||||
contest_id = { contest_id, 'string' },
|
||||
problems = { problems, 'table' },
|
||||
config = { config, 'table' },
|
||||
})
|
||||
|
||||
if not check_internet_connectivity() then
|
||||
return {}
|
||||
end
|
||||
|
||||
if not setup_python_env() then
|
||||
return {}
|
||||
end
|
||||
|
||||
local plugin_path = get_plugin_path()
|
||||
local jobs = {}
|
||||
|
||||
for _, problem in ipairs(problems) do
|
||||
local args
|
||||
if platform == 'cses' then
|
||||
args = {
|
||||
'uv',
|
||||
'run',
|
||||
'--directory',
|
||||
plugin_path,
|
||||
'-m',
|
||||
'scrapers.' .. platform,
|
||||
'tests',
|
||||
problem.id,
|
||||
}
|
||||
else
|
||||
args = {
|
||||
'uv',
|
||||
'run',
|
||||
'--directory',
|
||||
plugin_path,
|
||||
'-m',
|
||||
'scrapers.' .. platform,
|
||||
'tests',
|
||||
contest_id,
|
||||
problem.id,
|
||||
}
|
||||
end
|
||||
|
||||
local job = vim.system(args, {
|
||||
cwd = plugin_path,
|
||||
text = true,
|
||||
timeout = 30000,
|
||||
})
|
||||
|
||||
jobs[problem.id] = {
|
||||
job = job,
|
||||
problem = problem,
|
||||
}
|
||||
end
|
||||
|
||||
local results = {}
|
||||
for problem_id, job_data in pairs(jobs) do
|
||||
local result = job_data.job:wait()
|
||||
local scrape_result = {
|
||||
success = false,
|
||||
problem_id = problem_id,
|
||||
error = 'Unknown error',
|
||||
}
|
||||
|
||||
if result.code == 0 then
|
||||
local ok, data = pcall(vim.json.decode, result.stdout)
|
||||
if ok and data.success then
|
||||
scrape_result = data
|
||||
|
||||
if data.tests and #data.tests > 0 then
|
||||
local ctx = problem.create_context(platform, contest_id, problem_id, config)
|
||||
local base_name = vim.fn.fnamemodify(ctx.input_file, ':r')
|
||||
|
||||
for i, test_case in ipairs(data.tests) do
|
||||
local input_file = base_name .. '.' .. i .. '.cpin'
|
||||
local expected_file = base_name .. '.' .. i .. '.cpout'
|
||||
|
||||
local input_content = test_case.input:gsub('\r', '')
|
||||
local expected_content = test_case.expected:gsub('\r', '')
|
||||
|
||||
vim.fn.writefile(vim.split(input_content, '\n', true), input_file)
|
||||
vim.fn.writefile(vim.split(expected_content, '\n', true), expected_file)
|
||||
end
|
||||
|
||||
local cached_test_cases = {}
|
||||
for i, test_case in ipairs(data.tests) do
|
||||
table.insert(cached_test_cases, {
|
||||
index = i,
|
||||
input = test_case.input,
|
||||
expected = test_case.expected,
|
||||
})
|
||||
end
|
||||
|
||||
cache.set_test_cases(
|
||||
platform,
|
||||
contest_id,
|
||||
problem_id,
|
||||
cached_test_cases,
|
||||
data.timeout_ms,
|
||||
data.memory_mb
|
||||
)
|
||||
end
|
||||
else
|
||||
scrape_result.error = ok and data.error or 'Failed to parse scraper output'
|
||||
end
|
||||
else
|
||||
scrape_result.error = 'Scraper execution failed: ' .. (result.stderr or 'Unknown error')
|
||||
end
|
||||
|
||||
results[problem_id] = scrape_result
|
||||
end
|
||||
|
||||
return results
|
||||
end
|
||||
|
||||
return M
|
||||
|
|
|
|||
|
|
@ -168,70 +168,210 @@ def scrape(url: str) -> list[TestCase]:
|
|||
|
||||
|
||||
def scrape_contests() -> list[ContestSummary]:
|
||||
contests = []
|
||||
max_pages = 15
|
||||
import concurrent.futures
|
||||
import random
|
||||
|
||||
for page in range(1, max_pages + 1):
|
||||
def get_max_pages() -> int:
|
||||
try:
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
}
|
||||
url = f"https://atcoder.jp/contests/archive?page={page}"
|
||||
response = requests.get(url, headers=headers, timeout=10)
|
||||
response = requests.get(
|
||||
"https://atcoder.jp/contests/archive", headers=headers, timeout=10
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
table = soup.find("table", class_="table")
|
||||
if not table:
|
||||
break
|
||||
pagination = soup.find("ul", class_="pagination")
|
||||
if not pagination or not isinstance(pagination, Tag):
|
||||
return 15
|
||||
|
||||
tbody = table.find("tbody")
|
||||
if not tbody or not isinstance(tbody, Tag):
|
||||
break
|
||||
lis = pagination.find_all("li")
|
||||
if lis and isinstance(lis[-1], Tag):
|
||||
last_li_text = lis[-1].get_text().strip()
|
||||
try:
|
||||
return int(last_li_text)
|
||||
except ValueError:
|
||||
return 15
|
||||
return 15
|
||||
except Exception:
|
||||
return 15
|
||||
|
||||
rows = tbody.find_all("tr")
|
||||
if not rows:
|
||||
break
|
||||
def scrape_page_with_retry(page: int, max_retries: int = 3) -> list[ContestSummary]:
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
}
|
||||
url = f"https://atcoder.jp/contests/archive?page={page}"
|
||||
response = requests.get(url, headers=headers, timeout=10)
|
||||
|
||||
for row in rows:
|
||||
cells = row.find_all("td")
|
||||
if len(cells) < 2:
|
||||
if response.status_code == 429:
|
||||
backoff_time = (2**attempt) + random.uniform(0, 1)
|
||||
print(
|
||||
f"Rate limited on page {page}, retrying in {backoff_time:.1f}s",
|
||||
file=sys.stderr,
|
||||
)
|
||||
time.sleep(backoff_time)
|
||||
continue
|
||||
|
||||
contest_cell = cells[1]
|
||||
link = contest_cell.find("a")
|
||||
if not link or not link.get("href"):
|
||||
response.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
table = soup.find("table", class_="table")
|
||||
if not table:
|
||||
return []
|
||||
|
||||
tbody = table.find("tbody")
|
||||
if not tbody or not isinstance(tbody, Tag):
|
||||
return []
|
||||
|
||||
rows = tbody.find_all("tr")
|
||||
if not rows:
|
||||
return []
|
||||
|
||||
contests = []
|
||||
for row in rows:
|
||||
cells = row.find_all("td")
|
||||
if len(cells) < 2:
|
||||
continue
|
||||
|
||||
contest_cell = cells[1]
|
||||
link = contest_cell.find("a")
|
||||
if not link or not link.get("href"):
|
||||
continue
|
||||
|
||||
href = link.get("href")
|
||||
contest_id = href.split("/")[-1]
|
||||
name = link.get_text().strip()
|
||||
|
||||
try:
|
||||
name = name.encode().decode("unicode_escape")
|
||||
except:
|
||||
pass
|
||||
|
||||
name = (
|
||||
name.replace("\uff08", "(")
|
||||
.replace("\uff09", ")")
|
||||
.replace("\u3000", " ")
|
||||
)
|
||||
name = re.sub(
|
||||
r"[\uff01-\uff5e]", lambda m: chr(ord(m.group()) - 0xFEE0), name
|
||||
)
|
||||
|
||||
def generate_display_name_from_id(contest_id: str) -> str:
|
||||
parts = contest_id.replace("-", " ").replace("_", " ")
|
||||
|
||||
parts = re.sub(
|
||||
r"\b(jsc|JSC)\b",
|
||||
"Japanese Student Championship",
|
||||
parts,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
parts = re.sub(
|
||||
r"\b(wtf|WTF)\b",
|
||||
"World Tour Finals",
|
||||
parts,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
parts = re.sub(
|
||||
r"\b(ahc)(\d+)\b",
|
||||
r"Heuristic Contest \2 (AHC)",
|
||||
parts,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
parts = re.sub(
|
||||
r"\b(arc)(\d+)\b",
|
||||
r"Regular Contest \2 (ARC)",
|
||||
parts,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
parts = re.sub(
|
||||
r"\b(abc)(\d+)\b",
|
||||
r"Beginner Contest \2 (ABC)",
|
||||
parts,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
parts = re.sub(
|
||||
r"\b(agc)(\d+)\b",
|
||||
r"Grand Contest \2 (AGC)",
|
||||
parts,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
|
||||
return parts.title()
|
||||
|
||||
english_chars = sum(1 for c in name if c.isascii() and c.isalpha())
|
||||
total_chars = len(re.sub(r"\s+", "", name))
|
||||
|
||||
if total_chars > 0 and english_chars / total_chars < 0.3:
|
||||
display_name = generate_display_name_from_id(contest_id)
|
||||
else:
|
||||
display_name = name
|
||||
if "AtCoder Beginner Contest" in name:
|
||||
match = re.search(r"AtCoder Beginner Contest (\d+)", name)
|
||||
if match:
|
||||
display_name = (
|
||||
f"Beginner Contest {match.group(1)} (ABC)"
|
||||
)
|
||||
elif "AtCoder Regular Contest" in name:
|
||||
match = re.search(r"AtCoder Regular Contest (\d+)", name)
|
||||
if match:
|
||||
display_name = f"Regular Contest {match.group(1)} (ARC)"
|
||||
elif "AtCoder Grand Contest" in name:
|
||||
match = re.search(r"AtCoder Grand Contest (\d+)", name)
|
||||
if match:
|
||||
display_name = f"Grand Contest {match.group(1)} (AGC)"
|
||||
elif "AtCoder Heuristic Contest" in name:
|
||||
match = re.search(r"AtCoder Heuristic Contest (\d+)", name)
|
||||
if match:
|
||||
display_name = (
|
||||
f"Heuristic Contest {match.group(1)} (AHC)"
|
||||
)
|
||||
|
||||
contests.append(
|
||||
ContestSummary(
|
||||
id=contest_id, name=name, display_name=display_name
|
||||
)
|
||||
)
|
||||
|
||||
return contests
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
if response.status_code == 429:
|
||||
continue
|
||||
|
||||
href = link.get("href")
|
||||
contest_id = href.split("/")[-1]
|
||||
name = link.get_text().strip()
|
||||
|
||||
display_name = name
|
||||
if "AtCoder Beginner Contest" in name:
|
||||
match = re.search(r"AtCoder Beginner Contest (\d+)", name)
|
||||
if match:
|
||||
display_name = f"Beginner Contest {match.group(1)} (ABC)"
|
||||
elif "AtCoder Regular Contest" in name:
|
||||
match = re.search(r"AtCoder Regular Contest (\d+)", name)
|
||||
if match:
|
||||
display_name = f"Regular Contest {match.group(1)} (ARC)"
|
||||
elif "AtCoder Grand Contest" in name:
|
||||
match = re.search(r"AtCoder Grand Contest (\d+)", name)
|
||||
if match:
|
||||
display_name = f"Grand Contest {match.group(1)} (AGC)"
|
||||
|
||||
contests.append(
|
||||
ContestSummary(id=contest_id, name=name, display_name=display_name)
|
||||
print(
|
||||
f"Failed to scrape page {page} (attempt {attempt + 1}): {e}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
if attempt == max_retries - 1:
|
||||
return []
|
||||
except Exception as e:
|
||||
print(f"Unexpected error on page {page}: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
time.sleep(0.5)
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
print(f"Failed to scrape page {page}: {e}", file=sys.stderr)
|
||||
continue
|
||||
max_pages = get_max_pages()
|
||||
page_results = {}
|
||||
|
||||
return contests
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
|
||||
future_to_page = {
|
||||
executor.submit(scrape_page_with_retry, page): page
|
||||
for page in range(1, max_pages + 1)
|
||||
}
|
||||
|
||||
for future in concurrent.futures.as_completed(future_to_page):
|
||||
page = future_to_page[future]
|
||||
page_contests = future.result()
|
||||
page_results[page] = page_contests
|
||||
|
||||
# Sort by page number to maintain order
|
||||
all_contests = []
|
||||
for page in sorted(page_results.keys()):
|
||||
all_contests.extend(page_results[page])
|
||||
|
||||
return all_contests
|
||||
|
||||
|
||||
def main() -> None:
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from dataclasses import asdict
|
||||
|
||||
|
|
@ -148,8 +149,6 @@ def parse_problem_url(contest_id: str, problem_letter: str) -> str:
|
|||
|
||||
|
||||
def extract_problem_limits(soup: BeautifulSoup) -> tuple[int, float]:
|
||||
import re
|
||||
|
||||
timeout_ms = None
|
||||
memory_mb = None
|
||||
|
||||
|
|
@ -240,22 +239,43 @@ def scrape_contests() -> list[ContestSummary]:
|
|||
contest_id = str(contest["id"])
|
||||
name = contest["name"]
|
||||
|
||||
# Clean up contest names for display
|
||||
display_name = name
|
||||
if "Educational Codeforces Round" in name:
|
||||
import re
|
||||
|
||||
match = re.search(r"Educational Codeforces Round (\d+)", name)
|
||||
if match:
|
||||
display_name = f"Educational Round {match.group(1)}"
|
||||
elif "Codeforces Round" in name and "Div" in name:
|
||||
match = re.search(r"Codeforces Round (\d+) \(Div\. (\d+)\)", name)
|
||||
if match:
|
||||
display_name = f"Round {match.group(1)} (Div. {match.group(2)})"
|
||||
elif "Codeforces Global Round" in name:
|
||||
match = re.search(r"Codeforces Global Round (\d+)", name)
|
||||
if match:
|
||||
display_name = f"Global Round {match.group(1)}"
|
||||
elif "Codeforces Round" in name:
|
||||
# Handle various Div patterns
|
||||
div_match = re.search(r"Codeforces Round (\d+) \(Div\. (\d+)\)", name)
|
||||
if div_match:
|
||||
display_name = (
|
||||
f"Round {div_match.group(1)} (Div. {div_match.group(2)})"
|
||||
)
|
||||
else:
|
||||
# Handle combined divs like "Div. 1 + Div. 2"
|
||||
combined_match = re.search(
|
||||
r"Codeforces Round (\d+) \(Div\. 1 \+ Div\. 2\)", name
|
||||
)
|
||||
if combined_match:
|
||||
display_name = (
|
||||
f"Round {combined_match.group(1)} (Div. 1 + Div. 2)"
|
||||
)
|
||||
else:
|
||||
# Handle single div like "Div. 1"
|
||||
single_div_match = re.search(
|
||||
r"Codeforces Round (\d+) \(Div\. 1\)", name
|
||||
)
|
||||
if single_div_match:
|
||||
display_name = f"Round {single_div_match.group(1)} (Div. 1)"
|
||||
else:
|
||||
# Fallback: extract just the round number
|
||||
round_match = re.search(r"Codeforces Round (\d+)", name)
|
||||
if round_match:
|
||||
display_name = f"Round {round_match.group(1)}"
|
||||
|
||||
contests.append(
|
||||
ContestSummary(id=contest_id, name=name, display_name=display_name)
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
from dataclasses import asdict
|
||||
|
||||
import requests
|
||||
|
|
@ -39,6 +41,38 @@ def denormalize_category_name(category_id: str) -> str:
|
|||
return category_map.get(category_id, category_id.replace("_", " ").title())
|
||||
|
||||
|
||||
def request_with_retry(
|
||||
url: str, headers: dict, max_retries: int = 3
|
||||
) -> requests.Response:
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
delay = 0.5 + random.uniform(0, 0.3)
|
||||
time.sleep(delay)
|
||||
|
||||
response = requests.get(url, headers=headers, timeout=10)
|
||||
|
||||
if response.status_code == 429:
|
||||
backoff = (2**attempt) + random.uniform(0, 1)
|
||||
print(f"Rate limited, retrying in {backoff:.1f}s", file=sys.stderr)
|
||||
time.sleep(backoff)
|
||||
continue
|
||||
|
||||
response.raise_for_status()
|
||||
return response
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
if attempt == max_retries - 1:
|
||||
raise
|
||||
backoff = 2**attempt
|
||||
print(
|
||||
f"Request failed (attempt {attempt + 1}), retrying in {backoff}s: {e}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
time.sleep(backoff)
|
||||
|
||||
raise Exception("All retry attempts failed")
|
||||
|
||||
|
||||
def scrape_category_problems(category_id: str) -> list[ProblemSummary]:
|
||||
category_name = denormalize_category_name(category_id)
|
||||
|
||||
|
|
@ -48,8 +82,7 @@ def scrape_category_problems(category_id: str) -> list[ProblemSummary]:
|
|||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
}
|
||||
|
||||
response = requests.get(problemset_url, headers=headers, timeout=10)
|
||||
response.raise_for_status()
|
||||
response = request_with_retry(problemset_url, headers)
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
|
|
@ -143,10 +176,7 @@ def scrape_categories() -> list[ContestSummary]:
|
|||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
}
|
||||
response = requests.get(
|
||||
"https://cses.fi/problemset/", headers=headers, timeout=10
|
||||
)
|
||||
response.raise_for_status()
|
||||
response = request_with_retry("https://cses.fi/problemset/", headers)
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
categories = []
|
||||
|
|
@ -293,8 +323,7 @@ def scrape(url: str) -> list[TestCase]:
|
|||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
}
|
||||
|
||||
response = requests.get(url, headers=headers, timeout=10)
|
||||
response.raise_for_status()
|
||||
response = request_with_retry(url, headers)
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue