Merge pull request #140 from barrett-ruth/feat/async-scrapers

Asynchronous Scrapers
This commit is contained in:
Barrett Ruth 2025-10-04 05:34:26 +02:00 committed by GitHub
commit 8f466f135a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 1034 additions and 2079 deletions

View file

@ -79,29 +79,22 @@ end
---@param platform string
---@param contest_id string
---@return ContestData?
---@return ContestData
function M.get_contest_data(platform, contest_id)
vim.validate({
platform = { platform, 'string' },
contest_id = { contest_id, 'string' },
})
if not cache_data[platform] then
return nil
end
local contest_data = cache_data[platform][contest_id]
if not contest_data or vim.tbl_isempty(contest_data) then
return nil
end
return contest_data
return cache_data[platform][contest_id] or {}
end
---@param platform string
---@param contest_id string
---@param problems Problem[]
function M.set_contest_data(platform, contest_id, problems)
---@param contest_name? string
---@param display_name? string
function M.set_contest_data(platform, contest_id, problems, contest_name, display_name)
vim.validate({
platform = { platform, 'string' },
contest_id = { contest_id, 'string' },
@ -109,36 +102,17 @@ function M.set_contest_data(platform, contest_id, problems)
})
cache_data[platform] = cache_data[platform] or {}
local existing = cache_data[platform][contest_id] or {}
local existing_by_id = {}
if existing.problems then
for _, p in ipairs(existing.problems) do
existing_by_id[p.id] = p
end
local out = {
name = contest_name,
display_name = display_name,
problems = vim.deepcopy(problems),
index_map = {},
}
for i, p in ipairs(out.problems) do
out.index_map[p.id] = i
end
local merged = {}
for _, p in ipairs(problems) do
local prev = existing_by_id[p.id] or {}
local merged_p = {
id = p.id,
name = p.name or prev.name,
test_cases = prev.test_cases,
timeout_ms = prev.timeout_ms,
memory_mb = prev.memory_mb,
interactive = prev.interactive,
}
table.insert(merged, merged_p)
end
existing.problems = merged
existing.index_map = {}
for i, p in ipairs(merged) do
existing.index_map[p.id] = i
end
cache_data[platform][contest_id] = existing
cache_data[platform][contest_id] = out
M.save()
end

View file

@ -36,9 +36,8 @@ function M.get_platforms()
return result
end
---Get list of contests for a specific platform
---@param platform string Platform identifier (e.g. "codeforces", "atcoder")
---@param refresh? boolean Whether to skip caching and append new contests
---@param platform string
---@param refresh? boolean
---@return cp.ContestItem[]
function M.get_platform_contests(platform, refresh)
logger.log(
@ -48,24 +47,21 @@ function M.get_platform_contests(platform, refresh)
)
cache.load()
local picker_contests = cache.get_contest_summaries(platform)
if refresh or vim.tbl_isempty(picker_contests) then
logger.log(('Cache miss on %s contests'):format(platform))
local contests = scraper.scrape_contest_list(platform)
local contests = scraper.scrape_contest_list(platform) -- sync
cache.set_contest_summaries(platform, contests)
picker_contests = cache.get_contest_summaries(platform) -- <-- reload after write
end
logger.log(
('Loaded %s %s contests.'):format(#picker_contests, constants.PLATFORM_DISPLAY_NAMES[platform]),
('Loaded %d %s contests.'):format(#picker_contests, constants.PLATFORM_DISPLAY_NAMES[platform]),
vim.log.levels.INFO,
true
)
picker_contests = cache.get_contest_summaries(platform)
return picker_contests
end

View file

@ -31,7 +31,7 @@ local function substitute_template(cmd_template, substitutions)
return out
end
local function build_command(cmd_template, executable, substitutions)
function M.build_command(cmd_template, executable, substitutions)
local cmd = substitute_template(cmd_template, substitutions)
if executable then
table.insert(cmd, 1, executable)
@ -198,10 +198,4 @@ function M.compile_problem(contest_config, is_debug)
return { success = true, output = nil }
end
M._util = {
get_language_from_file = get_language_from_file,
substitute_template = substitute_template,
build_command = build_command,
}
return M

View file

@ -78,8 +78,8 @@ end
---@param substitutions table<string, string>
---@return string[]
local function build_command(language_config, substitutions)
local exec_util = require('cp.runner.execute')._util
return exec_util.build_command(language_config.test, language_config.executable, substitutions)
local execute = require('cp.runner.execute')
return execute.build_command(language_config.test, language_config.executable, substitutions)
end
---@param contest_config ContestConfig
@ -98,28 +98,6 @@ local function run_single_test_case(contest_config, cp_config, test_case)
local binary_file = state.get_binary_file()
local substitutions = { source = source_file, binary = binary_file }
if language_config.compile and binary_file and vim.fn.filereadable(binary_file) == 0 then
local cr = exec.compile(language_config, substitutions)
local ansi = require('cp.ui.ansi')
local clean = ansi.bytes_to_string(cr.stdout or '')
if cr.code ~= 0 then
return {
status = 'fail',
actual = clean,
actual_highlights = {},
error = 'Compilation failed',
stderr = clean,
time_ms = 0,
rss_mb = 0,
code = cr.code,
ok = false,
signal = nil,
tled = false,
mled = false,
}
end
end
local cmd = build_command(language_config, substitutions)
local stdin_content = (test_case.input or '') .. '\n'
local timeout_ms = (run_panel_state.constraints and run_panel_state.constraints.timeout_ms) or 0

View file

@ -1,67 +1,110 @@
local M = {}
local utils = require('cp.utils')
local logger = require('cp.log')
local utils = require('cp.utils')
local function syshandle(result)
if result.code ~= 0 then
local msg = 'Scraper failed: ' .. (result.stderr or 'Unknown error')
logger.log(msg, vim.log.levels.ERROR)
return {
success = false,
error = msg,
}
return { success = false, error = msg }
end
local ok, data = pcall(vim.json.decode, result.stdout)
if not ok then
local msg = 'Failed to parse scraper output: ' .. tostring(data)
logger.log(msg, vim.log.levels.ERROR)
return {
success = false,
error = msg,
}
return { success = false, error = msg }
end
return {
success = true,
data = data,
}
return { success = true, data = data }
end
---@param platform string
---@param subcommand string
---@param args string[]
---@param opts { sync?: boolean, ndjson?: boolean, on_event?: fun(ev: table), on_exit?: fun(result: table) }
local function run_scraper(platform, subcommand, args, opts)
if not utils.setup_python_env() then
local msg = 'Python environment setup failed'
logger.log(msg, vim.log.levels.ERROR)
return {
success = false,
message = msg,
}
end
local plugin_path = utils.get_plugin_path()
local cmd = {
'uv',
'run',
'--directory',
plugin_path,
'-m',
'scrapers.' .. platform,
subcommand,
}
local cmd = { 'uv', 'run', '--directory', plugin_path, '-m', 'scrapers.' .. platform, subcommand }
vim.list_extend(cmd, args)
local sysopts = {
text = true,
timeout = 30000,
}
if opts and opts.ndjson then
local uv = vim.loop
local stdout = uv.new_pipe(false)
local stderr = uv.new_pipe(false)
local buf = ''
if opts.sync then
local handle
handle = uv.spawn(
cmd[1],
{ args = vim.list_slice(cmd, 2), stdio = { nil, stdout, stderr } },
function(code, signal)
if buf ~= '' and opts.on_event then
local ok_tail, ev_tail = pcall(vim.json.decode, buf)
if ok_tail then
opts.on_event(ev_tail)
end
buf = ''
end
if opts.on_exit then
opts.on_exit({ success = (code == 0), code = code, signal = signal })
end
if not stdout:is_closing() then
stdout:close()
end
if not stderr:is_closing() then
stderr:close()
end
if handle and not handle:is_closing() then
handle:close()
end
end
)
if not handle then
logger.log('Failed to start scraper process', vim.log.levels.ERROR)
return { success = false, error = 'spawn failed' }
end
uv.read_start(stdout, function(_, data)
if data == nil then
if buf ~= '' and opts.on_event then
local ok_tail, ev_tail = pcall(vim.json.decode, buf)
if ok_tail then
opts.on_event(ev_tail)
end
buf = ''
end
return
end
buf = buf .. data
while true do
local s, e = buf:find('\n', 1, true)
if not s then
break
end
local line = buf:sub(1, s - 1)
buf = buf:sub(e + 1)
local ok, ev = pcall(vim.json.decode, line)
if ok and opts.on_event then
opts.on_event(ev)
end
end
end)
uv.read_start(stderr, function(_, _) end)
return
end
local sysopts = { text = true, timeout = 30000 }
if opts and opts.sync then
local result = vim.system(cmd, sysopts):wait()
return syshandle(result)
else
vim.system(cmd, sysopts, function(result)
return opts.on_exit(syshandle(result))
if opts and opts.on_exit then
return opts.on_exit(syshandle(result))
end
end)
end
end
@ -93,50 +136,59 @@ end
function M.scrape_contest_list(platform)
local result = run_scraper(platform, 'contests', {}, { sync = true })
if not result.success or not result.data.contests then
if not result or not result.success or not (result.data and result.data.contests) then
logger.log(
('Could not scrape contests list for platform %s: %s'):format(platform, result.msg),
('Could not scrape contests list for platform %s: %s'):format(
platform,
(result and result.error) or 'unknown'
),
vim.log.levels.ERROR
)
return {}
end
return result.data.contests
end
function M.scrape_problem_tests(platform, contest_id, problem_id, callback)
run_scraper(platform, 'tests', { contest_id, problem_id }, {
on_exit = function(result)
if not result.success or not result.data.tests then
logger.log(
'Failed to load tests: ' .. (result.msg or 'unknown error'),
vim.log.levels.ERROR
)
return {}
---@param platform string
---@param contest_id string
---@param callback fun(data: table)|nil
function M.scrape_all_tests(platform, contest_id, callback)
run_scraper(platform, 'tests', { contest_id }, {
ndjson = true,
on_event = function(ev)
if ev.done then
return
end
if ev.error and ev.problem_id then
logger.log(
('Failed to load tests for %s/%s: %s'):format(contest_id, ev.problem_id, ev.error),
vim.log.levels.WARN
)
return
end
if not ev.problem_id or not ev.tests then
return
end
vim.schedule(function()
vim.system({ 'mkdir', '-p', 'build', 'io' }):wait()
local config = require('cp.config')
local base_name = config.default_filename(contest_id, problem_id)
for i, test_case in ipairs(result.data.tests) do
local base_name = config.default_filename(contest_id, ev.problem_id)
for i, t in ipairs(ev.tests) do
local input_file = 'io/' .. base_name .. '.' .. i .. '.cpin'
local expected_file = 'io/' .. base_name .. '.' .. i .. '.cpout'
local input_content = test_case.input:gsub('\r', '')
local expected_content = test_case.expected:gsub('\r', '')
pcall(vim.fn.writefile, vim.split(input_content, '\n', { trimempty = true }), input_file)
pcall(
vim.fn.writefile,
vim.split(expected_content, '\n', { trimempty = true }),
expected_file
)
local input_content = t.input:gsub('\r', '')
local expected_content = t.expected:gsub('\r', '')
vim.fn.writefile(vim.split(input_content, '\n', { trimempty = true }), input_file)
vim.fn.writefile(vim.split(expected_content, '\n', { trimempty = true }), expected_file)
end
if type(callback) == 'function' then
callback(result.data)
callback({
tests = ev.tests,
timeout_ms = ev.timeout_ms or 0,
memory_mb = ev.memory_mb or 0,
interactive = ev.interactive or false,
problem_id = ev.problem_id,
})
end
end)
end,

View file

@ -28,45 +28,26 @@ function M.set_platform(platform)
return true
end
local function backfill_missing_tests(platform, contest_id, problems)
cache.load()
local missing = {}
for _, prob in ipairs(problems) do
if not cache.get_test_cases(platform, contest_id, prob.id) then
table.insert(missing, prob.id)
end
end
if #missing == 0 then
logger.log(('All problems already cached for %s contest %s.'):format(platform, contest_id))
return
end
for _, pid in ipairs(missing) do
local captured = pid
scraper.scrape_problem_tests(platform, contest_id, captured, function(result)
local cached_tests = {}
if result.tests then
for i, t in ipairs(result.tests) do
cached_tests[i] = { index = i, input = t.input, expected = t.expected }
end
end
cache.set_test_cases(
platform,
contest_id,
captured,
cached_tests,
result.timeout_ms,
result.memory_mb
)
end)
end
end
---@class TestCaseLite
---@field input string
---@field expected string
---@class ScrapeEvent
---@field problem_id string
---@field tests TestCaseLite[]|nil
---@field timeout_ms integer|nil
---@field memory_mb integer|nil
---@field interactive boolean|nil
---@field error string|nil
---@field done boolean|nil
---@field succeeded integer|nil
---@field failed integer|nil
---@param platform string
---@param contest_id string
---@param language string|nil
---@param problem_id string|nil
function M.setup_contest(platform, contest_id, language, problem_id)
if not platform then
logger.log('No platform configured. Use :CP <platform> <contest> [--{lang=<lang>,debug} first.')
return
end
local config = config_module.get_config()
if not vim.tbl_contains(config.scrapers, platform) then
logger.log(('Scraping disabled for %s.'):format(platform), vim.log.levels.WARN)
@ -75,28 +56,47 @@ function M.setup_contest(platform, contest_id, language, problem_id)
state.set_contest_id(contest_id)
cache.load()
local contest_data = cache.get_contest_data(platform, contest_id)
local function proceed(contest_data)
local problems = contest_data.problems
local pid = problems[(problem_id and contest_data.index_map[problem_id] or 1)].id
M.setup_problem(pid, language)
local cached_len = #vim.tbl_filter(function(p)
return cache.get_test_cases(platform, contest_id, p.id) ~= nil
end, problems)
if cached_len ~= #problems then
scraper.scrape_all_tests(platform, contest_id, function(ev)
local cached_tests = {}
for i, t in ipairs(ev.tests) do
cached_tests[i] = { index = i, input = t.input, expected = t.expected }
end
cache.set_test_cases(
platform,
contest_id,
ev.problem_id,
cached_tests,
ev.timeout_ms or 0,
ev.memory_mb or 0
)
end)
end
end
local contest_data = cache.get_contest_data(platform, contest_id)
if not contest_data or not contest_data.problems then
logger.log('Fetching contests problems...', vim.log.levels.INFO, true)
scraper.scrape_contest_metadata(platform, contest_id, function(result)
local problems = result.problems or {}
cache.set_contest_data(platform, contest_id, problems)
cache.set_contest_data(platform, contest_id, problems, result.name, result.display_name)
logger.log(('Found %d problems for %s contest %s.'):format(#problems, platform, contest_id))
local pid = problem_id or (problems[1] and problems[1].id)
if pid then
M.setup_problem(pid, language)
end
backfill_missing_tests(platform, contest_id, problems)
proceed(cache.get_contest_data(platform, contest_id))
end)
else
local problems = contest_data.problems
local pid = problem_id or (problems[1] and problems[1].id)
if pid then
M.setup_problem(pid, language)
end
backfill_missing_tests(platform, contest_id, problems)
return
end
proceed(contest_data)
end
---@param problem_id string
@ -195,19 +195,9 @@ function M.navigate_problem(direction, language)
end
local problems = contest_data.problems
local current_index
for i, prob in ipairs(problems) do
if prob.id == current_problem_id then
current_index = i
break
end
end
if not current_index then
M.setup_contest(platform, contest_id, language, problems[1].id)
return
end
local index = contest_data.index_map[current_problem_id]
local new_index = current_index + direction
local new_index = index + direction
if new_index < 1 or new_index > #problems then
return
end

View file

@ -57,7 +57,7 @@ local function find_gnu_time()
_time_cached = true
_time_path = nil
_time_reason = 'GNU time not found (install `time` on Linux or `brew install coreutils` on macOS)'
_time_reason = 'GNU time not found'
return _time_path, _time_reason
end
@ -214,7 +214,7 @@ local function find_gnu_timeout()
_timeout_cached = true
_timeout_path = nil
_timeout_reason = 'GNU timeout not found (install `coreutils`; macOS: `brew install coreutils`)'
_timeout_reason = 'GNU timeout not found'
return _timeout_path, _timeout_reason
end

View file

@ -8,10 +8,10 @@ dependencies = [
"backoff>=2.2.1",
"beautifulsoup4>=4.13.5",
"curl-cffi>=0.13.0",
"playwright>=1.55.0",
"httpx>=0.28.1",
"ndjson>=0.3.1",
"requests>=2.32.5",
"scrapling[fetchers]>=0.3.5",
"scrapy>=2.13.3",
]
[dependency-groups]
@ -22,6 +22,7 @@ dev = [
"pytest>=8.0.0",
"pytest-mock>=3.12.0",
"pre-commit>=4.3.0",
"basedpyright>=1.31.6",
]
[tool.pytest.ini_options]

View file

@ -1,14 +1,19 @@
#!/usr/bin/env python3
import concurrent.futures
import asyncio
import json
import re
import sys
import time
from dataclasses import asdict
from typing import Any
import backoff
import httpx
import requests
from bs4 import BeautifulSoup, Tag
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from .base import BaseScraper
from .models import (
@ -20,398 +25,352 @@ from .models import (
TestsResult,
)
MIB_TO_MB = 1.048576
BASE_URL = "https://atcoder.jp"
ARCHIVE_URL = f"{BASE_URL}/contests/archive"
TIMEOUT_SECONDS = 30
HEADERS = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
}
RETRY_STATUS = {429, 502, 503, 504}
FATAL_STATUS = {400, 401, 403, 404, 410}
def _make_request(url: str, timeout: int = 10) -> requests.Response:
headers = {
"User-Agent": (
"Mozilla/5.0 (X11; Linux x86_64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/120.0.0.0 Safari/537.36"
)
}
_session = requests.Session()
_adapter = HTTPAdapter(
pool_connections=100,
pool_maxsize=100,
max_retries=Retry(total=0),
)
_session.mount("https://", _adapter)
_session.mount("http://", _adapter)
@backoff.on_exception(
backoff.expo,
(requests.exceptions.RequestException, requests.exceptions.HTTPError),
max_tries=5,
jitter=backoff.random_jitter,
on_backoff=lambda details: print(
f"Request error on {url} (attempt {details['tries']}), "
f"retrying in {details['wait']:.1f}s: {details['exception']}",
file=sys.stderr,
),
def _give_up_requests(exc: Exception) -> bool:
if isinstance(exc, requests.HTTPError) and exc.response is not None:
return exc.response.status_code in FATAL_STATUS
return False
def _retry_after_requests(details):
exc = details.get("exception")
if isinstance(exc, requests.HTTPError) and exc.response is not None:
ra = exc.response.headers.get("Retry-After")
if ra:
try:
time.sleep(max(0.0, float(ra)))
except ValueError:
pass
@backoff.on_exception(
backoff.expo,
(requests.ConnectionError, requests.Timeout, requests.HTTPError),
max_tries=5,
jitter=backoff.full_jitter,
giveup=_give_up_requests,
on_backoff=_retry_after_requests,
)
def _fetch(url: str) -> str:
r = _session.get(url, headers=HEADERS, timeout=TIMEOUT_SECONDS)
if r.status_code in RETRY_STATUS:
raise requests.HTTPError(response=r)
r.raise_for_status()
return r.text
def _giveup_httpx(exc: Exception) -> bool:
return (
isinstance(exc, httpx.HTTPStatusError)
and exc.response is not None
and (exc.response.status_code in FATAL_STATUS)
)
@backoff.on_predicate(
backoff.expo,
lambda resp: resp.status_code == 429,
max_tries=5,
jitter=backoff.random_jitter,
on_backoff=lambda details: print(
f"Rate limited on {url}, retrying in {details['wait']:.1f}s",
file=sys.stderr,
),
@backoff.on_exception(
backoff.expo,
(httpx.ConnectError, httpx.ReadTimeout, httpx.HTTPStatusError),
max_tries=5,
jitter=backoff.full_jitter,
giveup=_giveup_httpx,
)
async def _get_async(client: httpx.AsyncClient, url: str) -> str:
r = await client.get(url, headers=HEADERS, timeout=TIMEOUT_SECONDS)
r.raise_for_status()
return r.text
def _text_from_pre(pre: Tag) -> str:
return (
pre.get_text(separator="\n", strip=False)
.replace("\r", "")
.replace("\xa0", " ")
.rstrip("\n")
)
def _req():
return requests.get(url, headers=headers, timeout=timeout)
resp = _req()
resp.raise_for_status()
return resp
def extract_problem_limits(soup: BeautifulSoup) -> tuple[int, float]:
timeout_ms = None
memory_mb = None
def _parse_last_page(html: str) -> int:
soup = BeautifulSoup(html, "html.parser")
nav = soup.select_one("ul.pagination")
if not nav:
return 1
nums = []
for a in nav.select("a"):
s = a.get_text(strip=True)
if s.isdigit():
nums.append(int(s))
return max(nums) if nums else 1
paragraphs = soup.find_all("p")
for p in paragraphs:
text = p.get_text()
if "Time Limit:" in text and "Memory Limit:" in text:
time_match = re.search(r"Time Limit:\s*(\d+)\s*sec", text)
if time_match:
seconds = int(time_match.group(1))
timeout_ms = seconds * 1000
memory_match = re.search(r"Memory Limit:\s*(\d+)\s*MiB", text)
if memory_match:
memory_mib = int(memory_match.group(1))
memory_mb = round(memory_mib * 1.048576, 2)
break
def _parse_archive_contests(html: str) -> list[ContestSummary]:
soup = BeautifulSoup(html, "html.parser")
tbody = soup.select_one("table.table-default tbody") or soup.select_one("tbody")
if not tbody:
return []
out: list[ContestSummary] = []
for tr in tbody.select("tr"):
a = tr.select_one("a[href^='/contests/']")
if not a:
continue
href_attr = a.get("href")
if not isinstance(href_attr, str):
continue
m = re.search(r"/contests/([^/?#]+)", href_attr)
if not m:
continue
cid = m.group(1)
name = a.get_text(strip=True)
out.append(ContestSummary(id=cid, name=name, display_name=name))
return out
if timeout_ms is None:
raise ValueError("Could not find valid timeout in problem constraints")
if memory_mb is None:
raise ValueError("Could not find valid memory limit in problem constraints")
def _parse_tasks_list(html: str) -> list[dict[str, str]]:
soup = BeautifulSoup(html, "html.parser")
tbody = soup.select_one("table tbody")
if not tbody:
return []
rows: list[dict[str, str]] = []
for tr in tbody.select("tr"):
tds = tr.select("td")
if len(tds) < 2:
continue
letter = tds[0].get_text(strip=True)
a = tds[1].select_one("a[href*='/tasks/']")
if not a:
continue
href_attr = a.get("href")
if not isinstance(href_attr, str):
continue
m = re.search(r"/contests/[^/]+/tasks/([^/?#]+)", href_attr)
if not m:
continue
slug = m.group(1)
title = a.get_text(strip=True)
rows.append({"letter": letter, "title": title, "slug": slug})
return rows
def _extract_limits(html: str) -> tuple[int, float]:
soup = BeautifulSoup(html, "html.parser")
txt = soup.get_text(" ", strip=True)
timeout_ms = 0
memory_mb = 0.0
ts = re.search(r"Time\s*Limit:\s*([\d.]+)\s*sec", txt, flags=re.I)
if ts:
timeout_ms = int(float(ts.group(1)) * 1000)
ms = re.search(r"Memory\s*Limit:\s*(\d+)\s*MiB", txt, flags=re.I)
if ms:
memory_mb = float(ms.group(1)) * MIB_TO_MB
return timeout_ms, memory_mb
def parse_problem_url(contest_id: str, problem_letter: str) -> str:
task_id: str = f"{contest_id}_{problem_letter}"
return f"https://atcoder.jp/contests/{contest_id}/tasks/{task_id}"
def _extract_samples(html: str) -> list[TestCase]:
soup = BeautifulSoup(html, "html.parser")
root = soup.select_one("#task-statement") or soup
inputs: dict[str, str] = {}
outputs: dict[str, str] = {}
for h in root.find_all(re.compile(r"h[2-4]")):
title = h.get_text(" ", strip=True)
pre = h.find_next("pre")
if not pre:
continue
t = _text_from_pre(pre)
mi = re.search(r"Sample\s*Input\s*(\d+)", title, flags=re.I)
mo = re.search(r"Sample\s*Output\s*(\d+)", title, flags=re.I)
if mi:
inputs[mi.group(1)] = t
elif mo:
outputs[mo.group(1)] = t
cases: list[TestCase] = []
for k in sorted(set(inputs) & set(outputs), key=lambda s: int(s)):
cases.append(TestCase(input=inputs[k], expected=outputs[k]))
return cases
def extract_problem_from_row(row, contest_id: str) -> ProblemSummary | None:
cells = row.find_all("td")
if len(cells) < 2:
return None
task_link = cells[1].find("a")
if not task_link:
return None
task_name = task_link.get_text(strip=True)
task_href = task_link.get("href", "")
if not task_href:
return None
task_id = task_href.split("/")[-1]
if not task_id.startswith(contest_id + "_"):
return None
problem_letter = task_id[len(contest_id) + 1 :]
if not problem_letter or not task_name:
return None
return ProblemSummary(id=problem_letter.lower(), name=task_name)
def _scrape_tasks_sync(contest_id: str) -> list[dict[str, str]]:
html = _fetch(f"{BASE_URL}/contests/{contest_id}/tasks")
return _parse_tasks_list(html)
def scrape_contest_problems(contest_id: str) -> list[ProblemSummary]:
try:
contest_url = f"https://atcoder.jp/contests/{contest_id}/tasks"
response = _make_request(contest_url)
soup = BeautifulSoup(response.text, "html.parser")
task_table = soup.find("table", class_="table")
if not task_table or not isinstance(task_table, Tag):
return []
rows = task_table.find_all("tr")[1:]
problems: list[ProblemSummary] = []
for row in rows:
problem = extract_problem_from_row(row, contest_id)
if problem:
problems.append(problem)
return problems
except Exception as e:
print(f"Failed to scrape AtCoder contest problems: {e}", file=sys.stderr)
return []
def _scrape_problem_page_sync(contest_id: str, slug: str) -> dict[str, Any]:
html = _fetch(f"{BASE_URL}/contests/{contest_id}/tasks/{slug}")
tests = _extract_samples(html)
timeout_ms, memory_mb = _extract_limits(html)
return {
"tests": tests,
"timeout_ms": timeout_ms,
"memory_mb": memory_mb,
"interactive": False,
}
def extract_test_case_from_headers(sample_headers, i: int) -> tuple[str, str] | None:
if i >= len(sample_headers):
return None
header = sample_headers[i]
if "input" not in header.get_text().lower():
return None
input_pre = header.find_next("pre")
if not input_pre or i + 1 >= len(sample_headers):
return None
next_header = sample_headers[i + 1]
if "output" not in next_header.get_text().lower():
return None
output_pre = next_header.find_next("pre")
if not output_pre:
return None
input_text = input_pre.get_text().strip().replace("\r", "")
output_text = output_pre.get_text().strip().replace("\r", "")
if not input_text or not output_text:
return None
return (input_text, output_text)
def _to_problem_summaries(rows: list[dict[str, str]]) -> list[ProblemSummary]:
out: list[ProblemSummary] = []
seen: set[str] = set()
for r in rows:
letter = (r.get("letter") or "").strip().upper()
title = r.get("title") or ""
if not letter:
continue
pid = letter.lower()
if pid in seen:
continue
seen.add(pid)
out.append(ProblemSummary(id=pid, name=title))
return out
def scrape(url: str) -> list[TestCase]:
try:
response = _make_request(url)
soup = BeautifulSoup(response.text, "html.parser")
sample_headers = soup.find_all(
"h3", string=lambda x: x and "sample" in x.lower() if x else False
)
tests: list[TestCase] = []
i = 0
while i < len(sample_headers):
test_case = extract_test_case_from_headers(sample_headers, i)
if test_case:
input_text, output_text = test_case
tests.append(TestCase(input=input_text, expected=output_text))
i += 2
else:
i += 1
return tests
except Exception as e:
print(f"Error scraping AtCoder: {e}", file=sys.stderr)
return []
async def _fetch_all_contests_async() -> list[ContestSummary]:
async with httpx.AsyncClient(
limits=httpx.Limits(max_connections=100, max_keepalive_connections=100)
) as client:
first_html = await _get_async(client, ARCHIVE_URL)
last = _parse_last_page(first_html)
out = _parse_archive_contests(first_html)
if last <= 1:
return out
tasks = [
asyncio.create_task(_get_async(client, f"{ARCHIVE_URL}?page={p}"))
for p in range(2, last + 1)
]
for coro in asyncio.as_completed(tasks):
html = await coro
out.extend(_parse_archive_contests(html))
return out
def scrape_contests() -> list[ContestSummary]:
def get_max_pages() -> int:
try:
response = _make_request("https://atcoder.jp/contests/archive")
soup = BeautifulSoup(response.text, "html.parser")
pagination = soup.find("ul", class_="pagination")
if not pagination or not isinstance(pagination, Tag):
return 15
lis = pagination.find_all("li")
if lis and isinstance(lis[-1], Tag):
last_li_text = lis[-1].get_text().strip()
try:
return int(last_li_text)
except ValueError:
return 15
return 15
except Exception:
return 15
def scrape_page(page: int) -> list[ContestSummary]:
try:
response = _make_request(f"https://atcoder.jp/contests/archive?page={page}")
except Exception:
return []
soup = BeautifulSoup(response.text, "html.parser")
table = soup.find("table", class_="table")
if not table:
return []
tbody = table.find("tbody")
if not tbody or not isinstance(tbody, Tag):
return []
rows = tbody.find_all("tr")
if not rows:
return []
contests = []
for row in rows:
cells = row.find_all("td")
if len(cells) < 2:
continue
contest_cell = cells[1]
link = contest_cell.find("a")
if not link or not link.get("href"):
continue
href = link.get("href")
contest_id = href.split("/")[-1]
name = link.get_text().strip()
try:
name = name.encode().decode("unicode_escape")
except (UnicodeDecodeError, UnicodeEncodeError):
pass
name = (
name.replace("\uff08", "(")
.replace("\uff09", ")")
.replace("\u3000", " ")
)
name = re.sub(
r"[\uff01-\uff5e]", lambda m: chr(ord(m.group()) - 0xFEE0), name
)
if not (
contest_id.startswith("ahc") or name.lower().find("heuristic") != -1
):
contests.append(
ContestSummary(id=contest_id, name=name, display_name=name)
)
return contests
max_pages = get_max_pages()
page_results = {}
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
future_to_page = {
executor.submit(scrape_page, page): page for page in range(1, max_pages + 1)
}
for future in concurrent.futures.as_completed(future_to_page):
page = future_to_page[future]
page_contests = future.result()
page_results[page] = page_contests
all_contests = []
for page in sorted(page_results.keys()):
all_contests.extend(page_results[page])
return all_contests
class AtCoderScraper(BaseScraper):
class AtcoderScraper(BaseScraper):
@property
def platform_name(self) -> str:
return "atcoder"
def scrape_contest_metadata(self, contest_id: str) -> MetadataResult:
return self._safe_execute("metadata", self._scrape_metadata_impl, contest_id)
def scrape_problem_tests(self, contest_id: str, problem_id: str) -> TestsResult:
return self._safe_execute(
"tests", self._scrape_tests_impl, contest_id, problem_id
)
def scrape_contest_list(self) -> ContestListResult:
return self._safe_execute("contests", self._scrape_contests_impl)
def _safe_execute(self, operation: str, func, *args):
try:
return func(*args)
except Exception as e:
error_msg = f"{self.platform_name}: {str(e)}"
if operation == "metadata":
return MetadataResult(success=False, error=error_msg)
elif operation == "tests":
return TestsResult(
success=False,
error=error_msg,
problem_id="",
url="",
tests=[],
timeout_ms=0,
memory_mb=0,
async def scrape_contest_metadata(self, contest_id: str) -> MetadataResult:
async def impl(cid: str) -> MetadataResult:
rows = await asyncio.to_thread(_scrape_tasks_sync, cid)
problems = _to_problem_summaries(rows)
if not problems:
return self._create_metadata_error(
f"No problems found for contest {cid}", cid
)
elif operation == "contests":
return ContestListResult(success=False, error=error_msg)
def _scrape_metadata_impl(self, contest_id: str) -> MetadataResult:
problems = scrape_contest_problems(contest_id)
if not problems:
return MetadataResult(
success=False,
error=f"{self.platform_name}: No problems found for contest {contest_id}",
)
return MetadataResult(
success=True, error="", contest_id=contest_id, problems=problems
)
def _scrape_tests_impl(self, contest_id: str, problem_id: str) -> TestsResult:
problem_letter = problem_id.upper()
url = parse_problem_url(contest_id, problem_letter)
tests = scrape(url)
response = _make_request(url)
soup = BeautifulSoup(response.text, "html.parser")
timeout_ms, memory_mb = extract_problem_limits(soup)
if not tests:
return TestsResult(
success=False,
error=f"{self.platform_name}: No tests found for {contest_id} {problem_letter}",
problem_id=f"{contest_id}_{problem_id.lower()}",
url=url,
tests=[],
timeout_ms=timeout_ms,
memory_mb=memory_mb,
success=True, error="", contest_id=cid, problems=problems
)
return TestsResult(
success=True,
error="",
problem_id=f"{contest_id}_{problem_id.lower()}",
url=url,
tests=tests,
timeout_ms=timeout_ms,
memory_mb=memory_mb,
)
return await self._safe_execute("metadata", impl, contest_id)
def _scrape_contests_impl(self) -> ContestListResult:
contests = scrape_contests()
if not contests:
return ContestListResult(
success=False, error=f"{self.platform_name}: No contests found"
)
return ContestListResult(success=True, error="", contests=contests)
async def scrape_contest_list(self) -> ContestListResult:
async def impl() -> ContestListResult:
try:
contests = await _fetch_all_contests_async()
except Exception as e:
return self._create_contests_error(str(e))
if not contests:
return self._create_contests_error("No contests found")
return ContestListResult(success=True, error="", contests=contests)
return await self._safe_execute("contests", impl)
async def stream_tests_for_category_async(self, category_id: str) -> None:
rows = await asyncio.to_thread(_scrape_tasks_sync, category_id)
async def emit(row: dict[str, str]) -> None:
letter = (row.get("letter") or "").strip().lower()
slug = row.get("slug") or ""
if not letter or not slug:
return
try:
data = await asyncio.to_thread(
_scrape_problem_page_sync, category_id, slug
)
tests: list[TestCase] = data["tests"]
if not tests:
print(
json.dumps(
{
"problem_id": letter,
"error": f"{self.platform_name}: no tests found",
}
),
flush=True,
)
return
print(
json.dumps(
{
"problem_id": letter,
"tests": [
{"input": t.input, "expected": t.expected}
for t in tests
],
"timeout_ms": data["timeout_ms"],
"memory_mb": data["memory_mb"],
"interactive": bool(data["interactive"]),
}
),
flush=True,
)
except Exception as e:
print(
json.dumps(
{
"problem_id": letter,
"error": f"{self.platform_name}: {str(e)}",
}
),
flush=True,
)
await asyncio.gather(*(emit(r) for r in rows))
def main() -> None:
async def main_async() -> int:
if len(sys.argv) < 2:
result = MetadataResult(
success=False,
error="Usage: atcoder.py metadata <contest_id> OR atcoder.py tests <contest_id> <problem_letter> OR atcoder.py contests",
error="Usage: atcoder.py metadata <contest_id> OR atcoder.py tests <contest_id> OR atcoder.py contests",
)
print(json.dumps(asdict(result)))
sys.exit(1)
return 1
mode: str = sys.argv[1]
scraper = AtCoderScraper()
scraper = AtcoderScraper()
if mode == "metadata":
if len(sys.argv) != 3:
result = MetadataResult(
success=False,
error="Usage: atcoder.py metadata <contest_id>",
success=False, error="Usage: atcoder.py metadata <contest_id>"
)
print(json.dumps(asdict(result)))
sys.exit(1)
contest_id: str = sys.argv[2]
result = scraper.scrape_contest_metadata(contest_id)
return 1
contest_id = sys.argv[2]
result = await scraper.scrape_contest_metadata(contest_id)
print(json.dumps(asdict(result)))
if not result.success:
sys.exit(1)
return 0 if result.success else 1
elif mode == "tests":
if len(sys.argv) != 4:
if mode == "tests":
if len(sys.argv) != 3:
tests_result = TestsResult(
success=False,
error="Usage: atcoder.py tests <contest_id> <problem_letter>",
error="Usage: atcoder.py tests <contest_id>",
problem_id="",
url="",
tests=[],
@ -419,35 +378,32 @@ def main() -> None:
memory_mb=0,
)
print(json.dumps(asdict(tests_result)))
sys.exit(1)
return 1
contest_id = sys.argv[2]
await scraper.stream_tests_for_category_async(contest_id)
return 0
test_contest_id: str = sys.argv[2]
problem_letter: str = sys.argv[3]
tests_result = scraper.scrape_problem_tests(test_contest_id, problem_letter)
print(json.dumps(asdict(tests_result)))
if not tests_result.success:
sys.exit(1)
elif mode == "contests":
if mode == "contests":
if len(sys.argv) != 2:
contest_result = ContestListResult(
success=False, error="Usage: atcoder.py contests"
)
print(json.dumps(asdict(contest_result)))
sys.exit(1)
contest_result = scraper.scrape_contest_list()
return 1
contest_result = await scraper.scrape_contest_list()
print(json.dumps(asdict(contest_result)))
if not contest_result.success:
sys.exit(1)
return 0 if contest_result.success else 1
else:
result = MetadataResult(
success=False,
error=f"Unknown mode: {mode}. Use 'metadata', 'tests', or 'contests'",
)
print(json.dumps(asdict(result)))
sys.exit(1)
result = MetadataResult(
success=False,
error="Unknown mode. Use 'metadata <contest_id>', 'tests <contest_id>', or 'contests'",
)
print(json.dumps(asdict(result)))
return 1
def main() -> None:
sys.exit(asyncio.run(main_async()))
if __name__ == "__main__":

View file

@ -1,8 +1,13 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Any, Awaitable, Callable, ParamSpec, cast
from .models import ContestListResult, MetadataResult, TestsResult
P = ParamSpec("P")
@dataclass
class ScraperConfig:
@ -13,21 +18,18 @@ class ScraperConfig:
class BaseScraper(ABC):
def __init__(self, config: ScraperConfig | None = None):
self.config = config or ScraperConfig()
@property
@abstractmethod
def platform_name(self) -> str: ...
@abstractmethod
def scrape_contest_metadata(self, contest_id: str) -> MetadataResult: ...
async def scrape_contest_metadata(self, contest_id: str) -> MetadataResult: ...
@abstractmethod
def scrape_problem_tests(self, contest_id: str, problem_id: str) -> TestsResult: ...
async def scrape_contest_list(self) -> ContestListResult: ...
@abstractmethod
def scrape_contest_list(self) -> ContestListResult: ...
async def stream_tests_for_category_async(self, category_id: str) -> None: ...
def _create_metadata_error(
self, error_msg: str, contest_id: str = ""
@ -56,15 +58,21 @@ class BaseScraper(ABC):
success=False, error=f"{self.platform_name}: {error_msg}"
)
def _safe_execute(self, operation: str, func, *args, **kwargs):
async def _safe_execute(
self,
operation: str,
func: Callable[P, Awaitable[Any]],
*args: P.args,
**kwargs: P.kwargs,
):
try:
return func(*args, **kwargs)
return await func(*args, **kwargs)
except Exception as e:
if operation == "metadata":
contest_id = args[0] if args else ""
contest_id = cast(str, args[0]) if args else ""
return self._create_metadata_error(str(e), contest_id)
elif operation == "tests":
problem_id = args[1] if len(args) > 1 else ""
problem_id = cast(str, args[1]) if len(args) > 1 else ""
return self._create_tests_error(str(e), problem_id)
elif operation == "contests":
return self._create_contests_error(str(e))

View file

@ -1,9 +1,12 @@
#!/usr/bin/env python3
import asyncio
import json
import logging
import re
import sys
from dataclasses import asdict
from typing import Any
import requests
from bs4 import BeautifulSoup, Tag
@ -19,224 +22,132 @@ from .models import (
TestsResult,
)
def scrape(url: str) -> list[TestCase]:
try:
page = StealthyFetcher.fetch(url, headless=True, solve_cloudflare=True)
html = page.html_content
soup = BeautifulSoup(html, "html.parser")
input_sections = soup.find_all("div", class_="input")
output_sections = soup.find_all("div", class_="output")
individual_inputs: dict[str, list[str]] = {}
individual_outputs: dict[str, list[str]] = {}
for inp_section in input_sections:
inp_pre = inp_section.find("pre")
if not inp_pre or not isinstance(inp_pre, Tag):
continue
test_line_divs = inp_pre.find_all(
"div", class_=lambda x: x and "test-example-line-" in x
)
if not test_line_divs:
continue
for div in test_line_divs:
classes = div.get("class", [])
class_name = next(
(
cls
for cls in classes
if "test-example-line-" in cls and cls.split("-")[-1].isdigit()
),
None,
)
if not class_name:
continue
test_num = class_name.replace("test-example-line-", "")
if test_num not in individual_inputs:
individual_inputs[test_num] = []
individual_inputs[test_num].append(div.get_text().strip())
for out_section in output_sections:
out_pre = out_section.find("pre")
if not out_pre or not isinstance(out_pre, Tag):
continue
test_line_divs = out_pre.find_all(
"div", class_=lambda x: x and "test-example-line-" in x
)
if not test_line_divs:
continue
for div in test_line_divs:
classes = div.get("class", [])
class_name = next(
(
cls
for cls in classes
if "test-example-line-" in cls and cls.split("-")[-1].isdigit()
),
None,
)
if not class_name:
continue
test_num = class_name.replace("test-example-line-", "")
if test_num not in individual_outputs:
individual_outputs[test_num] = []
individual_outputs[test_num].append(div.get_text().strip())
if individual_inputs and individual_outputs:
common_tests = set(individual_inputs.keys()) & set(
individual_outputs.keys()
)
if common_tests:
tests = []
for test_num in sorted(common_tests):
input_text = "\n".join(individual_inputs[test_num])
output_text = "\n".join(individual_outputs[test_num])
prefixed_input = "1\n" + input_text
tests.append(TestCase(input=prefixed_input, expected=output_text))
return tests
all_inputs = []
all_outputs = []
for inp_section in input_sections:
inp_pre = inp_section.find("pre")
if not inp_pre or not isinstance(inp_pre, Tag):
continue
divs = inp_pre.find_all("div")
if divs:
lines = [div.get_text().strip() for div in divs if isinstance(div, Tag)]
text = "\n".join(lines)
else:
text = inp_pre.get_text().replace("\r", "").strip()
all_inputs.append(text)
for out_section in output_sections:
out_pre = out_section.find("pre")
if not out_pre or not isinstance(out_pre, Tag):
continue
divs = out_pre.find_all("div")
if divs:
lines = [div.get_text().strip() for div in divs if isinstance(div, Tag)]
text = "\n".join(lines)
else:
text = out_pre.get_text().replace("\r", "").strip()
all_outputs.append(text)
if not all_inputs or not all_outputs:
return []
combined_input = "\n".join(all_inputs)
combined_output = "\n".join(all_outputs)
return [TestCase(input=combined_input, expected=combined_output)]
except Exception as e:
print(f"Scrapling failed: {e}", file=sys.stderr)
return []
# suppress scrapling logging - https://github.com/D4Vinci/Scrapling/issues/31)
logging.getLogger("scrapling").setLevel(logging.CRITICAL)
def parse_problem_url(contest_id: str, problem_letter: str) -> str:
BASE_URL = "https://codeforces.com"
API_CONTEST_LIST_URL = f"{BASE_URL}/api/contest.list"
TIMEOUT_SECONDS = 30
HEADERS = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
}
def _text_from_pre(pre: Tag) -> str:
return (
f"https://codeforces.com/contest/{contest_id}/problem/{problem_letter.upper()}"
pre.get_text(separator="\n", strip=False)
.replace("\r", "")
.replace("\xa0", " ")
.rstrip("\n")
)
def extract_problem_limits(soup: BeautifulSoup) -> tuple[int, float]:
timeout_ms = None
memory_mb = None
time_limit_div = soup.find("div", class_="time-limit")
if time_limit_div:
text = time_limit_div.get_text().strip()
match = re.search(r"(\d+) seconds?", text)
if match:
seconds = int(match.group(1))
timeout_ms = seconds * 1000
if timeout_ms is None:
raise ValueError("Could not find valid timeout in time-limit section")
memory_limit_div = soup.find("div", class_="memory-limit")
if memory_limit_div:
text = memory_limit_div.get_text().strip()
match = re.search(r"(\d+) megabytes", text)
if match:
memory_mb = float(match.group(1))
if memory_mb is None:
raise ValueError("Could not find valid memory limit in memory-limit section")
def _extract_limits(block: Tag) -> tuple[int, float]:
tdiv = block.find("div", class_="time-limit")
mdiv = block.find("div", class_="memory-limit")
timeout_ms = 0
memory_mb = 0.0
if tdiv:
ttxt = tdiv.get_text(" ", strip=True)
ts = re.search(r"(\d+)\s*seconds?", ttxt)
if ts:
timeout_ms = int(ts.group(1)) * 1000
if mdiv:
mtxt = mdiv.get_text(" ", strip=True)
ms = re.search(r"(\d+)\s*megabytes?", mtxt)
if ms:
memory_mb = float(ms.group(1))
return timeout_ms, memory_mb
def scrape_contest_problems(contest_id: str) -> list[ProblemSummary]:
try:
contest_url: str = f"https://codeforces.com/contest/{contest_id}"
page = StealthyFetcher.fetch(contest_url, headless=True, solve_cloudflare=True)
html = page.html_content
def _extract_title(block: Tag) -> tuple[str, str]:
t = block.find("div", class_="title")
if not t:
return "", ""
s = t.get_text(" ", strip=True)
parts = s.split(".", 1)
if len(parts) != 2:
return "", s.strip()
return parts[0].strip().upper(), parts[1].strip()
soup = BeautifulSoup(html, "html.parser")
problems: list[ProblemSummary] = []
problem_links = soup.find_all(
"a", href=lambda x: x and f"/contest/{contest_id}/problem/" in x
def _extract_samples(block: Tag) -> list[TestCase]:
st = block.find("div", class_="sample-test")
if not st:
return []
inputs = [
_text_from_pre(pre)
for inp in st.find_all("div", class_="input") # type: ignore[union-attr]
for pre in [inp.find("pre")]
if isinstance(pre, Tag)
]
outputs = [
_text_from_pre(pre)
for out in st.find_all("div", class_="output") # type: ignore[union-attr]
for pre in [out.find("pre")]
if isinstance(pre, Tag)
]
n = min(len(inputs), len(outputs))
return [TestCase(input=inputs[i], expected=outputs[i]) for i in range(n)]
def _is_interactive(block: Tag) -> bool:
ps = block.find("div", class_="problem-statement")
txt = ps.get_text(" ", strip=True) if ps else block.get_text(" ", strip=True)
return "This is an interactive problem" in txt
def _fetch_problems_html(contest_id: str) -> str:
url = f"{BASE_URL}/contest/{contest_id}/problems"
page = StealthyFetcher.fetch(
url,
headless=True,
solve_cloudflare=True,
)
return page.html_content
def _parse_all_blocks(html: str) -> list[dict[str, Any]]:
soup = BeautifulSoup(html, "html.parser")
blocks = soup.find_all("div", class_="problem-statement")
out: list[dict[str, Any]] = []
for b in blocks:
holder = b.find_parent("div", class_="problemindexholder")
letter = (holder.get("problemindex") if holder else "").strip().upper()
name = _extract_title(b)[1] # keep your name extraction
if not letter:
continue
tests = _extract_samples(b)
timeout_ms, memory_mb = _extract_limits(b)
interactive = _is_interactive(b)
out.append(
{
"letter": letter,
"name": name,
"tests": tests,
"timeout_ms": timeout_ms,
"memory_mb": memory_mb,
"interactive": interactive,
}
)
for link in problem_links:
if not isinstance(link, Tag):
continue
href: str = str(link.get("href", ""))
if f"/contest/{contest_id}/problem/" in href:
problem_letter: str = href.split("/")[-1].lower()
problem_name: str = link.get_text(strip=True)
if not (problem_letter and problem_name):
continue
problems.append(ProblemSummary(id=problem_letter, name=problem_name))
seen: set[str] = set()
unique_problems: list[ProblemSummary] = []
for p in problems:
if p.id not in seen:
seen.add(p.id)
unique_problems.append(p)
return unique_problems
except Exception as e:
print(f"Failed to scrape contest problems: {e}", file=sys.stderr)
return []
return out
def scrape_sample_tests(url: str) -> list[TestCase]:
print(f"Scraping: {url}", file=sys.stderr)
return scrape(url)
def scrape_contests() -> list[ContestSummary]:
response = requests.get("https://codeforces.com/api/contest.list", timeout=10)
response.raise_for_status()
data = response.json()
if data["status"] != "OK":
return []
contests = []
for contest in data["result"]:
contest_id = str(contest["id"])
name = contest["name"]
contests.append(ContestSummary(id=contest_id, name=name, display_name=name))
return contests
def _scrape_contest_problems_sync(contest_id: str) -> list[ProblemSummary]:
html = _fetch_problems_html(contest_id)
blocks = _parse_all_blocks(html)
problems: list[ProblemSummary] = []
seen: set[str] = set()
for b in blocks:
pid = b["letter"].upper()
if pid in seen:
continue
seen.add(pid)
problems.append(ProblemSummary(id=pid.lower(), name=b["name"]))
return problems
class CodeforcesScraper(BaseScraper):
@ -244,81 +155,94 @@ class CodeforcesScraper(BaseScraper):
def platform_name(self) -> str:
return "codeforces"
def scrape_contest_metadata(self, contest_id: str) -> MetadataResult:
return self._safe_execute(
"metadata", self._scrape_contest_metadata_impl, contest_id
)
def scrape_problem_tests(self, contest_id: str, problem_id: str) -> TestsResult:
return self._safe_execute(
"tests", self._scrape_problem_tests_impl, contest_id, problem_id
)
def scrape_contest_list(self) -> ContestListResult:
return self._safe_execute("contests", self._scrape_contest_list_impl)
def _scrape_contest_metadata_impl(self, contest_id: str) -> MetadataResult:
problems = scrape_contest_problems(contest_id)
if not problems:
return self._create_metadata_error(
f"No problems found for contest {contest_id}", contest_id
)
return MetadataResult(
success=True, error="", contest_id=contest_id, problems=problems
)
def _scrape_problem_tests_impl(
self, contest_id: str, problem_letter: str
) -> TestsResult:
problem_id = contest_id + problem_letter.lower()
url = parse_problem_url(contest_id, problem_letter)
tests = scrape_sample_tests(url)
page = StealthyFetcher.fetch(url, headless=True, solve_cloudflare=True)
html = page.html_content
soup = BeautifulSoup(html, "html.parser")
timeout_ms, memory_mb = extract_problem_limits(soup)
problem_statement_div = soup.find("div", class_="problem-statement")
interactive = bool(
problem_statement_div
and "This is an interactive problem" in problem_statement_div.get_text()
)
if not tests:
return self._create_tests_error(
f"No tests found for {contest_id} {problem_letter}", problem_id, url
async def scrape_contest_metadata(self, contest_id: str) -> MetadataResult:
async def impl(cid: str) -> MetadataResult:
problems = await asyncio.to_thread(_scrape_contest_problems_sync, cid)
if not problems:
return self._create_metadata_error(
f"No problems found for contest {cid}", cid
)
return MetadataResult(
success=True, error="", contest_id=cid, problems=problems
)
return TestsResult(
success=True,
error="",
problem_id=problem_id,
url=url,
tests=tests,
timeout_ms=timeout_ms,
memory_mb=memory_mb,
interactive=interactive,
)
return await self._safe_execute("metadata", impl, contest_id)
def _scrape_contest_list_impl(self) -> ContestListResult:
contests = scrape_contests()
if not contests:
return self._create_contests_error("No contests found")
return ContestListResult(success=True, error="", contests=contests)
async def scrape_contest_list(self) -> ContestListResult:
async def impl() -> ContestListResult:
try:
r = requests.get(API_CONTEST_LIST_URL, timeout=TIMEOUT_SECONDS)
r.raise_for_status()
data = r.json()
if data.get("status") != "OK":
return self._create_contests_error("Invalid API response")
contests: list[ContestSummary] = []
for c in data["result"]:
if c.get("phase") != "FINISHED":
continue
cid = str(c["id"])
name = c["name"]
contests.append(
ContestSummary(id=cid, name=name, display_name=name)
)
if not contests:
return self._create_contests_error("No contests found")
return ContestListResult(success=True, error="", contests=contests)
except Exception as e:
return self._create_contests_error(str(e))
return await self._safe_execute("contests", impl)
async def stream_tests_for_category_async(self, category_id: str) -> None:
html = await asyncio.to_thread(_fetch_problems_html, category_id)
blocks = await asyncio.to_thread(_parse_all_blocks, html)
for b in blocks:
pid = b["letter"].lower()
tests: list[TestCase] = b["tests"]
if not tests:
print(
json.dumps(
{
"problem_id": pid,
"error": f"{self.platform_name}: no tests found",
}
),
flush=True,
)
continue
print(
json.dumps(
{
"problem_id": pid,
"tests": [
{"input": t.input, "expected": t.expected} for t in tests
],
"timeout_ms": b["timeout_ms"],
"memory_mb": b["memory_mb"],
"interactive": bool(b["interactive"]),
}
),
flush=True,
)
def main() -> None:
async def main_async() -> int:
if len(sys.argv) < 2:
result = MetadataResult(
success=False,
error="Usage: codeforces.py metadata <contest_id> OR codeforces.py tests <contest_id> <problem_letter> OR codeforces.py contests",
error="Usage: codeforces.py metadata <contest_id> OR codeforces.py tests <contest_id> OR codeforces.py contests",
)
print(json.dumps(asdict(result)))
sys.exit(1)
return 1
scraper = CodeforcesScraper()
mode: str = sys.argv[1]
scraper = CodeforcesScraper()
if mode == "metadata":
if len(sys.argv) != 3:
@ -326,17 +250,17 @@ def main() -> None:
success=False, error="Usage: codeforces.py metadata <contest_id>"
)
print(json.dumps(asdict(result)))
sys.exit(1)
contest_id: str = sys.argv[2]
result = scraper.scrape_contest_metadata(contest_id)
return 1
contest_id = sys.argv[2]
result = await scraper.scrape_contest_metadata(contest_id)
print(json.dumps(asdict(result)))
return 0 if result.success else 1
elif mode == "tests":
if len(sys.argv) != 4:
if mode == "tests":
if len(sys.argv) != 3:
tests_result = TestsResult(
success=False,
error="Usage: codeforces.py tests <contest_id> <problem_letter>",
error="Usage: codeforces.py tests <contest_id>",
problem_id="",
url="",
tests=[],
@ -344,31 +268,32 @@ def main() -> None:
memory_mb=0,
)
print(json.dumps(asdict(tests_result)))
sys.exit(1)
return 1
contest_id = sys.argv[2]
await scraper.stream_tests_for_category_async(contest_id)
return 0
tests_contest_id: str = sys.argv[2]
problem_letter: str = sys.argv[3]
tests_result = scraper.scrape_problem_tests(tests_contest_id, problem_letter)
print(json.dumps(asdict(tests_result)))
elif mode == "contests":
if mode == "contests":
if len(sys.argv) != 2:
contest_result = ContestListResult(
success=False, error="Usage: codeforces.py contests"
)
print(json.dumps(asdict(contest_result)))
sys.exit(1)
contest_result = scraper.scrape_contest_list()
return 1
contest_result = await scraper.scrape_contest_list()
print(json.dumps(asdict(contest_result)))
return 0 if contest_result.success else 1
else:
result = MetadataResult(
success=False,
error=f"Unknown mode: {mode}. Use 'metadata', 'tests', or 'contests'",
)
print(json.dumps(asdict(result)))
sys.exit(1)
result = MetadataResult(
success=False,
error="Unknown mode. Use 'metadata <contest_id>', 'tests <contest_id>', or 'contests'",
)
print(json.dumps(asdict(result)))
return 1
def main() -> None:
sys.exit(asyncio.run(main_async()))
if __name__ == "__main__":

View file

@ -1,13 +1,13 @@
#!/usr/bin/env python3
import asyncio
import json
import re
import sys
from dataclasses import asdict
from typing import Any
import backoff
import requests
from bs4 import BeautifulSoup, Tag
import httpx
from .base import BaseScraper
from .models import (
@ -19,6 +19,15 @@ from .models import (
TestsResult,
)
BASE_URL = "https://cses.fi"
INDEX_PATH = "/problemset/list"
TASK_PATH = "/problemset/task/{id}"
HEADERS = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
TIMEOUT_S = 15.0
CONNECTIONS = 8
def normalize_category_name(category_name: str) -> str:
return category_name.lower().replace(" ", "_").replace("&", "and")
@ -57,256 +66,114 @@ def snake_to_title(name: str) -> str:
return " ".join(map(fix_word, enumerate(words)))
@backoff.on_exception(
backoff.expo,
(requests.exceptions.RequestException, requests.exceptions.HTTPError),
max_tries=4,
jitter=backoff.random_jitter,
on_backoff=lambda details: print(
f"Request failed (attempt {details['tries']}), retrying in {details['wait']:.1f}s: {details['exception']}",
file=sys.stderr,
),
async def fetch_text(client: httpx.AsyncClient, path: str) -> str:
r = await client.get(BASE_URL + path, headers=HEADERS, timeout=TIMEOUT_S)
r.raise_for_status()
return r.text
CATEGORY_BLOCK_RE = re.compile(
r'<h2>(?P<cat>[^<]+)</h2>\s*<ul class="task-list">(?P<body>.*?)</ul>',
re.DOTALL,
)
@backoff.on_predicate(
backoff.expo,
lambda response: response.status_code == 429,
max_tries=4,
jitter=backoff.random_jitter,
on_backoff=lambda details: print(
f"Rate limited, retrying in {details['wait']:.1f}s", file=sys.stderr
),
TASK_LINK_RE = re.compile(
r'<li class="task"><a href="/problemset/task/(?P<id>\d+)/?">(?P<title>[^<]+)</a>',
re.DOTALL,
)
def make_request(url: str, headers: dict) -> requests.Response:
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
return response
TITLE_RE = re.compile(
r'<div class="title-block">.*?<h1>(?P<title>[^<]+)</h1>', re.DOTALL
)
TIME_RE = re.compile(r"<li><b>Time limit:</b>\s*([0-9.]+)\s*s</li>")
MEM_RE = re.compile(r"<li><b>Memory limit:</b>\s*(\d+)\s*MB</li>")
SIDEBAR_CAT_RE = re.compile(
r'<div class="nav sidebar">.*?<h4>(?P<cat>[^<]+)</h4>', re.DOTALL
)
MD_BLOCK_RE = re.compile(r'<div class="md">(.*?)</div>', re.DOTALL | re.IGNORECASE)
EXAMPLE_SECTION_RE = re.compile(
r"<h[1-6][^>]*>\s*example[s]?:?\s*</h[1-6]>\s*(?P<section>.*?)(?=<h[1-6][^>]*>|$)",
re.DOTALL | re.IGNORECASE,
)
LABELED_IO_RE = re.compile(
r"input\s*:\s*</p>\s*<pre>(?P<input>.*?)</pre>.*?output\s*:\s*</p>\s*<pre>(?P<output>.*?)</pre>",
re.DOTALL | re.IGNORECASE,
)
PRE_RE = re.compile(r"<pre>(.*?)</pre>", re.DOTALL | re.IGNORECASE)
def scrape_category_problems(category_id: str) -> list[ProblemSummary]:
category_name = snake_to_title(category_id)
try:
problemset_url = "https://cses.fi/problemset/"
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
response = make_request(problemset_url, headers)
soup = BeautifulSoup(response.text, "html.parser")
current_category = None
problems = []
target_found = False
for element in soup.find_all(["h1", "h2", "ul"]):
if not isinstance(element, Tag):
continue
if element.name in ["h1", "h2"]:
text = element.get_text(strip=True)
if not text or text.startswith("CSES") or text == "CSES Problem Set":
continue
if target_found and current_category != text:
break
current_category = text
if text.lower() == category_name.lower():
target_found = True
elif element.name == "ul" and current_category and target_found:
problem_links = element.find_all(
"a", href=lambda x: x and "/problemset/task/" in x
)
for link in problem_links:
href = link.get("href", "")
if not href:
continue
problem_id = href.split("/")[-1]
problem_name = link.get_text(strip=True)
if not problem_id.isdigit() or not problem_name:
continue
problems.append(ProblemSummary(id=problem_id, name=problem_name))
return problems
except Exception as e:
print(f"Failed to scrape CSES category {category_id}: {e}", file=sys.stderr)
return []
def parse_problem_url(problem_input: str) -> str | None:
if problem_input.startswith("https://cses.fi/problemset/task/"):
return problem_input.rstrip("/")
elif problem_input.isdigit():
return f"https://cses.fi/problemset/task/{problem_input}"
return None
def extract_problem_limits(soup: BeautifulSoup) -> tuple[int, float]:
timeout_ms = None
memory_mb = None
constraints_ul = soup.find("ul", class_="task-constraints")
if not constraints_ul or not isinstance(constraints_ul, Tag):
raise ValueError("Could not find task-constraints section")
for li in constraints_ul.find_all("li"):
text = li.get_text()
if "Time limit:" in text:
match = re.search(r"Time limit:\s*(\d+(?:\.\d+)?)\s*s", text)
if match:
seconds = float(match.group(1))
timeout_ms = int(seconds * 1000)
if "Memory limit:" in text:
match = re.search(r"Memory limit:\s*(\d+)\s*MB", text)
if match:
memory_mb = float(match.group(1))
if timeout_ms is None:
raise ValueError("Could not find valid timeout in task-constraints section")
if memory_mb is None:
raise ValueError(
"Could not find valid memory limit in task-constraints section"
)
return timeout_ms, memory_mb
def scrape_categories() -> list[ContestSummary]:
try:
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
response = make_request("https://cses.fi/problemset/", headers)
soup = BeautifulSoup(response.text, "html.parser")
categories = []
for h2 in soup.find_all("h2"):
category_name = h2.get_text().strip()
if category_name == "General":
continue
category_id = normalize_category_name(category_name)
display_name = category_name
categories.append(
ContestSummary(
id=category_id, name=category_name, display_name=display_name
)
def parse_categories(html: str) -> list[ContestSummary]:
out: list[ContestSummary] = []
for m in CATEGORY_BLOCK_RE.finditer(html):
cat = m.group("cat").strip()
if cat == "General":
continue
out.append(
ContestSummary(
id=normalize_category_name(cat),
name=cat,
display_name=cat,
)
return categories
except Exception as e:
print(f"Failed to scrape CSES categories: {e}", file=sys.stderr)
return []
def process_problem_element(
element,
current_category: str | None,
all_categories: dict[str, list[ProblemSummary]],
) -> str | None:
if element.name == "h1":
category_name = element.get_text().strip()
if category_name not in all_categories:
all_categories[category_name] = []
return category_name
if element.name != "a" or "/problemset/task/" not in element.get("href", ""):
return current_category
href = element.get("href", "")
if not href:
return current_category
problem_id = href.split("/")[-1]
problem_name = element.get_text(strip=True)
if not (problem_id.isdigit() and problem_name and current_category):
return current_category
problem = ProblemSummary(id=problem_id, name=problem_name)
all_categories[current_category].append(problem)
return current_category
def scrape_all_problems() -> dict[str, list[ProblemSummary]]:
try:
problemset_url = "https://cses.fi/problemset/"
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
response = requests.get(problemset_url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
all_categories: dict[str, list[ProblemSummary]] = {}
current_category = None
for element in soup.find_all(["h1", "h2", "ul"]):
if not isinstance(element, Tag):
continue
if element.name in ["h1", "h2"]:
text = element.get_text(strip=True)
if text and not text.startswith("CSES") and text != "CSES Problem Set":
current_category = text
if current_category not in all_categories:
all_categories[current_category] = []
print(f"Found category: {current_category}", file=sys.stderr)
elif element.name == "ul" and current_category:
problem_links = element.find_all(
"a", href=lambda x: x and "/problemset/task/" in x
)
for link in problem_links:
href = link.get("href", "")
if href:
problem_id = href.split("/")[-1]
problem_name = link.get_text(strip=True)
if problem_id.isdigit() and problem_name:
problem = ProblemSummary(id=problem_id, name=problem_name)
all_categories[current_category].append(problem)
print(
f"Found {len(all_categories)} categories with {sum(len(probs) for probs in all_categories.values())} problems",
file=sys.stderr,
)
return all_categories
except Exception as e:
print(f"Failed to scrape CSES problems: {e}", file=sys.stderr)
return {}
def _collect_section_after(header: Tag) -> list[Tag]:
out: list[Tag] = []
cur = header.find_next_sibling()
while cur and not (isinstance(cur, Tag) and cur.name in ("h1", "h2", "h3")):
if isinstance(cur, Tag):
out.append(cur)
cur = cur.find_next_sibling()
return out
def extract_example_test_cases(soup: BeautifulSoup) -> list[tuple[str, str]]:
example_headers = soup.find_all(
lambda t: isinstance(t, Tag)
and t.name in ("h1", "h2", "h3")
and t.get_text(strip=True).lower().startswith("example")
)
cases: list[tuple[str, str]] = []
for hdr in example_headers:
section = _collect_section_after(hdr)
def find_labeled(label: str) -> str | None:
for node in section:
if not isinstance(node, Tag):
continue
if node.name in ("p", "h4", "h5", "h6"):
txt = node.get_text(strip=True).lower().rstrip(":")
if txt == label:
pre = node.find_next_sibling("pre")
if pre:
return pre.get_text().strip()
return None
inp = find_labeled("input")
out = find_labeled("output")
if not inp or not out:
pres = [n for n in section if isinstance(n, Tag) and n.name == "pre"]
if len(pres) >= 2:
inp = inp or pres[0].get_text().strip()
out = out or pres[1].get_text().strip()
if inp and out:
cases.append((inp, out))
return cases
def parse_category_problems(category_id: str, html: str) -> list[ProblemSummary]:
want = snake_to_title(category_id)
for m in CATEGORY_BLOCK_RE.finditer(html):
cat = m.group("cat").strip()
if cat != want:
continue
body = m.group("body")
return [
ProblemSummary(id=mm.group("id"), name=mm.group("title"))
for mm in TASK_LINK_RE.finditer(body)
]
return []
def scrape(url: str) -> list[TestCase]:
try:
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
response = make_request(url, headers)
soup = BeautifulSoup(response.text, "html.parser")
pairs = extract_example_test_cases(soup)
return [TestCase(input=inp, expected=out) for (inp, out) in pairs]
except Exception as e:
print(f"Error scraping CSES: {e}", file=sys.stderr)
def parse_limits(html: str) -> tuple[int, int]:
tm = TIME_RE.search(html)
mm = MEM_RE.search(html)
t = int(round(float(tm.group(1)) * 1000)) if tm else 0
m = int(mm.group(1)) if mm else 0
return t, m
def parse_title(html: str) -> str:
mt = TITLE_RE.search(html)
return mt.group("title").strip() if mt else ""
def parse_category_from_sidebar(html: str) -> str | None:
m = SIDEBAR_CAT_RE.search(html)
return m.group("cat").strip() if m else None
def parse_tests(html: str) -> list[TestCase]:
md = MD_BLOCK_RE.search(html)
if not md:
return []
block = md.group(1)
msec = EXAMPLE_SECTION_RE.search(block)
section = msec.group("section") if msec else block
mlabel = LABELED_IO_RE.search(section)
if mlabel:
a = mlabel.group("input").strip()
b = mlabel.group("output").strip()
return [TestCase(input=a, expected=b)]
pres = PRE_RE.findall(section)
if len(pres) >= 2:
return [TestCase(input=pres[0].strip(), expected=pres[1].strip())]
return []
def task_path(problem_id: str | int) -> str:
return TASK_PATH.format(id=str(problem_id))
class CSESScraper(BaseScraper):
@ -314,129 +181,99 @@ class CSESScraper(BaseScraper):
def platform_name(self) -> str:
return "cses"
def scrape_contest_metadata(self, contest_id: str) -> MetadataResult:
return self._safe_execute("metadata", self._scrape_metadata_impl, contest_id)
def scrape_problem_tests(self, contest_id: str, problem_id: str) -> TestsResult:
return self._safe_execute(
"tests", self._scrape_tests_impl, contest_id, problem_id
)
def scrape_contest_list(self) -> ContestListResult:
return self._safe_execute("contests", self._scrape_contests_impl)
def _safe_execute(self, operation: str, func, *args):
try:
return func(*args)
except Exception as e:
error_msg = f"{self.platform_name}: {str(e)}"
if operation == "metadata":
return MetadataResult(success=False, error=error_msg)
elif operation == "tests":
return TestsResult(
success=False,
error=error_msg,
problem_id="",
url="",
tests=[],
timeout_ms=0,
memory_mb=0,
)
elif operation == "contests":
return ContestListResult(success=False, error=error_msg)
def _scrape_metadata_impl(self, category_id: str) -> MetadataResult:
problems = scrape_category_problems(category_id)
async def scrape_contest_metadata(self, contest_id: str) -> MetadataResult:
async with httpx.AsyncClient() as client:
html = await fetch_text(client, INDEX_PATH)
problems = parse_category_problems(contest_id, html)
if not problems:
return MetadataResult(
success=False,
error=f"{self.platform_name}: No problems found for category: {category_id}",
error=f"{self.platform_name}: No problems found for category: {contest_id}",
)
return MetadataResult(
success=True, error="", contest_id=category_id, problems=problems
success=True, error="", contest_id=contest_id, problems=problems
)
def _scrape_tests_impl(self, category: str, problem_id: str) -> TestsResult:
url = parse_problem_url(problem_id)
if not url:
return TestsResult(
success=False,
error=f"{self.platform_name}: Invalid problem input: {problem_id}. Use either problem ID (e.g., 1068) or full URL",
problem_id=problem_id if problem_id.isdigit() else "",
url="",
tests=[],
timeout_ms=0,
memory_mb=0,
)
tests = scrape(url)
m = re.search(r"/task/(\d+)", url)
actual_problem_id = (
problem_id if problem_id.isdigit() else (m.group(1) if m else "")
)
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
timeout_ms, memory_mb = extract_problem_limits(soup)
if not tests:
return TestsResult(
success=False,
error=f"{self.platform_name}: No tests found for {problem_id}",
problem_id=actual_problem_id,
url=url,
tests=[],
timeout_ms=timeout_ms,
memory_mb=memory_mb,
)
return TestsResult(
success=True,
error="",
problem_id=actual_problem_id,
url=url,
tests=tests,
timeout_ms=timeout_ms,
memory_mb=memory_mb,
)
def _scrape_contests_impl(self) -> ContestListResult:
categories = scrape_categories()
if not categories:
async def scrape_contest_list(self) -> ContestListResult:
async with httpx.AsyncClient() as client:
html = await fetch_text(client, INDEX_PATH)
cats = parse_categories(html)
if not cats:
return ContestListResult(
success=False, error=f"{self.platform_name}: No contests found"
)
return ContestListResult(success=True, error="", contests=categories)
return ContestListResult(success=True, error="", contests=cats)
async def stream_tests_for_category_async(self, category_id: str) -> None:
async with httpx.AsyncClient(
limits=httpx.Limits(max_connections=CONNECTIONS)
) as client:
index_html = await fetch_text(client, INDEX_PATH)
problems = parse_category_problems(category_id, index_html)
if not problems:
return
sem = asyncio.Semaphore(CONNECTIONS)
async def run_one(pid: str) -> dict[str, Any]:
async with sem:
try:
html = await fetch_text(client, task_path(pid))
tests = parse_tests(html)
timeout_ms, memory_mb = parse_limits(html)
if not tests:
return {
"problem_id": pid,
"error": f"{self.platform_name}: no tests found",
}
return {
"problem_id": pid,
"tests": [
{"input": t.input, "expected": t.expected}
for t in tests
],
"timeout_ms": timeout_ms,
"memory_mb": memory_mb,
"interactive": False,
}
except Exception as e:
return {"problem_id": pid, "error": str(e)}
tasks = [run_one(p.id) for p in problems]
for coro in asyncio.as_completed(tasks):
payload = await coro
print(json.dumps(payload), flush=True)
def main() -> None:
async def main_async() -> int:
if len(sys.argv) < 2:
result = MetadataResult(
success=False,
error="Usage: cses.py metadata <category_id> OR cses.py tests <category> <problem_id> OR cses.py contests",
error="Usage: cses.py metadata <category_id> OR cses.py tests <category> OR cses.py contests",
)
print(json.dumps(asdict(result)))
sys.exit(1)
return 1
mode: str = sys.argv[1]
scraper = CSESScraper()
if mode == "metadata":
if len(sys.argv) != 3:
result = MetadataResult(
success=False,
error="Usage: cses.py metadata <category_id>",
success=False, error="Usage: cses.py metadata <category_id>"
)
print(json.dumps(asdict(result)))
sys.exit(1)
return 1
category_id = sys.argv[2]
result = scraper.scrape_contest_metadata(category_id)
result = await scraper.scrape_contest_metadata(category_id)
print(json.dumps(asdict(result)))
if not result.success:
sys.exit(1)
elif mode == "tests":
if len(sys.argv) != 4:
return 0 if result.success else 1
if mode == "tests":
if len(sys.argv) != 3:
tests_result = TestsResult(
success=False,
error="Usage: cses.py tests <category> <problem_id>",
error="Usage: cses.py tests <category>",
problem_id="",
url="",
tests=[],
@ -444,31 +281,32 @@ def main() -> None:
memory_mb=0,
)
print(json.dumps(asdict(tests_result)))
sys.exit(1)
return 1
category = sys.argv[2]
problem_id = sys.argv[3]
tests_result = scraper.scrape_problem_tests(category, problem_id)
print(json.dumps(asdict(tests_result)))
if not tests_result.success:
sys.exit(1)
elif mode == "contests":
await scraper.stream_tests_for_category_async(category)
return 0
if mode == "contests":
if len(sys.argv) != 2:
contest_result = ContestListResult(
success=False, error="Usage: cses.py contests"
)
print(json.dumps(asdict(contest_result)))
sys.exit(1)
contest_result = scraper.scrape_contest_list()
return 1
contest_result = await scraper.scrape_contest_list()
print(json.dumps(asdict(contest_result)))
if not contest_result.success:
sys.exit(1)
else:
result = MetadataResult(
success=False,
error=f"Unknown mode: {mode}. Use 'metadata <category>', 'tests <category> <problem_id>', or 'contests'",
)
print(json.dumps(asdict(result)))
sys.exit(1)
return 0 if contest_result.success else 1
result = MetadataResult(
success=False,
error=f"Unknown mode: {mode}. Use 'metadata <category>', 'tests <category>', or 'contests'",
)
print(json.dumps(asdict(result)))
return 1
def main() -> None:
sys.exit(asyncio.run(main_async()))
if __name__ == "__main__":

View file

@ -1,43 +0,0 @@
import pytest
@pytest.fixture
def mock_codeforces_html():
return """
<div class="time-limit">Time limit: 1 seconds</div>
<div class="memory-limit">Memory limit: 256 megabytes</div>
<div class="input">
<pre>
<div class="test-example-line-1">3</div>
<div class="test-example-line-1">1 2 3</div>
</pre>
</div>
<div class="output">
<pre>
<div class="test-example-line-1">6</div>
</pre>
</div>
"""
@pytest.fixture
def mock_atcoder_html():
return """
<h3>Sample Input 1</h3>
<pre>3
1 2 3</pre>
<h3>Sample Output 1</h3>
<pre>6</pre>
"""
@pytest.fixture
def mock_cses_html():
return """
<h1>Example</h1>
<p>Input:</p>
<pre>3
1 2 3</pre>
<p>Output:</p>
<pre>6</pre>
"""

View file

@ -1,199 +0,0 @@
from unittest.mock import Mock
from scrapers.atcoder import scrape, scrape_contest_problems, scrape_contests
from scrapers.models import ContestSummary, ProblemSummary
def test_scrape_success(mocker, mock_atcoder_html):
mock_response = Mock()
mock_response.text = mock_atcoder_html
mocker.patch("scrapers.atcoder.requests.get", return_value=mock_response)
result = scrape("https://atcoder.jp/contests/abc350/tasks/abc350_a")
assert len(result) == 1
assert result[0].input == "3\n1 2 3"
assert result[0].expected == "6"
def test_scrape_contest_problems(mocker):
mock_response = Mock()
mock_response.text = """
<table class="table">
<tr><th>Task</th><th>Name</th></tr>
<tr>
<td></td>
<td><a href="/contests/abc350/tasks/abc350_a">A - Water Tank</a></td>
</tr>
<tr>
<td></td>
<td><a href="/contests/abc350/tasks/abc350_b">B - Dentist Aoki</a></td>
</tr>
</table>
"""
mocker.patch("scrapers.atcoder.requests.get", return_value=mock_response)
result = scrape_contest_problems("abc350")
assert len(result) == 2
assert result[0] == ProblemSummary(id="a", name="A - Water Tank")
assert result[1] == ProblemSummary(id="b", name="B - Dentist Aoki")
def test_scrape_network_error(mocker):
mocker.patch(
"scrapers.atcoder.requests.get", side_effect=Exception("Network error")
)
result = scrape("https://atcoder.jp/contests/abc350/tasks/abc350_a")
assert result == []
def test_scrape_contests_success(mocker):
def mock_get_side_effect(url, **kwargs):
if url == "https://atcoder.jp/contests/archive":
mock_response = Mock()
mock_response.raise_for_status.return_value = None
mock_response.text = """
<html>
<ul class="pagination">
<li>1</li>
</ul>
</html>
"""
return mock_response
elif "page=1" in url:
mock_response = Mock()
mock_response.raise_for_status.return_value = None
mock_response.text = """
<table class="table">
<tbody>
<tr>
<td>2025-01-15 21:00:00+0900</td>
<td><a href="/contests/abc350">AtCoder Beginner Contest 350</a></td>
<td>01:40</td>
<td> - 1999</td>
</tr>
<tr>
<td>2025-01-14 21:00:00+0900</td>
<td><a href="/contests/arc170">AtCoder Regular Contest 170</a></td>
<td>02:00</td>
<td>1000 - 2799</td>
</tr>
</tbody>
</table>
"""
return mock_response
else:
mock_response = Mock()
mock_response.raise_for_status.return_value = None
mock_response.text = "<html></html>"
return mock_response
mocker.patch("scrapers.atcoder.requests.get", side_effect=mock_get_side_effect)
result = scrape_contests()
assert len(result) == 2
assert result[0] == ContestSummary(
id="abc350",
name="AtCoder Beginner Contest 350",
display_name="AtCoder Beginner Contest 350",
)
assert result[1] == ContestSummary(
id="arc170",
name="AtCoder Regular Contest 170",
display_name="AtCoder Regular Contest 170",
)
def test_scrape_contests_no_table(mocker):
mock_response = Mock()
mock_response.text = "<html><body>No table found</body></html>"
mocker.patch("scrapers.atcoder.requests.get", return_value=mock_response)
result = scrape_contests()
assert result == []
def test_scrape_contests_network_error(mocker):
mocker.patch(
"scrapers.atcoder.requests.get", side_effect=Exception("Network error")
)
result = scrape_contests()
assert result == []
def test_scrape_contests_filters_ahc(mocker):
def mock_get_side_effect(url, **kwargs):
if url == "https://atcoder.jp/contests/archive":
mock_response = Mock()
mock_response.raise_for_status.return_value = None
mock_response.text = """
<html>
<ul class="pagination">
<li>1</li>
</ul>
</html>
"""
return mock_response
elif "page=1" in url:
mock_response = Mock()
mock_response.raise_for_status.return_value = None
mock_response.text = """
<table class="table">
<tbody>
<tr>
<td>2025-01-15 21:00:00+0900</td>
<td><a href="/contests/abc350">AtCoder Beginner Contest 350</a></td>
<td>01:40</td>
<td> - 1999</td>
</tr>
<tr>
<td>2025-01-14 21:00:00+0900</td>
<td><a href="/contests/ahc044">AtCoder Heuristic Contest 044</a></td>
<td>05:00</td>
<td>-</td>
</tr>
<tr>
<td>2025-01-13 21:00:00+0900</td>
<td><a href="/contests/arc170">AtCoder Regular Contest 170</a></td>
<td>02:00</td>
<td>1000 - 2799</td>
</tr>
</tbody>
</table>
"""
return mock_response
else:
mock_response = Mock()
mock_response.raise_for_status.return_value = None
mock_response.text = "<html></html>"
return mock_response
mocker.patch("scrapers.atcoder.requests.get", side_effect=mock_get_side_effect)
result = scrape_contests()
assert len(result) == 2
assert result[0] == ContestSummary(
id="abc350",
name="AtCoder Beginner Contest 350",
display_name="AtCoder Beginner Contest 350",
)
assert result[1] == ContestSummary(
id="arc170",
name="AtCoder Regular Contest 170",
display_name="AtCoder Regular Contest 170",
)
# Ensure ahc044 is filtered out
contest_ids = [contest.id for contest in result]
assert "ahc044" not in contest_ids

View file

@ -1,97 +0,0 @@
from unittest.mock import Mock
from scrapers.codeforces import CodeforcesScraper
from scrapers.models import ContestSummary, ProblemSummary
def test_scrape_success(mocker, mock_codeforces_html):
mock_page = Mock()
mock_page.html_content = mock_codeforces_html
mocker.patch("scrapers.codeforces.StealthyFetcher.fetch", return_value=mock_page)
scraper = CodeforcesScraper()
result = scraper.scrape_problem_tests("1900", "A")
assert result.success
assert len(result.tests) == 1
assert result.tests[0].input == "1\n3\n1 2 3"
assert result.tests[0].expected == "6"
def test_scrape_contest_problems(mocker):
html = """
<a href="/contest/1900/problem/A">A. Problem A</a>
<a href="/contest/1900/problem/B">B. Problem B</a>
"""
mock_page = Mock()
mock_page.html_content = html
mocker.patch("scrapers.codeforces.StealthyFetcher.fetch", return_value=mock_page)
scraper = CodeforcesScraper()
result = scraper.scrape_contest_metadata("1900")
assert result.success
assert len(result.problems) == 2
assert result.problems[0] == ProblemSummary(id="a", name="A. Problem A")
assert result.problems[1] == ProblemSummary(id="b", name="B. Problem B")
def test_scrape_network_error(mocker):
mocker.patch(
"scrapers.codeforces.StealthyFetcher.fetch",
side_effect=Exception("Network error"),
)
scraper = CodeforcesScraper()
result = scraper.scrape_problem_tests("1900", "A")
assert not result.success
assert "network error" in result.error.lower()
def test_scrape_contests_success(mocker):
mock_response = Mock()
mock_response.json.return_value = {
"status": "OK",
"result": [
{"id": 1951, "name": "Educational Codeforces Round 168 (Rated for Div. 2)"},
{"id": 1950, "name": "Codeforces Round 936 (Div. 2)"},
{"id": 1949, "name": "Codeforces Global Round 26"},
],
}
mocker.patch("scrapers.codeforces.requests.get", return_value=mock_response)
scraper = CodeforcesScraper()
result = scraper.scrape_contest_list()
assert result.success
assert len(result.contests) == 3
assert result.contests[0] == ContestSummary(
id="1951",
name="Educational Codeforces Round 168 (Rated for Div. 2)",
display_name="Educational Codeforces Round 168 (Rated for Div. 2)",
)
def test_scrape_contests_api_error(mocker):
mock_response = Mock()
mock_response.json.return_value = {"status": "FAILED", "result": []}
mocker.patch("scrapers.codeforces.requests.get", return_value=mock_response)
scraper = CodeforcesScraper()
result = scraper.scrape_contest_list()
assert not result.success
assert "no contests found" in result.error.lower()
def test_scrape_contests_network_error(mocker):
mocker.patch(
"scrapers.codeforces.requests.get", side_effect=Exception("Network error")
)
scraper = CodeforcesScraper()
result = scraper.scrape_contest_list()
assert not result.success
assert "network error" in result.error.lower()

View file

@ -1,185 +0,0 @@
from unittest.mock import Mock
from scrapers.cses import (
normalize_category_name,
scrape,
scrape_all_problems,
scrape_categories,
scrape_category_problems,
snake_to_title,
)
from scrapers.models import ContestSummary, ProblemSummary
def test_scrape_success(mocker, mock_cses_html):
mock_response = Mock()
mock_response.text = mock_cses_html
mocker.patch("scrapers.cses.requests.get", return_value=mock_response)
result = scrape("https://cses.fi/problemset/task/1068")
assert len(result) == 1
assert result[0].input == "3\n1 2 3"
assert result[0].expected == "6"
def test_scrape_all_problems(mocker):
mock_response = Mock()
mock_response.text = """
<div class="content">
<h1>Introductory Problems</h1>
<ul>
<li><a href="/problemset/task/1068">Weird Algorithm</a></li>
<li><a href="/problemset/task/1083">Missing Number</a></li>
</ul>
<h1>Sorting and Searching</h1>
<ul>
<li><a href="/problemset/task/1084">Apartments</a></li>
</ul>
</div>
"""
mock_response.raise_for_status = Mock()
mocker.patch("scrapers.cses.requests.get", return_value=mock_response)
result = scrape_all_problems()
assert "Introductory Problems" in result
assert "Sorting and Searching" in result
assert len(result["Introductory Problems"]) == 2
assert result["Introductory Problems"][0] == ProblemSummary(
id="1068",
name="Weird Algorithm",
)
def test_scrape_network_error(mocker):
mocker.patch("scrapers.cses.requests.get", side_effect=Exception("Network error"))
result = scrape("https://cses.fi/problemset/task/1068")
assert result == []
def test_normalize_category_name():
assert normalize_category_name("Sorting and Searching") == "sorting_and_searching"
assert normalize_category_name("Dynamic Programming") == "dynamic_programming"
assert normalize_category_name("Graph Algorithms") == "graph_algorithms"
def test_snake_to_title():
assert snake_to_title("sorting_and_searching") == "Sorting and Searching"
assert snake_to_title("dynamic_programming") == "Dynamic Programming"
assert snake_to_title("graph_algorithms") == "Graph Algorithms"
def test_scrape_category_problems_success(mocker):
mock_response = Mock()
mock_response.text = """
<div class="content">
<h1>General</h1>
<ul>
<li><a href="/problemset/task/1000">Test Problem</a></li>
</ul>
<h1>Sorting and Searching</h1>
<ul>
<li><a href="/problemset/task/1640">Sum of Two Values</a></li>
<li><a href="/problemset/task/1643">Maximum Subarray Sum</a></li>
</ul>
<h1>Dynamic Programming</h1>
<ul>
<li><a href="/problemset/task/1633">Dice Combinations</a></li>
</ul>
</div>
"""
mock_response.raise_for_status = Mock()
mocker.patch("scrapers.cses.requests.get", return_value=mock_response)
result = scrape_category_problems("sorting_and_searching")
assert len(result) == 2
assert result[0].id == "1640"
assert result[0].name == "Sum of Two Values"
assert result[1].id == "1643"
assert result[1].name == "Maximum Subarray Sum"
def test_scrape_category_problems_not_found(mocker):
mock_response = Mock()
mock_response.text = """
<div class="content">
<h1>Some Other Category</h1>
<ul>
<li><a href="/problemset/task/1000">Test Problem</a></li>
</ul>
</div>
"""
mock_response.raise_for_status = Mock()
mocker.patch("scrapers.cses.requests.get", return_value=mock_response)
result = scrape_category_problems("nonexistent_category")
assert result == []
def test_scrape_category_problems_network_error(mocker):
mocker.patch("scrapers.cses.requests.get", side_effect=Exception("Network error"))
result = scrape_category_problems("sorting_and_searching")
assert result == []
def test_scrape_categories_success(mocker):
mock_response = Mock()
mock_response.text = """
<html>
<body>
<h2>General</h2>
<ul class="task-list">
<li class="link"><a href="/register">Register</a></li>
</ul>
<h2>Introductory Problems</h2>
<ul class="task-list">
<li class="task"><a href="/problemset/task/1068">Weird Algorithm</a></li>
<li class="task"><a href="/problemset/task/1083">Missing Number</a></li>
</ul>
<h2>Sorting and Searching</h2>
<ul class="task-list">
<li class="task"><a href="/problemset/task/1621">Distinct Numbers</a></li>
<li class="task"><a href="/problemset/task/1084">Apartments</a></li>
<li class="task"><a href="/problemset/task/1090">Ferris Wheel</a></li>
</ul>
</body>
</html>
"""
mock_response.raise_for_status = Mock()
mocker.patch("scrapers.cses.requests.get", return_value=mock_response)
result = scrape_categories()
assert len(result) == 2
assert result[0] == ContestSummary(
id="introductory_problems",
name="Introductory Problems",
display_name="Introductory Problems",
)
assert result[1] == ContestSummary(
id="sorting_and_searching",
name="Sorting and Searching",
display_name="Sorting and Searching",
)
def test_scrape_categories_network_error(mocker):
mocker.patch("scrapers.cses.requests.get", side_effect=Exception("Network error"))
result = scrape_categories()
assert result == []

View file

@ -0,0 +1,2 @@
def test():
assert 5 == 5

415
uv.lock generated
View file

@ -92,6 +92,20 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" },
]
[[package]]
name = "anyio"
version = "4.11.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "idna" },
{ name = "sniffio" },
{ name = "typing-extensions", marker = "python_full_version < '3.13'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/c6/78/7d432127c41b50bccba979505f272c16cbcadcc33645d5fa3a738110ae75/anyio-4.11.0.tar.gz", hash = "sha256:82a8d0b81e318cc5ce71a5f1f8b5c4e63619620b63141ef8c995fa0db95a57c4", size = 219094, upload-time = "2025-09-23T09:19:12.58Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/15/b3/9b1a8074496371342ec1e796a96f99c82c945a339cd81a8e73de28b4cf9e/anyio-4.11.0-py3-none-any.whl", hash = "sha256:0287e96f4d26d4149305414d4e3bc32f0dcd0862365a4bddea19d7a1ec38c4fc", size = 109097, upload-time = "2025-09-23T09:19:10.601Z" },
]
[[package]]
name = "attrs"
version = "25.3.0"
@ -101,15 +115,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815, upload-time = "2025-03-13T11:10:21.14Z" },
]
[[package]]
name = "automat"
version = "25.4.16"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/e3/0f/d40bbe294bbf004d436a8bcbcfaadca8b5140d39ad0ad3d73d1a8ba15f14/automat-25.4.16.tar.gz", hash = "sha256:0017591a5477066e90d26b0e696ddc143baafd87b588cfac8100bc6be9634de0", size = 129977, upload-time = "2025-04-16T20:12:16.002Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/02/ff/1175b0b7371e46244032d43a56862d0af455823b5280a50c63d99cc50f18/automat-25.4.16-py3-none-any.whl", hash = "sha256:04e9bce696a8d5671ee698005af6e5a9fa15354140a87f4870744604dcdd3ba1", size = 42842, upload-time = "2025-04-16T20:12:14.447Z" },
]
[[package]]
name = "backoff"
version = "2.2.1"
@ -119,6 +124,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148, upload-time = "2022-10-05T19:19:30.546Z" },
]
[[package]]
name = "basedpyright"
version = "1.31.6"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "nodejs-wheel-binaries" },
]
sdist = { url = "https://files.pythonhosted.org/packages/a9/f6/c5657b1e464d04757cde2db76922a88091fe16854bd3d12e470c23b0dcf1/basedpyright-1.31.6.tar.gz", hash = "sha256:07f3602ba1582218dfd1db25b8b69cd3493e1f4367f46a44fd57bb9034b52ea9", size = 22683901, upload-time = "2025-10-01T13:11:21.317Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/4e/2b/34f338b4c04fe965fd209ed872d9fdd893dacc1a06feb6c9fec13ff535c1/basedpyright-1.31.6-py3-none-any.whl", hash = "sha256:620968ee69c14eee6682f29ffd6f813a30966afb1083ecfa4caf155c5d24f2d5", size = 11805295, upload-time = "2025-10-01T13:11:18.308Z" },
]
[[package]]
name = "beautifulsoup4"
version = "4.13.5"
@ -332,77 +349,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
]
[[package]]
name = "constantly"
version = "23.10.4"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/4d/6f/cb2a94494ff74aa9528a36c5b1422756330a75a8367bf20bd63171fc324d/constantly-23.10.4.tar.gz", hash = "sha256:aa92b70a33e2ac0bb33cd745eb61776594dc48764b06c35e0efd050b7f1c7cbd", size = 13300, upload-time = "2023-10-28T23:18:24.316Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/b8/40/c199d095151addf69efdb4b9ca3a4f20f70e20508d6222bffb9b76f58573/constantly-23.10.4-py3-none-any.whl", hash = "sha256:3fd9b4d1c3dc1ec9757f3c52aef7e53ad9323dbe39f51dfd4c43853b68dfa3f9", size = 13547, upload-time = "2023-10-28T23:18:23.038Z" },
]
[[package]]
name = "cryptography"
version = "46.0.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "cffi", marker = "platform_python_implementation != 'PyPy'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/a9/62/e3664e6ffd7743e1694b244dde70b43a394f6f7fbcacf7014a8ff5197c73/cryptography-46.0.1.tar.gz", hash = "sha256:ed570874e88f213437f5cf758f9ef26cbfc3f336d889b1e592ee11283bb8d1c7", size = 749198, upload-time = "2025-09-17T00:10:35.797Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/4c/8c/44ee01267ec01e26e43ebfdae3f120ec2312aa72fa4c0507ebe41a26739f/cryptography-46.0.1-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:1cd6d50c1a8b79af1a6f703709d8973845f677c8e97b1268f5ff323d38ce8475", size = 7285044, upload-time = "2025-09-17T00:08:36.807Z" },
{ url = "https://files.pythonhosted.org/packages/22/59/9ae689a25047e0601adfcb159ec4f83c0b4149fdb5c3030cc94cd218141d/cryptography-46.0.1-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0ff483716be32690c14636e54a1f6e2e1b7bf8e22ca50b989f88fa1b2d287080", size = 4308182, upload-time = "2025-09-17T00:08:39.388Z" },
{ url = "https://files.pythonhosted.org/packages/c4/ee/ca6cc9df7118f2fcd142c76b1da0f14340d77518c05b1ebfbbabca6b9e7d/cryptography-46.0.1-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9873bf7c1f2a6330bdfe8621e7ce64b725784f9f0c3a6a55c3047af5849f920e", size = 4572393, upload-time = "2025-09-17T00:08:41.663Z" },
{ url = "https://files.pythonhosted.org/packages/7f/a3/0f5296f63815d8e985922b05c31f77ce44787b3127a67c0b7f70f115c45f/cryptography-46.0.1-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:0dfb7c88d4462a0cfdd0d87a3c245a7bc3feb59de101f6ff88194f740f72eda6", size = 4308400, upload-time = "2025-09-17T00:08:43.559Z" },
{ url = "https://files.pythonhosted.org/packages/5d/8c/74fcda3e4e01be1d32775d5b4dd841acaac3c1b8fa4d0774c7ac8d52463d/cryptography-46.0.1-cp311-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e22801b61613ebdebf7deb18b507919e107547a1d39a3b57f5f855032dd7cfb8", size = 4015786, upload-time = "2025-09-17T00:08:45.758Z" },
{ url = "https://files.pythonhosted.org/packages/dc/b8/85d23287baeef273b0834481a3dd55bbed3a53587e3b8d9f0898235b8f91/cryptography-46.0.1-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:757af4f6341ce7a1e47c326ca2a81f41d236070217e5fbbad61bbfe299d55d28", size = 4982606, upload-time = "2025-09-17T00:08:47.602Z" },
{ url = "https://files.pythonhosted.org/packages/e5/d3/de61ad5b52433b389afca0bc70f02a7a1f074651221f599ce368da0fe437/cryptography-46.0.1-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f7a24ea78de345cfa7f6a8d3bde8b242c7fac27f2bd78fa23474ca38dfaeeab9", size = 4604234, upload-time = "2025-09-17T00:08:49.879Z" },
{ url = "https://files.pythonhosted.org/packages/dc/1f/dbd4d6570d84748439237a7478d124ee0134bf166ad129267b7ed8ea6d22/cryptography-46.0.1-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:9e8776dac9e660c22241b6587fae51a67b4b0147daa4d176b172c3ff768ad736", size = 4307669, upload-time = "2025-09-17T00:08:52.321Z" },
{ url = "https://files.pythonhosted.org/packages/ec/fd/ca0a14ce7f0bfe92fa727aacaf2217eb25eb7e4ed513b14d8e03b26e63ed/cryptography-46.0.1-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:9f40642a140c0c8649987027867242b801486865277cbabc8c6059ddef16dc8b", size = 4947579, upload-time = "2025-09-17T00:08:54.697Z" },
{ url = "https://files.pythonhosted.org/packages/89/6b/09c30543bb93401f6f88fce556b3bdbb21e55ae14912c04b7bf355f5f96c/cryptography-46.0.1-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:449ef2b321bec7d97ef2c944173275ebdab78f3abdd005400cc409e27cd159ab", size = 4603669, upload-time = "2025-09-17T00:08:57.16Z" },
{ url = "https://files.pythonhosted.org/packages/23/9a/38cb01cb09ce0adceda9fc627c9cf98eb890fc8d50cacbe79b011df20f8a/cryptography-46.0.1-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:2dd339ba3345b908fa3141ddba4025568fa6fd398eabce3ef72a29ac2d73ad75", size = 4435828, upload-time = "2025-09-17T00:08:59.606Z" },
{ url = "https://files.pythonhosted.org/packages/0f/53/435b5c36a78d06ae0bef96d666209b0ecd8f8181bfe4dda46536705df59e/cryptography-46.0.1-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7411c910fb2a412053cf33cfad0153ee20d27e256c6c3f14d7d7d1d9fec59fd5", size = 4709553, upload-time = "2025-09-17T00:09:01.832Z" },
{ url = "https://files.pythonhosted.org/packages/f5/c4/0da6e55595d9b9cd3b6eb5dc22f3a07ded7f116a3ea72629cab595abb804/cryptography-46.0.1-cp311-abi3-win32.whl", hash = "sha256:cbb8e769d4cac884bb28e3ff620ef1001b75588a5c83c9c9f1fdc9afbe7f29b0", size = 3058327, upload-time = "2025-09-17T00:09:03.726Z" },
{ url = "https://files.pythonhosted.org/packages/95/0f/cd29a35e0d6e78a0ee61793564c8cff0929c38391cb0de27627bdc7525aa/cryptography-46.0.1-cp311-abi3-win_amd64.whl", hash = "sha256:92e8cfe8bd7dd86eac0a677499894862cd5cc2fd74de917daa881d00871ac8e7", size = 3523893, upload-time = "2025-09-17T00:09:06.272Z" },
{ url = "https://files.pythonhosted.org/packages/f2/dd/eea390f3e78432bc3d2f53952375f8b37cb4d37783e626faa6a51e751719/cryptography-46.0.1-cp311-abi3-win_arm64.whl", hash = "sha256:db5597a4c7353b2e5fb05a8e6cb74b56a4658a2b7bf3cb6b1821ae7e7fd6eaa0", size = 2932145, upload-time = "2025-09-17T00:09:08.568Z" },
{ url = "https://files.pythonhosted.org/packages/0a/fb/c73588561afcd5e24b089952bd210b14676c0c5bf1213376350ae111945c/cryptography-46.0.1-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:4c49eda9a23019e11d32a0eb51a27b3e7ddedde91e099c0ac6373e3aacc0d2ee", size = 7193928, upload-time = "2025-09-17T00:09:10.595Z" },
{ url = "https://files.pythonhosted.org/packages/26/34/0ff0bb2d2c79f25a2a63109f3b76b9108a906dd2a2eb5c1d460b9938adbb/cryptography-46.0.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9babb7818fdd71394e576cf26c5452df77a355eac1a27ddfa24096665a27f8fd", size = 4293515, upload-time = "2025-09-17T00:09:12.861Z" },
{ url = "https://files.pythonhosted.org/packages/df/b7/d4f848aee24ecd1be01db6c42c4a270069a4f02a105d9c57e143daf6cf0f/cryptography-46.0.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9f2c4cc63be3ef43c0221861177cee5d14b505cd4d4599a89e2cd273c4d3542a", size = 4545619, upload-time = "2025-09-17T00:09:15.397Z" },
{ url = "https://files.pythonhosted.org/packages/44/a5/42fedefc754fd1901e2d95a69815ea4ec8a9eed31f4c4361fcab80288661/cryptography-46.0.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:41c281a74df173876da1dc9a9b6953d387f06e3d3ed9284e3baae3ab3f40883a", size = 4299160, upload-time = "2025-09-17T00:09:17.155Z" },
{ url = "https://files.pythonhosted.org/packages/86/a1/cd21174f56e769c831fbbd6399a1b7519b0ff6280acec1b826d7b072640c/cryptography-46.0.1-cp314-cp314t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0a17377fa52563d730248ba1f68185461fff36e8bc75d8787a7dd2e20a802b7a", size = 3994491, upload-time = "2025-09-17T00:09:18.971Z" },
{ url = "https://files.pythonhosted.org/packages/8d/2f/a8cbfa1c029987ddc746fd966711d4fa71efc891d37fbe9f030fe5ab4eec/cryptography-46.0.1-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:0d1922d9280e08cde90b518a10cd66831f632960a8d08cb3418922d83fce6f12", size = 4960157, upload-time = "2025-09-17T00:09:20.923Z" },
{ url = "https://files.pythonhosted.org/packages/67/ae/63a84e6789e0d5a2502edf06b552bcb0fa9ff16147265d5c44a211942abe/cryptography-46.0.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:af84e8e99f1a82cea149e253014ea9dc89f75b82c87bb6c7242203186f465129", size = 4577263, upload-time = "2025-09-17T00:09:23.356Z" },
{ url = "https://files.pythonhosted.org/packages/ef/8f/1b9fa8e92bd9cbcb3b7e1e593a5232f2c1e6f9bd72b919c1a6b37d315f92/cryptography-46.0.1-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:ef648d2c690703501714588b2ba640facd50fd16548133b11b2859e8655a69da", size = 4298703, upload-time = "2025-09-17T00:09:25.566Z" },
{ url = "https://files.pythonhosted.org/packages/c3/af/bb95db070e73fea3fae31d8a69ac1463d89d1c084220f549b00dd01094a8/cryptography-46.0.1-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:e94eb5fa32a8a9f9bf991f424f002913e3dd7c699ef552db9b14ba6a76a6313b", size = 4926363, upload-time = "2025-09-17T00:09:27.451Z" },
{ url = "https://files.pythonhosted.org/packages/f5/3b/d8fb17ffeb3a83157a1cc0aa5c60691d062aceecba09c2e5e77ebfc1870c/cryptography-46.0.1-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:534b96c0831855e29fc3b069b085fd185aa5353033631a585d5cd4dd5d40d657", size = 4576958, upload-time = "2025-09-17T00:09:29.924Z" },
{ url = "https://files.pythonhosted.org/packages/d9/46/86bc3a05c10c8aa88c8ae7e953a8b4e407c57823ed201dbcba55c4d655f4/cryptography-46.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f9b55038b5c6c47559aa33626d8ecd092f354e23de3c6975e4bb205df128a2a0", size = 4422507, upload-time = "2025-09-17T00:09:32.222Z" },
{ url = "https://files.pythonhosted.org/packages/a8/4e/387e5a21dfd2b4198e74968a541cfd6128f66f8ec94ed971776e15091ac3/cryptography-46.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ec13b7105117dbc9afd023300fb9954d72ca855c274fe563e72428ece10191c0", size = 4683964, upload-time = "2025-09-17T00:09:34.118Z" },
{ url = "https://files.pythonhosted.org/packages/25/a3/f9f5907b166adb8f26762071474b38bbfcf89858a5282f032899075a38a1/cryptography-46.0.1-cp314-cp314t-win32.whl", hash = "sha256:504e464944f2c003a0785b81668fe23c06f3b037e9cb9f68a7c672246319f277", size = 3029705, upload-time = "2025-09-17T00:09:36.381Z" },
{ url = "https://files.pythonhosted.org/packages/12/66/4d3a4f1850db2e71c2b1628d14b70b5e4c1684a1bd462f7fffb93c041c38/cryptography-46.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:c52fded6383f7e20eaf70a60aeddd796b3677c3ad2922c801be330db62778e05", size = 3502175, upload-time = "2025-09-17T00:09:38.261Z" },
{ url = "https://files.pythonhosted.org/packages/52/c7/9f10ad91435ef7d0d99a0b93c4360bea3df18050ff5b9038c489c31ac2f5/cryptography-46.0.1-cp314-cp314t-win_arm64.whl", hash = "sha256:9495d78f52c804b5ec8878b5b8c7873aa8e63db9cd9ee387ff2db3fffe4df784", size = 2912354, upload-time = "2025-09-17T00:09:40.078Z" },
{ url = "https://files.pythonhosted.org/packages/98/e5/fbd632385542a3311915976f88e0dfcf09e62a3fc0aff86fb6762162a24d/cryptography-46.0.1-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:d84c40bdb8674c29fa192373498b6cb1e84f882889d21a471b45d1f868d8d44b", size = 7255677, upload-time = "2025-09-17T00:09:42.407Z" },
{ url = "https://files.pythonhosted.org/packages/56/3e/13ce6eab9ad6eba1b15a7bd476f005a4c1b3f299f4c2f32b22408b0edccf/cryptography-46.0.1-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9ed64e5083fa806709e74fc5ea067dfef9090e5b7a2320a49be3c9df3583a2d8", size = 4301110, upload-time = "2025-09-17T00:09:45.614Z" },
{ url = "https://files.pythonhosted.org/packages/a2/67/65dc233c1ddd688073cf7b136b06ff4b84bf517ba5529607c9d79720fc67/cryptography-46.0.1-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:341fb7a26bc9d6093c1b124b9f13acc283d2d51da440b98b55ab3f79f2522ead", size = 4562369, upload-time = "2025-09-17T00:09:47.601Z" },
{ url = "https://files.pythonhosted.org/packages/17/db/d64ae4c6f4e98c3dac5bf35dd4d103f4c7c345703e43560113e5e8e31b2b/cryptography-46.0.1-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6ef1488967e729948d424d09c94753d0167ce59afba8d0f6c07a22b629c557b2", size = 4302126, upload-time = "2025-09-17T00:09:49.335Z" },
{ url = "https://files.pythonhosted.org/packages/3d/19/5f1eea17d4805ebdc2e685b7b02800c4f63f3dd46cfa8d4c18373fea46c8/cryptography-46.0.1-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7823bc7cdf0b747ecfb096d004cc41573c2f5c7e3a29861603a2871b43d3ef32", size = 4009431, upload-time = "2025-09-17T00:09:51.239Z" },
{ url = "https://files.pythonhosted.org/packages/81/b5/229ba6088fe7abccbfe4c5edb96c7a5ad547fac5fdd0d40aa6ea540b2985/cryptography-46.0.1-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:f736ab8036796f5a119ff8211deda416f8c15ce03776db704a7a4e17381cb2ef", size = 4980739, upload-time = "2025-09-17T00:09:54.181Z" },
{ url = "https://files.pythonhosted.org/packages/3a/9c/50aa38907b201e74bc43c572f9603fa82b58e831bd13c245613a23cff736/cryptography-46.0.1-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:e46710a240a41d594953012213ea8ca398cd2448fbc5d0f1be8160b5511104a0", size = 4592289, upload-time = "2025-09-17T00:09:56.731Z" },
{ url = "https://files.pythonhosted.org/packages/5a/33/229858f8a5bb22f82468bb285e9f4c44a31978d5f5830bb4ea1cf8a4e454/cryptography-46.0.1-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:84ef1f145de5aee82ea2447224dc23f065ff4cc5791bb3b506615957a6ba8128", size = 4301815, upload-time = "2025-09-17T00:09:58.548Z" },
{ url = "https://files.pythonhosted.org/packages/52/cb/b76b2c87fbd6ed4a231884bea3ce073406ba8e2dae9defad910d33cbf408/cryptography-46.0.1-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:9394c7d5a7565ac5f7d9ba38b2617448eba384d7b107b262d63890079fad77ca", size = 4943251, upload-time = "2025-09-17T00:10:00.475Z" },
{ url = "https://files.pythonhosted.org/packages/94/0f/f66125ecf88e4cb5b8017ff43f3a87ede2d064cb54a1c5893f9da9d65093/cryptography-46.0.1-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:ed957044e368ed295257ae3d212b95456bd9756df490e1ac4538857f67531fcc", size = 4591247, upload-time = "2025-09-17T00:10:02.874Z" },
{ url = "https://files.pythonhosted.org/packages/f6/22/9f3134ae436b63b463cfdf0ff506a0570da6873adb4bf8c19b8a5b4bac64/cryptography-46.0.1-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f7de12fa0eee6234de9a9ce0ffcfa6ce97361db7a50b09b65c63ac58e5f22fc7", size = 4428534, upload-time = "2025-09-17T00:10:04.994Z" },
{ url = "https://files.pythonhosted.org/packages/89/39/e6042bcb2638650b0005c752c38ea830cbfbcbb1830e4d64d530000aa8dc/cryptography-46.0.1-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7fab1187b6c6b2f11a326f33b036f7168f5b996aedd0c059f9738915e4e8f53a", size = 4699541, upload-time = "2025-09-17T00:10:06.925Z" },
{ url = "https://files.pythonhosted.org/packages/68/46/753d457492d15458c7b5a653fc9a84a1c9c7a83af6ebdc94c3fc373ca6e8/cryptography-46.0.1-cp38-abi3-win32.whl", hash = "sha256:45f790934ac1018adeba46a0f7289b2b8fe76ba774a88c7f1922213a56c98bc1", size = 3043779, upload-time = "2025-09-17T00:10:08.951Z" },
{ url = "https://files.pythonhosted.org/packages/2f/50/b6f3b540c2f6ee712feeb5fa780bb11fad76634e71334718568e7695cb55/cryptography-46.0.1-cp38-abi3-win_amd64.whl", hash = "sha256:7176a5ab56fac98d706921f6416a05e5aff7df0e4b91516f450f8627cda22af3", size = 3517226, upload-time = "2025-09-17T00:10:10.769Z" },
{ url = "https://files.pythonhosted.org/packages/ff/e8/77d17d00981cdd27cc493e81e1749a0b8bbfb843780dbd841e30d7f50743/cryptography-46.0.1-cp38-abi3-win_arm64.whl", hash = "sha256:efc9e51c3e595267ff84adf56e9b357db89ab2279d7e375ffcaf8f678606f3d9", size = 2923149, upload-time = "2025-09-17T00:10:13.236Z" },
{ url = "https://files.pythonhosted.org/packages/27/27/077e09fd92075dd1338ea0ffaf5cfee641535545925768350ad90d8c36ca/cryptography-46.0.1-pp311-pypy311_pp73-macosx_10_9_x86_64.whl", hash = "sha256:b9c79af2c3058430d911ff1a5b2b96bbfe8da47d5ed961639ce4681886614e70", size = 3722319, upload-time = "2025-09-17T00:10:20.273Z" },
{ url = "https://files.pythonhosted.org/packages/db/32/6fc7250280920418651640d76cee34d91c1e0601d73acd44364570cf041f/cryptography-46.0.1-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:0ca4be2af48c24df689a150d9cd37404f689e2968e247b6b8ff09bff5bcd786f", size = 4249030, upload-time = "2025-09-17T00:10:22.396Z" },
{ url = "https://files.pythonhosted.org/packages/32/33/8d5398b2da15a15110b2478480ab512609f95b45ead3a105c9a9c76f9980/cryptography-46.0.1-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:13e67c4d3fb8b6bc4ef778a7ccdd8df4cd15b4bcc18f4239c8440891a11245cc", size = 4528009, upload-time = "2025-09-17T00:10:24.418Z" },
{ url = "https://files.pythonhosted.org/packages/fd/1c/4012edad2a8977ab386c36b6e21f5065974d37afa3eade83a9968cba4855/cryptography-46.0.1-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:15b5fd9358803b0d1cc42505a18d8bca81dabb35b5cfbfea1505092e13a9d96d", size = 4248902, upload-time = "2025-09-17T00:10:26.255Z" },
{ url = "https://files.pythonhosted.org/packages/58/a3/257cd5ae677302de8fa066fca9de37128f6729d1e63c04dd6a15555dd450/cryptography-46.0.1-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:e34da95e29daf8a71cb2841fd55df0511539a6cdf33e6f77c1e95e44006b9b46", size = 4527150, upload-time = "2025-09-17T00:10:28.28Z" },
{ url = "https://files.pythonhosted.org/packages/6a/cd/fe6b65e1117ec7631f6be8951d3db076bac3e1b096e3e12710ed071ffc3c/cryptography-46.0.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:34f04b7311174469ab3ac2647469743720f8b6c8b046f238e5cb27905695eb2a", size = 3448210, upload-time = "2025-09-17T00:10:30.145Z" },
]
[[package]]
name = "cssselect"
version = "1.3.0"
@ -450,15 +396,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/7c/24/f7351052cf9db771fe4f32fca47fd66e6d9b53d8613b17faf7d130a9d553/cython-3.1.4-py3-none-any.whl", hash = "sha256:d194d95e4fa029a3f6c7d46bdd16d973808c7ea4797586911fdb67cb98b1a2c6", size = 1227541, upload-time = "2025-09-16T07:20:29.595Z" },
]
[[package]]
name = "defusedxml"
version = "0.7.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520, upload-time = "2021-03-08T10:59:26.269Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" },
]
[[package]]
name = "distlib"
version = "0.4.0"
@ -611,15 +548,40 @@ wheels = [
]
[[package]]
name = "hyperlink"
version = "21.0.0"
name = "h11"
version = "0.16.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
]
[[package]]
name = "httpcore"
version = "1.0.9"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "certifi" },
{ name = "h11" },
]
sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
]
[[package]]
name = "httpx"
version = "0.28.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
{ name = "certifi" },
{ name = "httpcore" },
{ name = "idna" },
]
sdist = { url = "https://files.pythonhosted.org/packages/3a/51/1947bd81d75af87e3bb9e34593a4cf118115a8feb451ce7a69044ef1412e/hyperlink-21.0.0.tar.gz", hash = "sha256:427af957daa58bc909471c6c40f74c5450fa123dd093fc53efd2e91d2705a56b", size = 140743, upload-time = "2021-01-08T05:51:20.972Z" }
sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/6e/aa/8caf6a0a3e62863cbb9dab27135660acba46903b703e224f14f447e57934/hyperlink-21.0.0-py2.py3-none-any.whl", hash = "sha256:e6b14c37ecb73e89c77d78cdb4c2cc8f3fb59a885c5b3f819ff4ed80f25af1b4", size = 74638, upload-time = "2021-01-08T05:51:22.906Z" },
{ url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
]
[[package]]
@ -640,18 +602,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" },
]
[[package]]
name = "incremental"
version = "24.7.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "setuptools" },
]
sdist = { url = "https://files.pythonhosted.org/packages/27/87/156b374ff6578062965afe30cc57627d35234369b3336cf244b240c8d8e6/incremental-24.7.2.tar.gz", hash = "sha256:fb4f1d47ee60efe87d4f6f0ebb5f70b9760db2b2574c59c8e8912be4ebd464c9", size = 28157, upload-time = "2024-07-29T20:03:55.441Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/0d/38/221e5b2ae676a3938c2c1919131410c342b6efc2baffeda395dd66eeca8f/incremental-24.7.2-py3-none-any.whl", hash = "sha256:8cb2c3431530bec48ad70513931a760f446ad6c25e8333ca5d95e24b0ed7b8fe", size = 20516, upload-time = "2024-07-29T20:03:53.677Z" },
]
[[package]]
name = "iniconfig"
version = "2.1.0"
@ -661,38 +611,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" },
]
[[package]]
name = "itemadapter"
version = "0.12.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/e9/50/2fd91416acfbd316b58de909cfc2a5c2daaa4ced67fb76cb0dedcbd13197/itemadapter-0.12.2.tar.gz", hash = "sha256:8e05c07cea966a7a8c4f096150ee2c91d9b4104a76f9afd029b235e1b564a61f", size = 32089, upload-time = "2025-09-02T12:15:19.751Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/9a/ce/b2d995ddf3d493849f5608c7eab92c24cc50933503c645de3e4843aa7800/itemadapter-0.12.2-py3-none-any.whl", hash = "sha256:17ff8acb169fb11dbed8af83e805c19c3b890bde4653761b4d3c1544142e04b6", size = 18480, upload-time = "2025-09-02T12:15:18.259Z" },
]
[[package]]
name = "itemloaders"
version = "1.3.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "itemadapter" },
{ name = "jmespath" },
{ name = "parsel" },
]
sdist = { url = "https://files.pythonhosted.org/packages/b6/3e/c549370e95c9dc7ec5e155c075e2700fa75abe5625608a4ce5009eabe0bf/itemloaders-1.3.2.tar.gz", hash = "sha256:4faf5b3abe83bf014476e3fd9ccf66867282971d9f1d4e96d9a61b60c3786770", size = 19707, upload-time = "2024-09-30T13:48:49.417Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d5/68/9592dcfd9c24467b545fac17b098a171e372bf0d775400fa1971712bca57/itemloaders-1.3.2-py3-none-any.whl", hash = "sha256:6a91465f721c7bad8b07e1fbb0560cf99f4845156ed9f7bf2ca424336c6a677c", size = 12194, upload-time = "2024-09-30T13:48:47.82Z" },
]
[[package]]
name = "jmespath"
version = "1.0.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/00/2a/e867e8531cf3e36b41201936b7fa7ba7b5702dbef42922193f05c8976cd6/jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe", size = 25843, upload-time = "2022-06-17T18:00:12.224Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256, upload-time = "2022-06-17T18:00:10.251Z" },
]
[[package]]
name = "language-tags"
version = "1.2.0"
@ -1030,6 +948,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
]
[[package]]
name = "ndjson"
version = "0.3.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/b4/d5/209b6ca94566f9c94c0ec41cee1681c0a3b92a306a84a9b0fcd662088dc3/ndjson-0.3.1.tar.gz", hash = "sha256:bf9746cb6bb1cb53d172cda7f154c07c786d665ff28341e4e689b796b229e5d6", size = 6448, upload-time = "2020-02-25T05:01:07.873Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/70/c9/04ba0056011ba96a58163ebfd666d8385300bd12da1afe661a5a147758d7/ndjson-0.3.1-py2.py3-none-any.whl", hash = "sha256:839c22275e6baa3040077b83c005ac24199b94973309a8a1809be962c753a410", size = 5305, upload-time = "2020-02-25T05:01:06.39Z" },
]
[[package]]
name = "nodeenv"
version = "1.9.1"
@ -1039,6 +966,22 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314, upload-time = "2024-06-04T18:44:08.352Z" },
]
[[package]]
name = "nodejs-wheel-binaries"
version = "22.20.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/0f/54/02f58c8119e2f1984e2572cc77a7b469dbaf4f8d171ad376e305749ef48e/nodejs_wheel_binaries-22.20.0.tar.gz", hash = "sha256:a62d47c9fd9c32191dff65bbe60261504f26992a0a19fe8b4d523256a84bd351", size = 8058, upload-time = "2025-09-26T09:48:00.906Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/24/6d/333e5458422f12318e3c3e6e7f194353aa68b0d633217c7e89833427ca01/nodejs_wheel_binaries-22.20.0-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:455add5ac4f01c9c830ab6771dbfad0fdf373f9b040d3aabe8cca9b6c56654fb", size = 53246314, upload-time = "2025-09-26T09:47:32.536Z" },
{ url = "https://files.pythonhosted.org/packages/56/30/dcd6879d286a35b3c4c8f9e5e0e1bcf4f9e25fe35310fc77ecf97f915a23/nodejs_wheel_binaries-22.20.0-py2.py3-none-macosx_11_0_x86_64.whl", hash = "sha256:5d8c12f97eea7028b34a84446eb5ca81829d0c428dfb4e647e09ac617f4e21fa", size = 53644391, upload-time = "2025-09-26T09:47:36.093Z" },
{ url = "https://files.pythonhosted.org/packages/58/be/c7b2e7aa3bb281d380a1c531f84d0ccfe225832dfc3bed1ca171753b9630/nodejs_wheel_binaries-22.20.0-py2.py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a2b0989194148f66e9295d8f11bc463bde02cbe276517f4d20a310fb84780ae", size = 60282516, upload-time = "2025-09-26T09:47:39.88Z" },
{ url = "https://files.pythonhosted.org/packages/3e/c5/8befacf4190e03babbae54cb0809fb1a76e1600ec3967ab8ee9f8fc85b65/nodejs_wheel_binaries-22.20.0-py2.py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b5c500aa4dc046333ecb0a80f183e069e5c30ce637f1c1a37166b2c0b642dc21", size = 60347290, upload-time = "2025-09-26T09:47:43.712Z" },
{ url = "https://files.pythonhosted.org/packages/c0/bd/cfffd1e334277afa0714962c6ec432b5fe339340a6bca2e5fa8e678e7590/nodejs_wheel_binaries-22.20.0-py2.py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:3279eb1b99521f0d20a850bbfc0159a658e0e85b843b3cf31b090d7da9f10dfc", size = 62178798, upload-time = "2025-09-26T09:47:47.752Z" },
{ url = "https://files.pythonhosted.org/packages/08/14/10b83a9c02faac985b3e9f5e65d63a34fc0f46b48d8a2c3e4caa3e1e7318/nodejs_wheel_binaries-22.20.0-py2.py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:d29705797b33bade62d79d8f106c2453c8a26442a9b2a5576610c0f7e7c351ed", size = 62772957, upload-time = "2025-09-26T09:47:51.266Z" },
{ url = "https://files.pythonhosted.org/packages/b4/a9/c6a480259aa0d6b270aac2c6ba73a97444b9267adde983a5b7e34f17e45a/nodejs_wheel_binaries-22.20.0-py2.py3-none-win_amd64.whl", hash = "sha256:4bd658962f24958503541963e5a6f2cc512a8cb301e48a69dc03c879f40a28ae", size = 40120431, upload-time = "2025-09-26T09:47:54.363Z" },
{ url = "https://files.pythonhosted.org/packages/42/b1/6a4eb2c6e9efa028074b0001b61008c9d202b6b46caee9e5d1b18c088216/nodejs_wheel_binaries-22.20.0-py2.py3-none-win_arm64.whl", hash = "sha256:1fccac931faa210d22b6962bcdbc99269d16221d831b9a118bbb80fe434a60b8", size = 38844133, upload-time = "2025-09-26T09:47:57.357Z" },
]
[[package]]
name = "numpy"
version = "2.3.3"
@ -1193,22 +1136,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
]
[[package]]
name = "parsel"
version = "1.10.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "cssselect" },
{ name = "jmespath" },
{ name = "lxml" },
{ name = "packaging" },
{ name = "w3lib" },
]
sdist = { url = "https://files.pythonhosted.org/packages/f6/df/acd504c154c0b9028b0d8491a77fdd5f86e9c06ee04f986abf85e36d9a5f/parsel-1.10.0.tar.gz", hash = "sha256:14f17db9559f51b43357b9dfe43cec870a8efb5ea4857abb624ec6ff80d8a080", size = 51421, upload-time = "2025-01-17T15:38:31.941Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/12/18/35d1d947553d24909dca37e2ff11720eecb601360d1bac8d7a9a1bc7eb08/parsel-1.10.0-py2.py3-none-any.whl", hash = "sha256:6a0c28bd81f9df34ba665884c88efa0b18b8d2c44c81f64e27f2f0cb37d46169", size = 17266, upload-time = "2025-01-17T15:38:27.83Z" },
]
[[package]]
name = "patchright"
version = "1.55.2"
@ -1363,36 +1290,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/cc/35/cc0aaecf278bb4575b8555f2b137de5ab821595ddae9da9d3cd1da4072c7/propcache-0.3.2-py3-none-any.whl", hash = "sha256:98f1ec44fb675f5052cccc8e609c46ed23a35a1cfd18545ad4e29002d858a43f", size = 12663, upload-time = "2025-06-09T22:56:04.484Z" },
]
[[package]]
name = "protego"
version = "0.5.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/19/9b/9c3a649167c7e43a0818df515d515e66d95a261fdfdf2a6afd45be9db696/protego-0.5.0.tar.gz", hash = "sha256:225dee0acfcc71de8c6f7cef9c618e5a9d3e7baa7ae1470b8d076a064033c463", size = 3137494, upload-time = "2025-06-24T13:58:45.31Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/3a/cb/4347985f89ca3e4beb5d0cb85f8b951c9e339564bd2a3f388d6fb78382cc/protego-0.5.0-py3-none-any.whl", hash = "sha256:4237227840a67fdeec289a9b89652455b5657806388c17e1a556e160435f8fc5", size = 10356, upload-time = "2025-06-24T13:58:44.08Z" },
]
[[package]]
name = "pyasn1"
version = "0.6.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/ba/e9/01f1a64245b89f039897cb0130016d79f77d52669aae6ee7b159a6c4c018/pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034", size = 145322, upload-time = "2024-09-10T22:41:42.55Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c8/f1/d6a797abb14f6283c0ddff96bbdd46937f64122b8c925cab503dd37f8214/pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629", size = 83135, upload-time = "2024-09-11T16:00:36.122Z" },
]
[[package]]
name = "pyasn1-modules"
version = "0.4.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pyasn1" },
]
sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892, upload-time = "2025-03-28T02:41:22.17Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" },
]
[[package]]
name = "pycparser"
version = "2.23"
@ -1402,15 +1299,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/a0/e3/59cd50310fc9b59512193629e1984c1f95e5c8ae6e5d8c69532ccc65a7fe/pycparser-2.23-py3-none-any.whl", hash = "sha256:e5c6e8d3fbad53479cab09ac03729e0a9faf2bee3db8208a550daf5af81a5934", size = 118140, upload-time = "2025-09-09T13:23:46.651Z" },
]
[[package]]
name = "pydispatcher"
version = "2.0.7"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/21/db/030d0700ae90d2f9d52c2f3c1f864881e19cef8cba3b0a08759c8494c19c/PyDispatcher-2.0.7.tar.gz", hash = "sha256:b777c6ad080dc1bad74a4c29d6a46914fa6701ac70f94b0d66fbcfde62f5be31", size = 38891, upload-time = "2023-02-17T20:11:13.106Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/66/0e/9ee7bc0b48ec45d93b302fa2d787830dca4dc454d31a237faa5815995988/PyDispatcher-2.0.7-py3-none-any.whl", hash = "sha256:96543bea04115ffde08f851e1d45cacbfd1ee866ac42127d9b476dc5aefa7de0", size = 12040, upload-time = "2023-02-17T20:11:11.991Z" },
]
[[package]]
name = "pyee"
version = "13.0.0"
@ -1463,25 +1351,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c1/7c/54afe9ffee547c41e1161691e72067a37ed27466ac71c089bfdcd07ca70d/pyobjc_framework_cocoa-11.1-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:1b5de4e1757bb65689d6dc1f8d8717de9ec8587eb0c4831c134f13aba29f9b71", size = 396742, upload-time = "2025-06-14T20:46:57.64Z" },
]
[[package]]
name = "pyopenssl"
version = "25.3.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "cryptography" },
{ name = "typing-extensions", marker = "python_full_version < '3.13'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/80/be/97b83a464498a79103036bc74d1038df4a7ef0e402cfaf4d5e113fb14759/pyopenssl-25.3.0.tar.gz", hash = "sha256:c981cb0a3fd84e8602d7afc209522773b94c1c2446a3c710a75b06fe1beae329", size = 184073, upload-time = "2025-09-17T00:32:21.037Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d1/81/ef2b1dfd1862567d573a4fdbc9f969067621764fbb74338496840a1d2977/pyopenssl-25.3.0-py3-none-any.whl", hash = "sha256:1fda6fc034d5e3d179d39e59c1895c9faeaf40a79de5fc4cbbfbe0d36f4a77b6", size = 57268, upload-time = "2025-09-17T00:32:19.474Z" },
]
[[package]]
name = "pypydispatcher"
version = "2.1.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/d5/7b/65f55513d3c769fd677f90032d8d8703e3dc17e88a41b6074d2177548bca/PyPyDispatcher-2.1.2.tar.gz", hash = "sha256:b6bec5dfcff9d2535bca2b23c80eae367b1ac250a645106948d315fcfa9130f2", size = 23224, upload-time = "2017-07-03T14:20:51.806Z" }
[[package]]
name = "pysocks"
version = "1.7.1"
@ -1554,15 +1423,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446, upload-time = "2024-08-06T20:33:04.33Z" },
]
[[package]]
name = "queuelib"
version = "1.8.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/4c/78/9ace6888cf6d390c9aec3ba93020838b08934959b544a7f10b15db815d29/queuelib-1.8.0.tar.gz", hash = "sha256:582bc65514481100b0539bd671da6b355b878869cfc77d92c63b75fcc9cf8e27", size = 11675, upload-time = "2025-03-31T12:18:46.193Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/70/44/542f4e702fafc477260d3463ae1bcdd113faac9d42336601af50985af914/queuelib-1.8.0-py3-none-any.whl", hash = "sha256:599468c5589716e63d3bb753dae7bf32cc94838ade1e7b450a061faec4a2015d", size = 13615, upload-time = "2025-03-31T12:18:43.526Z" },
]
[[package]]
name = "requests"
version = "2.32.5"
@ -1598,14 +1458,15 @@ dependencies = [
{ name = "backoff" },
{ name = "beautifulsoup4" },
{ name = "curl-cffi" },
{ name = "playwright" },
{ name = "httpx" },
{ name = "ndjson" },
{ name = "requests" },
{ name = "scrapling", extra = ["fetchers"] },
{ name = "scrapy" },
]
[package.dev-dependencies]
dev = [
{ name = "basedpyright" },
{ name = "mypy" },
{ name = "pre-commit" },
{ name = "pytest" },
@ -1619,14 +1480,15 @@ requires-dist = [
{ name = "backoff", specifier = ">=2.2.1" },
{ name = "beautifulsoup4", specifier = ">=4.13.5" },
{ name = "curl-cffi", specifier = ">=0.13.0" },
{ name = "playwright", specifier = ">=1.55.0" },
{ name = "httpx", specifier = ">=0.28.1" },
{ name = "ndjson", specifier = ">=0.3.1" },
{ name = "requests", specifier = ">=2.32.5" },
{ name = "scrapling", extras = ["fetchers"], specifier = ">=0.3.5" },
{ name = "scrapy", specifier = ">=2.13.3" },
]
[package.metadata.requires-dev]
dev = [
{ name = "basedpyright", specifier = ">=1.31.6" },
{ name = "mypy", specifier = ">=1.18.2" },
{ name = "pre-commit", specifier = ">=4.3.0" },
{ name = "pytest", specifier = ">=8.0.0" },
@ -1661,35 +1523,6 @@ fetchers = [
{ name = "playwright" },
]
[[package]]
name = "scrapy"
version = "2.13.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "cryptography" },
{ name = "cssselect" },
{ name = "defusedxml" },
{ name = "itemadapter" },
{ name = "itemloaders" },
{ name = "lxml" },
{ name = "packaging" },
{ name = "parsel" },
{ name = "protego" },
{ name = "pydispatcher", marker = "platform_python_implementation == 'CPython'" },
{ name = "pyopenssl" },
{ name = "pypydispatcher", marker = "platform_python_implementation == 'PyPy'" },
{ name = "queuelib" },
{ name = "service-identity" },
{ name = "tldextract" },
{ name = "twisted" },
{ name = "w3lib" },
{ name = "zope-interface" },
]
sdist = { url = "https://files.pythonhosted.org/packages/be/6c/bab0c01c5c50842548f0b5e936dfd2520a1ce84c171472c2cfe4d0599841/scrapy-2.13.3.tar.gz", hash = "sha256:bf17588c10e46a9d70c49a05380b749e3c7fba58204a367a5747ce6da2bd204d", size = 1220051, upload-time = "2025-07-02T15:41:15.776Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/53/cb/474b56910b9fb823298008444790a6d5fb9c8dfb936101136932d586287a/scrapy-2.13.3-py3-none-any.whl", hash = "sha256:9c16a482e1474b501f7b7121a4071ddc5cec4c0c7c0320217ed678d4fb8a3e9e", size = 321805, upload-time = "2025-07-02T15:41:13.782Z" },
]
[[package]]
name = "screeninfo"
version = "0.8.1"
@ -1704,27 +1537,12 @@ wheels = [
]
[[package]]
name = "service-identity"
version = "24.2.0"
name = "sniffio"
version = "1.3.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "attrs" },
{ name = "cryptography" },
{ name = "pyasn1" },
{ name = "pyasn1-modules" },
]
sdist = { url = "https://files.pythonhosted.org/packages/07/a5/dfc752b979067947261dbbf2543470c58efe735c3c1301dd870ef27830ee/service_identity-24.2.0.tar.gz", hash = "sha256:b8683ba13f0d39c6cd5d625d2c5f65421d6d707b013b375c355751557cbe8e09", size = 39245, upload-time = "2024-10-26T07:21:57.736Z" }
sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/08/2c/ca6dd598b384bc1ce581e24aaae0f2bed4ccac57749d5c3befbb5e742081/service_identity-24.2.0-py3-none-any.whl", hash = "sha256:6b047fbd8a84fd0bb0d55ebce4031e400562b9196e1e0d3e0fe2b8a59f6d4a85", size = 11364, upload-time = "2024-10-26T07:21:56.302Z" },
]
[[package]]
name = "setuptools"
version = "80.9.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/18/5d/3bf57dcd21979b887f014ea83c24ae194cfcd12b9e0fda66b957c69d1fca/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c", size = 1319958, upload-time = "2025-05-27T00:56:51.443Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" },
{ url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
]
[[package]]
@ -1763,24 +1581,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" },
]
[[package]]
name = "twisted"
version = "25.5.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "attrs" },
{ name = "automat" },
{ name = "constantly" },
{ name = "hyperlink" },
{ name = "incremental" },
{ name = "typing-extensions" },
{ name = "zope-interface" },
]
sdist = { url = "https://files.pythonhosted.org/packages/13/0f/82716ed849bf7ea4984c21385597c949944f0f9b428b5710f79d0afc084d/twisted-25.5.0.tar.gz", hash = "sha256:1deb272358cb6be1e3e8fc6f9c8b36f78eb0fa7c2233d2dbe11ec6fee04ea316", size = 3545725, upload-time = "2025-06-07T09:52:24.858Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/eb/66/ab7efd8941f0bc7b2bd555b0f0471bff77df4c88e0cc31120c82737fec77/twisted-25.5.0-py3-none-any.whl", hash = "sha256:8559f654d01a54a8c3efe66d533d43f383531ebf8d81d9f9ab4769d91ca15df7", size = 3204767, upload-time = "2025-06-07T09:52:21.428Z" },
]
[[package]]
name = "types-beautifulsoup4"
version = "4.12.0.20250516"
@ -1866,15 +1666,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/76/06/04c8e804f813cf972e3262f3f8584c232de64f0cde9f703b46cf53a45090/virtualenv-20.34.0-py3-none-any.whl", hash = "sha256:341f5afa7eee943e4984a9207c025feedd768baff6753cd660c857ceb3e36026", size = 5983279, upload-time = "2025-08-13T14:24:05.111Z" },
]
[[package]]
name = "w3lib"
version = "2.3.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/bf/7d/1172cfaa1e29beb9bf938e484c122b3bdc82e8e37b17a4f753ba6d6e009f/w3lib-2.3.1.tar.gz", hash = "sha256:5c8ac02a3027576174c2b61eb9a2170ba1b197cae767080771b6f1febda249a4", size = 49531, upload-time = "2025-01-27T14:22:10.453Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/58/dd/56f0d8af71e475ed194d702f8b4cf9cea812c95e82ad823d239023c6558c/w3lib-2.3.1-py3-none-any.whl", hash = "sha256:9ccd2ae10c8c41c7279cd8ad4fe65f834be894fe7bfdd7304b991fd69325847b", size = 21751, upload-time = "2025-01-27T14:22:09.421Z" },
]
[[package]]
name = "yarl"
version = "1.20.1"
@ -1956,29 +1747,3 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/94/c3/b2e9f38bc3e11191981d57ea08cab2166e74ea770024a646617c9cddd9f6/yarl-1.20.1-cp313-cp313t-win_amd64.whl", hash = "sha256:541d050a355bbbc27e55d906bc91cb6fe42f96c01413dd0f4ed5a5240513874f", size = 93003, upload-time = "2025-06-10T00:45:27.752Z" },
{ url = "https://files.pythonhosted.org/packages/b4/2d/2345fce04cfd4bee161bf1e7d9cdc702e3e16109021035dbb24db654a622/yarl-1.20.1-py3-none-any.whl", hash = "sha256:83b8eb083fe4683c6115795d9fc1cfaf2cbbefb19b3a1cb68f6527460f483a77", size = 46542, upload-time = "2025-06-10T00:46:07.521Z" },
]
[[package]]
name = "zope-interface"
version = "8.0.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/88/3a/7fcf02178b8fad0a51e67e32765cd039ae505d054d744d76b8c2bbcba5ba/zope_interface-8.0.1.tar.gz", hash = "sha256:eba5610d042c3704a48222f7f7c6ab5b243ed26f917e2bc69379456b115e02d1", size = 253746, upload-time = "2025-09-25T05:55:51.285Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/f2/2f/c10c739bcb9b072090c97c2e08533777497190daa19d190d72b4cce9c7cb/zope_interface-8.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4bd01022d2e1bce4a4a4ed9549edb25393c92e607d7daa6deff843f1f68b479d", size = 207903, upload-time = "2025-09-25T05:58:21.671Z" },
{ url = "https://files.pythonhosted.org/packages/b5/e1/9845ac3697f108d9a1af6912170c59a23732090bbfb35955fe77e5544955/zope_interface-8.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:29be8db8b712d94f1c05e24ea230a879271d787205ba1c9a6100d1d81f06c69a", size = 208345, upload-time = "2025-09-25T05:58:24.217Z" },
{ url = "https://files.pythonhosted.org/packages/f2/49/6573bc8b841cfab18e80c8e8259f1abdbbf716140011370de30231be79ad/zope_interface-8.0.1-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:51ae1b856565b30455b7879fdf0a56a88763b401d3f814fa9f9542d7410dbd7e", size = 255027, upload-time = "2025-09-25T05:58:19.975Z" },
{ url = "https://files.pythonhosted.org/packages/e2/fd/908b0fd4b1ab6e412dfac9bd2b606f2893ef9ba3dd36d643f5e5b94c57b3/zope_interface-8.0.1-cp311-cp311-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d2e7596149cb1acd1d4d41b9f8fe2ffc0e9e29e2e91d026311814181d0d9efaf", size = 259800, upload-time = "2025-09-25T05:58:11.487Z" },
{ url = "https://files.pythonhosted.org/packages/dc/78/8419a2b4e88410520ed4b7f93bbd25a6d4ae66c4e2b131320f2b90f43077/zope_interface-8.0.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b2737c11c34fb9128816759864752d007ec4f987b571c934c30723ed881a7a4f", size = 260978, upload-time = "2025-09-25T06:26:24.483Z" },
{ url = "https://files.pythonhosted.org/packages/e5/90/caf68152c292f1810e2bd3acd2177badf08a740aa8a348714617d6c9ad0b/zope_interface-8.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:cf66e4bf731aa7e0ced855bb3670e8cda772f6515a475c6a107bad5cb6604103", size = 212155, upload-time = "2025-09-25T05:59:40.318Z" },
{ url = "https://files.pythonhosted.org/packages/dc/a6/0f08713ddda834c428ebf97b2a7fd8dea50c0100065a8955924dbd94dae8/zope_interface-8.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:115f27c1cc95ce7a517d960ef381beedb0a7ce9489645e80b9ab3cbf8a78799c", size = 208609, upload-time = "2025-09-25T05:58:53.698Z" },
{ url = "https://files.pythonhosted.org/packages/e9/5e/d423045f54dc81e0991ec655041e7a0eccf6b2642535839dd364b35f4d7f/zope_interface-8.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:af655c573b84e3cb6a4f6fd3fbe04e4dc91c63c6b6f99019b3713ef964e589bc", size = 208797, upload-time = "2025-09-25T05:58:56.258Z" },
{ url = "https://files.pythonhosted.org/packages/c6/43/39d4bb3f7a80ebd261446792493cfa4e198badd47107224f5b6fe1997ad9/zope_interface-8.0.1-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:23f82ef9b2d5370750cc1bf883c3b94c33d098ce08557922a3fbc7ff3b63dfe1", size = 259242, upload-time = "2025-09-25T05:58:21.602Z" },
{ url = "https://files.pythonhosted.org/packages/da/29/49effcff64ef30731e35520a152a9dfcafec86cf114b4c2aff942e8264ba/zope_interface-8.0.1-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:35a1565d5244997f2e629c5c68715b3d9d9036e8df23c4068b08d9316dcb2822", size = 264696, upload-time = "2025-09-25T05:58:13.351Z" },
{ url = "https://files.pythonhosted.org/packages/c7/39/b947673ec9a258eeaa20208dd2f6127d9fbb3e5071272a674ebe02063a78/zope_interface-8.0.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:029ea1db7e855a475bf88d9910baab4e94d007a054810e9007ac037a91c67c6f", size = 264229, upload-time = "2025-09-25T06:26:26.226Z" },
{ url = "https://files.pythonhosted.org/packages/8f/ee/eed6efd1fc3788d1bef7a814e0592d8173b7fe601c699b935009df035fc2/zope_interface-8.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0beb3e7f7dc153944076fcaf717a935f68d39efa9fce96ec97bafcc0c2ea6cab", size = 212270, upload-time = "2025-09-25T05:58:53.584Z" },
{ url = "https://files.pythonhosted.org/packages/5f/dc/3c12fca01c910c793d636ffe9c0984e0646abaf804e44552070228ed0ede/zope_interface-8.0.1-cp313-cp313-macosx_10_9_x86_64.whl", hash = "sha256:c7cc027fc5c61c5d69e5080c30b66382f454f43dc379c463a38e78a9c6bab71a", size = 208992, upload-time = "2025-09-25T05:58:40.712Z" },
{ url = "https://files.pythonhosted.org/packages/46/71/6127b7282a3e380ca927ab2b40778a9c97935a4a57a2656dadc312db5f30/zope_interface-8.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:fcf9097ff3003b7662299f1c25145e15260ec2a27f9a9e69461a585d79ca8552", size = 209051, upload-time = "2025-09-25T05:58:42.182Z" },
{ url = "https://files.pythonhosted.org/packages/56/86/4387a9f951ee18b0e41fda77da77d59c33e59f04660578e2bad688703e64/zope_interface-8.0.1-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:6d965347dd1fb9e9a53aa852d4ded46b41ca670d517fd54e733a6b6a4d0561c2", size = 259223, upload-time = "2025-09-25T05:58:23.191Z" },
{ url = "https://files.pythonhosted.org/packages/61/08/ce60a114466abc067c68ed41e2550c655f551468ae17b4b17ea360090146/zope_interface-8.0.1-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9a3b8bb77a4b89427a87d1e9eb969ab05e38e6b4a338a9de10f6df23c33ec3c2", size = 264690, upload-time = "2025-09-25T05:58:15.052Z" },
{ url = "https://files.pythonhosted.org/packages/36/9a/62a9ba3a919594605a07c34eee3068659bbd648e2fa0c4a86d876810b674/zope_interface-8.0.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:87e6b089002c43231fb9afec89268391bcc7a3b66e76e269ffde19a8112fb8d5", size = 264201, upload-time = "2025-09-25T06:26:27.797Z" },
{ url = "https://files.pythonhosted.org/packages/da/06/8fe88bd7edef60566d21ef5caca1034e10f6b87441ea85de4bbf9ea74768/zope_interface-8.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:64a43f5280aa770cbafd0307cb3d1ff430e2a1001774e8ceb40787abe4bb6658", size = 212273, upload-time = "2025-09-25T06:00:25.398Z" },
]