Merge pull request #140 from barrett-ruth/feat/async-scrapers

Asynchronous Scrapers
This commit is contained in:
Barrett Ruth 2025-10-04 05:34:26 +02:00 committed by GitHub
commit 8f466f135a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 1034 additions and 2079 deletions

View file

@ -79,29 +79,22 @@ end
---@param platform string ---@param platform string
---@param contest_id string ---@param contest_id string
---@return ContestData? ---@return ContestData
function M.get_contest_data(platform, contest_id) function M.get_contest_data(platform, contest_id)
vim.validate({ vim.validate({
platform = { platform, 'string' }, platform = { platform, 'string' },
contest_id = { contest_id, 'string' }, contest_id = { contest_id, 'string' },
}) })
if not cache_data[platform] then return cache_data[platform][contest_id] or {}
return nil
end
local contest_data = cache_data[platform][contest_id]
if not contest_data or vim.tbl_isempty(contest_data) then
return nil
end
return contest_data
end end
---@param platform string ---@param platform string
---@param contest_id string ---@param contest_id string
---@param problems Problem[] ---@param problems Problem[]
function M.set_contest_data(platform, contest_id, problems) ---@param contest_name? string
---@param display_name? string
function M.set_contest_data(platform, contest_id, problems, contest_name, display_name)
vim.validate({ vim.validate({
platform = { platform, 'string' }, platform = { platform, 'string' },
contest_id = { contest_id, 'string' }, contest_id = { contest_id, 'string' },
@ -109,36 +102,17 @@ function M.set_contest_data(platform, contest_id, problems)
}) })
cache_data[platform] = cache_data[platform] or {} cache_data[platform] = cache_data[platform] or {}
local existing = cache_data[platform][contest_id] or {} local out = {
name = contest_name,
local existing_by_id = {} display_name = display_name,
if existing.problems then problems = vim.deepcopy(problems),
for _, p in ipairs(existing.problems) do index_map = {},
existing_by_id[p.id] = p }
end for i, p in ipairs(out.problems) do
out.index_map[p.id] = i
end end
local merged = {} cache_data[platform][contest_id] = out
for _, p in ipairs(problems) do
local prev = existing_by_id[p.id] or {}
local merged_p = {
id = p.id,
name = p.name or prev.name,
test_cases = prev.test_cases,
timeout_ms = prev.timeout_ms,
memory_mb = prev.memory_mb,
interactive = prev.interactive,
}
table.insert(merged, merged_p)
end
existing.problems = merged
existing.index_map = {}
for i, p in ipairs(merged) do
existing.index_map[p.id] = i
end
cache_data[platform][contest_id] = existing
M.save() M.save()
end end

View file

@ -36,9 +36,8 @@ function M.get_platforms()
return result return result
end end
---Get list of contests for a specific platform ---@param platform string
---@param platform string Platform identifier (e.g. "codeforces", "atcoder") ---@param refresh? boolean
---@param refresh? boolean Whether to skip caching and append new contests
---@return cp.ContestItem[] ---@return cp.ContestItem[]
function M.get_platform_contests(platform, refresh) function M.get_platform_contests(platform, refresh)
logger.log( logger.log(
@ -48,24 +47,21 @@ function M.get_platform_contests(platform, refresh)
) )
cache.load() cache.load()
local picker_contests = cache.get_contest_summaries(platform) local picker_contests = cache.get_contest_summaries(platform)
if refresh or vim.tbl_isempty(picker_contests) then if refresh or vim.tbl_isempty(picker_contests) then
logger.log(('Cache miss on %s contests'):format(platform)) logger.log(('Cache miss on %s contests'):format(platform))
local contests = scraper.scrape_contest_list(platform) local contests = scraper.scrape_contest_list(platform) -- sync
cache.set_contest_summaries(platform, contests) cache.set_contest_summaries(platform, contests)
picker_contests = cache.get_contest_summaries(platform) -- <-- reload after write
end end
logger.log( logger.log(
('Loaded %s %s contests.'):format(#picker_contests, constants.PLATFORM_DISPLAY_NAMES[platform]), ('Loaded %d %s contests.'):format(#picker_contests, constants.PLATFORM_DISPLAY_NAMES[platform]),
vim.log.levels.INFO, vim.log.levels.INFO,
true true
) )
picker_contests = cache.get_contest_summaries(platform)
return picker_contests return picker_contests
end end

View file

@ -31,7 +31,7 @@ local function substitute_template(cmd_template, substitutions)
return out return out
end end
local function build_command(cmd_template, executable, substitutions) function M.build_command(cmd_template, executable, substitutions)
local cmd = substitute_template(cmd_template, substitutions) local cmd = substitute_template(cmd_template, substitutions)
if executable then if executable then
table.insert(cmd, 1, executable) table.insert(cmd, 1, executable)
@ -198,10 +198,4 @@ function M.compile_problem(contest_config, is_debug)
return { success = true, output = nil } return { success = true, output = nil }
end end
M._util = {
get_language_from_file = get_language_from_file,
substitute_template = substitute_template,
build_command = build_command,
}
return M return M

View file

@ -78,8 +78,8 @@ end
---@param substitutions table<string, string> ---@param substitutions table<string, string>
---@return string[] ---@return string[]
local function build_command(language_config, substitutions) local function build_command(language_config, substitutions)
local exec_util = require('cp.runner.execute')._util local execute = require('cp.runner.execute')
return exec_util.build_command(language_config.test, language_config.executable, substitutions) return execute.build_command(language_config.test, language_config.executable, substitutions)
end end
---@param contest_config ContestConfig ---@param contest_config ContestConfig
@ -98,28 +98,6 @@ local function run_single_test_case(contest_config, cp_config, test_case)
local binary_file = state.get_binary_file() local binary_file = state.get_binary_file()
local substitutions = { source = source_file, binary = binary_file } local substitutions = { source = source_file, binary = binary_file }
if language_config.compile and binary_file and vim.fn.filereadable(binary_file) == 0 then
local cr = exec.compile(language_config, substitutions)
local ansi = require('cp.ui.ansi')
local clean = ansi.bytes_to_string(cr.stdout or '')
if cr.code ~= 0 then
return {
status = 'fail',
actual = clean,
actual_highlights = {},
error = 'Compilation failed',
stderr = clean,
time_ms = 0,
rss_mb = 0,
code = cr.code,
ok = false,
signal = nil,
tled = false,
mled = false,
}
end
end
local cmd = build_command(language_config, substitutions) local cmd = build_command(language_config, substitutions)
local stdin_content = (test_case.input or '') .. '\n' local stdin_content = (test_case.input or '') .. '\n'
local timeout_ms = (run_panel_state.constraints and run_panel_state.constraints.timeout_ms) or 0 local timeout_ms = (run_panel_state.constraints and run_panel_state.constraints.timeout_ms) or 0

View file

@ -1,67 +1,110 @@
local M = {} local M = {}
local utils = require('cp.utils')
local logger = require('cp.log') local logger = require('cp.log')
local utils = require('cp.utils')
local function syshandle(result) local function syshandle(result)
if result.code ~= 0 then if result.code ~= 0 then
local msg = 'Scraper failed: ' .. (result.stderr or 'Unknown error') local msg = 'Scraper failed: ' .. (result.stderr or 'Unknown error')
logger.log(msg, vim.log.levels.ERROR) logger.log(msg, vim.log.levels.ERROR)
return { return { success = false, error = msg }
success = false,
error = msg,
}
end end
local ok, data = pcall(vim.json.decode, result.stdout) local ok, data = pcall(vim.json.decode, result.stdout)
if not ok then if not ok then
local msg = 'Failed to parse scraper output: ' .. tostring(data) local msg = 'Failed to parse scraper output: ' .. tostring(data)
logger.log(msg, vim.log.levels.ERROR) logger.log(msg, vim.log.levels.ERROR)
return { return { success = false, error = msg }
success = false,
error = msg,
}
end end
return { return { success = true, data = data }
success = true,
data = data,
}
end end
---@param platform string
---@param subcommand string
---@param args string[]
---@param opts { sync?: boolean, ndjson?: boolean, on_event?: fun(ev: table), on_exit?: fun(result: table) }
local function run_scraper(platform, subcommand, args, opts) local function run_scraper(platform, subcommand, args, opts)
if not utils.setup_python_env() then
local msg = 'Python environment setup failed'
logger.log(msg, vim.log.levels.ERROR)
return {
success = false,
message = msg,
}
end
local plugin_path = utils.get_plugin_path() local plugin_path = utils.get_plugin_path()
local cmd = { local cmd = { 'uv', 'run', '--directory', plugin_path, '-m', 'scrapers.' .. platform, subcommand }
'uv',
'run',
'--directory',
plugin_path,
'-m',
'scrapers.' .. platform,
subcommand,
}
vim.list_extend(cmd, args) vim.list_extend(cmd, args)
local sysopts = { if opts and opts.ndjson then
text = true, local uv = vim.loop
timeout = 30000, local stdout = uv.new_pipe(false)
} local stderr = uv.new_pipe(false)
local buf = ''
if opts.sync then local handle
handle = uv.spawn(
cmd[1],
{ args = vim.list_slice(cmd, 2), stdio = { nil, stdout, stderr } },
function(code, signal)
if buf ~= '' and opts.on_event then
local ok_tail, ev_tail = pcall(vim.json.decode, buf)
if ok_tail then
opts.on_event(ev_tail)
end
buf = ''
end
if opts.on_exit then
opts.on_exit({ success = (code == 0), code = code, signal = signal })
end
if not stdout:is_closing() then
stdout:close()
end
if not stderr:is_closing() then
stderr:close()
end
if handle and not handle:is_closing() then
handle:close()
end
end
)
if not handle then
logger.log('Failed to start scraper process', vim.log.levels.ERROR)
return { success = false, error = 'spawn failed' }
end
uv.read_start(stdout, function(_, data)
if data == nil then
if buf ~= '' and opts.on_event then
local ok_tail, ev_tail = pcall(vim.json.decode, buf)
if ok_tail then
opts.on_event(ev_tail)
end
buf = ''
end
return
end
buf = buf .. data
while true do
local s, e = buf:find('\n', 1, true)
if not s then
break
end
local line = buf:sub(1, s - 1)
buf = buf:sub(e + 1)
local ok, ev = pcall(vim.json.decode, line)
if ok and opts.on_event then
opts.on_event(ev)
end
end
end)
uv.read_start(stderr, function(_, _) end)
return
end
local sysopts = { text = true, timeout = 30000 }
if opts and opts.sync then
local result = vim.system(cmd, sysopts):wait() local result = vim.system(cmd, sysopts):wait()
return syshandle(result) return syshandle(result)
else else
vim.system(cmd, sysopts, function(result) vim.system(cmd, sysopts, function(result)
return opts.on_exit(syshandle(result)) if opts and opts.on_exit then
return opts.on_exit(syshandle(result))
end
end) end)
end end
end end
@ -93,50 +136,59 @@ end
function M.scrape_contest_list(platform) function M.scrape_contest_list(platform)
local result = run_scraper(platform, 'contests', {}, { sync = true }) local result = run_scraper(platform, 'contests', {}, { sync = true })
if not result.success or not result.data.contests then if not result or not result.success or not (result.data and result.data.contests) then
logger.log( logger.log(
('Could not scrape contests list for platform %s: %s'):format(platform, result.msg), ('Could not scrape contests list for platform %s: %s'):format(
platform,
(result and result.error) or 'unknown'
),
vim.log.levels.ERROR vim.log.levels.ERROR
) )
return {} return {}
end end
return result.data.contests return result.data.contests
end end
function M.scrape_problem_tests(platform, contest_id, problem_id, callback) ---@param platform string
run_scraper(platform, 'tests', { contest_id, problem_id }, { ---@param contest_id string
on_exit = function(result) ---@param callback fun(data: table)|nil
if not result.success or not result.data.tests then function M.scrape_all_tests(platform, contest_id, callback)
logger.log( run_scraper(platform, 'tests', { contest_id }, {
'Failed to load tests: ' .. (result.msg or 'unknown error'), ndjson = true,
vim.log.levels.ERROR on_event = function(ev)
) if ev.done then
return
return {} end
if ev.error and ev.problem_id then
logger.log(
('Failed to load tests for %s/%s: %s'):format(contest_id, ev.problem_id, ev.error),
vim.log.levels.WARN
)
return
end
if not ev.problem_id or not ev.tests then
return
end end
vim.schedule(function() vim.schedule(function()
vim.system({ 'mkdir', '-p', 'build', 'io' }):wait() vim.system({ 'mkdir', '-p', 'build', 'io' }):wait()
local config = require('cp.config') local config = require('cp.config')
local base_name = config.default_filename(contest_id, problem_id) local base_name = config.default_filename(contest_id, ev.problem_id)
for i, t in ipairs(ev.tests) do
for i, test_case in ipairs(result.data.tests) do
local input_file = 'io/' .. base_name .. '.' .. i .. '.cpin' local input_file = 'io/' .. base_name .. '.' .. i .. '.cpin'
local expected_file = 'io/' .. base_name .. '.' .. i .. '.cpout' local expected_file = 'io/' .. base_name .. '.' .. i .. '.cpout'
local input_content = t.input:gsub('\r', '')
local input_content = test_case.input:gsub('\r', '') local expected_content = t.expected:gsub('\r', '')
local expected_content = test_case.expected:gsub('\r', '') vim.fn.writefile(vim.split(input_content, '\n', { trimempty = true }), input_file)
vim.fn.writefile(vim.split(expected_content, '\n', { trimempty = true }), expected_file)
pcall(vim.fn.writefile, vim.split(input_content, '\n', { trimempty = true }), input_file)
pcall(
vim.fn.writefile,
vim.split(expected_content, '\n', { trimempty = true }),
expected_file
)
end end
if type(callback) == 'function' then if type(callback) == 'function' then
callback(result.data) callback({
tests = ev.tests,
timeout_ms = ev.timeout_ms or 0,
memory_mb = ev.memory_mb or 0,
interactive = ev.interactive or false,
problem_id = ev.problem_id,
})
end end
end) end)
end, end,

View file

@ -28,45 +28,26 @@ function M.set_platform(platform)
return true return true
end end
local function backfill_missing_tests(platform, contest_id, problems) ---@class TestCaseLite
cache.load() ---@field input string
local missing = {} ---@field expected string
for _, prob in ipairs(problems) do
if not cache.get_test_cases(platform, contest_id, prob.id) then
table.insert(missing, prob.id)
end
end
if #missing == 0 then
logger.log(('All problems already cached for %s contest %s.'):format(platform, contest_id))
return
end
for _, pid in ipairs(missing) do
local captured = pid
scraper.scrape_problem_tests(platform, contest_id, captured, function(result)
local cached_tests = {}
if result.tests then
for i, t in ipairs(result.tests) do
cached_tests[i] = { index = i, input = t.input, expected = t.expected }
end
end
cache.set_test_cases(
platform,
contest_id,
captured,
cached_tests,
result.timeout_ms,
result.memory_mb
)
end)
end
end
---@class ScrapeEvent
---@field problem_id string
---@field tests TestCaseLite[]|nil
---@field timeout_ms integer|nil
---@field memory_mb integer|nil
---@field interactive boolean|nil
---@field error string|nil
---@field done boolean|nil
---@field succeeded integer|nil
---@field failed integer|nil
---@param platform string
---@param contest_id string
---@param language string|nil
---@param problem_id string|nil
function M.setup_contest(platform, contest_id, language, problem_id) function M.setup_contest(platform, contest_id, language, problem_id)
if not platform then
logger.log('No platform configured. Use :CP <platform> <contest> [--{lang=<lang>,debug} first.')
return
end
local config = config_module.get_config() local config = config_module.get_config()
if not vim.tbl_contains(config.scrapers, platform) then if not vim.tbl_contains(config.scrapers, platform) then
logger.log(('Scraping disabled for %s.'):format(platform), vim.log.levels.WARN) logger.log(('Scraping disabled for %s.'):format(platform), vim.log.levels.WARN)
@ -75,28 +56,47 @@ function M.setup_contest(platform, contest_id, language, problem_id)
state.set_contest_id(contest_id) state.set_contest_id(contest_id)
cache.load() cache.load()
local contest_data = cache.get_contest_data(platform, contest_id)
local function proceed(contest_data)
local problems = contest_data.problems
local pid = problems[(problem_id and contest_data.index_map[problem_id] or 1)].id
M.setup_problem(pid, language)
local cached_len = #vim.tbl_filter(function(p)
return cache.get_test_cases(platform, contest_id, p.id) ~= nil
end, problems)
if cached_len ~= #problems then
scraper.scrape_all_tests(platform, contest_id, function(ev)
local cached_tests = {}
for i, t in ipairs(ev.tests) do
cached_tests[i] = { index = i, input = t.input, expected = t.expected }
end
cache.set_test_cases(
platform,
contest_id,
ev.problem_id,
cached_tests,
ev.timeout_ms or 0,
ev.memory_mb or 0
)
end)
end
end
local contest_data = cache.get_contest_data(platform, contest_id)
if not contest_data or not contest_data.problems then if not contest_data or not contest_data.problems then
logger.log('Fetching contests problems...', vim.log.levels.INFO, true) logger.log('Fetching contests problems...', vim.log.levels.INFO, true)
scraper.scrape_contest_metadata(platform, contest_id, function(result) scraper.scrape_contest_metadata(platform, contest_id, function(result)
local problems = result.problems or {} local problems = result.problems or {}
cache.set_contest_data(platform, contest_id, problems) cache.set_contest_data(platform, contest_id, problems, result.name, result.display_name)
logger.log(('Found %d problems for %s contest %s.'):format(#problems, platform, contest_id)) logger.log(('Found %d problems for %s contest %s.'):format(#problems, platform, contest_id))
local pid = problem_id or (problems[1] and problems[1].id) proceed(cache.get_contest_data(platform, contest_id))
if pid then
M.setup_problem(pid, language)
end
backfill_missing_tests(platform, contest_id, problems)
end) end)
else return
local problems = contest_data.problems
local pid = problem_id or (problems[1] and problems[1].id)
if pid then
M.setup_problem(pid, language)
end
backfill_missing_tests(platform, contest_id, problems)
end end
proceed(contest_data)
end end
---@param problem_id string ---@param problem_id string
@ -195,19 +195,9 @@ function M.navigate_problem(direction, language)
end end
local problems = contest_data.problems local problems = contest_data.problems
local current_index local index = contest_data.index_map[current_problem_id]
for i, prob in ipairs(problems) do
if prob.id == current_problem_id then
current_index = i
break
end
end
if not current_index then
M.setup_contest(platform, contest_id, language, problems[1].id)
return
end
local new_index = current_index + direction local new_index = index + direction
if new_index < 1 or new_index > #problems then if new_index < 1 or new_index > #problems then
return return
end end

View file

@ -57,7 +57,7 @@ local function find_gnu_time()
_time_cached = true _time_cached = true
_time_path = nil _time_path = nil
_time_reason = 'GNU time not found (install `time` on Linux or `brew install coreutils` on macOS)' _time_reason = 'GNU time not found'
return _time_path, _time_reason return _time_path, _time_reason
end end
@ -214,7 +214,7 @@ local function find_gnu_timeout()
_timeout_cached = true _timeout_cached = true
_timeout_path = nil _timeout_path = nil
_timeout_reason = 'GNU timeout not found (install `coreutils`; macOS: `brew install coreutils`)' _timeout_reason = 'GNU timeout not found'
return _timeout_path, _timeout_reason return _timeout_path, _timeout_reason
end end

View file

@ -8,10 +8,10 @@ dependencies = [
"backoff>=2.2.1", "backoff>=2.2.1",
"beautifulsoup4>=4.13.5", "beautifulsoup4>=4.13.5",
"curl-cffi>=0.13.0", "curl-cffi>=0.13.0",
"playwright>=1.55.0", "httpx>=0.28.1",
"ndjson>=0.3.1",
"requests>=2.32.5", "requests>=2.32.5",
"scrapling[fetchers]>=0.3.5", "scrapling[fetchers]>=0.3.5",
"scrapy>=2.13.3",
] ]
[dependency-groups] [dependency-groups]
@ -22,6 +22,7 @@ dev = [
"pytest>=8.0.0", "pytest>=8.0.0",
"pytest-mock>=3.12.0", "pytest-mock>=3.12.0",
"pre-commit>=4.3.0", "pre-commit>=4.3.0",
"basedpyright>=1.31.6",
] ]
[tool.pytest.ini_options] [tool.pytest.ini_options]

View file

@ -1,14 +1,19 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import concurrent.futures import asyncio
import json import json
import re import re
import sys import sys
import time
from dataclasses import asdict from dataclasses import asdict
from typing import Any
import backoff import backoff
import httpx
import requests import requests
from bs4 import BeautifulSoup, Tag from bs4 import BeautifulSoup, Tag
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from .base import BaseScraper from .base import BaseScraper
from .models import ( from .models import (
@ -20,398 +25,352 @@ from .models import (
TestsResult, TestsResult,
) )
MIB_TO_MB = 1.048576
BASE_URL = "https://atcoder.jp"
ARCHIVE_URL = f"{BASE_URL}/contests/archive"
TIMEOUT_SECONDS = 30
HEADERS = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
}
RETRY_STATUS = {429, 502, 503, 504}
FATAL_STATUS = {400, 401, 403, 404, 410}
def _make_request(url: str, timeout: int = 10) -> requests.Response: _session = requests.Session()
headers = { _adapter = HTTPAdapter(
"User-Agent": ( pool_connections=100,
"Mozilla/5.0 (X11; Linux x86_64) " pool_maxsize=100,
"AppleWebKit/537.36 (KHTML, like Gecko) " max_retries=Retry(total=0),
"Chrome/120.0.0.0 Safari/537.36" )
) _session.mount("https://", _adapter)
} _session.mount("http://", _adapter)
@backoff.on_exception(
backoff.expo, def _give_up_requests(exc: Exception) -> bool:
(requests.exceptions.RequestException, requests.exceptions.HTTPError), if isinstance(exc, requests.HTTPError) and exc.response is not None:
max_tries=5, return exc.response.status_code in FATAL_STATUS
jitter=backoff.random_jitter, return False
on_backoff=lambda details: print(
f"Request error on {url} (attempt {details['tries']}), "
f"retrying in {details['wait']:.1f}s: {details['exception']}", def _retry_after_requests(details):
file=sys.stderr, exc = details.get("exception")
), if isinstance(exc, requests.HTTPError) and exc.response is not None:
ra = exc.response.headers.get("Retry-After")
if ra:
try:
time.sleep(max(0.0, float(ra)))
except ValueError:
pass
@backoff.on_exception(
backoff.expo,
(requests.ConnectionError, requests.Timeout, requests.HTTPError),
max_tries=5,
jitter=backoff.full_jitter,
giveup=_give_up_requests,
on_backoff=_retry_after_requests,
)
def _fetch(url: str) -> str:
r = _session.get(url, headers=HEADERS, timeout=TIMEOUT_SECONDS)
if r.status_code in RETRY_STATUS:
raise requests.HTTPError(response=r)
r.raise_for_status()
return r.text
def _giveup_httpx(exc: Exception) -> bool:
return (
isinstance(exc, httpx.HTTPStatusError)
and exc.response is not None
and (exc.response.status_code in FATAL_STATUS)
) )
@backoff.on_predicate(
backoff.expo,
lambda resp: resp.status_code == 429, @backoff.on_exception(
max_tries=5, backoff.expo,
jitter=backoff.random_jitter, (httpx.ConnectError, httpx.ReadTimeout, httpx.HTTPStatusError),
on_backoff=lambda details: print( max_tries=5,
f"Rate limited on {url}, retrying in {details['wait']:.1f}s", jitter=backoff.full_jitter,
file=sys.stderr, giveup=_giveup_httpx,
), )
async def _get_async(client: httpx.AsyncClient, url: str) -> str:
r = await client.get(url, headers=HEADERS, timeout=TIMEOUT_SECONDS)
r.raise_for_status()
return r.text
def _text_from_pre(pre: Tag) -> str:
return (
pre.get_text(separator="\n", strip=False)
.replace("\r", "")
.replace("\xa0", " ")
.rstrip("\n")
) )
def _req():
return requests.get(url, headers=headers, timeout=timeout)
resp = _req()
resp.raise_for_status()
return resp
def extract_problem_limits(soup: BeautifulSoup) -> tuple[int, float]: def _parse_last_page(html: str) -> int:
timeout_ms = None soup = BeautifulSoup(html, "html.parser")
memory_mb = None nav = soup.select_one("ul.pagination")
if not nav:
return 1
nums = []
for a in nav.select("a"):
s = a.get_text(strip=True)
if s.isdigit():
nums.append(int(s))
return max(nums) if nums else 1
paragraphs = soup.find_all("p")
for p in paragraphs:
text = p.get_text()
if "Time Limit:" in text and "Memory Limit:" in text:
time_match = re.search(r"Time Limit:\s*(\d+)\s*sec", text)
if time_match:
seconds = int(time_match.group(1))
timeout_ms = seconds * 1000
memory_match = re.search(r"Memory Limit:\s*(\d+)\s*MiB", text) def _parse_archive_contests(html: str) -> list[ContestSummary]:
if memory_match: soup = BeautifulSoup(html, "html.parser")
memory_mib = int(memory_match.group(1)) tbody = soup.select_one("table.table-default tbody") or soup.select_one("tbody")
memory_mb = round(memory_mib * 1.048576, 2) if not tbody:
break return []
out: list[ContestSummary] = []
for tr in tbody.select("tr"):
a = tr.select_one("a[href^='/contests/']")
if not a:
continue
href_attr = a.get("href")
if not isinstance(href_attr, str):
continue
m = re.search(r"/contests/([^/?#]+)", href_attr)
if not m:
continue
cid = m.group(1)
name = a.get_text(strip=True)
out.append(ContestSummary(id=cid, name=name, display_name=name))
return out
if timeout_ms is None:
raise ValueError("Could not find valid timeout in problem constraints")
if memory_mb is None: def _parse_tasks_list(html: str) -> list[dict[str, str]]:
raise ValueError("Could not find valid memory limit in problem constraints") soup = BeautifulSoup(html, "html.parser")
tbody = soup.select_one("table tbody")
if not tbody:
return []
rows: list[dict[str, str]] = []
for tr in tbody.select("tr"):
tds = tr.select("td")
if len(tds) < 2:
continue
letter = tds[0].get_text(strip=True)
a = tds[1].select_one("a[href*='/tasks/']")
if not a:
continue
href_attr = a.get("href")
if not isinstance(href_attr, str):
continue
m = re.search(r"/contests/[^/]+/tasks/([^/?#]+)", href_attr)
if not m:
continue
slug = m.group(1)
title = a.get_text(strip=True)
rows.append({"letter": letter, "title": title, "slug": slug})
return rows
def _extract_limits(html: str) -> tuple[int, float]:
soup = BeautifulSoup(html, "html.parser")
txt = soup.get_text(" ", strip=True)
timeout_ms = 0
memory_mb = 0.0
ts = re.search(r"Time\s*Limit:\s*([\d.]+)\s*sec", txt, flags=re.I)
if ts:
timeout_ms = int(float(ts.group(1)) * 1000)
ms = re.search(r"Memory\s*Limit:\s*(\d+)\s*MiB", txt, flags=re.I)
if ms:
memory_mb = float(ms.group(1)) * MIB_TO_MB
return timeout_ms, memory_mb return timeout_ms, memory_mb
def parse_problem_url(contest_id: str, problem_letter: str) -> str: def _extract_samples(html: str) -> list[TestCase]:
task_id: str = f"{contest_id}_{problem_letter}" soup = BeautifulSoup(html, "html.parser")
return f"https://atcoder.jp/contests/{contest_id}/tasks/{task_id}" root = soup.select_one("#task-statement") or soup
inputs: dict[str, str] = {}
outputs: dict[str, str] = {}
for h in root.find_all(re.compile(r"h[2-4]")):
title = h.get_text(" ", strip=True)
pre = h.find_next("pre")
if not pre:
continue
t = _text_from_pre(pre)
mi = re.search(r"Sample\s*Input\s*(\d+)", title, flags=re.I)
mo = re.search(r"Sample\s*Output\s*(\d+)", title, flags=re.I)
if mi:
inputs[mi.group(1)] = t
elif mo:
outputs[mo.group(1)] = t
cases: list[TestCase] = []
for k in sorted(set(inputs) & set(outputs), key=lambda s: int(s)):
cases.append(TestCase(input=inputs[k], expected=outputs[k]))
return cases
def extract_problem_from_row(row, contest_id: str) -> ProblemSummary | None: def _scrape_tasks_sync(contest_id: str) -> list[dict[str, str]]:
cells = row.find_all("td") html = _fetch(f"{BASE_URL}/contests/{contest_id}/tasks")
if len(cells) < 2: return _parse_tasks_list(html)
return None
task_link = cells[1].find("a")
if not task_link:
return None
task_name = task_link.get_text(strip=True)
task_href = task_link.get("href", "")
if not task_href:
return None
task_id = task_href.split("/")[-1]
if not task_id.startswith(contest_id + "_"):
return None
problem_letter = task_id[len(contest_id) + 1 :]
if not problem_letter or not task_name:
return None
return ProblemSummary(id=problem_letter.lower(), name=task_name)
def scrape_contest_problems(contest_id: str) -> list[ProblemSummary]: def _scrape_problem_page_sync(contest_id: str, slug: str) -> dict[str, Any]:
try: html = _fetch(f"{BASE_URL}/contests/{contest_id}/tasks/{slug}")
contest_url = f"https://atcoder.jp/contests/{contest_id}/tasks" tests = _extract_samples(html)
response = _make_request(contest_url) timeout_ms, memory_mb = _extract_limits(html)
return {
soup = BeautifulSoup(response.text, "html.parser") "tests": tests,
task_table = soup.find("table", class_="table") "timeout_ms": timeout_ms,
if not task_table or not isinstance(task_table, Tag): "memory_mb": memory_mb,
return [] "interactive": False,
}
rows = task_table.find_all("tr")[1:]
problems: list[ProblemSummary] = []
for row in rows:
problem = extract_problem_from_row(row, contest_id)
if problem:
problems.append(problem)
return problems
except Exception as e:
print(f"Failed to scrape AtCoder contest problems: {e}", file=sys.stderr)
return []
def extract_test_case_from_headers(sample_headers, i: int) -> tuple[str, str] | None: def _to_problem_summaries(rows: list[dict[str, str]]) -> list[ProblemSummary]:
if i >= len(sample_headers): out: list[ProblemSummary] = []
return None seen: set[str] = set()
for r in rows:
header = sample_headers[i] letter = (r.get("letter") or "").strip().upper()
if "input" not in header.get_text().lower(): title = r.get("title") or ""
return None if not letter:
continue
input_pre = header.find_next("pre") pid = letter.lower()
if not input_pre or i + 1 >= len(sample_headers): if pid in seen:
return None continue
seen.add(pid)
next_header = sample_headers[i + 1] out.append(ProblemSummary(id=pid, name=title))
if "output" not in next_header.get_text().lower(): return out
return None
output_pre = next_header.find_next("pre")
if not output_pre:
return None
input_text = input_pre.get_text().strip().replace("\r", "")
output_text = output_pre.get_text().strip().replace("\r", "")
if not input_text or not output_text:
return None
return (input_text, output_text)
def scrape(url: str) -> list[TestCase]: async def _fetch_all_contests_async() -> list[ContestSummary]:
try: async with httpx.AsyncClient(
response = _make_request(url) limits=httpx.Limits(max_connections=100, max_keepalive_connections=100)
) as client:
soup = BeautifulSoup(response.text, "html.parser") first_html = await _get_async(client, ARCHIVE_URL)
sample_headers = soup.find_all( last = _parse_last_page(first_html)
"h3", string=lambda x: x and "sample" in x.lower() if x else False out = _parse_archive_contests(first_html)
) if last <= 1:
return out
tests: list[TestCase] = [] tasks = [
i = 0 asyncio.create_task(_get_async(client, f"{ARCHIVE_URL}?page={p}"))
while i < len(sample_headers): for p in range(2, last + 1)
test_case = extract_test_case_from_headers(sample_headers, i) ]
if test_case: for coro in asyncio.as_completed(tasks):
input_text, output_text = test_case html = await coro
tests.append(TestCase(input=input_text, expected=output_text)) out.extend(_parse_archive_contests(html))
i += 2 return out
else:
i += 1
return tests
except Exception as e:
print(f"Error scraping AtCoder: {e}", file=sys.stderr)
return []
def scrape_contests() -> list[ContestSummary]: class AtcoderScraper(BaseScraper):
def get_max_pages() -> int:
try:
response = _make_request("https://atcoder.jp/contests/archive")
soup = BeautifulSoup(response.text, "html.parser")
pagination = soup.find("ul", class_="pagination")
if not pagination or not isinstance(pagination, Tag):
return 15
lis = pagination.find_all("li")
if lis and isinstance(lis[-1], Tag):
last_li_text = lis[-1].get_text().strip()
try:
return int(last_li_text)
except ValueError:
return 15
return 15
except Exception:
return 15
def scrape_page(page: int) -> list[ContestSummary]:
try:
response = _make_request(f"https://atcoder.jp/contests/archive?page={page}")
except Exception:
return []
soup = BeautifulSoup(response.text, "html.parser")
table = soup.find("table", class_="table")
if not table:
return []
tbody = table.find("tbody")
if not tbody or not isinstance(tbody, Tag):
return []
rows = tbody.find_all("tr")
if not rows:
return []
contests = []
for row in rows:
cells = row.find_all("td")
if len(cells) < 2:
continue
contest_cell = cells[1]
link = contest_cell.find("a")
if not link or not link.get("href"):
continue
href = link.get("href")
contest_id = href.split("/")[-1]
name = link.get_text().strip()
try:
name = name.encode().decode("unicode_escape")
except (UnicodeDecodeError, UnicodeEncodeError):
pass
name = (
name.replace("\uff08", "(")
.replace("\uff09", ")")
.replace("\u3000", " ")
)
name = re.sub(
r"[\uff01-\uff5e]", lambda m: chr(ord(m.group()) - 0xFEE0), name
)
if not (
contest_id.startswith("ahc") or name.lower().find("heuristic") != -1
):
contests.append(
ContestSummary(id=contest_id, name=name, display_name=name)
)
return contests
max_pages = get_max_pages()
page_results = {}
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
future_to_page = {
executor.submit(scrape_page, page): page for page in range(1, max_pages + 1)
}
for future in concurrent.futures.as_completed(future_to_page):
page = future_to_page[future]
page_contests = future.result()
page_results[page] = page_contests
all_contests = []
for page in sorted(page_results.keys()):
all_contests.extend(page_results[page])
return all_contests
class AtCoderScraper(BaseScraper):
@property @property
def platform_name(self) -> str: def platform_name(self) -> str:
return "atcoder" return "atcoder"
def scrape_contest_metadata(self, contest_id: str) -> MetadataResult: async def scrape_contest_metadata(self, contest_id: str) -> MetadataResult:
return self._safe_execute("metadata", self._scrape_metadata_impl, contest_id) async def impl(cid: str) -> MetadataResult:
rows = await asyncio.to_thread(_scrape_tasks_sync, cid)
def scrape_problem_tests(self, contest_id: str, problem_id: str) -> TestsResult: problems = _to_problem_summaries(rows)
return self._safe_execute( if not problems:
"tests", self._scrape_tests_impl, contest_id, problem_id return self._create_metadata_error(
) f"No problems found for contest {cid}", cid
def scrape_contest_list(self) -> ContestListResult:
return self._safe_execute("contests", self._scrape_contests_impl)
def _safe_execute(self, operation: str, func, *args):
try:
return func(*args)
except Exception as e:
error_msg = f"{self.platform_name}: {str(e)}"
if operation == "metadata":
return MetadataResult(success=False, error=error_msg)
elif operation == "tests":
return TestsResult(
success=False,
error=error_msg,
problem_id="",
url="",
tests=[],
timeout_ms=0,
memory_mb=0,
) )
elif operation == "contests":
return ContestListResult(success=False, error=error_msg)
def _scrape_metadata_impl(self, contest_id: str) -> MetadataResult:
problems = scrape_contest_problems(contest_id)
if not problems:
return MetadataResult( return MetadataResult(
success=False, success=True, error="", contest_id=cid, problems=problems
error=f"{self.platform_name}: No problems found for contest {contest_id}",
)
return MetadataResult(
success=True, error="", contest_id=contest_id, problems=problems
)
def _scrape_tests_impl(self, contest_id: str, problem_id: str) -> TestsResult:
problem_letter = problem_id.upper()
url = parse_problem_url(contest_id, problem_letter)
tests = scrape(url)
response = _make_request(url)
soup = BeautifulSoup(response.text, "html.parser")
timeout_ms, memory_mb = extract_problem_limits(soup)
if not tests:
return TestsResult(
success=False,
error=f"{self.platform_name}: No tests found for {contest_id} {problem_letter}",
problem_id=f"{contest_id}_{problem_id.lower()}",
url=url,
tests=[],
timeout_ms=timeout_ms,
memory_mb=memory_mb,
) )
return TestsResult( return await self._safe_execute("metadata", impl, contest_id)
success=True,
error="",
problem_id=f"{contest_id}_{problem_id.lower()}",
url=url,
tests=tests,
timeout_ms=timeout_ms,
memory_mb=memory_mb,
)
def _scrape_contests_impl(self) -> ContestListResult: async def scrape_contest_list(self) -> ContestListResult:
contests = scrape_contests() async def impl() -> ContestListResult:
if not contests: try:
return ContestListResult( contests = await _fetch_all_contests_async()
success=False, error=f"{self.platform_name}: No contests found" except Exception as e:
) return self._create_contests_error(str(e))
return ContestListResult(success=True, error="", contests=contests) if not contests:
return self._create_contests_error("No contests found")
return ContestListResult(success=True, error="", contests=contests)
return await self._safe_execute("contests", impl)
async def stream_tests_for_category_async(self, category_id: str) -> None:
rows = await asyncio.to_thread(_scrape_tasks_sync, category_id)
async def emit(row: dict[str, str]) -> None:
letter = (row.get("letter") or "").strip().lower()
slug = row.get("slug") or ""
if not letter or not slug:
return
try:
data = await asyncio.to_thread(
_scrape_problem_page_sync, category_id, slug
)
tests: list[TestCase] = data["tests"]
if not tests:
print(
json.dumps(
{
"problem_id": letter,
"error": f"{self.platform_name}: no tests found",
}
),
flush=True,
)
return
print(
json.dumps(
{
"problem_id": letter,
"tests": [
{"input": t.input, "expected": t.expected}
for t in tests
],
"timeout_ms": data["timeout_ms"],
"memory_mb": data["memory_mb"],
"interactive": bool(data["interactive"]),
}
),
flush=True,
)
except Exception as e:
print(
json.dumps(
{
"problem_id": letter,
"error": f"{self.platform_name}: {str(e)}",
}
),
flush=True,
)
await asyncio.gather(*(emit(r) for r in rows))
def main() -> None: async def main_async() -> int:
if len(sys.argv) < 2: if len(sys.argv) < 2:
result = MetadataResult( result = MetadataResult(
success=False, success=False,
error="Usage: atcoder.py metadata <contest_id> OR atcoder.py tests <contest_id> <problem_letter> OR atcoder.py contests", error="Usage: atcoder.py metadata <contest_id> OR atcoder.py tests <contest_id> OR atcoder.py contests",
) )
print(json.dumps(asdict(result))) print(json.dumps(asdict(result)))
sys.exit(1) return 1
mode: str = sys.argv[1] mode: str = sys.argv[1]
scraper = AtCoderScraper() scraper = AtcoderScraper()
if mode == "metadata": if mode == "metadata":
if len(sys.argv) != 3: if len(sys.argv) != 3:
result = MetadataResult( result = MetadataResult(
success=False, success=False, error="Usage: atcoder.py metadata <contest_id>"
error="Usage: atcoder.py metadata <contest_id>",
) )
print(json.dumps(asdict(result))) print(json.dumps(asdict(result)))
sys.exit(1) return 1
contest_id = sys.argv[2]
contest_id: str = sys.argv[2] result = await scraper.scrape_contest_metadata(contest_id)
result = scraper.scrape_contest_metadata(contest_id)
print(json.dumps(asdict(result))) print(json.dumps(asdict(result)))
if not result.success: return 0 if result.success else 1
sys.exit(1)
elif mode == "tests": if mode == "tests":
if len(sys.argv) != 4: if len(sys.argv) != 3:
tests_result = TestsResult( tests_result = TestsResult(
success=False, success=False,
error="Usage: atcoder.py tests <contest_id> <problem_letter>", error="Usage: atcoder.py tests <contest_id>",
problem_id="", problem_id="",
url="", url="",
tests=[], tests=[],
@ -419,35 +378,32 @@ def main() -> None:
memory_mb=0, memory_mb=0,
) )
print(json.dumps(asdict(tests_result))) print(json.dumps(asdict(tests_result)))
sys.exit(1) return 1
contest_id = sys.argv[2]
await scraper.stream_tests_for_category_async(contest_id)
return 0
test_contest_id: str = sys.argv[2] if mode == "contests":
problem_letter: str = sys.argv[3]
tests_result = scraper.scrape_problem_tests(test_contest_id, problem_letter)
print(json.dumps(asdict(tests_result)))
if not tests_result.success:
sys.exit(1)
elif mode == "contests":
if len(sys.argv) != 2: if len(sys.argv) != 2:
contest_result = ContestListResult( contest_result = ContestListResult(
success=False, error="Usage: atcoder.py contests" success=False, error="Usage: atcoder.py contests"
) )
print(json.dumps(asdict(contest_result))) print(json.dumps(asdict(contest_result)))
sys.exit(1) return 1
contest_result = await scraper.scrape_contest_list()
contest_result = scraper.scrape_contest_list()
print(json.dumps(asdict(contest_result))) print(json.dumps(asdict(contest_result)))
if not contest_result.success: return 0 if contest_result.success else 1
sys.exit(1)
else: result = MetadataResult(
result = MetadataResult( success=False,
success=False, error="Unknown mode. Use 'metadata <contest_id>', 'tests <contest_id>', or 'contests'",
error=f"Unknown mode: {mode}. Use 'metadata', 'tests', or 'contests'", )
) print(json.dumps(asdict(result)))
print(json.dumps(asdict(result))) return 1
sys.exit(1)
def main() -> None:
sys.exit(asyncio.run(main_async()))
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -1,8 +1,13 @@
from __future__ import annotations
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from dataclasses import dataclass from dataclasses import dataclass
from typing import Any, Awaitable, Callable, ParamSpec, cast
from .models import ContestListResult, MetadataResult, TestsResult from .models import ContestListResult, MetadataResult, TestsResult
P = ParamSpec("P")
@dataclass @dataclass
class ScraperConfig: class ScraperConfig:
@ -13,21 +18,18 @@ class ScraperConfig:
class BaseScraper(ABC): class BaseScraper(ABC):
def __init__(self, config: ScraperConfig | None = None):
self.config = config or ScraperConfig()
@property @property
@abstractmethod @abstractmethod
def platform_name(self) -> str: ... def platform_name(self) -> str: ...
@abstractmethod @abstractmethod
def scrape_contest_metadata(self, contest_id: str) -> MetadataResult: ... async def scrape_contest_metadata(self, contest_id: str) -> MetadataResult: ...
@abstractmethod @abstractmethod
def scrape_problem_tests(self, contest_id: str, problem_id: str) -> TestsResult: ... async def scrape_contest_list(self) -> ContestListResult: ...
@abstractmethod @abstractmethod
def scrape_contest_list(self) -> ContestListResult: ... async def stream_tests_for_category_async(self, category_id: str) -> None: ...
def _create_metadata_error( def _create_metadata_error(
self, error_msg: str, contest_id: str = "" self, error_msg: str, contest_id: str = ""
@ -56,15 +58,21 @@ class BaseScraper(ABC):
success=False, error=f"{self.platform_name}: {error_msg}" success=False, error=f"{self.platform_name}: {error_msg}"
) )
def _safe_execute(self, operation: str, func, *args, **kwargs): async def _safe_execute(
self,
operation: str,
func: Callable[P, Awaitable[Any]],
*args: P.args,
**kwargs: P.kwargs,
):
try: try:
return func(*args, **kwargs) return await func(*args, **kwargs)
except Exception as e: except Exception as e:
if operation == "metadata": if operation == "metadata":
contest_id = args[0] if args else "" contest_id = cast(str, args[0]) if args else ""
return self._create_metadata_error(str(e), contest_id) return self._create_metadata_error(str(e), contest_id)
elif operation == "tests": elif operation == "tests":
problem_id = args[1] if len(args) > 1 else "" problem_id = cast(str, args[1]) if len(args) > 1 else ""
return self._create_tests_error(str(e), problem_id) return self._create_tests_error(str(e), problem_id)
elif operation == "contests": elif operation == "contests":
return self._create_contests_error(str(e)) return self._create_contests_error(str(e))

View file

@ -1,9 +1,12 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import asyncio
import json import json
import logging
import re import re
import sys import sys
from dataclasses import asdict from dataclasses import asdict
from typing import Any
import requests import requests
from bs4 import BeautifulSoup, Tag from bs4 import BeautifulSoup, Tag
@ -19,224 +22,132 @@ from .models import (
TestsResult, TestsResult,
) )
# suppress scrapling logging - https://github.com/D4Vinci/Scrapling/issues/31)
def scrape(url: str) -> list[TestCase]: logging.getLogger("scrapling").setLevel(logging.CRITICAL)
try:
page = StealthyFetcher.fetch(url, headless=True, solve_cloudflare=True)
html = page.html_content
soup = BeautifulSoup(html, "html.parser")
input_sections = soup.find_all("div", class_="input")
output_sections = soup.find_all("div", class_="output")
individual_inputs: dict[str, list[str]] = {}
individual_outputs: dict[str, list[str]] = {}
for inp_section in input_sections:
inp_pre = inp_section.find("pre")
if not inp_pre or not isinstance(inp_pre, Tag):
continue
test_line_divs = inp_pre.find_all(
"div", class_=lambda x: x and "test-example-line-" in x
)
if not test_line_divs:
continue
for div in test_line_divs:
classes = div.get("class", [])
class_name = next(
(
cls
for cls in classes
if "test-example-line-" in cls and cls.split("-")[-1].isdigit()
),
None,
)
if not class_name:
continue
test_num = class_name.replace("test-example-line-", "")
if test_num not in individual_inputs:
individual_inputs[test_num] = []
individual_inputs[test_num].append(div.get_text().strip())
for out_section in output_sections:
out_pre = out_section.find("pre")
if not out_pre or not isinstance(out_pre, Tag):
continue
test_line_divs = out_pre.find_all(
"div", class_=lambda x: x and "test-example-line-" in x
)
if not test_line_divs:
continue
for div in test_line_divs:
classes = div.get("class", [])
class_name = next(
(
cls
for cls in classes
if "test-example-line-" in cls and cls.split("-")[-1].isdigit()
),
None,
)
if not class_name:
continue
test_num = class_name.replace("test-example-line-", "")
if test_num not in individual_outputs:
individual_outputs[test_num] = []
individual_outputs[test_num].append(div.get_text().strip())
if individual_inputs and individual_outputs:
common_tests = set(individual_inputs.keys()) & set(
individual_outputs.keys()
)
if common_tests:
tests = []
for test_num in sorted(common_tests):
input_text = "\n".join(individual_inputs[test_num])
output_text = "\n".join(individual_outputs[test_num])
prefixed_input = "1\n" + input_text
tests.append(TestCase(input=prefixed_input, expected=output_text))
return tests
all_inputs = []
all_outputs = []
for inp_section in input_sections:
inp_pre = inp_section.find("pre")
if not inp_pre or not isinstance(inp_pre, Tag):
continue
divs = inp_pre.find_all("div")
if divs:
lines = [div.get_text().strip() for div in divs if isinstance(div, Tag)]
text = "\n".join(lines)
else:
text = inp_pre.get_text().replace("\r", "").strip()
all_inputs.append(text)
for out_section in output_sections:
out_pre = out_section.find("pre")
if not out_pre or not isinstance(out_pre, Tag):
continue
divs = out_pre.find_all("div")
if divs:
lines = [div.get_text().strip() for div in divs if isinstance(div, Tag)]
text = "\n".join(lines)
else:
text = out_pre.get_text().replace("\r", "").strip()
all_outputs.append(text)
if not all_inputs or not all_outputs:
return []
combined_input = "\n".join(all_inputs)
combined_output = "\n".join(all_outputs)
return [TestCase(input=combined_input, expected=combined_output)]
except Exception as e:
print(f"Scrapling failed: {e}", file=sys.stderr)
return []
def parse_problem_url(contest_id: str, problem_letter: str) -> str: BASE_URL = "https://codeforces.com"
API_CONTEST_LIST_URL = f"{BASE_URL}/api/contest.list"
TIMEOUT_SECONDS = 30
HEADERS = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
}
def _text_from_pre(pre: Tag) -> str:
return ( return (
f"https://codeforces.com/contest/{contest_id}/problem/{problem_letter.upper()}" pre.get_text(separator="\n", strip=False)
.replace("\r", "")
.replace("\xa0", " ")
.rstrip("\n")
) )
def extract_problem_limits(soup: BeautifulSoup) -> tuple[int, float]: def _extract_limits(block: Tag) -> tuple[int, float]:
timeout_ms = None tdiv = block.find("div", class_="time-limit")
memory_mb = None mdiv = block.find("div", class_="memory-limit")
timeout_ms = 0
time_limit_div = soup.find("div", class_="time-limit") memory_mb = 0.0
if time_limit_div: if tdiv:
text = time_limit_div.get_text().strip() ttxt = tdiv.get_text(" ", strip=True)
match = re.search(r"(\d+) seconds?", text) ts = re.search(r"(\d+)\s*seconds?", ttxt)
if match: if ts:
seconds = int(match.group(1)) timeout_ms = int(ts.group(1)) * 1000
timeout_ms = seconds * 1000 if mdiv:
mtxt = mdiv.get_text(" ", strip=True)
if timeout_ms is None: ms = re.search(r"(\d+)\s*megabytes?", mtxt)
raise ValueError("Could not find valid timeout in time-limit section") if ms:
memory_mb = float(ms.group(1))
memory_limit_div = soup.find("div", class_="memory-limit")
if memory_limit_div:
text = memory_limit_div.get_text().strip()
match = re.search(r"(\d+) megabytes", text)
if match:
memory_mb = float(match.group(1))
if memory_mb is None:
raise ValueError("Could not find valid memory limit in memory-limit section")
return timeout_ms, memory_mb return timeout_ms, memory_mb
def scrape_contest_problems(contest_id: str) -> list[ProblemSummary]: def _extract_title(block: Tag) -> tuple[str, str]:
try: t = block.find("div", class_="title")
contest_url: str = f"https://codeforces.com/contest/{contest_id}" if not t:
page = StealthyFetcher.fetch(contest_url, headless=True, solve_cloudflare=True) return "", ""
html = page.html_content s = t.get_text(" ", strip=True)
parts = s.split(".", 1)
if len(parts) != 2:
return "", s.strip()
return parts[0].strip().upper(), parts[1].strip()
soup = BeautifulSoup(html, "html.parser")
problems: list[ProblemSummary] = []
problem_links = soup.find_all( def _extract_samples(block: Tag) -> list[TestCase]:
"a", href=lambda x: x and f"/contest/{contest_id}/problem/" in x st = block.find("div", class_="sample-test")
if not st:
return []
inputs = [
_text_from_pre(pre)
for inp in st.find_all("div", class_="input") # type: ignore[union-attr]
for pre in [inp.find("pre")]
if isinstance(pre, Tag)
]
outputs = [
_text_from_pre(pre)
for out in st.find_all("div", class_="output") # type: ignore[union-attr]
for pre in [out.find("pre")]
if isinstance(pre, Tag)
]
n = min(len(inputs), len(outputs))
return [TestCase(input=inputs[i], expected=outputs[i]) for i in range(n)]
def _is_interactive(block: Tag) -> bool:
ps = block.find("div", class_="problem-statement")
txt = ps.get_text(" ", strip=True) if ps else block.get_text(" ", strip=True)
return "This is an interactive problem" in txt
def _fetch_problems_html(contest_id: str) -> str:
url = f"{BASE_URL}/contest/{contest_id}/problems"
page = StealthyFetcher.fetch(
url,
headless=True,
solve_cloudflare=True,
)
return page.html_content
def _parse_all_blocks(html: str) -> list[dict[str, Any]]:
soup = BeautifulSoup(html, "html.parser")
blocks = soup.find_all("div", class_="problem-statement")
out: list[dict[str, Any]] = []
for b in blocks:
holder = b.find_parent("div", class_="problemindexholder")
letter = (holder.get("problemindex") if holder else "").strip().upper()
name = _extract_title(b)[1] # keep your name extraction
if not letter:
continue
tests = _extract_samples(b)
timeout_ms, memory_mb = _extract_limits(b)
interactive = _is_interactive(b)
out.append(
{
"letter": letter,
"name": name,
"tests": tests,
"timeout_ms": timeout_ms,
"memory_mb": memory_mb,
"interactive": interactive,
}
) )
return out
for link in problem_links:
if not isinstance(link, Tag):
continue
href: str = str(link.get("href", ""))
if f"/contest/{contest_id}/problem/" in href:
problem_letter: str = href.split("/")[-1].lower()
problem_name: str = link.get_text(strip=True)
if not (problem_letter and problem_name):
continue
problems.append(ProblemSummary(id=problem_letter, name=problem_name))
seen: set[str] = set()
unique_problems: list[ProblemSummary] = []
for p in problems:
if p.id not in seen:
seen.add(p.id)
unique_problems.append(p)
return unique_problems
except Exception as e:
print(f"Failed to scrape contest problems: {e}", file=sys.stderr)
return []
def scrape_sample_tests(url: str) -> list[TestCase]: def _scrape_contest_problems_sync(contest_id: str) -> list[ProblemSummary]:
print(f"Scraping: {url}", file=sys.stderr) html = _fetch_problems_html(contest_id)
return scrape(url) blocks = _parse_all_blocks(html)
problems: list[ProblemSummary] = []
seen: set[str] = set()
def scrape_contests() -> list[ContestSummary]: for b in blocks:
response = requests.get("https://codeforces.com/api/contest.list", timeout=10) pid = b["letter"].upper()
response.raise_for_status() if pid in seen:
continue
data = response.json() seen.add(pid)
if data["status"] != "OK": problems.append(ProblemSummary(id=pid.lower(), name=b["name"]))
return [] return problems
contests = []
for contest in data["result"]:
contest_id = str(contest["id"])
name = contest["name"]
contests.append(ContestSummary(id=contest_id, name=name, display_name=name))
return contests
class CodeforcesScraper(BaseScraper): class CodeforcesScraper(BaseScraper):
@ -244,81 +155,94 @@ class CodeforcesScraper(BaseScraper):
def platform_name(self) -> str: def platform_name(self) -> str:
return "codeforces" return "codeforces"
def scrape_contest_metadata(self, contest_id: str) -> MetadataResult: async def scrape_contest_metadata(self, contest_id: str) -> MetadataResult:
return self._safe_execute( async def impl(cid: str) -> MetadataResult:
"metadata", self._scrape_contest_metadata_impl, contest_id problems = await asyncio.to_thread(_scrape_contest_problems_sync, cid)
) if not problems:
return self._create_metadata_error(
def scrape_problem_tests(self, contest_id: str, problem_id: str) -> TestsResult: f"No problems found for contest {cid}", cid
return self._safe_execute( )
"tests", self._scrape_problem_tests_impl, contest_id, problem_id return MetadataResult(
) success=True, error="", contest_id=cid, problems=problems
def scrape_contest_list(self) -> ContestListResult:
return self._safe_execute("contests", self._scrape_contest_list_impl)
def _scrape_contest_metadata_impl(self, contest_id: str) -> MetadataResult:
problems = scrape_contest_problems(contest_id)
if not problems:
return self._create_metadata_error(
f"No problems found for contest {contest_id}", contest_id
)
return MetadataResult(
success=True, error="", contest_id=contest_id, problems=problems
)
def _scrape_problem_tests_impl(
self, contest_id: str, problem_letter: str
) -> TestsResult:
problem_id = contest_id + problem_letter.lower()
url = parse_problem_url(contest_id, problem_letter)
tests = scrape_sample_tests(url)
page = StealthyFetcher.fetch(url, headless=True, solve_cloudflare=True)
html = page.html_content
soup = BeautifulSoup(html, "html.parser")
timeout_ms, memory_mb = extract_problem_limits(soup)
problem_statement_div = soup.find("div", class_="problem-statement")
interactive = bool(
problem_statement_div
and "This is an interactive problem" in problem_statement_div.get_text()
)
if not tests:
return self._create_tests_error(
f"No tests found for {contest_id} {problem_letter}", problem_id, url
) )
return TestsResult( return await self._safe_execute("metadata", impl, contest_id)
success=True,
error="",
problem_id=problem_id,
url=url,
tests=tests,
timeout_ms=timeout_ms,
memory_mb=memory_mb,
interactive=interactive,
)
def _scrape_contest_list_impl(self) -> ContestListResult: async def scrape_contest_list(self) -> ContestListResult:
contests = scrape_contests() async def impl() -> ContestListResult:
if not contests: try:
return self._create_contests_error("No contests found") r = requests.get(API_CONTEST_LIST_URL, timeout=TIMEOUT_SECONDS)
return ContestListResult(success=True, error="", contests=contests) r.raise_for_status()
data = r.json()
if data.get("status") != "OK":
return self._create_contests_error("Invalid API response")
contests: list[ContestSummary] = []
for c in data["result"]:
if c.get("phase") != "FINISHED":
continue
cid = str(c["id"])
name = c["name"]
contests.append(
ContestSummary(id=cid, name=name, display_name=name)
)
if not contests:
return self._create_contests_error("No contests found")
return ContestListResult(success=True, error="", contests=contests)
except Exception as e:
return self._create_contests_error(str(e))
return await self._safe_execute("contests", impl)
async def stream_tests_for_category_async(self, category_id: str) -> None:
html = await asyncio.to_thread(_fetch_problems_html, category_id)
blocks = await asyncio.to_thread(_parse_all_blocks, html)
for b in blocks:
pid = b["letter"].lower()
tests: list[TestCase] = b["tests"]
if not tests:
print(
json.dumps(
{
"problem_id": pid,
"error": f"{self.platform_name}: no tests found",
}
),
flush=True,
)
continue
print(
json.dumps(
{
"problem_id": pid,
"tests": [
{"input": t.input, "expected": t.expected} for t in tests
],
"timeout_ms": b["timeout_ms"],
"memory_mb": b["memory_mb"],
"interactive": bool(b["interactive"]),
}
),
flush=True,
)
def main() -> None: async def main_async() -> int:
if len(sys.argv) < 2: if len(sys.argv) < 2:
result = MetadataResult( result = MetadataResult(
success=False, success=False,
error="Usage: codeforces.py metadata <contest_id> OR codeforces.py tests <contest_id> <problem_letter> OR codeforces.py contests", error="Usage: codeforces.py metadata <contest_id> OR codeforces.py tests <contest_id> OR codeforces.py contests",
) )
print(json.dumps(asdict(result))) print(json.dumps(asdict(result)))
sys.exit(1) return 1
scraper = CodeforcesScraper()
mode: str = sys.argv[1] mode: str = sys.argv[1]
scraper = CodeforcesScraper()
if mode == "metadata": if mode == "metadata":
if len(sys.argv) != 3: if len(sys.argv) != 3:
@ -326,17 +250,17 @@ def main() -> None:
success=False, error="Usage: codeforces.py metadata <contest_id>" success=False, error="Usage: codeforces.py metadata <contest_id>"
) )
print(json.dumps(asdict(result))) print(json.dumps(asdict(result)))
sys.exit(1) return 1
contest_id = sys.argv[2]
contest_id: str = sys.argv[2] result = await scraper.scrape_contest_metadata(contest_id)
result = scraper.scrape_contest_metadata(contest_id)
print(json.dumps(asdict(result))) print(json.dumps(asdict(result)))
return 0 if result.success else 1
elif mode == "tests": if mode == "tests":
if len(sys.argv) != 4: if len(sys.argv) != 3:
tests_result = TestsResult( tests_result = TestsResult(
success=False, success=False,
error="Usage: codeforces.py tests <contest_id> <problem_letter>", error="Usage: codeforces.py tests <contest_id>",
problem_id="", problem_id="",
url="", url="",
tests=[], tests=[],
@ -344,31 +268,32 @@ def main() -> None:
memory_mb=0, memory_mb=0,
) )
print(json.dumps(asdict(tests_result))) print(json.dumps(asdict(tests_result)))
sys.exit(1) return 1
contest_id = sys.argv[2]
await scraper.stream_tests_for_category_async(contest_id)
return 0
tests_contest_id: str = sys.argv[2] if mode == "contests":
problem_letter: str = sys.argv[3]
tests_result = scraper.scrape_problem_tests(tests_contest_id, problem_letter)
print(json.dumps(asdict(tests_result)))
elif mode == "contests":
if len(sys.argv) != 2: if len(sys.argv) != 2:
contest_result = ContestListResult( contest_result = ContestListResult(
success=False, error="Usage: codeforces.py contests" success=False, error="Usage: codeforces.py contests"
) )
print(json.dumps(asdict(contest_result))) print(json.dumps(asdict(contest_result)))
sys.exit(1) return 1
contest_result = await scraper.scrape_contest_list()
contest_result = scraper.scrape_contest_list()
print(json.dumps(asdict(contest_result))) print(json.dumps(asdict(contest_result)))
return 0 if contest_result.success else 1
else: result = MetadataResult(
result = MetadataResult( success=False,
success=False, error="Unknown mode. Use 'metadata <contest_id>', 'tests <contest_id>', or 'contests'",
error=f"Unknown mode: {mode}. Use 'metadata', 'tests', or 'contests'", )
) print(json.dumps(asdict(result)))
print(json.dumps(asdict(result))) return 1
sys.exit(1)
def main() -> None:
sys.exit(asyncio.run(main_async()))
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -1,13 +1,13 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import asyncio
import json import json
import re import re
import sys import sys
from dataclasses import asdict from dataclasses import asdict
from typing import Any
import backoff import httpx
import requests
from bs4 import BeautifulSoup, Tag
from .base import BaseScraper from .base import BaseScraper
from .models import ( from .models import (
@ -19,6 +19,15 @@ from .models import (
TestsResult, TestsResult,
) )
BASE_URL = "https://cses.fi"
INDEX_PATH = "/problemset/list"
TASK_PATH = "/problemset/task/{id}"
HEADERS = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
TIMEOUT_S = 15.0
CONNECTIONS = 8
def normalize_category_name(category_name: str) -> str: def normalize_category_name(category_name: str) -> str:
return category_name.lower().replace(" ", "_").replace("&", "and") return category_name.lower().replace(" ", "_").replace("&", "and")
@ -57,256 +66,114 @@ def snake_to_title(name: str) -> str:
return " ".join(map(fix_word, enumerate(words))) return " ".join(map(fix_word, enumerate(words)))
@backoff.on_exception( async def fetch_text(client: httpx.AsyncClient, path: str) -> str:
backoff.expo, r = await client.get(BASE_URL + path, headers=HEADERS, timeout=TIMEOUT_S)
(requests.exceptions.RequestException, requests.exceptions.HTTPError), r.raise_for_status()
max_tries=4, return r.text
jitter=backoff.random_jitter,
on_backoff=lambda details: print(
f"Request failed (attempt {details['tries']}), retrying in {details['wait']:.1f}s: {details['exception']}", CATEGORY_BLOCK_RE = re.compile(
file=sys.stderr, r'<h2>(?P<cat>[^<]+)</h2>\s*<ul class="task-list">(?P<body>.*?)</ul>',
), re.DOTALL,
) )
@backoff.on_predicate( TASK_LINK_RE = re.compile(
backoff.expo, r'<li class="task"><a href="/problemset/task/(?P<id>\d+)/?">(?P<title>[^<]+)</a>',
lambda response: response.status_code == 429, re.DOTALL,
max_tries=4,
jitter=backoff.random_jitter,
on_backoff=lambda details: print(
f"Rate limited, retrying in {details['wait']:.1f}s", file=sys.stderr
),
) )
def make_request(url: str, headers: dict) -> requests.Response:
response = requests.get(url, headers=headers, timeout=10) TITLE_RE = re.compile(
response.raise_for_status() r'<div class="title-block">.*?<h1>(?P<title>[^<]+)</h1>', re.DOTALL
return response )
TIME_RE = re.compile(r"<li><b>Time limit:</b>\s*([0-9.]+)\s*s</li>")
MEM_RE = re.compile(r"<li><b>Memory limit:</b>\s*(\d+)\s*MB</li>")
SIDEBAR_CAT_RE = re.compile(
r'<div class="nav sidebar">.*?<h4>(?P<cat>[^<]+)</h4>', re.DOTALL
)
MD_BLOCK_RE = re.compile(r'<div class="md">(.*?)</div>', re.DOTALL | re.IGNORECASE)
EXAMPLE_SECTION_RE = re.compile(
r"<h[1-6][^>]*>\s*example[s]?:?\s*</h[1-6]>\s*(?P<section>.*?)(?=<h[1-6][^>]*>|$)",
re.DOTALL | re.IGNORECASE,
)
LABELED_IO_RE = re.compile(
r"input\s*:\s*</p>\s*<pre>(?P<input>.*?)</pre>.*?output\s*:\s*</p>\s*<pre>(?P<output>.*?)</pre>",
re.DOTALL | re.IGNORECASE,
)
PRE_RE = re.compile(r"<pre>(.*?)</pre>", re.DOTALL | re.IGNORECASE)
def scrape_category_problems(category_id: str) -> list[ProblemSummary]: def parse_categories(html: str) -> list[ContestSummary]:
category_name = snake_to_title(category_id) out: list[ContestSummary] = []
try: for m in CATEGORY_BLOCK_RE.finditer(html):
problemset_url = "https://cses.fi/problemset/" cat = m.group("cat").strip()
headers = { if cat == "General":
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" continue
} out.append(
response = make_request(problemset_url, headers) ContestSummary(
soup = BeautifulSoup(response.text, "html.parser") id=normalize_category_name(cat),
current_category = None name=cat,
problems = [] display_name=cat,
target_found = False
for element in soup.find_all(["h1", "h2", "ul"]):
if not isinstance(element, Tag):
continue
if element.name in ["h1", "h2"]:
text = element.get_text(strip=True)
if not text or text.startswith("CSES") or text == "CSES Problem Set":
continue
if target_found and current_category != text:
break
current_category = text
if text.lower() == category_name.lower():
target_found = True
elif element.name == "ul" and current_category and target_found:
problem_links = element.find_all(
"a", href=lambda x: x and "/problemset/task/" in x
)
for link in problem_links:
href = link.get("href", "")
if not href:
continue
problem_id = href.split("/")[-1]
problem_name = link.get_text(strip=True)
if not problem_id.isdigit() or not problem_name:
continue
problems.append(ProblemSummary(id=problem_id, name=problem_name))
return problems
except Exception as e:
print(f"Failed to scrape CSES category {category_id}: {e}", file=sys.stderr)
return []
def parse_problem_url(problem_input: str) -> str | None:
if problem_input.startswith("https://cses.fi/problemset/task/"):
return problem_input.rstrip("/")
elif problem_input.isdigit():
return f"https://cses.fi/problemset/task/{problem_input}"
return None
def extract_problem_limits(soup: BeautifulSoup) -> tuple[int, float]:
timeout_ms = None
memory_mb = None
constraints_ul = soup.find("ul", class_="task-constraints")
if not constraints_ul or not isinstance(constraints_ul, Tag):
raise ValueError("Could not find task-constraints section")
for li in constraints_ul.find_all("li"):
text = li.get_text()
if "Time limit:" in text:
match = re.search(r"Time limit:\s*(\d+(?:\.\d+)?)\s*s", text)
if match:
seconds = float(match.group(1))
timeout_ms = int(seconds * 1000)
if "Memory limit:" in text:
match = re.search(r"Memory limit:\s*(\d+)\s*MB", text)
if match:
memory_mb = float(match.group(1))
if timeout_ms is None:
raise ValueError("Could not find valid timeout in task-constraints section")
if memory_mb is None:
raise ValueError(
"Could not find valid memory limit in task-constraints section"
)
return timeout_ms, memory_mb
def scrape_categories() -> list[ContestSummary]:
try:
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
response = make_request("https://cses.fi/problemset/", headers)
soup = BeautifulSoup(response.text, "html.parser")
categories = []
for h2 in soup.find_all("h2"):
category_name = h2.get_text().strip()
if category_name == "General":
continue
category_id = normalize_category_name(category_name)
display_name = category_name
categories.append(
ContestSummary(
id=category_id, name=category_name, display_name=display_name
)
) )
return categories
except Exception as e:
print(f"Failed to scrape CSES categories: {e}", file=sys.stderr)
return []
def process_problem_element(
element,
current_category: str | None,
all_categories: dict[str, list[ProblemSummary]],
) -> str | None:
if element.name == "h1":
category_name = element.get_text().strip()
if category_name not in all_categories:
all_categories[category_name] = []
return category_name
if element.name != "a" or "/problemset/task/" not in element.get("href", ""):
return current_category
href = element.get("href", "")
if not href:
return current_category
problem_id = href.split("/")[-1]
problem_name = element.get_text(strip=True)
if not (problem_id.isdigit() and problem_name and current_category):
return current_category
problem = ProblemSummary(id=problem_id, name=problem_name)
all_categories[current_category].append(problem)
return current_category
def scrape_all_problems() -> dict[str, list[ProblemSummary]]:
try:
problemset_url = "https://cses.fi/problemset/"
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
response = requests.get(problemset_url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
all_categories: dict[str, list[ProblemSummary]] = {}
current_category = None
for element in soup.find_all(["h1", "h2", "ul"]):
if not isinstance(element, Tag):
continue
if element.name in ["h1", "h2"]:
text = element.get_text(strip=True)
if text and not text.startswith("CSES") and text != "CSES Problem Set":
current_category = text
if current_category not in all_categories:
all_categories[current_category] = []
print(f"Found category: {current_category}", file=sys.stderr)
elif element.name == "ul" and current_category:
problem_links = element.find_all(
"a", href=lambda x: x and "/problemset/task/" in x
)
for link in problem_links:
href = link.get("href", "")
if href:
problem_id = href.split("/")[-1]
problem_name = link.get_text(strip=True)
if problem_id.isdigit() and problem_name:
problem = ProblemSummary(id=problem_id, name=problem_name)
all_categories[current_category].append(problem)
print(
f"Found {len(all_categories)} categories with {sum(len(probs) for probs in all_categories.values())} problems",
file=sys.stderr,
) )
return all_categories
except Exception as e:
print(f"Failed to scrape CSES problems: {e}", file=sys.stderr)
return {}
def _collect_section_after(header: Tag) -> list[Tag]:
out: list[Tag] = []
cur = header.find_next_sibling()
while cur and not (isinstance(cur, Tag) and cur.name in ("h1", "h2", "h3")):
if isinstance(cur, Tag):
out.append(cur)
cur = cur.find_next_sibling()
return out return out
def extract_example_test_cases(soup: BeautifulSoup) -> list[tuple[str, str]]: def parse_category_problems(category_id: str, html: str) -> list[ProblemSummary]:
example_headers = soup.find_all( want = snake_to_title(category_id)
lambda t: isinstance(t, Tag) for m in CATEGORY_BLOCK_RE.finditer(html):
and t.name in ("h1", "h2", "h3") cat = m.group("cat").strip()
and t.get_text(strip=True).lower().startswith("example") if cat != want:
) continue
cases: list[tuple[str, str]] = [] body = m.group("body")
for hdr in example_headers: return [
section = _collect_section_after(hdr) ProblemSummary(id=mm.group("id"), name=mm.group("title"))
for mm in TASK_LINK_RE.finditer(body)
def find_labeled(label: str) -> str | None: ]
for node in section: return []
if not isinstance(node, Tag):
continue
if node.name in ("p", "h4", "h5", "h6"):
txt = node.get_text(strip=True).lower().rstrip(":")
if txt == label:
pre = node.find_next_sibling("pre")
if pre:
return pre.get_text().strip()
return None
inp = find_labeled("input")
out = find_labeled("output")
if not inp or not out:
pres = [n for n in section if isinstance(n, Tag) and n.name == "pre"]
if len(pres) >= 2:
inp = inp or pres[0].get_text().strip()
out = out or pres[1].get_text().strip()
if inp and out:
cases.append((inp, out))
return cases
def scrape(url: str) -> list[TestCase]: def parse_limits(html: str) -> tuple[int, int]:
try: tm = TIME_RE.search(html)
headers = { mm = MEM_RE.search(html)
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" t = int(round(float(tm.group(1)) * 1000)) if tm else 0
} m = int(mm.group(1)) if mm else 0
response = make_request(url, headers) return t, m
soup = BeautifulSoup(response.text, "html.parser")
pairs = extract_example_test_cases(soup)
return [TestCase(input=inp, expected=out) for (inp, out) in pairs] def parse_title(html: str) -> str:
except Exception as e: mt = TITLE_RE.search(html)
print(f"Error scraping CSES: {e}", file=sys.stderr) return mt.group("title").strip() if mt else ""
def parse_category_from_sidebar(html: str) -> str | None:
m = SIDEBAR_CAT_RE.search(html)
return m.group("cat").strip() if m else None
def parse_tests(html: str) -> list[TestCase]:
md = MD_BLOCK_RE.search(html)
if not md:
return [] return []
block = md.group(1)
msec = EXAMPLE_SECTION_RE.search(block)
section = msec.group("section") if msec else block
mlabel = LABELED_IO_RE.search(section)
if mlabel:
a = mlabel.group("input").strip()
b = mlabel.group("output").strip()
return [TestCase(input=a, expected=b)]
pres = PRE_RE.findall(section)
if len(pres) >= 2:
return [TestCase(input=pres[0].strip(), expected=pres[1].strip())]
return []
def task_path(problem_id: str | int) -> str:
return TASK_PATH.format(id=str(problem_id))
class CSESScraper(BaseScraper): class CSESScraper(BaseScraper):
@ -314,129 +181,99 @@ class CSESScraper(BaseScraper):
def platform_name(self) -> str: def platform_name(self) -> str:
return "cses" return "cses"
def scrape_contest_metadata(self, contest_id: str) -> MetadataResult: async def scrape_contest_metadata(self, contest_id: str) -> MetadataResult:
return self._safe_execute("metadata", self._scrape_metadata_impl, contest_id) async with httpx.AsyncClient() as client:
html = await fetch_text(client, INDEX_PATH)
def scrape_problem_tests(self, contest_id: str, problem_id: str) -> TestsResult: problems = parse_category_problems(contest_id, html)
return self._safe_execute(
"tests", self._scrape_tests_impl, contest_id, problem_id
)
def scrape_contest_list(self) -> ContestListResult:
return self._safe_execute("contests", self._scrape_contests_impl)
def _safe_execute(self, operation: str, func, *args):
try:
return func(*args)
except Exception as e:
error_msg = f"{self.platform_name}: {str(e)}"
if operation == "metadata":
return MetadataResult(success=False, error=error_msg)
elif operation == "tests":
return TestsResult(
success=False,
error=error_msg,
problem_id="",
url="",
tests=[],
timeout_ms=0,
memory_mb=0,
)
elif operation == "contests":
return ContestListResult(success=False, error=error_msg)
def _scrape_metadata_impl(self, category_id: str) -> MetadataResult:
problems = scrape_category_problems(category_id)
if not problems: if not problems:
return MetadataResult( return MetadataResult(
success=False, success=False,
error=f"{self.platform_name}: No problems found for category: {category_id}", error=f"{self.platform_name}: No problems found for category: {contest_id}",
) )
return MetadataResult( return MetadataResult(
success=True, error="", contest_id=category_id, problems=problems success=True, error="", contest_id=contest_id, problems=problems
) )
def _scrape_tests_impl(self, category: str, problem_id: str) -> TestsResult: async def scrape_contest_list(self) -> ContestListResult:
url = parse_problem_url(problem_id) async with httpx.AsyncClient() as client:
if not url: html = await fetch_text(client, INDEX_PATH)
return TestsResult( cats = parse_categories(html)
success=False, if not cats:
error=f"{self.platform_name}: Invalid problem input: {problem_id}. Use either problem ID (e.g., 1068) or full URL",
problem_id=problem_id if problem_id.isdigit() else "",
url="",
tests=[],
timeout_ms=0,
memory_mb=0,
)
tests = scrape(url)
m = re.search(r"/task/(\d+)", url)
actual_problem_id = (
problem_id if problem_id.isdigit() else (m.group(1) if m else "")
)
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
timeout_ms, memory_mb = extract_problem_limits(soup)
if not tests:
return TestsResult(
success=False,
error=f"{self.platform_name}: No tests found for {problem_id}",
problem_id=actual_problem_id,
url=url,
tests=[],
timeout_ms=timeout_ms,
memory_mb=memory_mb,
)
return TestsResult(
success=True,
error="",
problem_id=actual_problem_id,
url=url,
tests=tests,
timeout_ms=timeout_ms,
memory_mb=memory_mb,
)
def _scrape_contests_impl(self) -> ContestListResult:
categories = scrape_categories()
if not categories:
return ContestListResult( return ContestListResult(
success=False, error=f"{self.platform_name}: No contests found" success=False, error=f"{self.platform_name}: No contests found"
) )
return ContestListResult(success=True, error="", contests=categories) return ContestListResult(success=True, error="", contests=cats)
async def stream_tests_for_category_async(self, category_id: str) -> None:
async with httpx.AsyncClient(
limits=httpx.Limits(max_connections=CONNECTIONS)
) as client:
index_html = await fetch_text(client, INDEX_PATH)
problems = parse_category_problems(category_id, index_html)
if not problems:
return
sem = asyncio.Semaphore(CONNECTIONS)
async def run_one(pid: str) -> dict[str, Any]:
async with sem:
try:
html = await fetch_text(client, task_path(pid))
tests = parse_tests(html)
timeout_ms, memory_mb = parse_limits(html)
if not tests:
return {
"problem_id": pid,
"error": f"{self.platform_name}: no tests found",
}
return {
"problem_id": pid,
"tests": [
{"input": t.input, "expected": t.expected}
for t in tests
],
"timeout_ms": timeout_ms,
"memory_mb": memory_mb,
"interactive": False,
}
except Exception as e:
return {"problem_id": pid, "error": str(e)}
tasks = [run_one(p.id) for p in problems]
for coro in asyncio.as_completed(tasks):
payload = await coro
print(json.dumps(payload), flush=True)
def main() -> None: async def main_async() -> int:
if len(sys.argv) < 2: if len(sys.argv) < 2:
result = MetadataResult( result = MetadataResult(
success=False, success=False,
error="Usage: cses.py metadata <category_id> OR cses.py tests <category> <problem_id> OR cses.py contests", error="Usage: cses.py metadata <category_id> OR cses.py tests <category> OR cses.py contests",
) )
print(json.dumps(asdict(result))) print(json.dumps(asdict(result)))
sys.exit(1) return 1
mode: str = sys.argv[1] mode: str = sys.argv[1]
scraper = CSESScraper() scraper = CSESScraper()
if mode == "metadata": if mode == "metadata":
if len(sys.argv) != 3: if len(sys.argv) != 3:
result = MetadataResult( result = MetadataResult(
success=False, success=False, error="Usage: cses.py metadata <category_id>"
error="Usage: cses.py metadata <category_id>",
) )
print(json.dumps(asdict(result))) print(json.dumps(asdict(result)))
sys.exit(1) return 1
category_id = sys.argv[2] category_id = sys.argv[2]
result = scraper.scrape_contest_metadata(category_id) result = await scraper.scrape_contest_metadata(category_id)
print(json.dumps(asdict(result))) print(json.dumps(asdict(result)))
if not result.success: return 0 if result.success else 1
sys.exit(1)
elif mode == "tests": if mode == "tests":
if len(sys.argv) != 4: if len(sys.argv) != 3:
tests_result = TestsResult( tests_result = TestsResult(
success=False, success=False,
error="Usage: cses.py tests <category> <problem_id>", error="Usage: cses.py tests <category>",
problem_id="", problem_id="",
url="", url="",
tests=[], tests=[],
@ -444,31 +281,32 @@ def main() -> None:
memory_mb=0, memory_mb=0,
) )
print(json.dumps(asdict(tests_result))) print(json.dumps(asdict(tests_result)))
sys.exit(1) return 1
category = sys.argv[2] category = sys.argv[2]
problem_id = sys.argv[3] await scraper.stream_tests_for_category_async(category)
tests_result = scraper.scrape_problem_tests(category, problem_id) return 0
print(json.dumps(asdict(tests_result)))
if not tests_result.success: if mode == "contests":
sys.exit(1)
elif mode == "contests":
if len(sys.argv) != 2: if len(sys.argv) != 2:
contest_result = ContestListResult( contest_result = ContestListResult(
success=False, error="Usage: cses.py contests" success=False, error="Usage: cses.py contests"
) )
print(json.dumps(asdict(contest_result))) print(json.dumps(asdict(contest_result)))
sys.exit(1) return 1
contest_result = scraper.scrape_contest_list() contest_result = await scraper.scrape_contest_list()
print(json.dumps(asdict(contest_result))) print(json.dumps(asdict(contest_result)))
if not contest_result.success: return 0 if contest_result.success else 1
sys.exit(1)
else: result = MetadataResult(
result = MetadataResult( success=False,
success=False, error=f"Unknown mode: {mode}. Use 'metadata <category>', 'tests <category>', or 'contests'",
error=f"Unknown mode: {mode}. Use 'metadata <category>', 'tests <category> <problem_id>', or 'contests'", )
) print(json.dumps(asdict(result)))
print(json.dumps(asdict(result))) return 1
sys.exit(1)
def main() -> None:
sys.exit(asyncio.run(main_async()))
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -1,43 +0,0 @@
import pytest
@pytest.fixture
def mock_codeforces_html():
return """
<div class="time-limit">Time limit: 1 seconds</div>
<div class="memory-limit">Memory limit: 256 megabytes</div>
<div class="input">
<pre>
<div class="test-example-line-1">3</div>
<div class="test-example-line-1">1 2 3</div>
</pre>
</div>
<div class="output">
<pre>
<div class="test-example-line-1">6</div>
</pre>
</div>
"""
@pytest.fixture
def mock_atcoder_html():
return """
<h3>Sample Input 1</h3>
<pre>3
1 2 3</pre>
<h3>Sample Output 1</h3>
<pre>6</pre>
"""
@pytest.fixture
def mock_cses_html():
return """
<h1>Example</h1>
<p>Input:</p>
<pre>3
1 2 3</pre>
<p>Output:</p>
<pre>6</pre>
"""

View file

@ -1,199 +0,0 @@
from unittest.mock import Mock
from scrapers.atcoder import scrape, scrape_contest_problems, scrape_contests
from scrapers.models import ContestSummary, ProblemSummary
def test_scrape_success(mocker, mock_atcoder_html):
mock_response = Mock()
mock_response.text = mock_atcoder_html
mocker.patch("scrapers.atcoder.requests.get", return_value=mock_response)
result = scrape("https://atcoder.jp/contests/abc350/tasks/abc350_a")
assert len(result) == 1
assert result[0].input == "3\n1 2 3"
assert result[0].expected == "6"
def test_scrape_contest_problems(mocker):
mock_response = Mock()
mock_response.text = """
<table class="table">
<tr><th>Task</th><th>Name</th></tr>
<tr>
<td></td>
<td><a href="/contests/abc350/tasks/abc350_a">A - Water Tank</a></td>
</tr>
<tr>
<td></td>
<td><a href="/contests/abc350/tasks/abc350_b">B - Dentist Aoki</a></td>
</tr>
</table>
"""
mocker.patch("scrapers.atcoder.requests.get", return_value=mock_response)
result = scrape_contest_problems("abc350")
assert len(result) == 2
assert result[0] == ProblemSummary(id="a", name="A - Water Tank")
assert result[1] == ProblemSummary(id="b", name="B - Dentist Aoki")
def test_scrape_network_error(mocker):
mocker.patch(
"scrapers.atcoder.requests.get", side_effect=Exception("Network error")
)
result = scrape("https://atcoder.jp/contests/abc350/tasks/abc350_a")
assert result == []
def test_scrape_contests_success(mocker):
def mock_get_side_effect(url, **kwargs):
if url == "https://atcoder.jp/contests/archive":
mock_response = Mock()
mock_response.raise_for_status.return_value = None
mock_response.text = """
<html>
<ul class="pagination">
<li>1</li>
</ul>
</html>
"""
return mock_response
elif "page=1" in url:
mock_response = Mock()
mock_response.raise_for_status.return_value = None
mock_response.text = """
<table class="table">
<tbody>
<tr>
<td>2025-01-15 21:00:00+0900</td>
<td><a href="/contests/abc350">AtCoder Beginner Contest 350</a></td>
<td>01:40</td>
<td> - 1999</td>
</tr>
<tr>
<td>2025-01-14 21:00:00+0900</td>
<td><a href="/contests/arc170">AtCoder Regular Contest 170</a></td>
<td>02:00</td>
<td>1000 - 2799</td>
</tr>
</tbody>
</table>
"""
return mock_response
else:
mock_response = Mock()
mock_response.raise_for_status.return_value = None
mock_response.text = "<html></html>"
return mock_response
mocker.patch("scrapers.atcoder.requests.get", side_effect=mock_get_side_effect)
result = scrape_contests()
assert len(result) == 2
assert result[0] == ContestSummary(
id="abc350",
name="AtCoder Beginner Contest 350",
display_name="AtCoder Beginner Contest 350",
)
assert result[1] == ContestSummary(
id="arc170",
name="AtCoder Regular Contest 170",
display_name="AtCoder Regular Contest 170",
)
def test_scrape_contests_no_table(mocker):
mock_response = Mock()
mock_response.text = "<html><body>No table found</body></html>"
mocker.patch("scrapers.atcoder.requests.get", return_value=mock_response)
result = scrape_contests()
assert result == []
def test_scrape_contests_network_error(mocker):
mocker.patch(
"scrapers.atcoder.requests.get", side_effect=Exception("Network error")
)
result = scrape_contests()
assert result == []
def test_scrape_contests_filters_ahc(mocker):
def mock_get_side_effect(url, **kwargs):
if url == "https://atcoder.jp/contests/archive":
mock_response = Mock()
mock_response.raise_for_status.return_value = None
mock_response.text = """
<html>
<ul class="pagination">
<li>1</li>
</ul>
</html>
"""
return mock_response
elif "page=1" in url:
mock_response = Mock()
mock_response.raise_for_status.return_value = None
mock_response.text = """
<table class="table">
<tbody>
<tr>
<td>2025-01-15 21:00:00+0900</td>
<td><a href="/contests/abc350">AtCoder Beginner Contest 350</a></td>
<td>01:40</td>
<td> - 1999</td>
</tr>
<tr>
<td>2025-01-14 21:00:00+0900</td>
<td><a href="/contests/ahc044">AtCoder Heuristic Contest 044</a></td>
<td>05:00</td>
<td>-</td>
</tr>
<tr>
<td>2025-01-13 21:00:00+0900</td>
<td><a href="/contests/arc170">AtCoder Regular Contest 170</a></td>
<td>02:00</td>
<td>1000 - 2799</td>
</tr>
</tbody>
</table>
"""
return mock_response
else:
mock_response = Mock()
mock_response.raise_for_status.return_value = None
mock_response.text = "<html></html>"
return mock_response
mocker.patch("scrapers.atcoder.requests.get", side_effect=mock_get_side_effect)
result = scrape_contests()
assert len(result) == 2
assert result[0] == ContestSummary(
id="abc350",
name="AtCoder Beginner Contest 350",
display_name="AtCoder Beginner Contest 350",
)
assert result[1] == ContestSummary(
id="arc170",
name="AtCoder Regular Contest 170",
display_name="AtCoder Regular Contest 170",
)
# Ensure ahc044 is filtered out
contest_ids = [contest.id for contest in result]
assert "ahc044" not in contest_ids

View file

@ -1,97 +0,0 @@
from unittest.mock import Mock
from scrapers.codeforces import CodeforcesScraper
from scrapers.models import ContestSummary, ProblemSummary
def test_scrape_success(mocker, mock_codeforces_html):
mock_page = Mock()
mock_page.html_content = mock_codeforces_html
mocker.patch("scrapers.codeforces.StealthyFetcher.fetch", return_value=mock_page)
scraper = CodeforcesScraper()
result = scraper.scrape_problem_tests("1900", "A")
assert result.success
assert len(result.tests) == 1
assert result.tests[0].input == "1\n3\n1 2 3"
assert result.tests[0].expected == "6"
def test_scrape_contest_problems(mocker):
html = """
<a href="/contest/1900/problem/A">A. Problem A</a>
<a href="/contest/1900/problem/B">B. Problem B</a>
"""
mock_page = Mock()
mock_page.html_content = html
mocker.patch("scrapers.codeforces.StealthyFetcher.fetch", return_value=mock_page)
scraper = CodeforcesScraper()
result = scraper.scrape_contest_metadata("1900")
assert result.success
assert len(result.problems) == 2
assert result.problems[0] == ProblemSummary(id="a", name="A. Problem A")
assert result.problems[1] == ProblemSummary(id="b", name="B. Problem B")
def test_scrape_network_error(mocker):
mocker.patch(
"scrapers.codeforces.StealthyFetcher.fetch",
side_effect=Exception("Network error"),
)
scraper = CodeforcesScraper()
result = scraper.scrape_problem_tests("1900", "A")
assert not result.success
assert "network error" in result.error.lower()
def test_scrape_contests_success(mocker):
mock_response = Mock()
mock_response.json.return_value = {
"status": "OK",
"result": [
{"id": 1951, "name": "Educational Codeforces Round 168 (Rated for Div. 2)"},
{"id": 1950, "name": "Codeforces Round 936 (Div. 2)"},
{"id": 1949, "name": "Codeforces Global Round 26"},
],
}
mocker.patch("scrapers.codeforces.requests.get", return_value=mock_response)
scraper = CodeforcesScraper()
result = scraper.scrape_contest_list()
assert result.success
assert len(result.contests) == 3
assert result.contests[0] == ContestSummary(
id="1951",
name="Educational Codeforces Round 168 (Rated for Div. 2)",
display_name="Educational Codeforces Round 168 (Rated for Div. 2)",
)
def test_scrape_contests_api_error(mocker):
mock_response = Mock()
mock_response.json.return_value = {"status": "FAILED", "result": []}
mocker.patch("scrapers.codeforces.requests.get", return_value=mock_response)
scraper = CodeforcesScraper()
result = scraper.scrape_contest_list()
assert not result.success
assert "no contests found" in result.error.lower()
def test_scrape_contests_network_error(mocker):
mocker.patch(
"scrapers.codeforces.requests.get", side_effect=Exception("Network error")
)
scraper = CodeforcesScraper()
result = scraper.scrape_contest_list()
assert not result.success
assert "network error" in result.error.lower()

View file

@ -1,185 +0,0 @@
from unittest.mock import Mock
from scrapers.cses import (
normalize_category_name,
scrape,
scrape_all_problems,
scrape_categories,
scrape_category_problems,
snake_to_title,
)
from scrapers.models import ContestSummary, ProblemSummary
def test_scrape_success(mocker, mock_cses_html):
mock_response = Mock()
mock_response.text = mock_cses_html
mocker.patch("scrapers.cses.requests.get", return_value=mock_response)
result = scrape("https://cses.fi/problemset/task/1068")
assert len(result) == 1
assert result[0].input == "3\n1 2 3"
assert result[0].expected == "6"
def test_scrape_all_problems(mocker):
mock_response = Mock()
mock_response.text = """
<div class="content">
<h1>Introductory Problems</h1>
<ul>
<li><a href="/problemset/task/1068">Weird Algorithm</a></li>
<li><a href="/problemset/task/1083">Missing Number</a></li>
</ul>
<h1>Sorting and Searching</h1>
<ul>
<li><a href="/problemset/task/1084">Apartments</a></li>
</ul>
</div>
"""
mock_response.raise_for_status = Mock()
mocker.patch("scrapers.cses.requests.get", return_value=mock_response)
result = scrape_all_problems()
assert "Introductory Problems" in result
assert "Sorting and Searching" in result
assert len(result["Introductory Problems"]) == 2
assert result["Introductory Problems"][0] == ProblemSummary(
id="1068",
name="Weird Algorithm",
)
def test_scrape_network_error(mocker):
mocker.patch("scrapers.cses.requests.get", side_effect=Exception("Network error"))
result = scrape("https://cses.fi/problemset/task/1068")
assert result == []
def test_normalize_category_name():
assert normalize_category_name("Sorting and Searching") == "sorting_and_searching"
assert normalize_category_name("Dynamic Programming") == "dynamic_programming"
assert normalize_category_name("Graph Algorithms") == "graph_algorithms"
def test_snake_to_title():
assert snake_to_title("sorting_and_searching") == "Sorting and Searching"
assert snake_to_title("dynamic_programming") == "Dynamic Programming"
assert snake_to_title("graph_algorithms") == "Graph Algorithms"
def test_scrape_category_problems_success(mocker):
mock_response = Mock()
mock_response.text = """
<div class="content">
<h1>General</h1>
<ul>
<li><a href="/problemset/task/1000">Test Problem</a></li>
</ul>
<h1>Sorting and Searching</h1>
<ul>
<li><a href="/problemset/task/1640">Sum of Two Values</a></li>
<li><a href="/problemset/task/1643">Maximum Subarray Sum</a></li>
</ul>
<h1>Dynamic Programming</h1>
<ul>
<li><a href="/problemset/task/1633">Dice Combinations</a></li>
</ul>
</div>
"""
mock_response.raise_for_status = Mock()
mocker.patch("scrapers.cses.requests.get", return_value=mock_response)
result = scrape_category_problems("sorting_and_searching")
assert len(result) == 2
assert result[0].id == "1640"
assert result[0].name == "Sum of Two Values"
assert result[1].id == "1643"
assert result[1].name == "Maximum Subarray Sum"
def test_scrape_category_problems_not_found(mocker):
mock_response = Mock()
mock_response.text = """
<div class="content">
<h1>Some Other Category</h1>
<ul>
<li><a href="/problemset/task/1000">Test Problem</a></li>
</ul>
</div>
"""
mock_response.raise_for_status = Mock()
mocker.patch("scrapers.cses.requests.get", return_value=mock_response)
result = scrape_category_problems("nonexistent_category")
assert result == []
def test_scrape_category_problems_network_error(mocker):
mocker.patch("scrapers.cses.requests.get", side_effect=Exception("Network error"))
result = scrape_category_problems("sorting_and_searching")
assert result == []
def test_scrape_categories_success(mocker):
mock_response = Mock()
mock_response.text = """
<html>
<body>
<h2>General</h2>
<ul class="task-list">
<li class="link"><a href="/register">Register</a></li>
</ul>
<h2>Introductory Problems</h2>
<ul class="task-list">
<li class="task"><a href="/problemset/task/1068">Weird Algorithm</a></li>
<li class="task"><a href="/problemset/task/1083">Missing Number</a></li>
</ul>
<h2>Sorting and Searching</h2>
<ul class="task-list">
<li class="task"><a href="/problemset/task/1621">Distinct Numbers</a></li>
<li class="task"><a href="/problemset/task/1084">Apartments</a></li>
<li class="task"><a href="/problemset/task/1090">Ferris Wheel</a></li>
</ul>
</body>
</html>
"""
mock_response.raise_for_status = Mock()
mocker.patch("scrapers.cses.requests.get", return_value=mock_response)
result = scrape_categories()
assert len(result) == 2
assert result[0] == ContestSummary(
id="introductory_problems",
name="Introductory Problems",
display_name="Introductory Problems",
)
assert result[1] == ContestSummary(
id="sorting_and_searching",
name="Sorting and Searching",
display_name="Sorting and Searching",
)
def test_scrape_categories_network_error(mocker):
mocker.patch("scrapers.cses.requests.get", side_effect=Exception("Network error"))
result = scrape_categories()
assert result == []

View file

@ -0,0 +1,2 @@
def test():
assert 5 == 5

415
uv.lock generated
View file

@ -92,6 +92,20 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" }, { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" },
] ]
[[package]]
name = "anyio"
version = "4.11.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "idna" },
{ name = "sniffio" },
{ name = "typing-extensions", marker = "python_full_version < '3.13'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/c6/78/7d432127c41b50bccba979505f272c16cbcadcc33645d5fa3a738110ae75/anyio-4.11.0.tar.gz", hash = "sha256:82a8d0b81e318cc5ce71a5f1f8b5c4e63619620b63141ef8c995fa0db95a57c4", size = 219094, upload-time = "2025-09-23T09:19:12.58Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/15/b3/9b1a8074496371342ec1e796a96f99c82c945a339cd81a8e73de28b4cf9e/anyio-4.11.0-py3-none-any.whl", hash = "sha256:0287e96f4d26d4149305414d4e3bc32f0dcd0862365a4bddea19d7a1ec38c4fc", size = 109097, upload-time = "2025-09-23T09:19:10.601Z" },
]
[[package]] [[package]]
name = "attrs" name = "attrs"
version = "25.3.0" version = "25.3.0"
@ -101,15 +115,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815, upload-time = "2025-03-13T11:10:21.14Z" }, { url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815, upload-time = "2025-03-13T11:10:21.14Z" },
] ]
[[package]]
name = "automat"
version = "25.4.16"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/e3/0f/d40bbe294bbf004d436a8bcbcfaadca8b5140d39ad0ad3d73d1a8ba15f14/automat-25.4.16.tar.gz", hash = "sha256:0017591a5477066e90d26b0e696ddc143baafd87b588cfac8100bc6be9634de0", size = 129977, upload-time = "2025-04-16T20:12:16.002Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/02/ff/1175b0b7371e46244032d43a56862d0af455823b5280a50c63d99cc50f18/automat-25.4.16-py3-none-any.whl", hash = "sha256:04e9bce696a8d5671ee698005af6e5a9fa15354140a87f4870744604dcdd3ba1", size = 42842, upload-time = "2025-04-16T20:12:14.447Z" },
]
[[package]] [[package]]
name = "backoff" name = "backoff"
version = "2.2.1" version = "2.2.1"
@ -119,6 +124,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148, upload-time = "2022-10-05T19:19:30.546Z" }, { url = "https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148, upload-time = "2022-10-05T19:19:30.546Z" },
] ]
[[package]]
name = "basedpyright"
version = "1.31.6"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "nodejs-wheel-binaries" },
]
sdist = { url = "https://files.pythonhosted.org/packages/a9/f6/c5657b1e464d04757cde2db76922a88091fe16854bd3d12e470c23b0dcf1/basedpyright-1.31.6.tar.gz", hash = "sha256:07f3602ba1582218dfd1db25b8b69cd3493e1f4367f46a44fd57bb9034b52ea9", size = 22683901, upload-time = "2025-10-01T13:11:21.317Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/4e/2b/34f338b4c04fe965fd209ed872d9fdd893dacc1a06feb6c9fec13ff535c1/basedpyright-1.31.6-py3-none-any.whl", hash = "sha256:620968ee69c14eee6682f29ffd6f813a30966afb1083ecfa4caf155c5d24f2d5", size = 11805295, upload-time = "2025-10-01T13:11:18.308Z" },
]
[[package]] [[package]]
name = "beautifulsoup4" name = "beautifulsoup4"
version = "4.13.5" version = "4.13.5"
@ -332,77 +349,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
] ]
[[package]]
name = "constantly"
version = "23.10.4"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/4d/6f/cb2a94494ff74aa9528a36c5b1422756330a75a8367bf20bd63171fc324d/constantly-23.10.4.tar.gz", hash = "sha256:aa92b70a33e2ac0bb33cd745eb61776594dc48764b06c35e0efd050b7f1c7cbd", size = 13300, upload-time = "2023-10-28T23:18:24.316Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/b8/40/c199d095151addf69efdb4b9ca3a4f20f70e20508d6222bffb9b76f58573/constantly-23.10.4-py3-none-any.whl", hash = "sha256:3fd9b4d1c3dc1ec9757f3c52aef7e53ad9323dbe39f51dfd4c43853b68dfa3f9", size = 13547, upload-time = "2023-10-28T23:18:23.038Z" },
]
[[package]]
name = "cryptography"
version = "46.0.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "cffi", marker = "platform_python_implementation != 'PyPy'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/a9/62/e3664e6ffd7743e1694b244dde70b43a394f6f7fbcacf7014a8ff5197c73/cryptography-46.0.1.tar.gz", hash = "sha256:ed570874e88f213437f5cf758f9ef26cbfc3f336d889b1e592ee11283bb8d1c7", size = 749198, upload-time = "2025-09-17T00:10:35.797Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/4c/8c/44ee01267ec01e26e43ebfdae3f120ec2312aa72fa4c0507ebe41a26739f/cryptography-46.0.1-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:1cd6d50c1a8b79af1a6f703709d8973845f677c8e97b1268f5ff323d38ce8475", size = 7285044, upload-time = "2025-09-17T00:08:36.807Z" },
{ url = "https://files.pythonhosted.org/packages/22/59/9ae689a25047e0601adfcb159ec4f83c0b4149fdb5c3030cc94cd218141d/cryptography-46.0.1-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0ff483716be32690c14636e54a1f6e2e1b7bf8e22ca50b989f88fa1b2d287080", size = 4308182, upload-time = "2025-09-17T00:08:39.388Z" },
{ url = "https://files.pythonhosted.org/packages/c4/ee/ca6cc9df7118f2fcd142c76b1da0f14340d77518c05b1ebfbbabca6b9e7d/cryptography-46.0.1-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9873bf7c1f2a6330bdfe8621e7ce64b725784f9f0c3a6a55c3047af5849f920e", size = 4572393, upload-time = "2025-09-17T00:08:41.663Z" },
{ url = "https://files.pythonhosted.org/packages/7f/a3/0f5296f63815d8e985922b05c31f77ce44787b3127a67c0b7f70f115c45f/cryptography-46.0.1-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:0dfb7c88d4462a0cfdd0d87a3c245a7bc3feb59de101f6ff88194f740f72eda6", size = 4308400, upload-time = "2025-09-17T00:08:43.559Z" },
{ url = "https://files.pythonhosted.org/packages/5d/8c/74fcda3e4e01be1d32775d5b4dd841acaac3c1b8fa4d0774c7ac8d52463d/cryptography-46.0.1-cp311-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e22801b61613ebdebf7deb18b507919e107547a1d39a3b57f5f855032dd7cfb8", size = 4015786, upload-time = "2025-09-17T00:08:45.758Z" },
{ url = "https://files.pythonhosted.org/packages/dc/b8/85d23287baeef273b0834481a3dd55bbed3a53587e3b8d9f0898235b8f91/cryptography-46.0.1-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:757af4f6341ce7a1e47c326ca2a81f41d236070217e5fbbad61bbfe299d55d28", size = 4982606, upload-time = "2025-09-17T00:08:47.602Z" },
{ url = "https://files.pythonhosted.org/packages/e5/d3/de61ad5b52433b389afca0bc70f02a7a1f074651221f599ce368da0fe437/cryptography-46.0.1-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f7a24ea78de345cfa7f6a8d3bde8b242c7fac27f2bd78fa23474ca38dfaeeab9", size = 4604234, upload-time = "2025-09-17T00:08:49.879Z" },
{ url = "https://files.pythonhosted.org/packages/dc/1f/dbd4d6570d84748439237a7478d124ee0134bf166ad129267b7ed8ea6d22/cryptography-46.0.1-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:9e8776dac9e660c22241b6587fae51a67b4b0147daa4d176b172c3ff768ad736", size = 4307669, upload-time = "2025-09-17T00:08:52.321Z" },
{ url = "https://files.pythonhosted.org/packages/ec/fd/ca0a14ce7f0bfe92fa727aacaf2217eb25eb7e4ed513b14d8e03b26e63ed/cryptography-46.0.1-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:9f40642a140c0c8649987027867242b801486865277cbabc8c6059ddef16dc8b", size = 4947579, upload-time = "2025-09-17T00:08:54.697Z" },
{ url = "https://files.pythonhosted.org/packages/89/6b/09c30543bb93401f6f88fce556b3bdbb21e55ae14912c04b7bf355f5f96c/cryptography-46.0.1-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:449ef2b321bec7d97ef2c944173275ebdab78f3abdd005400cc409e27cd159ab", size = 4603669, upload-time = "2025-09-17T00:08:57.16Z" },
{ url = "https://files.pythonhosted.org/packages/23/9a/38cb01cb09ce0adceda9fc627c9cf98eb890fc8d50cacbe79b011df20f8a/cryptography-46.0.1-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:2dd339ba3345b908fa3141ddba4025568fa6fd398eabce3ef72a29ac2d73ad75", size = 4435828, upload-time = "2025-09-17T00:08:59.606Z" },
{ url = "https://files.pythonhosted.org/packages/0f/53/435b5c36a78d06ae0bef96d666209b0ecd8f8181bfe4dda46536705df59e/cryptography-46.0.1-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7411c910fb2a412053cf33cfad0153ee20d27e256c6c3f14d7d7d1d9fec59fd5", size = 4709553, upload-time = "2025-09-17T00:09:01.832Z" },
{ url = "https://files.pythonhosted.org/packages/f5/c4/0da6e55595d9b9cd3b6eb5dc22f3a07ded7f116a3ea72629cab595abb804/cryptography-46.0.1-cp311-abi3-win32.whl", hash = "sha256:cbb8e769d4cac884bb28e3ff620ef1001b75588a5c83c9c9f1fdc9afbe7f29b0", size = 3058327, upload-time = "2025-09-17T00:09:03.726Z" },
{ url = "https://files.pythonhosted.org/packages/95/0f/cd29a35e0d6e78a0ee61793564c8cff0929c38391cb0de27627bdc7525aa/cryptography-46.0.1-cp311-abi3-win_amd64.whl", hash = "sha256:92e8cfe8bd7dd86eac0a677499894862cd5cc2fd74de917daa881d00871ac8e7", size = 3523893, upload-time = "2025-09-17T00:09:06.272Z" },
{ url = "https://files.pythonhosted.org/packages/f2/dd/eea390f3e78432bc3d2f53952375f8b37cb4d37783e626faa6a51e751719/cryptography-46.0.1-cp311-abi3-win_arm64.whl", hash = "sha256:db5597a4c7353b2e5fb05a8e6cb74b56a4658a2b7bf3cb6b1821ae7e7fd6eaa0", size = 2932145, upload-time = "2025-09-17T00:09:08.568Z" },
{ url = "https://files.pythonhosted.org/packages/0a/fb/c73588561afcd5e24b089952bd210b14676c0c5bf1213376350ae111945c/cryptography-46.0.1-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:4c49eda9a23019e11d32a0eb51a27b3e7ddedde91e099c0ac6373e3aacc0d2ee", size = 7193928, upload-time = "2025-09-17T00:09:10.595Z" },
{ url = "https://files.pythonhosted.org/packages/26/34/0ff0bb2d2c79f25a2a63109f3b76b9108a906dd2a2eb5c1d460b9938adbb/cryptography-46.0.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9babb7818fdd71394e576cf26c5452df77a355eac1a27ddfa24096665a27f8fd", size = 4293515, upload-time = "2025-09-17T00:09:12.861Z" },
{ url = "https://files.pythonhosted.org/packages/df/b7/d4f848aee24ecd1be01db6c42c4a270069a4f02a105d9c57e143daf6cf0f/cryptography-46.0.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9f2c4cc63be3ef43c0221861177cee5d14b505cd4d4599a89e2cd273c4d3542a", size = 4545619, upload-time = "2025-09-17T00:09:15.397Z" },
{ url = "https://files.pythonhosted.org/packages/44/a5/42fedefc754fd1901e2d95a69815ea4ec8a9eed31f4c4361fcab80288661/cryptography-46.0.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:41c281a74df173876da1dc9a9b6953d387f06e3d3ed9284e3baae3ab3f40883a", size = 4299160, upload-time = "2025-09-17T00:09:17.155Z" },
{ url = "https://files.pythonhosted.org/packages/86/a1/cd21174f56e769c831fbbd6399a1b7519b0ff6280acec1b826d7b072640c/cryptography-46.0.1-cp314-cp314t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0a17377fa52563d730248ba1f68185461fff36e8bc75d8787a7dd2e20a802b7a", size = 3994491, upload-time = "2025-09-17T00:09:18.971Z" },
{ url = "https://files.pythonhosted.org/packages/8d/2f/a8cbfa1c029987ddc746fd966711d4fa71efc891d37fbe9f030fe5ab4eec/cryptography-46.0.1-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:0d1922d9280e08cde90b518a10cd66831f632960a8d08cb3418922d83fce6f12", size = 4960157, upload-time = "2025-09-17T00:09:20.923Z" },
{ url = "https://files.pythonhosted.org/packages/67/ae/63a84e6789e0d5a2502edf06b552bcb0fa9ff16147265d5c44a211942abe/cryptography-46.0.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:af84e8e99f1a82cea149e253014ea9dc89f75b82c87bb6c7242203186f465129", size = 4577263, upload-time = "2025-09-17T00:09:23.356Z" },
{ url = "https://files.pythonhosted.org/packages/ef/8f/1b9fa8e92bd9cbcb3b7e1e593a5232f2c1e6f9bd72b919c1a6b37d315f92/cryptography-46.0.1-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:ef648d2c690703501714588b2ba640facd50fd16548133b11b2859e8655a69da", size = 4298703, upload-time = "2025-09-17T00:09:25.566Z" },
{ url = "https://files.pythonhosted.org/packages/c3/af/bb95db070e73fea3fae31d8a69ac1463d89d1c084220f549b00dd01094a8/cryptography-46.0.1-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:e94eb5fa32a8a9f9bf991f424f002913e3dd7c699ef552db9b14ba6a76a6313b", size = 4926363, upload-time = "2025-09-17T00:09:27.451Z" },
{ url = "https://files.pythonhosted.org/packages/f5/3b/d8fb17ffeb3a83157a1cc0aa5c60691d062aceecba09c2e5e77ebfc1870c/cryptography-46.0.1-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:534b96c0831855e29fc3b069b085fd185aa5353033631a585d5cd4dd5d40d657", size = 4576958, upload-time = "2025-09-17T00:09:29.924Z" },
{ url = "https://files.pythonhosted.org/packages/d9/46/86bc3a05c10c8aa88c8ae7e953a8b4e407c57823ed201dbcba55c4d655f4/cryptography-46.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f9b55038b5c6c47559aa33626d8ecd092f354e23de3c6975e4bb205df128a2a0", size = 4422507, upload-time = "2025-09-17T00:09:32.222Z" },
{ url = "https://files.pythonhosted.org/packages/a8/4e/387e5a21dfd2b4198e74968a541cfd6128f66f8ec94ed971776e15091ac3/cryptography-46.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ec13b7105117dbc9afd023300fb9954d72ca855c274fe563e72428ece10191c0", size = 4683964, upload-time = "2025-09-17T00:09:34.118Z" },
{ url = "https://files.pythonhosted.org/packages/25/a3/f9f5907b166adb8f26762071474b38bbfcf89858a5282f032899075a38a1/cryptography-46.0.1-cp314-cp314t-win32.whl", hash = "sha256:504e464944f2c003a0785b81668fe23c06f3b037e9cb9f68a7c672246319f277", size = 3029705, upload-time = "2025-09-17T00:09:36.381Z" },
{ url = "https://files.pythonhosted.org/packages/12/66/4d3a4f1850db2e71c2b1628d14b70b5e4c1684a1bd462f7fffb93c041c38/cryptography-46.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:c52fded6383f7e20eaf70a60aeddd796b3677c3ad2922c801be330db62778e05", size = 3502175, upload-time = "2025-09-17T00:09:38.261Z" },
{ url = "https://files.pythonhosted.org/packages/52/c7/9f10ad91435ef7d0d99a0b93c4360bea3df18050ff5b9038c489c31ac2f5/cryptography-46.0.1-cp314-cp314t-win_arm64.whl", hash = "sha256:9495d78f52c804b5ec8878b5b8c7873aa8e63db9cd9ee387ff2db3fffe4df784", size = 2912354, upload-time = "2025-09-17T00:09:40.078Z" },
{ url = "https://files.pythonhosted.org/packages/98/e5/fbd632385542a3311915976f88e0dfcf09e62a3fc0aff86fb6762162a24d/cryptography-46.0.1-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:d84c40bdb8674c29fa192373498b6cb1e84f882889d21a471b45d1f868d8d44b", size = 7255677, upload-time = "2025-09-17T00:09:42.407Z" },
{ url = "https://files.pythonhosted.org/packages/56/3e/13ce6eab9ad6eba1b15a7bd476f005a4c1b3f299f4c2f32b22408b0edccf/cryptography-46.0.1-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9ed64e5083fa806709e74fc5ea067dfef9090e5b7a2320a49be3c9df3583a2d8", size = 4301110, upload-time = "2025-09-17T00:09:45.614Z" },
{ url = "https://files.pythonhosted.org/packages/a2/67/65dc233c1ddd688073cf7b136b06ff4b84bf517ba5529607c9d79720fc67/cryptography-46.0.1-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:341fb7a26bc9d6093c1b124b9f13acc283d2d51da440b98b55ab3f79f2522ead", size = 4562369, upload-time = "2025-09-17T00:09:47.601Z" },
{ url = "https://files.pythonhosted.org/packages/17/db/d64ae4c6f4e98c3dac5bf35dd4d103f4c7c345703e43560113e5e8e31b2b/cryptography-46.0.1-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6ef1488967e729948d424d09c94753d0167ce59afba8d0f6c07a22b629c557b2", size = 4302126, upload-time = "2025-09-17T00:09:49.335Z" },
{ url = "https://files.pythonhosted.org/packages/3d/19/5f1eea17d4805ebdc2e685b7b02800c4f63f3dd46cfa8d4c18373fea46c8/cryptography-46.0.1-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7823bc7cdf0b747ecfb096d004cc41573c2f5c7e3a29861603a2871b43d3ef32", size = 4009431, upload-time = "2025-09-17T00:09:51.239Z" },
{ url = "https://files.pythonhosted.org/packages/81/b5/229ba6088fe7abccbfe4c5edb96c7a5ad547fac5fdd0d40aa6ea540b2985/cryptography-46.0.1-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:f736ab8036796f5a119ff8211deda416f8c15ce03776db704a7a4e17381cb2ef", size = 4980739, upload-time = "2025-09-17T00:09:54.181Z" },
{ url = "https://files.pythonhosted.org/packages/3a/9c/50aa38907b201e74bc43c572f9603fa82b58e831bd13c245613a23cff736/cryptography-46.0.1-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:e46710a240a41d594953012213ea8ca398cd2448fbc5d0f1be8160b5511104a0", size = 4592289, upload-time = "2025-09-17T00:09:56.731Z" },
{ url = "https://files.pythonhosted.org/packages/5a/33/229858f8a5bb22f82468bb285e9f4c44a31978d5f5830bb4ea1cf8a4e454/cryptography-46.0.1-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:84ef1f145de5aee82ea2447224dc23f065ff4cc5791bb3b506615957a6ba8128", size = 4301815, upload-time = "2025-09-17T00:09:58.548Z" },
{ url = "https://files.pythonhosted.org/packages/52/cb/b76b2c87fbd6ed4a231884bea3ce073406ba8e2dae9defad910d33cbf408/cryptography-46.0.1-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:9394c7d5a7565ac5f7d9ba38b2617448eba384d7b107b262d63890079fad77ca", size = 4943251, upload-time = "2025-09-17T00:10:00.475Z" },
{ url = "https://files.pythonhosted.org/packages/94/0f/f66125ecf88e4cb5b8017ff43f3a87ede2d064cb54a1c5893f9da9d65093/cryptography-46.0.1-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:ed957044e368ed295257ae3d212b95456bd9756df490e1ac4538857f67531fcc", size = 4591247, upload-time = "2025-09-17T00:10:02.874Z" },
{ url = "https://files.pythonhosted.org/packages/f6/22/9f3134ae436b63b463cfdf0ff506a0570da6873adb4bf8c19b8a5b4bac64/cryptography-46.0.1-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f7de12fa0eee6234de9a9ce0ffcfa6ce97361db7a50b09b65c63ac58e5f22fc7", size = 4428534, upload-time = "2025-09-17T00:10:04.994Z" },
{ url = "https://files.pythonhosted.org/packages/89/39/e6042bcb2638650b0005c752c38ea830cbfbcbb1830e4d64d530000aa8dc/cryptography-46.0.1-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7fab1187b6c6b2f11a326f33b036f7168f5b996aedd0c059f9738915e4e8f53a", size = 4699541, upload-time = "2025-09-17T00:10:06.925Z" },
{ url = "https://files.pythonhosted.org/packages/68/46/753d457492d15458c7b5a653fc9a84a1c9c7a83af6ebdc94c3fc373ca6e8/cryptography-46.0.1-cp38-abi3-win32.whl", hash = "sha256:45f790934ac1018adeba46a0f7289b2b8fe76ba774a88c7f1922213a56c98bc1", size = 3043779, upload-time = "2025-09-17T00:10:08.951Z" },
{ url = "https://files.pythonhosted.org/packages/2f/50/b6f3b540c2f6ee712feeb5fa780bb11fad76634e71334718568e7695cb55/cryptography-46.0.1-cp38-abi3-win_amd64.whl", hash = "sha256:7176a5ab56fac98d706921f6416a05e5aff7df0e4b91516f450f8627cda22af3", size = 3517226, upload-time = "2025-09-17T00:10:10.769Z" },
{ url = "https://files.pythonhosted.org/packages/ff/e8/77d17d00981cdd27cc493e81e1749a0b8bbfb843780dbd841e30d7f50743/cryptography-46.0.1-cp38-abi3-win_arm64.whl", hash = "sha256:efc9e51c3e595267ff84adf56e9b357db89ab2279d7e375ffcaf8f678606f3d9", size = 2923149, upload-time = "2025-09-17T00:10:13.236Z" },
{ url = "https://files.pythonhosted.org/packages/27/27/077e09fd92075dd1338ea0ffaf5cfee641535545925768350ad90d8c36ca/cryptography-46.0.1-pp311-pypy311_pp73-macosx_10_9_x86_64.whl", hash = "sha256:b9c79af2c3058430d911ff1a5b2b96bbfe8da47d5ed961639ce4681886614e70", size = 3722319, upload-time = "2025-09-17T00:10:20.273Z" },
{ url = "https://files.pythonhosted.org/packages/db/32/6fc7250280920418651640d76cee34d91c1e0601d73acd44364570cf041f/cryptography-46.0.1-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:0ca4be2af48c24df689a150d9cd37404f689e2968e247b6b8ff09bff5bcd786f", size = 4249030, upload-time = "2025-09-17T00:10:22.396Z" },
{ url = "https://files.pythonhosted.org/packages/32/33/8d5398b2da15a15110b2478480ab512609f95b45ead3a105c9a9c76f9980/cryptography-46.0.1-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:13e67c4d3fb8b6bc4ef778a7ccdd8df4cd15b4bcc18f4239c8440891a11245cc", size = 4528009, upload-time = "2025-09-17T00:10:24.418Z" },
{ url = "https://files.pythonhosted.org/packages/fd/1c/4012edad2a8977ab386c36b6e21f5065974d37afa3eade83a9968cba4855/cryptography-46.0.1-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:15b5fd9358803b0d1cc42505a18d8bca81dabb35b5cfbfea1505092e13a9d96d", size = 4248902, upload-time = "2025-09-17T00:10:26.255Z" },
{ url = "https://files.pythonhosted.org/packages/58/a3/257cd5ae677302de8fa066fca9de37128f6729d1e63c04dd6a15555dd450/cryptography-46.0.1-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:e34da95e29daf8a71cb2841fd55df0511539a6cdf33e6f77c1e95e44006b9b46", size = 4527150, upload-time = "2025-09-17T00:10:28.28Z" },
{ url = "https://files.pythonhosted.org/packages/6a/cd/fe6b65e1117ec7631f6be8951d3db076bac3e1b096e3e12710ed071ffc3c/cryptography-46.0.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:34f04b7311174469ab3ac2647469743720f8b6c8b046f238e5cb27905695eb2a", size = 3448210, upload-time = "2025-09-17T00:10:30.145Z" },
]
[[package]] [[package]]
name = "cssselect" name = "cssselect"
version = "1.3.0" version = "1.3.0"
@ -450,15 +396,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/7c/24/f7351052cf9db771fe4f32fca47fd66e6d9b53d8613b17faf7d130a9d553/cython-3.1.4-py3-none-any.whl", hash = "sha256:d194d95e4fa029a3f6c7d46bdd16d973808c7ea4797586911fdb67cb98b1a2c6", size = 1227541, upload-time = "2025-09-16T07:20:29.595Z" }, { url = "https://files.pythonhosted.org/packages/7c/24/f7351052cf9db771fe4f32fca47fd66e6d9b53d8613b17faf7d130a9d553/cython-3.1.4-py3-none-any.whl", hash = "sha256:d194d95e4fa029a3f6c7d46bdd16d973808c7ea4797586911fdb67cb98b1a2c6", size = 1227541, upload-time = "2025-09-16T07:20:29.595Z" },
] ]
[[package]]
name = "defusedxml"
version = "0.7.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520, upload-time = "2021-03-08T10:59:26.269Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" },
]
[[package]] [[package]]
name = "distlib" name = "distlib"
version = "0.4.0" version = "0.4.0"
@ -611,15 +548,40 @@ wheels = [
] ]
[[package]] [[package]]
name = "hyperlink" name = "h11"
version = "21.0.0" version = "0.16.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
]
[[package]]
name = "httpcore"
version = "1.0.9"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
dependencies = [ dependencies = [
{ name = "certifi" },
{ name = "h11" },
]
sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
]
[[package]]
name = "httpx"
version = "0.28.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
{ name = "certifi" },
{ name = "httpcore" },
{ name = "idna" }, { name = "idna" },
] ]
sdist = { url = "https://files.pythonhosted.org/packages/3a/51/1947bd81d75af87e3bb9e34593a4cf118115a8feb451ce7a69044ef1412e/hyperlink-21.0.0.tar.gz", hash = "sha256:427af957daa58bc909471c6c40f74c5450fa123dd093fc53efd2e91d2705a56b", size = 140743, upload-time = "2021-01-08T05:51:20.972Z" } sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/6e/aa/8caf6a0a3e62863cbb9dab27135660acba46903b703e224f14f447e57934/hyperlink-21.0.0-py2.py3-none-any.whl", hash = "sha256:e6b14c37ecb73e89c77d78cdb4c2cc8f3fb59a885c5b3f819ff4ed80f25af1b4", size = 74638, upload-time = "2021-01-08T05:51:22.906Z" }, { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
] ]
[[package]] [[package]]
@ -640,18 +602,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" }, { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" },
] ]
[[package]]
name = "incremental"
version = "24.7.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "setuptools" },
]
sdist = { url = "https://files.pythonhosted.org/packages/27/87/156b374ff6578062965afe30cc57627d35234369b3336cf244b240c8d8e6/incremental-24.7.2.tar.gz", hash = "sha256:fb4f1d47ee60efe87d4f6f0ebb5f70b9760db2b2574c59c8e8912be4ebd464c9", size = 28157, upload-time = "2024-07-29T20:03:55.441Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/0d/38/221e5b2ae676a3938c2c1919131410c342b6efc2baffeda395dd66eeca8f/incremental-24.7.2-py3-none-any.whl", hash = "sha256:8cb2c3431530bec48ad70513931a760f446ad6c25e8333ca5d95e24b0ed7b8fe", size = 20516, upload-time = "2024-07-29T20:03:53.677Z" },
]
[[package]] [[package]]
name = "iniconfig" name = "iniconfig"
version = "2.1.0" version = "2.1.0"
@ -661,38 +611,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" }, { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" },
] ]
[[package]]
name = "itemadapter"
version = "0.12.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/e9/50/2fd91416acfbd316b58de909cfc2a5c2daaa4ced67fb76cb0dedcbd13197/itemadapter-0.12.2.tar.gz", hash = "sha256:8e05c07cea966a7a8c4f096150ee2c91d9b4104a76f9afd029b235e1b564a61f", size = 32089, upload-time = "2025-09-02T12:15:19.751Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/9a/ce/b2d995ddf3d493849f5608c7eab92c24cc50933503c645de3e4843aa7800/itemadapter-0.12.2-py3-none-any.whl", hash = "sha256:17ff8acb169fb11dbed8af83e805c19c3b890bde4653761b4d3c1544142e04b6", size = 18480, upload-time = "2025-09-02T12:15:18.259Z" },
]
[[package]]
name = "itemloaders"
version = "1.3.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "itemadapter" },
{ name = "jmespath" },
{ name = "parsel" },
]
sdist = { url = "https://files.pythonhosted.org/packages/b6/3e/c549370e95c9dc7ec5e155c075e2700fa75abe5625608a4ce5009eabe0bf/itemloaders-1.3.2.tar.gz", hash = "sha256:4faf5b3abe83bf014476e3fd9ccf66867282971d9f1d4e96d9a61b60c3786770", size = 19707, upload-time = "2024-09-30T13:48:49.417Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d5/68/9592dcfd9c24467b545fac17b098a171e372bf0d775400fa1971712bca57/itemloaders-1.3.2-py3-none-any.whl", hash = "sha256:6a91465f721c7bad8b07e1fbb0560cf99f4845156ed9f7bf2ca424336c6a677c", size = 12194, upload-time = "2024-09-30T13:48:47.82Z" },
]
[[package]]
name = "jmespath"
version = "1.0.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/00/2a/e867e8531cf3e36b41201936b7fa7ba7b5702dbef42922193f05c8976cd6/jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe", size = 25843, upload-time = "2022-06-17T18:00:12.224Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256, upload-time = "2022-06-17T18:00:10.251Z" },
]
[[package]] [[package]]
name = "language-tags" name = "language-tags"
version = "1.2.0" version = "1.2.0"
@ -1030,6 +948,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
] ]
[[package]]
name = "ndjson"
version = "0.3.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/b4/d5/209b6ca94566f9c94c0ec41cee1681c0a3b92a306a84a9b0fcd662088dc3/ndjson-0.3.1.tar.gz", hash = "sha256:bf9746cb6bb1cb53d172cda7f154c07c786d665ff28341e4e689b796b229e5d6", size = 6448, upload-time = "2020-02-25T05:01:07.873Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/70/c9/04ba0056011ba96a58163ebfd666d8385300bd12da1afe661a5a147758d7/ndjson-0.3.1-py2.py3-none-any.whl", hash = "sha256:839c22275e6baa3040077b83c005ac24199b94973309a8a1809be962c753a410", size = 5305, upload-time = "2020-02-25T05:01:06.39Z" },
]
[[package]] [[package]]
name = "nodeenv" name = "nodeenv"
version = "1.9.1" version = "1.9.1"
@ -1039,6 +966,22 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314, upload-time = "2024-06-04T18:44:08.352Z" }, { url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314, upload-time = "2024-06-04T18:44:08.352Z" },
] ]
[[package]]
name = "nodejs-wheel-binaries"
version = "22.20.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/0f/54/02f58c8119e2f1984e2572cc77a7b469dbaf4f8d171ad376e305749ef48e/nodejs_wheel_binaries-22.20.0.tar.gz", hash = "sha256:a62d47c9fd9c32191dff65bbe60261504f26992a0a19fe8b4d523256a84bd351", size = 8058, upload-time = "2025-09-26T09:48:00.906Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/24/6d/333e5458422f12318e3c3e6e7f194353aa68b0d633217c7e89833427ca01/nodejs_wheel_binaries-22.20.0-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:455add5ac4f01c9c830ab6771dbfad0fdf373f9b040d3aabe8cca9b6c56654fb", size = 53246314, upload-time = "2025-09-26T09:47:32.536Z" },
{ url = "https://files.pythonhosted.org/packages/56/30/dcd6879d286a35b3c4c8f9e5e0e1bcf4f9e25fe35310fc77ecf97f915a23/nodejs_wheel_binaries-22.20.0-py2.py3-none-macosx_11_0_x86_64.whl", hash = "sha256:5d8c12f97eea7028b34a84446eb5ca81829d0c428dfb4e647e09ac617f4e21fa", size = 53644391, upload-time = "2025-09-26T09:47:36.093Z" },
{ url = "https://files.pythonhosted.org/packages/58/be/c7b2e7aa3bb281d380a1c531f84d0ccfe225832dfc3bed1ca171753b9630/nodejs_wheel_binaries-22.20.0-py2.py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a2b0989194148f66e9295d8f11bc463bde02cbe276517f4d20a310fb84780ae", size = 60282516, upload-time = "2025-09-26T09:47:39.88Z" },
{ url = "https://files.pythonhosted.org/packages/3e/c5/8befacf4190e03babbae54cb0809fb1a76e1600ec3967ab8ee9f8fc85b65/nodejs_wheel_binaries-22.20.0-py2.py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b5c500aa4dc046333ecb0a80f183e069e5c30ce637f1c1a37166b2c0b642dc21", size = 60347290, upload-time = "2025-09-26T09:47:43.712Z" },
{ url = "https://files.pythonhosted.org/packages/c0/bd/cfffd1e334277afa0714962c6ec432b5fe339340a6bca2e5fa8e678e7590/nodejs_wheel_binaries-22.20.0-py2.py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:3279eb1b99521f0d20a850bbfc0159a658e0e85b843b3cf31b090d7da9f10dfc", size = 62178798, upload-time = "2025-09-26T09:47:47.752Z" },
{ url = "https://files.pythonhosted.org/packages/08/14/10b83a9c02faac985b3e9f5e65d63a34fc0f46b48d8a2c3e4caa3e1e7318/nodejs_wheel_binaries-22.20.0-py2.py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:d29705797b33bade62d79d8f106c2453c8a26442a9b2a5576610c0f7e7c351ed", size = 62772957, upload-time = "2025-09-26T09:47:51.266Z" },
{ url = "https://files.pythonhosted.org/packages/b4/a9/c6a480259aa0d6b270aac2c6ba73a97444b9267adde983a5b7e34f17e45a/nodejs_wheel_binaries-22.20.0-py2.py3-none-win_amd64.whl", hash = "sha256:4bd658962f24958503541963e5a6f2cc512a8cb301e48a69dc03c879f40a28ae", size = 40120431, upload-time = "2025-09-26T09:47:54.363Z" },
{ url = "https://files.pythonhosted.org/packages/42/b1/6a4eb2c6e9efa028074b0001b61008c9d202b6b46caee9e5d1b18c088216/nodejs_wheel_binaries-22.20.0-py2.py3-none-win_arm64.whl", hash = "sha256:1fccac931faa210d22b6962bcdbc99269d16221d831b9a118bbb80fe434a60b8", size = 38844133, upload-time = "2025-09-26T09:47:57.357Z" },
]
[[package]] [[package]]
name = "numpy" name = "numpy"
version = "2.3.3" version = "2.3.3"
@ -1193,22 +1136,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
] ]
[[package]]
name = "parsel"
version = "1.10.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "cssselect" },
{ name = "jmespath" },
{ name = "lxml" },
{ name = "packaging" },
{ name = "w3lib" },
]
sdist = { url = "https://files.pythonhosted.org/packages/f6/df/acd504c154c0b9028b0d8491a77fdd5f86e9c06ee04f986abf85e36d9a5f/parsel-1.10.0.tar.gz", hash = "sha256:14f17db9559f51b43357b9dfe43cec870a8efb5ea4857abb624ec6ff80d8a080", size = 51421, upload-time = "2025-01-17T15:38:31.941Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/12/18/35d1d947553d24909dca37e2ff11720eecb601360d1bac8d7a9a1bc7eb08/parsel-1.10.0-py2.py3-none-any.whl", hash = "sha256:6a0c28bd81f9df34ba665884c88efa0b18b8d2c44c81f64e27f2f0cb37d46169", size = 17266, upload-time = "2025-01-17T15:38:27.83Z" },
]
[[package]] [[package]]
name = "patchright" name = "patchright"
version = "1.55.2" version = "1.55.2"
@ -1363,36 +1290,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/cc/35/cc0aaecf278bb4575b8555f2b137de5ab821595ddae9da9d3cd1da4072c7/propcache-0.3.2-py3-none-any.whl", hash = "sha256:98f1ec44fb675f5052cccc8e609c46ed23a35a1cfd18545ad4e29002d858a43f", size = 12663, upload-time = "2025-06-09T22:56:04.484Z" }, { url = "https://files.pythonhosted.org/packages/cc/35/cc0aaecf278bb4575b8555f2b137de5ab821595ddae9da9d3cd1da4072c7/propcache-0.3.2-py3-none-any.whl", hash = "sha256:98f1ec44fb675f5052cccc8e609c46ed23a35a1cfd18545ad4e29002d858a43f", size = 12663, upload-time = "2025-06-09T22:56:04.484Z" },
] ]
[[package]]
name = "protego"
version = "0.5.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/19/9b/9c3a649167c7e43a0818df515d515e66d95a261fdfdf2a6afd45be9db696/protego-0.5.0.tar.gz", hash = "sha256:225dee0acfcc71de8c6f7cef9c618e5a9d3e7baa7ae1470b8d076a064033c463", size = 3137494, upload-time = "2025-06-24T13:58:45.31Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/3a/cb/4347985f89ca3e4beb5d0cb85f8b951c9e339564bd2a3f388d6fb78382cc/protego-0.5.0-py3-none-any.whl", hash = "sha256:4237227840a67fdeec289a9b89652455b5657806388c17e1a556e160435f8fc5", size = 10356, upload-time = "2025-06-24T13:58:44.08Z" },
]
[[package]]
name = "pyasn1"
version = "0.6.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/ba/e9/01f1a64245b89f039897cb0130016d79f77d52669aae6ee7b159a6c4c018/pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034", size = 145322, upload-time = "2024-09-10T22:41:42.55Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c8/f1/d6a797abb14f6283c0ddff96bbdd46937f64122b8c925cab503dd37f8214/pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629", size = 83135, upload-time = "2024-09-11T16:00:36.122Z" },
]
[[package]]
name = "pyasn1-modules"
version = "0.4.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pyasn1" },
]
sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892, upload-time = "2025-03-28T02:41:22.17Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" },
]
[[package]] [[package]]
name = "pycparser" name = "pycparser"
version = "2.23" version = "2.23"
@ -1402,15 +1299,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/a0/e3/59cd50310fc9b59512193629e1984c1f95e5c8ae6e5d8c69532ccc65a7fe/pycparser-2.23-py3-none-any.whl", hash = "sha256:e5c6e8d3fbad53479cab09ac03729e0a9faf2bee3db8208a550daf5af81a5934", size = 118140, upload-time = "2025-09-09T13:23:46.651Z" }, { url = "https://files.pythonhosted.org/packages/a0/e3/59cd50310fc9b59512193629e1984c1f95e5c8ae6e5d8c69532ccc65a7fe/pycparser-2.23-py3-none-any.whl", hash = "sha256:e5c6e8d3fbad53479cab09ac03729e0a9faf2bee3db8208a550daf5af81a5934", size = 118140, upload-time = "2025-09-09T13:23:46.651Z" },
] ]
[[package]]
name = "pydispatcher"
version = "2.0.7"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/21/db/030d0700ae90d2f9d52c2f3c1f864881e19cef8cba3b0a08759c8494c19c/PyDispatcher-2.0.7.tar.gz", hash = "sha256:b777c6ad080dc1bad74a4c29d6a46914fa6701ac70f94b0d66fbcfde62f5be31", size = 38891, upload-time = "2023-02-17T20:11:13.106Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/66/0e/9ee7bc0b48ec45d93b302fa2d787830dca4dc454d31a237faa5815995988/PyDispatcher-2.0.7-py3-none-any.whl", hash = "sha256:96543bea04115ffde08f851e1d45cacbfd1ee866ac42127d9b476dc5aefa7de0", size = 12040, upload-time = "2023-02-17T20:11:11.991Z" },
]
[[package]] [[package]]
name = "pyee" name = "pyee"
version = "13.0.0" version = "13.0.0"
@ -1463,25 +1351,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c1/7c/54afe9ffee547c41e1161691e72067a37ed27466ac71c089bfdcd07ca70d/pyobjc_framework_cocoa-11.1-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:1b5de4e1757bb65689d6dc1f8d8717de9ec8587eb0c4831c134f13aba29f9b71", size = 396742, upload-time = "2025-06-14T20:46:57.64Z" }, { url = "https://files.pythonhosted.org/packages/c1/7c/54afe9ffee547c41e1161691e72067a37ed27466ac71c089bfdcd07ca70d/pyobjc_framework_cocoa-11.1-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:1b5de4e1757bb65689d6dc1f8d8717de9ec8587eb0c4831c134f13aba29f9b71", size = 396742, upload-time = "2025-06-14T20:46:57.64Z" },
] ]
[[package]]
name = "pyopenssl"
version = "25.3.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "cryptography" },
{ name = "typing-extensions", marker = "python_full_version < '3.13'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/80/be/97b83a464498a79103036bc74d1038df4a7ef0e402cfaf4d5e113fb14759/pyopenssl-25.3.0.tar.gz", hash = "sha256:c981cb0a3fd84e8602d7afc209522773b94c1c2446a3c710a75b06fe1beae329", size = 184073, upload-time = "2025-09-17T00:32:21.037Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d1/81/ef2b1dfd1862567d573a4fdbc9f969067621764fbb74338496840a1d2977/pyopenssl-25.3.0-py3-none-any.whl", hash = "sha256:1fda6fc034d5e3d179d39e59c1895c9faeaf40a79de5fc4cbbfbe0d36f4a77b6", size = 57268, upload-time = "2025-09-17T00:32:19.474Z" },
]
[[package]]
name = "pypydispatcher"
version = "2.1.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/d5/7b/65f55513d3c769fd677f90032d8d8703e3dc17e88a41b6074d2177548bca/PyPyDispatcher-2.1.2.tar.gz", hash = "sha256:b6bec5dfcff9d2535bca2b23c80eae367b1ac250a645106948d315fcfa9130f2", size = 23224, upload-time = "2017-07-03T14:20:51.806Z" }
[[package]] [[package]]
name = "pysocks" name = "pysocks"
version = "1.7.1" version = "1.7.1"
@ -1554,15 +1423,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446, upload-time = "2024-08-06T20:33:04.33Z" }, { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446, upload-time = "2024-08-06T20:33:04.33Z" },
] ]
[[package]]
name = "queuelib"
version = "1.8.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/4c/78/9ace6888cf6d390c9aec3ba93020838b08934959b544a7f10b15db815d29/queuelib-1.8.0.tar.gz", hash = "sha256:582bc65514481100b0539bd671da6b355b878869cfc77d92c63b75fcc9cf8e27", size = 11675, upload-time = "2025-03-31T12:18:46.193Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/70/44/542f4e702fafc477260d3463ae1bcdd113faac9d42336601af50985af914/queuelib-1.8.0-py3-none-any.whl", hash = "sha256:599468c5589716e63d3bb753dae7bf32cc94838ade1e7b450a061faec4a2015d", size = 13615, upload-time = "2025-03-31T12:18:43.526Z" },
]
[[package]] [[package]]
name = "requests" name = "requests"
version = "2.32.5" version = "2.32.5"
@ -1598,14 +1458,15 @@ dependencies = [
{ name = "backoff" }, { name = "backoff" },
{ name = "beautifulsoup4" }, { name = "beautifulsoup4" },
{ name = "curl-cffi" }, { name = "curl-cffi" },
{ name = "playwright" }, { name = "httpx" },
{ name = "ndjson" },
{ name = "requests" }, { name = "requests" },
{ name = "scrapling", extra = ["fetchers"] }, { name = "scrapling", extra = ["fetchers"] },
{ name = "scrapy" },
] ]
[package.dev-dependencies] [package.dev-dependencies]
dev = [ dev = [
{ name = "basedpyright" },
{ name = "mypy" }, { name = "mypy" },
{ name = "pre-commit" }, { name = "pre-commit" },
{ name = "pytest" }, { name = "pytest" },
@ -1619,14 +1480,15 @@ requires-dist = [
{ name = "backoff", specifier = ">=2.2.1" }, { name = "backoff", specifier = ">=2.2.1" },
{ name = "beautifulsoup4", specifier = ">=4.13.5" }, { name = "beautifulsoup4", specifier = ">=4.13.5" },
{ name = "curl-cffi", specifier = ">=0.13.0" }, { name = "curl-cffi", specifier = ">=0.13.0" },
{ name = "playwright", specifier = ">=1.55.0" }, { name = "httpx", specifier = ">=0.28.1" },
{ name = "ndjson", specifier = ">=0.3.1" },
{ name = "requests", specifier = ">=2.32.5" }, { name = "requests", specifier = ">=2.32.5" },
{ name = "scrapling", extras = ["fetchers"], specifier = ">=0.3.5" }, { name = "scrapling", extras = ["fetchers"], specifier = ">=0.3.5" },
{ name = "scrapy", specifier = ">=2.13.3" },
] ]
[package.metadata.requires-dev] [package.metadata.requires-dev]
dev = [ dev = [
{ name = "basedpyright", specifier = ">=1.31.6" },
{ name = "mypy", specifier = ">=1.18.2" }, { name = "mypy", specifier = ">=1.18.2" },
{ name = "pre-commit", specifier = ">=4.3.0" }, { name = "pre-commit", specifier = ">=4.3.0" },
{ name = "pytest", specifier = ">=8.0.0" }, { name = "pytest", specifier = ">=8.0.0" },
@ -1661,35 +1523,6 @@ fetchers = [
{ name = "playwright" }, { name = "playwright" },
] ]
[[package]]
name = "scrapy"
version = "2.13.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "cryptography" },
{ name = "cssselect" },
{ name = "defusedxml" },
{ name = "itemadapter" },
{ name = "itemloaders" },
{ name = "lxml" },
{ name = "packaging" },
{ name = "parsel" },
{ name = "protego" },
{ name = "pydispatcher", marker = "platform_python_implementation == 'CPython'" },
{ name = "pyopenssl" },
{ name = "pypydispatcher", marker = "platform_python_implementation == 'PyPy'" },
{ name = "queuelib" },
{ name = "service-identity" },
{ name = "tldextract" },
{ name = "twisted" },
{ name = "w3lib" },
{ name = "zope-interface" },
]
sdist = { url = "https://files.pythonhosted.org/packages/be/6c/bab0c01c5c50842548f0b5e936dfd2520a1ce84c171472c2cfe4d0599841/scrapy-2.13.3.tar.gz", hash = "sha256:bf17588c10e46a9d70c49a05380b749e3c7fba58204a367a5747ce6da2bd204d", size = 1220051, upload-time = "2025-07-02T15:41:15.776Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/53/cb/474b56910b9fb823298008444790a6d5fb9c8dfb936101136932d586287a/scrapy-2.13.3-py3-none-any.whl", hash = "sha256:9c16a482e1474b501f7b7121a4071ddc5cec4c0c7c0320217ed678d4fb8a3e9e", size = 321805, upload-time = "2025-07-02T15:41:13.782Z" },
]
[[package]] [[package]]
name = "screeninfo" name = "screeninfo"
version = "0.8.1" version = "0.8.1"
@ -1704,27 +1537,12 @@ wheels = [
] ]
[[package]] [[package]]
name = "service-identity" name = "sniffio"
version = "24.2.0" version = "1.3.1"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
dependencies = [ sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" }
{ name = "attrs" },
{ name = "cryptography" },
{ name = "pyasn1" },
{ name = "pyasn1-modules" },
]
sdist = { url = "https://files.pythonhosted.org/packages/07/a5/dfc752b979067947261dbbf2543470c58efe735c3c1301dd870ef27830ee/service_identity-24.2.0.tar.gz", hash = "sha256:b8683ba13f0d39c6cd5d625d2c5f65421d6d707b013b375c355751557cbe8e09", size = 39245, upload-time = "2024-10-26T07:21:57.736Z" }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/08/2c/ca6dd598b384bc1ce581e24aaae0f2bed4ccac57749d5c3befbb5e742081/service_identity-24.2.0-py3-none-any.whl", hash = "sha256:6b047fbd8a84fd0bb0d55ebce4031e400562b9196e1e0d3e0fe2b8a59f6d4a85", size = 11364, upload-time = "2024-10-26T07:21:56.302Z" }, { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
]
[[package]]
name = "setuptools"
version = "80.9.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/18/5d/3bf57dcd21979b887f014ea83c24ae194cfcd12b9e0fda66b957c69d1fca/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c", size = 1319958, upload-time = "2025-05-27T00:56:51.443Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" },
] ]
[[package]] [[package]]
@ -1763,24 +1581,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" }, { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" },
] ]
[[package]]
name = "twisted"
version = "25.5.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "attrs" },
{ name = "automat" },
{ name = "constantly" },
{ name = "hyperlink" },
{ name = "incremental" },
{ name = "typing-extensions" },
{ name = "zope-interface" },
]
sdist = { url = "https://files.pythonhosted.org/packages/13/0f/82716ed849bf7ea4984c21385597c949944f0f9b428b5710f79d0afc084d/twisted-25.5.0.tar.gz", hash = "sha256:1deb272358cb6be1e3e8fc6f9c8b36f78eb0fa7c2233d2dbe11ec6fee04ea316", size = 3545725, upload-time = "2025-06-07T09:52:24.858Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/eb/66/ab7efd8941f0bc7b2bd555b0f0471bff77df4c88e0cc31120c82737fec77/twisted-25.5.0-py3-none-any.whl", hash = "sha256:8559f654d01a54a8c3efe66d533d43f383531ebf8d81d9f9ab4769d91ca15df7", size = 3204767, upload-time = "2025-06-07T09:52:21.428Z" },
]
[[package]] [[package]]
name = "types-beautifulsoup4" name = "types-beautifulsoup4"
version = "4.12.0.20250516" version = "4.12.0.20250516"
@ -1866,15 +1666,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/76/06/04c8e804f813cf972e3262f3f8584c232de64f0cde9f703b46cf53a45090/virtualenv-20.34.0-py3-none-any.whl", hash = "sha256:341f5afa7eee943e4984a9207c025feedd768baff6753cd660c857ceb3e36026", size = 5983279, upload-time = "2025-08-13T14:24:05.111Z" }, { url = "https://files.pythonhosted.org/packages/76/06/04c8e804f813cf972e3262f3f8584c232de64f0cde9f703b46cf53a45090/virtualenv-20.34.0-py3-none-any.whl", hash = "sha256:341f5afa7eee943e4984a9207c025feedd768baff6753cd660c857ceb3e36026", size = 5983279, upload-time = "2025-08-13T14:24:05.111Z" },
] ]
[[package]]
name = "w3lib"
version = "2.3.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/bf/7d/1172cfaa1e29beb9bf938e484c122b3bdc82e8e37b17a4f753ba6d6e009f/w3lib-2.3.1.tar.gz", hash = "sha256:5c8ac02a3027576174c2b61eb9a2170ba1b197cae767080771b6f1febda249a4", size = 49531, upload-time = "2025-01-27T14:22:10.453Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/58/dd/56f0d8af71e475ed194d702f8b4cf9cea812c95e82ad823d239023c6558c/w3lib-2.3.1-py3-none-any.whl", hash = "sha256:9ccd2ae10c8c41c7279cd8ad4fe65f834be894fe7bfdd7304b991fd69325847b", size = 21751, upload-time = "2025-01-27T14:22:09.421Z" },
]
[[package]] [[package]]
name = "yarl" name = "yarl"
version = "1.20.1" version = "1.20.1"
@ -1956,29 +1747,3 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/94/c3/b2e9f38bc3e11191981d57ea08cab2166e74ea770024a646617c9cddd9f6/yarl-1.20.1-cp313-cp313t-win_amd64.whl", hash = "sha256:541d050a355bbbc27e55d906bc91cb6fe42f96c01413dd0f4ed5a5240513874f", size = 93003, upload-time = "2025-06-10T00:45:27.752Z" }, { url = "https://files.pythonhosted.org/packages/94/c3/b2e9f38bc3e11191981d57ea08cab2166e74ea770024a646617c9cddd9f6/yarl-1.20.1-cp313-cp313t-win_amd64.whl", hash = "sha256:541d050a355bbbc27e55d906bc91cb6fe42f96c01413dd0f4ed5a5240513874f", size = 93003, upload-time = "2025-06-10T00:45:27.752Z" },
{ url = "https://files.pythonhosted.org/packages/b4/2d/2345fce04cfd4bee161bf1e7d9cdc702e3e16109021035dbb24db654a622/yarl-1.20.1-py3-none-any.whl", hash = "sha256:83b8eb083fe4683c6115795d9fc1cfaf2cbbefb19b3a1cb68f6527460f483a77", size = 46542, upload-time = "2025-06-10T00:46:07.521Z" }, { url = "https://files.pythonhosted.org/packages/b4/2d/2345fce04cfd4bee161bf1e7d9cdc702e3e16109021035dbb24db654a622/yarl-1.20.1-py3-none-any.whl", hash = "sha256:83b8eb083fe4683c6115795d9fc1cfaf2cbbefb19b3a1cb68f6527460f483a77", size = 46542, upload-time = "2025-06-10T00:46:07.521Z" },
] ]
[[package]]
name = "zope-interface"
version = "8.0.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/88/3a/7fcf02178b8fad0a51e67e32765cd039ae505d054d744d76b8c2bbcba5ba/zope_interface-8.0.1.tar.gz", hash = "sha256:eba5610d042c3704a48222f7f7c6ab5b243ed26f917e2bc69379456b115e02d1", size = 253746, upload-time = "2025-09-25T05:55:51.285Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/f2/2f/c10c739bcb9b072090c97c2e08533777497190daa19d190d72b4cce9c7cb/zope_interface-8.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4bd01022d2e1bce4a4a4ed9549edb25393c92e607d7daa6deff843f1f68b479d", size = 207903, upload-time = "2025-09-25T05:58:21.671Z" },
{ url = "https://files.pythonhosted.org/packages/b5/e1/9845ac3697f108d9a1af6912170c59a23732090bbfb35955fe77e5544955/zope_interface-8.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:29be8db8b712d94f1c05e24ea230a879271d787205ba1c9a6100d1d81f06c69a", size = 208345, upload-time = "2025-09-25T05:58:24.217Z" },
{ url = "https://files.pythonhosted.org/packages/f2/49/6573bc8b841cfab18e80c8e8259f1abdbbf716140011370de30231be79ad/zope_interface-8.0.1-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:51ae1b856565b30455b7879fdf0a56a88763b401d3f814fa9f9542d7410dbd7e", size = 255027, upload-time = "2025-09-25T05:58:19.975Z" },
{ url = "https://files.pythonhosted.org/packages/e2/fd/908b0fd4b1ab6e412dfac9bd2b606f2893ef9ba3dd36d643f5e5b94c57b3/zope_interface-8.0.1-cp311-cp311-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d2e7596149cb1acd1d4d41b9f8fe2ffc0e9e29e2e91d026311814181d0d9efaf", size = 259800, upload-time = "2025-09-25T05:58:11.487Z" },
{ url = "https://files.pythonhosted.org/packages/dc/78/8419a2b4e88410520ed4b7f93bbd25a6d4ae66c4e2b131320f2b90f43077/zope_interface-8.0.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b2737c11c34fb9128816759864752d007ec4f987b571c934c30723ed881a7a4f", size = 260978, upload-time = "2025-09-25T06:26:24.483Z" },
{ url = "https://files.pythonhosted.org/packages/e5/90/caf68152c292f1810e2bd3acd2177badf08a740aa8a348714617d6c9ad0b/zope_interface-8.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:cf66e4bf731aa7e0ced855bb3670e8cda772f6515a475c6a107bad5cb6604103", size = 212155, upload-time = "2025-09-25T05:59:40.318Z" },
{ url = "https://files.pythonhosted.org/packages/dc/a6/0f08713ddda834c428ebf97b2a7fd8dea50c0100065a8955924dbd94dae8/zope_interface-8.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:115f27c1cc95ce7a517d960ef381beedb0a7ce9489645e80b9ab3cbf8a78799c", size = 208609, upload-time = "2025-09-25T05:58:53.698Z" },
{ url = "https://files.pythonhosted.org/packages/e9/5e/d423045f54dc81e0991ec655041e7a0eccf6b2642535839dd364b35f4d7f/zope_interface-8.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:af655c573b84e3cb6a4f6fd3fbe04e4dc91c63c6b6f99019b3713ef964e589bc", size = 208797, upload-time = "2025-09-25T05:58:56.258Z" },
{ url = "https://files.pythonhosted.org/packages/c6/43/39d4bb3f7a80ebd261446792493cfa4e198badd47107224f5b6fe1997ad9/zope_interface-8.0.1-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:23f82ef9b2d5370750cc1bf883c3b94c33d098ce08557922a3fbc7ff3b63dfe1", size = 259242, upload-time = "2025-09-25T05:58:21.602Z" },
{ url = "https://files.pythonhosted.org/packages/da/29/49effcff64ef30731e35520a152a9dfcafec86cf114b4c2aff942e8264ba/zope_interface-8.0.1-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:35a1565d5244997f2e629c5c68715b3d9d9036e8df23c4068b08d9316dcb2822", size = 264696, upload-time = "2025-09-25T05:58:13.351Z" },
{ url = "https://files.pythonhosted.org/packages/c7/39/b947673ec9a258eeaa20208dd2f6127d9fbb3e5071272a674ebe02063a78/zope_interface-8.0.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:029ea1db7e855a475bf88d9910baab4e94d007a054810e9007ac037a91c67c6f", size = 264229, upload-time = "2025-09-25T06:26:26.226Z" },
{ url = "https://files.pythonhosted.org/packages/8f/ee/eed6efd1fc3788d1bef7a814e0592d8173b7fe601c699b935009df035fc2/zope_interface-8.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0beb3e7f7dc153944076fcaf717a935f68d39efa9fce96ec97bafcc0c2ea6cab", size = 212270, upload-time = "2025-09-25T05:58:53.584Z" },
{ url = "https://files.pythonhosted.org/packages/5f/dc/3c12fca01c910c793d636ffe9c0984e0646abaf804e44552070228ed0ede/zope_interface-8.0.1-cp313-cp313-macosx_10_9_x86_64.whl", hash = "sha256:c7cc027fc5c61c5d69e5080c30b66382f454f43dc379c463a38e78a9c6bab71a", size = 208992, upload-time = "2025-09-25T05:58:40.712Z" },
{ url = "https://files.pythonhosted.org/packages/46/71/6127b7282a3e380ca927ab2b40778a9c97935a4a57a2656dadc312db5f30/zope_interface-8.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:fcf9097ff3003b7662299f1c25145e15260ec2a27f9a9e69461a585d79ca8552", size = 209051, upload-time = "2025-09-25T05:58:42.182Z" },
{ url = "https://files.pythonhosted.org/packages/56/86/4387a9f951ee18b0e41fda77da77d59c33e59f04660578e2bad688703e64/zope_interface-8.0.1-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:6d965347dd1fb9e9a53aa852d4ded46b41ca670d517fd54e733a6b6a4d0561c2", size = 259223, upload-time = "2025-09-25T05:58:23.191Z" },
{ url = "https://files.pythonhosted.org/packages/61/08/ce60a114466abc067c68ed41e2550c655f551468ae17b4b17ea360090146/zope_interface-8.0.1-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9a3b8bb77a4b89427a87d1e9eb969ab05e38e6b4a338a9de10f6df23c33ec3c2", size = 264690, upload-time = "2025-09-25T05:58:15.052Z" },
{ url = "https://files.pythonhosted.org/packages/36/9a/62a9ba3a919594605a07c34eee3068659bbd648e2fa0c4a86d876810b674/zope_interface-8.0.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:87e6b089002c43231fb9afec89268391bcc7a3b66e76e269ffde19a8112fb8d5", size = 264201, upload-time = "2025-09-25T06:26:27.797Z" },
{ url = "https://files.pythonhosted.org/packages/da/06/8fe88bd7edef60566d21ef5caca1034e10f6b87441ea85de4bbf9ea74768/zope_interface-8.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:64a43f5280aa770cbafd0307cb3d1ff430e2a1001774e8ceb40787abe4bb6658", size = 212273, upload-time = "2025-09-25T06:00:25.398Z" },
]