Merge pull request #140 from barrett-ruth/feat/async-scrapers
Asynchronous Scrapers
This commit is contained in:
commit
8f466f135a
18 changed files with 1034 additions and 2079 deletions
|
|
@ -79,29 +79,22 @@ end
|
|||
|
||||
---@param platform string
|
||||
---@param contest_id string
|
||||
---@return ContestData?
|
||||
---@return ContestData
|
||||
function M.get_contest_data(platform, contest_id)
|
||||
vim.validate({
|
||||
platform = { platform, 'string' },
|
||||
contest_id = { contest_id, 'string' },
|
||||
})
|
||||
|
||||
if not cache_data[platform] then
|
||||
return nil
|
||||
end
|
||||
|
||||
local contest_data = cache_data[platform][contest_id]
|
||||
if not contest_data or vim.tbl_isempty(contest_data) then
|
||||
return nil
|
||||
end
|
||||
|
||||
return contest_data
|
||||
return cache_data[platform][contest_id] or {}
|
||||
end
|
||||
|
||||
---@param platform string
|
||||
---@param contest_id string
|
||||
---@param problems Problem[]
|
||||
function M.set_contest_data(platform, contest_id, problems)
|
||||
---@param contest_name? string
|
||||
---@param display_name? string
|
||||
function M.set_contest_data(platform, contest_id, problems, contest_name, display_name)
|
||||
vim.validate({
|
||||
platform = { platform, 'string' },
|
||||
contest_id = { contest_id, 'string' },
|
||||
|
|
@ -109,36 +102,17 @@ function M.set_contest_data(platform, contest_id, problems)
|
|||
})
|
||||
|
||||
cache_data[platform] = cache_data[platform] or {}
|
||||
local existing = cache_data[platform][contest_id] or {}
|
||||
|
||||
local existing_by_id = {}
|
||||
if existing.problems then
|
||||
for _, p in ipairs(existing.problems) do
|
||||
existing_by_id[p.id] = p
|
||||
end
|
||||
local out = {
|
||||
name = contest_name,
|
||||
display_name = display_name,
|
||||
problems = vim.deepcopy(problems),
|
||||
index_map = {},
|
||||
}
|
||||
for i, p in ipairs(out.problems) do
|
||||
out.index_map[p.id] = i
|
||||
end
|
||||
|
||||
local merged = {}
|
||||
for _, p in ipairs(problems) do
|
||||
local prev = existing_by_id[p.id] or {}
|
||||
local merged_p = {
|
||||
id = p.id,
|
||||
name = p.name or prev.name,
|
||||
test_cases = prev.test_cases,
|
||||
timeout_ms = prev.timeout_ms,
|
||||
memory_mb = prev.memory_mb,
|
||||
interactive = prev.interactive,
|
||||
}
|
||||
table.insert(merged, merged_p)
|
||||
end
|
||||
|
||||
existing.problems = merged
|
||||
existing.index_map = {}
|
||||
for i, p in ipairs(merged) do
|
||||
existing.index_map[p.id] = i
|
||||
end
|
||||
|
||||
cache_data[platform][contest_id] = existing
|
||||
cache_data[platform][contest_id] = out
|
||||
M.save()
|
||||
end
|
||||
|
||||
|
|
|
|||
|
|
@ -36,9 +36,8 @@ function M.get_platforms()
|
|||
return result
|
||||
end
|
||||
|
||||
---Get list of contests for a specific platform
|
||||
---@param platform string Platform identifier (e.g. "codeforces", "atcoder")
|
||||
---@param refresh? boolean Whether to skip caching and append new contests
|
||||
---@param platform string
|
||||
---@param refresh? boolean
|
||||
---@return cp.ContestItem[]
|
||||
function M.get_platform_contests(platform, refresh)
|
||||
logger.log(
|
||||
|
|
@ -48,24 +47,21 @@ function M.get_platform_contests(platform, refresh)
|
|||
)
|
||||
|
||||
cache.load()
|
||||
|
||||
local picker_contests = cache.get_contest_summaries(platform)
|
||||
|
||||
if refresh or vim.tbl_isempty(picker_contests) then
|
||||
logger.log(('Cache miss on %s contests'):format(platform))
|
||||
local contests = scraper.scrape_contest_list(platform)
|
||||
|
||||
local contests = scraper.scrape_contest_list(platform) -- sync
|
||||
cache.set_contest_summaries(platform, contests)
|
||||
picker_contests = cache.get_contest_summaries(platform) -- <-- reload after write
|
||||
end
|
||||
|
||||
logger.log(
|
||||
('Loaded %s %s contests.'):format(#picker_contests, constants.PLATFORM_DISPLAY_NAMES[platform]),
|
||||
('Loaded %d %s contests.'):format(#picker_contests, constants.PLATFORM_DISPLAY_NAMES[platform]),
|
||||
vim.log.levels.INFO,
|
||||
true
|
||||
)
|
||||
|
||||
picker_contests = cache.get_contest_summaries(platform)
|
||||
|
||||
return picker_contests
|
||||
end
|
||||
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ local function substitute_template(cmd_template, substitutions)
|
|||
return out
|
||||
end
|
||||
|
||||
local function build_command(cmd_template, executable, substitutions)
|
||||
function M.build_command(cmd_template, executable, substitutions)
|
||||
local cmd = substitute_template(cmd_template, substitutions)
|
||||
if executable then
|
||||
table.insert(cmd, 1, executable)
|
||||
|
|
@ -198,10 +198,4 @@ function M.compile_problem(contest_config, is_debug)
|
|||
return { success = true, output = nil }
|
||||
end
|
||||
|
||||
M._util = {
|
||||
get_language_from_file = get_language_from_file,
|
||||
substitute_template = substitute_template,
|
||||
build_command = build_command,
|
||||
}
|
||||
|
||||
return M
|
||||
|
|
|
|||
|
|
@ -78,8 +78,8 @@ end
|
|||
---@param substitutions table<string, string>
|
||||
---@return string[]
|
||||
local function build_command(language_config, substitutions)
|
||||
local exec_util = require('cp.runner.execute')._util
|
||||
return exec_util.build_command(language_config.test, language_config.executable, substitutions)
|
||||
local execute = require('cp.runner.execute')
|
||||
return execute.build_command(language_config.test, language_config.executable, substitutions)
|
||||
end
|
||||
|
||||
---@param contest_config ContestConfig
|
||||
|
|
@ -98,28 +98,6 @@ local function run_single_test_case(contest_config, cp_config, test_case)
|
|||
local binary_file = state.get_binary_file()
|
||||
local substitutions = { source = source_file, binary = binary_file }
|
||||
|
||||
if language_config.compile and binary_file and vim.fn.filereadable(binary_file) == 0 then
|
||||
local cr = exec.compile(language_config, substitutions)
|
||||
local ansi = require('cp.ui.ansi')
|
||||
local clean = ansi.bytes_to_string(cr.stdout or '')
|
||||
if cr.code ~= 0 then
|
||||
return {
|
||||
status = 'fail',
|
||||
actual = clean,
|
||||
actual_highlights = {},
|
||||
error = 'Compilation failed',
|
||||
stderr = clean,
|
||||
time_ms = 0,
|
||||
rss_mb = 0,
|
||||
code = cr.code,
|
||||
ok = false,
|
||||
signal = nil,
|
||||
tled = false,
|
||||
mled = false,
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
local cmd = build_command(language_config, substitutions)
|
||||
local stdin_content = (test_case.input or '') .. '\n'
|
||||
local timeout_ms = (run_panel_state.constraints and run_panel_state.constraints.timeout_ms) or 0
|
||||
|
|
|
|||
|
|
@ -1,67 +1,110 @@
|
|||
local M = {}
|
||||
local utils = require('cp.utils')
|
||||
|
||||
local logger = require('cp.log')
|
||||
local utils = require('cp.utils')
|
||||
|
||||
local function syshandle(result)
|
||||
if result.code ~= 0 then
|
||||
local msg = 'Scraper failed: ' .. (result.stderr or 'Unknown error')
|
||||
logger.log(msg, vim.log.levels.ERROR)
|
||||
return {
|
||||
success = false,
|
||||
error = msg,
|
||||
}
|
||||
return { success = false, error = msg }
|
||||
end
|
||||
|
||||
local ok, data = pcall(vim.json.decode, result.stdout)
|
||||
if not ok then
|
||||
local msg = 'Failed to parse scraper output: ' .. tostring(data)
|
||||
logger.log(msg, vim.log.levels.ERROR)
|
||||
return {
|
||||
success = false,
|
||||
error = msg,
|
||||
}
|
||||
return { success = false, error = msg }
|
||||
end
|
||||
|
||||
return {
|
||||
success = true,
|
||||
data = data,
|
||||
}
|
||||
return { success = true, data = data }
|
||||
end
|
||||
|
||||
---@param platform string
|
||||
---@param subcommand string
|
||||
---@param args string[]
|
||||
---@param opts { sync?: boolean, ndjson?: boolean, on_event?: fun(ev: table), on_exit?: fun(result: table) }
|
||||
local function run_scraper(platform, subcommand, args, opts)
|
||||
if not utils.setup_python_env() then
|
||||
local msg = 'Python environment setup failed'
|
||||
logger.log(msg, vim.log.levels.ERROR)
|
||||
return {
|
||||
success = false,
|
||||
message = msg,
|
||||
}
|
||||
end
|
||||
|
||||
local plugin_path = utils.get_plugin_path()
|
||||
local cmd = {
|
||||
'uv',
|
||||
'run',
|
||||
'--directory',
|
||||
plugin_path,
|
||||
'-m',
|
||||
'scrapers.' .. platform,
|
||||
subcommand,
|
||||
}
|
||||
local cmd = { 'uv', 'run', '--directory', plugin_path, '-m', 'scrapers.' .. platform, subcommand }
|
||||
vim.list_extend(cmd, args)
|
||||
|
||||
local sysopts = {
|
||||
text = true,
|
||||
timeout = 30000,
|
||||
}
|
||||
if opts and opts.ndjson then
|
||||
local uv = vim.loop
|
||||
local stdout = uv.new_pipe(false)
|
||||
local stderr = uv.new_pipe(false)
|
||||
local buf = ''
|
||||
|
||||
if opts.sync then
|
||||
local handle
|
||||
handle = uv.spawn(
|
||||
cmd[1],
|
||||
{ args = vim.list_slice(cmd, 2), stdio = { nil, stdout, stderr } },
|
||||
function(code, signal)
|
||||
if buf ~= '' and opts.on_event then
|
||||
local ok_tail, ev_tail = pcall(vim.json.decode, buf)
|
||||
if ok_tail then
|
||||
opts.on_event(ev_tail)
|
||||
end
|
||||
buf = ''
|
||||
end
|
||||
if opts.on_exit then
|
||||
opts.on_exit({ success = (code == 0), code = code, signal = signal })
|
||||
end
|
||||
if not stdout:is_closing() then
|
||||
stdout:close()
|
||||
end
|
||||
if not stderr:is_closing() then
|
||||
stderr:close()
|
||||
end
|
||||
if handle and not handle:is_closing() then
|
||||
handle:close()
|
||||
end
|
||||
end
|
||||
)
|
||||
|
||||
if not handle then
|
||||
logger.log('Failed to start scraper process', vim.log.levels.ERROR)
|
||||
return { success = false, error = 'spawn failed' }
|
||||
end
|
||||
|
||||
uv.read_start(stdout, function(_, data)
|
||||
if data == nil then
|
||||
if buf ~= '' and opts.on_event then
|
||||
local ok_tail, ev_tail = pcall(vim.json.decode, buf)
|
||||
if ok_tail then
|
||||
opts.on_event(ev_tail)
|
||||
end
|
||||
buf = ''
|
||||
end
|
||||
return
|
||||
end
|
||||
buf = buf .. data
|
||||
while true do
|
||||
local s, e = buf:find('\n', 1, true)
|
||||
if not s then
|
||||
break
|
||||
end
|
||||
local line = buf:sub(1, s - 1)
|
||||
buf = buf:sub(e + 1)
|
||||
local ok, ev = pcall(vim.json.decode, line)
|
||||
if ok and opts.on_event then
|
||||
opts.on_event(ev)
|
||||
end
|
||||
end
|
||||
end)
|
||||
|
||||
uv.read_start(stderr, function(_, _) end)
|
||||
return
|
||||
end
|
||||
|
||||
local sysopts = { text = true, timeout = 30000 }
|
||||
if opts and opts.sync then
|
||||
local result = vim.system(cmd, sysopts):wait()
|
||||
return syshandle(result)
|
||||
else
|
||||
vim.system(cmd, sysopts, function(result)
|
||||
return opts.on_exit(syshandle(result))
|
||||
if opts and opts.on_exit then
|
||||
return opts.on_exit(syshandle(result))
|
||||
end
|
||||
end)
|
||||
end
|
||||
end
|
||||
|
|
@ -93,50 +136,59 @@ end
|
|||
|
||||
function M.scrape_contest_list(platform)
|
||||
local result = run_scraper(platform, 'contests', {}, { sync = true })
|
||||
if not result.success or not result.data.contests then
|
||||
if not result or not result.success or not (result.data and result.data.contests) then
|
||||
logger.log(
|
||||
('Could not scrape contests list for platform %s: %s'):format(platform, result.msg),
|
||||
('Could not scrape contests list for platform %s: %s'):format(
|
||||
platform,
|
||||
(result and result.error) or 'unknown'
|
||||
),
|
||||
vim.log.levels.ERROR
|
||||
)
|
||||
return {}
|
||||
end
|
||||
|
||||
return result.data.contests
|
||||
end
|
||||
|
||||
function M.scrape_problem_tests(platform, contest_id, problem_id, callback)
|
||||
run_scraper(platform, 'tests', { contest_id, problem_id }, {
|
||||
on_exit = function(result)
|
||||
if not result.success or not result.data.tests then
|
||||
logger.log(
|
||||
'Failed to load tests: ' .. (result.msg or 'unknown error'),
|
||||
vim.log.levels.ERROR
|
||||
)
|
||||
|
||||
return {}
|
||||
---@param platform string
|
||||
---@param contest_id string
|
||||
---@param callback fun(data: table)|nil
|
||||
function M.scrape_all_tests(platform, contest_id, callback)
|
||||
run_scraper(platform, 'tests', { contest_id }, {
|
||||
ndjson = true,
|
||||
on_event = function(ev)
|
||||
if ev.done then
|
||||
return
|
||||
end
|
||||
if ev.error and ev.problem_id then
|
||||
logger.log(
|
||||
('Failed to load tests for %s/%s: %s'):format(contest_id, ev.problem_id, ev.error),
|
||||
vim.log.levels.WARN
|
||||
)
|
||||
return
|
||||
end
|
||||
if not ev.problem_id or not ev.tests then
|
||||
return
|
||||
end
|
||||
|
||||
vim.schedule(function()
|
||||
vim.system({ 'mkdir', '-p', 'build', 'io' }):wait()
|
||||
local config = require('cp.config')
|
||||
local base_name = config.default_filename(contest_id, problem_id)
|
||||
|
||||
for i, test_case in ipairs(result.data.tests) do
|
||||
local base_name = config.default_filename(contest_id, ev.problem_id)
|
||||
for i, t in ipairs(ev.tests) do
|
||||
local input_file = 'io/' .. base_name .. '.' .. i .. '.cpin'
|
||||
local expected_file = 'io/' .. base_name .. '.' .. i .. '.cpout'
|
||||
|
||||
local input_content = test_case.input:gsub('\r', '')
|
||||
local expected_content = test_case.expected:gsub('\r', '')
|
||||
|
||||
pcall(vim.fn.writefile, vim.split(input_content, '\n', { trimempty = true }), input_file)
|
||||
pcall(
|
||||
vim.fn.writefile,
|
||||
vim.split(expected_content, '\n', { trimempty = true }),
|
||||
expected_file
|
||||
)
|
||||
local input_content = t.input:gsub('\r', '')
|
||||
local expected_content = t.expected:gsub('\r', '')
|
||||
vim.fn.writefile(vim.split(input_content, '\n', { trimempty = true }), input_file)
|
||||
vim.fn.writefile(vim.split(expected_content, '\n', { trimempty = true }), expected_file)
|
||||
end
|
||||
if type(callback) == 'function' then
|
||||
callback(result.data)
|
||||
callback({
|
||||
tests = ev.tests,
|
||||
timeout_ms = ev.timeout_ms or 0,
|
||||
memory_mb = ev.memory_mb or 0,
|
||||
interactive = ev.interactive or false,
|
||||
problem_id = ev.problem_id,
|
||||
})
|
||||
end
|
||||
end)
|
||||
end,
|
||||
|
|
|
|||
116
lua/cp/setup.lua
116
lua/cp/setup.lua
|
|
@ -28,45 +28,26 @@ function M.set_platform(platform)
|
|||
return true
|
||||
end
|
||||
|
||||
local function backfill_missing_tests(platform, contest_id, problems)
|
||||
cache.load()
|
||||
local missing = {}
|
||||
for _, prob in ipairs(problems) do
|
||||
if not cache.get_test_cases(platform, contest_id, prob.id) then
|
||||
table.insert(missing, prob.id)
|
||||
end
|
||||
end
|
||||
if #missing == 0 then
|
||||
logger.log(('All problems already cached for %s contest %s.'):format(platform, contest_id))
|
||||
return
|
||||
end
|
||||
for _, pid in ipairs(missing) do
|
||||
local captured = pid
|
||||
scraper.scrape_problem_tests(platform, contest_id, captured, function(result)
|
||||
local cached_tests = {}
|
||||
if result.tests then
|
||||
for i, t in ipairs(result.tests) do
|
||||
cached_tests[i] = { index = i, input = t.input, expected = t.expected }
|
||||
end
|
||||
end
|
||||
cache.set_test_cases(
|
||||
platform,
|
||||
contest_id,
|
||||
captured,
|
||||
cached_tests,
|
||||
result.timeout_ms,
|
||||
result.memory_mb
|
||||
)
|
||||
end)
|
||||
end
|
||||
end
|
||||
---@class TestCaseLite
|
||||
---@field input string
|
||||
---@field expected string
|
||||
|
||||
---@class ScrapeEvent
|
||||
---@field problem_id string
|
||||
---@field tests TestCaseLite[]|nil
|
||||
---@field timeout_ms integer|nil
|
||||
---@field memory_mb integer|nil
|
||||
---@field interactive boolean|nil
|
||||
---@field error string|nil
|
||||
---@field done boolean|nil
|
||||
---@field succeeded integer|nil
|
||||
---@field failed integer|nil
|
||||
|
||||
---@param platform string
|
||||
---@param contest_id string
|
||||
---@param language string|nil
|
||||
---@param problem_id string|nil
|
||||
function M.setup_contest(platform, contest_id, language, problem_id)
|
||||
if not platform then
|
||||
logger.log('No platform configured. Use :CP <platform> <contest> [--{lang=<lang>,debug} first.')
|
||||
return
|
||||
end
|
||||
|
||||
local config = config_module.get_config()
|
||||
if not vim.tbl_contains(config.scrapers, platform) then
|
||||
logger.log(('Scraping disabled for %s.'):format(platform), vim.log.levels.WARN)
|
||||
|
|
@ -75,28 +56,47 @@ function M.setup_contest(platform, contest_id, language, problem_id)
|
|||
|
||||
state.set_contest_id(contest_id)
|
||||
cache.load()
|
||||
local contest_data = cache.get_contest_data(platform, contest_id)
|
||||
|
||||
local function proceed(contest_data)
|
||||
local problems = contest_data.problems
|
||||
local pid = problems[(problem_id and contest_data.index_map[problem_id] or 1)].id
|
||||
M.setup_problem(pid, language)
|
||||
|
||||
local cached_len = #vim.tbl_filter(function(p)
|
||||
return cache.get_test_cases(platform, contest_id, p.id) ~= nil
|
||||
end, problems)
|
||||
|
||||
if cached_len ~= #problems then
|
||||
scraper.scrape_all_tests(platform, contest_id, function(ev)
|
||||
local cached_tests = {}
|
||||
for i, t in ipairs(ev.tests) do
|
||||
cached_tests[i] = { index = i, input = t.input, expected = t.expected }
|
||||
end
|
||||
cache.set_test_cases(
|
||||
platform,
|
||||
contest_id,
|
||||
ev.problem_id,
|
||||
cached_tests,
|
||||
ev.timeout_ms or 0,
|
||||
ev.memory_mb or 0
|
||||
)
|
||||
end)
|
||||
end
|
||||
end
|
||||
|
||||
local contest_data = cache.get_contest_data(platform, contest_id)
|
||||
if not contest_data or not contest_data.problems then
|
||||
logger.log('Fetching contests problems...', vim.log.levels.INFO, true)
|
||||
scraper.scrape_contest_metadata(platform, contest_id, function(result)
|
||||
local problems = result.problems or {}
|
||||
cache.set_contest_data(platform, contest_id, problems)
|
||||
cache.set_contest_data(platform, contest_id, problems, result.name, result.display_name)
|
||||
logger.log(('Found %d problems for %s contest %s.'):format(#problems, platform, contest_id))
|
||||
local pid = problem_id or (problems[1] and problems[1].id)
|
||||
if pid then
|
||||
M.setup_problem(pid, language)
|
||||
end
|
||||
backfill_missing_tests(platform, contest_id, problems)
|
||||
proceed(cache.get_contest_data(platform, contest_id))
|
||||
end)
|
||||
else
|
||||
local problems = contest_data.problems
|
||||
local pid = problem_id or (problems[1] and problems[1].id)
|
||||
if pid then
|
||||
M.setup_problem(pid, language)
|
||||
end
|
||||
backfill_missing_tests(platform, contest_id, problems)
|
||||
return
|
||||
end
|
||||
|
||||
proceed(contest_data)
|
||||
end
|
||||
|
||||
---@param problem_id string
|
||||
|
|
@ -195,19 +195,9 @@ function M.navigate_problem(direction, language)
|
|||
end
|
||||
|
||||
local problems = contest_data.problems
|
||||
local current_index
|
||||
for i, prob in ipairs(problems) do
|
||||
if prob.id == current_problem_id then
|
||||
current_index = i
|
||||
break
|
||||
end
|
||||
end
|
||||
if not current_index then
|
||||
M.setup_contest(platform, contest_id, language, problems[1].id)
|
||||
return
|
||||
end
|
||||
local index = contest_data.index_map[current_problem_id]
|
||||
|
||||
local new_index = current_index + direction
|
||||
local new_index = index + direction
|
||||
if new_index < 1 or new_index > #problems then
|
||||
return
|
||||
end
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ local function find_gnu_time()
|
|||
|
||||
_time_cached = true
|
||||
_time_path = nil
|
||||
_time_reason = 'GNU time not found (install `time` on Linux or `brew install coreutils` on macOS)'
|
||||
_time_reason = 'GNU time not found'
|
||||
return _time_path, _time_reason
|
||||
end
|
||||
|
||||
|
|
@ -214,7 +214,7 @@ local function find_gnu_timeout()
|
|||
|
||||
_timeout_cached = true
|
||||
_timeout_path = nil
|
||||
_timeout_reason = 'GNU timeout not found (install `coreutils`; macOS: `brew install coreutils`)'
|
||||
_timeout_reason = 'GNU timeout not found'
|
||||
return _timeout_path, _timeout_reason
|
||||
end
|
||||
|
||||
|
|
|
|||
|
|
@ -8,10 +8,10 @@ dependencies = [
|
|||
"backoff>=2.2.1",
|
||||
"beautifulsoup4>=4.13.5",
|
||||
"curl-cffi>=0.13.0",
|
||||
"playwright>=1.55.0",
|
||||
"httpx>=0.28.1",
|
||||
"ndjson>=0.3.1",
|
||||
"requests>=2.32.5",
|
||||
"scrapling[fetchers]>=0.3.5",
|
||||
"scrapy>=2.13.3",
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
|
|
@ -22,6 +22,7 @@ dev = [
|
|||
"pytest>=8.0.0",
|
||||
"pytest-mock>=3.12.0",
|
||||
"pre-commit>=4.3.0",
|
||||
"basedpyright>=1.31.6",
|
||||
]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
|
|
|
|||
|
|
@ -1,14 +1,19 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import concurrent.futures
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
from dataclasses import asdict
|
||||
from typing import Any
|
||||
|
||||
import backoff
|
||||
import httpx
|
||||
import requests
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
|
||||
from .base import BaseScraper
|
||||
from .models import (
|
||||
|
|
@ -20,398 +25,352 @@ from .models import (
|
|||
TestsResult,
|
||||
)
|
||||
|
||||
MIB_TO_MB = 1.048576
|
||||
BASE_URL = "https://atcoder.jp"
|
||||
ARCHIVE_URL = f"{BASE_URL}/contests/archive"
|
||||
TIMEOUT_SECONDS = 30
|
||||
HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
|
||||
}
|
||||
RETRY_STATUS = {429, 502, 503, 504}
|
||||
FATAL_STATUS = {400, 401, 403, 404, 410}
|
||||
|
||||
def _make_request(url: str, timeout: int = 10) -> requests.Response:
|
||||
headers = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (X11; Linux x86_64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/120.0.0.0 Safari/537.36"
|
||||
)
|
||||
}
|
||||
_session = requests.Session()
|
||||
_adapter = HTTPAdapter(
|
||||
pool_connections=100,
|
||||
pool_maxsize=100,
|
||||
max_retries=Retry(total=0),
|
||||
)
|
||||
_session.mount("https://", _adapter)
|
||||
_session.mount("http://", _adapter)
|
||||
|
||||
@backoff.on_exception(
|
||||
backoff.expo,
|
||||
(requests.exceptions.RequestException, requests.exceptions.HTTPError),
|
||||
max_tries=5,
|
||||
jitter=backoff.random_jitter,
|
||||
on_backoff=lambda details: print(
|
||||
f"Request error on {url} (attempt {details['tries']}), "
|
||||
f"retrying in {details['wait']:.1f}s: {details['exception']}",
|
||||
file=sys.stderr,
|
||||
),
|
||||
|
||||
def _give_up_requests(exc: Exception) -> bool:
|
||||
if isinstance(exc, requests.HTTPError) and exc.response is not None:
|
||||
return exc.response.status_code in FATAL_STATUS
|
||||
return False
|
||||
|
||||
|
||||
def _retry_after_requests(details):
|
||||
exc = details.get("exception")
|
||||
if isinstance(exc, requests.HTTPError) and exc.response is not None:
|
||||
ra = exc.response.headers.get("Retry-After")
|
||||
if ra:
|
||||
try:
|
||||
time.sleep(max(0.0, float(ra)))
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
|
||||
@backoff.on_exception(
|
||||
backoff.expo,
|
||||
(requests.ConnectionError, requests.Timeout, requests.HTTPError),
|
||||
max_tries=5,
|
||||
jitter=backoff.full_jitter,
|
||||
giveup=_give_up_requests,
|
||||
on_backoff=_retry_after_requests,
|
||||
)
|
||||
def _fetch(url: str) -> str:
|
||||
r = _session.get(url, headers=HEADERS, timeout=TIMEOUT_SECONDS)
|
||||
if r.status_code in RETRY_STATUS:
|
||||
raise requests.HTTPError(response=r)
|
||||
r.raise_for_status()
|
||||
return r.text
|
||||
|
||||
|
||||
def _giveup_httpx(exc: Exception) -> bool:
|
||||
return (
|
||||
isinstance(exc, httpx.HTTPStatusError)
|
||||
and exc.response is not None
|
||||
and (exc.response.status_code in FATAL_STATUS)
|
||||
)
|
||||
@backoff.on_predicate(
|
||||
backoff.expo,
|
||||
lambda resp: resp.status_code == 429,
|
||||
max_tries=5,
|
||||
jitter=backoff.random_jitter,
|
||||
on_backoff=lambda details: print(
|
||||
f"Rate limited on {url}, retrying in {details['wait']:.1f}s",
|
||||
file=sys.stderr,
|
||||
),
|
||||
|
||||
|
||||
@backoff.on_exception(
|
||||
backoff.expo,
|
||||
(httpx.ConnectError, httpx.ReadTimeout, httpx.HTTPStatusError),
|
||||
max_tries=5,
|
||||
jitter=backoff.full_jitter,
|
||||
giveup=_giveup_httpx,
|
||||
)
|
||||
async def _get_async(client: httpx.AsyncClient, url: str) -> str:
|
||||
r = await client.get(url, headers=HEADERS, timeout=TIMEOUT_SECONDS)
|
||||
r.raise_for_status()
|
||||
return r.text
|
||||
|
||||
|
||||
def _text_from_pre(pre: Tag) -> str:
|
||||
return (
|
||||
pre.get_text(separator="\n", strip=False)
|
||||
.replace("\r", "")
|
||||
.replace("\xa0", " ")
|
||||
.rstrip("\n")
|
||||
)
|
||||
def _req():
|
||||
return requests.get(url, headers=headers, timeout=timeout)
|
||||
|
||||
resp = _req()
|
||||
resp.raise_for_status()
|
||||
return resp
|
||||
|
||||
|
||||
def extract_problem_limits(soup: BeautifulSoup) -> tuple[int, float]:
|
||||
timeout_ms = None
|
||||
memory_mb = None
|
||||
def _parse_last_page(html: str) -> int:
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
nav = soup.select_one("ul.pagination")
|
||||
if not nav:
|
||||
return 1
|
||||
nums = []
|
||||
for a in nav.select("a"):
|
||||
s = a.get_text(strip=True)
|
||||
if s.isdigit():
|
||||
nums.append(int(s))
|
||||
return max(nums) if nums else 1
|
||||
|
||||
paragraphs = soup.find_all("p")
|
||||
for p in paragraphs:
|
||||
text = p.get_text()
|
||||
if "Time Limit:" in text and "Memory Limit:" in text:
|
||||
time_match = re.search(r"Time Limit:\s*(\d+)\s*sec", text)
|
||||
if time_match:
|
||||
seconds = int(time_match.group(1))
|
||||
timeout_ms = seconds * 1000
|
||||
|
||||
memory_match = re.search(r"Memory Limit:\s*(\d+)\s*MiB", text)
|
||||
if memory_match:
|
||||
memory_mib = int(memory_match.group(1))
|
||||
memory_mb = round(memory_mib * 1.048576, 2)
|
||||
break
|
||||
def _parse_archive_contests(html: str) -> list[ContestSummary]:
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
tbody = soup.select_one("table.table-default tbody") or soup.select_one("tbody")
|
||||
if not tbody:
|
||||
return []
|
||||
out: list[ContestSummary] = []
|
||||
for tr in tbody.select("tr"):
|
||||
a = tr.select_one("a[href^='/contests/']")
|
||||
if not a:
|
||||
continue
|
||||
href_attr = a.get("href")
|
||||
if not isinstance(href_attr, str):
|
||||
continue
|
||||
m = re.search(r"/contests/([^/?#]+)", href_attr)
|
||||
if not m:
|
||||
continue
|
||||
cid = m.group(1)
|
||||
name = a.get_text(strip=True)
|
||||
out.append(ContestSummary(id=cid, name=name, display_name=name))
|
||||
return out
|
||||
|
||||
if timeout_ms is None:
|
||||
raise ValueError("Could not find valid timeout in problem constraints")
|
||||
|
||||
if memory_mb is None:
|
||||
raise ValueError("Could not find valid memory limit in problem constraints")
|
||||
def _parse_tasks_list(html: str) -> list[dict[str, str]]:
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
tbody = soup.select_one("table tbody")
|
||||
if not tbody:
|
||||
return []
|
||||
rows: list[dict[str, str]] = []
|
||||
for tr in tbody.select("tr"):
|
||||
tds = tr.select("td")
|
||||
if len(tds) < 2:
|
||||
continue
|
||||
letter = tds[0].get_text(strip=True)
|
||||
a = tds[1].select_one("a[href*='/tasks/']")
|
||||
if not a:
|
||||
continue
|
||||
href_attr = a.get("href")
|
||||
if not isinstance(href_attr, str):
|
||||
continue
|
||||
m = re.search(r"/contests/[^/]+/tasks/([^/?#]+)", href_attr)
|
||||
if not m:
|
||||
continue
|
||||
slug = m.group(1)
|
||||
title = a.get_text(strip=True)
|
||||
rows.append({"letter": letter, "title": title, "slug": slug})
|
||||
return rows
|
||||
|
||||
|
||||
def _extract_limits(html: str) -> tuple[int, float]:
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
txt = soup.get_text(" ", strip=True)
|
||||
timeout_ms = 0
|
||||
memory_mb = 0.0
|
||||
ts = re.search(r"Time\s*Limit:\s*([\d.]+)\s*sec", txt, flags=re.I)
|
||||
if ts:
|
||||
timeout_ms = int(float(ts.group(1)) * 1000)
|
||||
ms = re.search(r"Memory\s*Limit:\s*(\d+)\s*MiB", txt, flags=re.I)
|
||||
if ms:
|
||||
memory_mb = float(ms.group(1)) * MIB_TO_MB
|
||||
return timeout_ms, memory_mb
|
||||
|
||||
|
||||
def parse_problem_url(contest_id: str, problem_letter: str) -> str:
|
||||
task_id: str = f"{contest_id}_{problem_letter}"
|
||||
return f"https://atcoder.jp/contests/{contest_id}/tasks/{task_id}"
|
||||
def _extract_samples(html: str) -> list[TestCase]:
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
root = soup.select_one("#task-statement") or soup
|
||||
inputs: dict[str, str] = {}
|
||||
outputs: dict[str, str] = {}
|
||||
for h in root.find_all(re.compile(r"h[2-4]")):
|
||||
title = h.get_text(" ", strip=True)
|
||||
pre = h.find_next("pre")
|
||||
if not pre:
|
||||
continue
|
||||
t = _text_from_pre(pre)
|
||||
mi = re.search(r"Sample\s*Input\s*(\d+)", title, flags=re.I)
|
||||
mo = re.search(r"Sample\s*Output\s*(\d+)", title, flags=re.I)
|
||||
if mi:
|
||||
inputs[mi.group(1)] = t
|
||||
elif mo:
|
||||
outputs[mo.group(1)] = t
|
||||
cases: list[TestCase] = []
|
||||
for k in sorted(set(inputs) & set(outputs), key=lambda s: int(s)):
|
||||
cases.append(TestCase(input=inputs[k], expected=outputs[k]))
|
||||
return cases
|
||||
|
||||
|
||||
def extract_problem_from_row(row, contest_id: str) -> ProblemSummary | None:
|
||||
cells = row.find_all("td")
|
||||
if len(cells) < 2:
|
||||
return None
|
||||
|
||||
task_link = cells[1].find("a")
|
||||
if not task_link:
|
||||
return None
|
||||
|
||||
task_name = task_link.get_text(strip=True)
|
||||
task_href = task_link.get("href", "")
|
||||
if not task_href:
|
||||
return None
|
||||
|
||||
task_id = task_href.split("/")[-1]
|
||||
if not task_id.startswith(contest_id + "_"):
|
||||
return None
|
||||
|
||||
problem_letter = task_id[len(contest_id) + 1 :]
|
||||
if not problem_letter or not task_name:
|
||||
return None
|
||||
|
||||
return ProblemSummary(id=problem_letter.lower(), name=task_name)
|
||||
def _scrape_tasks_sync(contest_id: str) -> list[dict[str, str]]:
|
||||
html = _fetch(f"{BASE_URL}/contests/{contest_id}/tasks")
|
||||
return _parse_tasks_list(html)
|
||||
|
||||
|
||||
def scrape_contest_problems(contest_id: str) -> list[ProblemSummary]:
|
||||
try:
|
||||
contest_url = f"https://atcoder.jp/contests/{contest_id}/tasks"
|
||||
response = _make_request(contest_url)
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
task_table = soup.find("table", class_="table")
|
||||
if not task_table or not isinstance(task_table, Tag):
|
||||
return []
|
||||
|
||||
rows = task_table.find_all("tr")[1:]
|
||||
problems: list[ProblemSummary] = []
|
||||
for row in rows:
|
||||
problem = extract_problem_from_row(row, contest_id)
|
||||
if problem:
|
||||
problems.append(problem)
|
||||
|
||||
return problems
|
||||
|
||||
except Exception as e:
|
||||
print(f"Failed to scrape AtCoder contest problems: {e}", file=sys.stderr)
|
||||
return []
|
||||
def _scrape_problem_page_sync(contest_id: str, slug: str) -> dict[str, Any]:
|
||||
html = _fetch(f"{BASE_URL}/contests/{contest_id}/tasks/{slug}")
|
||||
tests = _extract_samples(html)
|
||||
timeout_ms, memory_mb = _extract_limits(html)
|
||||
return {
|
||||
"tests": tests,
|
||||
"timeout_ms": timeout_ms,
|
||||
"memory_mb": memory_mb,
|
||||
"interactive": False,
|
||||
}
|
||||
|
||||
|
||||
def extract_test_case_from_headers(sample_headers, i: int) -> tuple[str, str] | None:
|
||||
if i >= len(sample_headers):
|
||||
return None
|
||||
|
||||
header = sample_headers[i]
|
||||
if "input" not in header.get_text().lower():
|
||||
return None
|
||||
|
||||
input_pre = header.find_next("pre")
|
||||
if not input_pre or i + 1 >= len(sample_headers):
|
||||
return None
|
||||
|
||||
next_header = sample_headers[i + 1]
|
||||
if "output" not in next_header.get_text().lower():
|
||||
return None
|
||||
|
||||
output_pre = next_header.find_next("pre")
|
||||
if not output_pre:
|
||||
return None
|
||||
|
||||
input_text = input_pre.get_text().strip().replace("\r", "")
|
||||
output_text = output_pre.get_text().strip().replace("\r", "")
|
||||
if not input_text or not output_text:
|
||||
return None
|
||||
|
||||
return (input_text, output_text)
|
||||
def _to_problem_summaries(rows: list[dict[str, str]]) -> list[ProblemSummary]:
|
||||
out: list[ProblemSummary] = []
|
||||
seen: set[str] = set()
|
||||
for r in rows:
|
||||
letter = (r.get("letter") or "").strip().upper()
|
||||
title = r.get("title") or ""
|
||||
if not letter:
|
||||
continue
|
||||
pid = letter.lower()
|
||||
if pid in seen:
|
||||
continue
|
||||
seen.add(pid)
|
||||
out.append(ProblemSummary(id=pid, name=title))
|
||||
return out
|
||||
|
||||
|
||||
def scrape(url: str) -> list[TestCase]:
|
||||
try:
|
||||
response = _make_request(url)
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
sample_headers = soup.find_all(
|
||||
"h3", string=lambda x: x and "sample" in x.lower() if x else False
|
||||
)
|
||||
|
||||
tests: list[TestCase] = []
|
||||
i = 0
|
||||
while i < len(sample_headers):
|
||||
test_case = extract_test_case_from_headers(sample_headers, i)
|
||||
if test_case:
|
||||
input_text, output_text = test_case
|
||||
tests.append(TestCase(input=input_text, expected=output_text))
|
||||
i += 2
|
||||
else:
|
||||
i += 1
|
||||
|
||||
return tests
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error scraping AtCoder: {e}", file=sys.stderr)
|
||||
return []
|
||||
async def _fetch_all_contests_async() -> list[ContestSummary]:
|
||||
async with httpx.AsyncClient(
|
||||
limits=httpx.Limits(max_connections=100, max_keepalive_connections=100)
|
||||
) as client:
|
||||
first_html = await _get_async(client, ARCHIVE_URL)
|
||||
last = _parse_last_page(first_html)
|
||||
out = _parse_archive_contests(first_html)
|
||||
if last <= 1:
|
||||
return out
|
||||
tasks = [
|
||||
asyncio.create_task(_get_async(client, f"{ARCHIVE_URL}?page={p}"))
|
||||
for p in range(2, last + 1)
|
||||
]
|
||||
for coro in asyncio.as_completed(tasks):
|
||||
html = await coro
|
||||
out.extend(_parse_archive_contests(html))
|
||||
return out
|
||||
|
||||
|
||||
def scrape_contests() -> list[ContestSummary]:
|
||||
def get_max_pages() -> int:
|
||||
try:
|
||||
response = _make_request("https://atcoder.jp/contests/archive")
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
pagination = soup.find("ul", class_="pagination")
|
||||
if not pagination or not isinstance(pagination, Tag):
|
||||
return 15
|
||||
|
||||
lis = pagination.find_all("li")
|
||||
if lis and isinstance(lis[-1], Tag):
|
||||
last_li_text = lis[-1].get_text().strip()
|
||||
try:
|
||||
return int(last_li_text)
|
||||
except ValueError:
|
||||
return 15
|
||||
return 15
|
||||
except Exception:
|
||||
return 15
|
||||
|
||||
def scrape_page(page: int) -> list[ContestSummary]:
|
||||
try:
|
||||
response = _make_request(f"https://atcoder.jp/contests/archive?page={page}")
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
table = soup.find("table", class_="table")
|
||||
if not table:
|
||||
return []
|
||||
|
||||
tbody = table.find("tbody")
|
||||
if not tbody or not isinstance(tbody, Tag):
|
||||
return []
|
||||
|
||||
rows = tbody.find_all("tr")
|
||||
if not rows:
|
||||
return []
|
||||
|
||||
contests = []
|
||||
for row in rows:
|
||||
cells = row.find_all("td")
|
||||
if len(cells) < 2:
|
||||
continue
|
||||
|
||||
contest_cell = cells[1]
|
||||
link = contest_cell.find("a")
|
||||
if not link or not link.get("href"):
|
||||
continue
|
||||
|
||||
href = link.get("href")
|
||||
contest_id = href.split("/")[-1]
|
||||
name = link.get_text().strip()
|
||||
|
||||
try:
|
||||
name = name.encode().decode("unicode_escape")
|
||||
except (UnicodeDecodeError, UnicodeEncodeError):
|
||||
pass
|
||||
|
||||
name = (
|
||||
name.replace("\uff08", "(")
|
||||
.replace("\uff09", ")")
|
||||
.replace("\u3000", " ")
|
||||
)
|
||||
name = re.sub(
|
||||
r"[\uff01-\uff5e]", lambda m: chr(ord(m.group()) - 0xFEE0), name
|
||||
)
|
||||
|
||||
if not (
|
||||
contest_id.startswith("ahc") or name.lower().find("heuristic") != -1
|
||||
):
|
||||
contests.append(
|
||||
ContestSummary(id=contest_id, name=name, display_name=name)
|
||||
)
|
||||
|
||||
return contests
|
||||
|
||||
max_pages = get_max_pages()
|
||||
page_results = {}
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
|
||||
future_to_page = {
|
||||
executor.submit(scrape_page, page): page for page in range(1, max_pages + 1)
|
||||
}
|
||||
|
||||
for future in concurrent.futures.as_completed(future_to_page):
|
||||
page = future_to_page[future]
|
||||
page_contests = future.result()
|
||||
page_results[page] = page_contests
|
||||
|
||||
all_contests = []
|
||||
for page in sorted(page_results.keys()):
|
||||
all_contests.extend(page_results[page])
|
||||
|
||||
return all_contests
|
||||
|
||||
|
||||
class AtCoderScraper(BaseScraper):
|
||||
class AtcoderScraper(BaseScraper):
|
||||
@property
|
||||
def platform_name(self) -> str:
|
||||
return "atcoder"
|
||||
|
||||
def scrape_contest_metadata(self, contest_id: str) -> MetadataResult:
|
||||
return self._safe_execute("metadata", self._scrape_metadata_impl, contest_id)
|
||||
|
||||
def scrape_problem_tests(self, contest_id: str, problem_id: str) -> TestsResult:
|
||||
return self._safe_execute(
|
||||
"tests", self._scrape_tests_impl, contest_id, problem_id
|
||||
)
|
||||
|
||||
def scrape_contest_list(self) -> ContestListResult:
|
||||
return self._safe_execute("contests", self._scrape_contests_impl)
|
||||
|
||||
def _safe_execute(self, operation: str, func, *args):
|
||||
try:
|
||||
return func(*args)
|
||||
except Exception as e:
|
||||
error_msg = f"{self.platform_name}: {str(e)}"
|
||||
|
||||
if operation == "metadata":
|
||||
return MetadataResult(success=False, error=error_msg)
|
||||
elif operation == "tests":
|
||||
return TestsResult(
|
||||
success=False,
|
||||
error=error_msg,
|
||||
problem_id="",
|
||||
url="",
|
||||
tests=[],
|
||||
timeout_ms=0,
|
||||
memory_mb=0,
|
||||
async def scrape_contest_metadata(self, contest_id: str) -> MetadataResult:
|
||||
async def impl(cid: str) -> MetadataResult:
|
||||
rows = await asyncio.to_thread(_scrape_tasks_sync, cid)
|
||||
problems = _to_problem_summaries(rows)
|
||||
if not problems:
|
||||
return self._create_metadata_error(
|
||||
f"No problems found for contest {cid}", cid
|
||||
)
|
||||
elif operation == "contests":
|
||||
return ContestListResult(success=False, error=error_msg)
|
||||
|
||||
def _scrape_metadata_impl(self, contest_id: str) -> MetadataResult:
|
||||
problems = scrape_contest_problems(contest_id)
|
||||
if not problems:
|
||||
return MetadataResult(
|
||||
success=False,
|
||||
error=f"{self.platform_name}: No problems found for contest {contest_id}",
|
||||
)
|
||||
return MetadataResult(
|
||||
success=True, error="", contest_id=contest_id, problems=problems
|
||||
)
|
||||
|
||||
def _scrape_tests_impl(self, contest_id: str, problem_id: str) -> TestsResult:
|
||||
problem_letter = problem_id.upper()
|
||||
url = parse_problem_url(contest_id, problem_letter)
|
||||
tests = scrape(url)
|
||||
|
||||
response = _make_request(url)
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
timeout_ms, memory_mb = extract_problem_limits(soup)
|
||||
|
||||
if not tests:
|
||||
return TestsResult(
|
||||
success=False,
|
||||
error=f"{self.platform_name}: No tests found for {contest_id} {problem_letter}",
|
||||
problem_id=f"{contest_id}_{problem_id.lower()}",
|
||||
url=url,
|
||||
tests=[],
|
||||
timeout_ms=timeout_ms,
|
||||
memory_mb=memory_mb,
|
||||
success=True, error="", contest_id=cid, problems=problems
|
||||
)
|
||||
|
||||
return TestsResult(
|
||||
success=True,
|
||||
error="",
|
||||
problem_id=f"{contest_id}_{problem_id.lower()}",
|
||||
url=url,
|
||||
tests=tests,
|
||||
timeout_ms=timeout_ms,
|
||||
memory_mb=memory_mb,
|
||||
)
|
||||
return await self._safe_execute("metadata", impl, contest_id)
|
||||
|
||||
def _scrape_contests_impl(self) -> ContestListResult:
|
||||
contests = scrape_contests()
|
||||
if not contests:
|
||||
return ContestListResult(
|
||||
success=False, error=f"{self.platform_name}: No contests found"
|
||||
)
|
||||
return ContestListResult(success=True, error="", contests=contests)
|
||||
async def scrape_contest_list(self) -> ContestListResult:
|
||||
async def impl() -> ContestListResult:
|
||||
try:
|
||||
contests = await _fetch_all_contests_async()
|
||||
except Exception as e:
|
||||
return self._create_contests_error(str(e))
|
||||
if not contests:
|
||||
return self._create_contests_error("No contests found")
|
||||
return ContestListResult(success=True, error="", contests=contests)
|
||||
|
||||
return await self._safe_execute("contests", impl)
|
||||
|
||||
async def stream_tests_for_category_async(self, category_id: str) -> None:
|
||||
rows = await asyncio.to_thread(_scrape_tasks_sync, category_id)
|
||||
|
||||
async def emit(row: dict[str, str]) -> None:
|
||||
letter = (row.get("letter") or "").strip().lower()
|
||||
slug = row.get("slug") or ""
|
||||
if not letter or not slug:
|
||||
return
|
||||
try:
|
||||
data = await asyncio.to_thread(
|
||||
_scrape_problem_page_sync, category_id, slug
|
||||
)
|
||||
tests: list[TestCase] = data["tests"]
|
||||
if not tests:
|
||||
print(
|
||||
json.dumps(
|
||||
{
|
||||
"problem_id": letter,
|
||||
"error": f"{self.platform_name}: no tests found",
|
||||
}
|
||||
),
|
||||
flush=True,
|
||||
)
|
||||
return
|
||||
print(
|
||||
json.dumps(
|
||||
{
|
||||
"problem_id": letter,
|
||||
"tests": [
|
||||
{"input": t.input, "expected": t.expected}
|
||||
for t in tests
|
||||
],
|
||||
"timeout_ms": data["timeout_ms"],
|
||||
"memory_mb": data["memory_mb"],
|
||||
"interactive": bool(data["interactive"]),
|
||||
}
|
||||
),
|
||||
flush=True,
|
||||
)
|
||||
except Exception as e:
|
||||
print(
|
||||
json.dumps(
|
||||
{
|
||||
"problem_id": letter,
|
||||
"error": f"{self.platform_name}: {str(e)}",
|
||||
}
|
||||
),
|
||||
flush=True,
|
||||
)
|
||||
|
||||
await asyncio.gather(*(emit(r) for r in rows))
|
||||
|
||||
|
||||
def main() -> None:
|
||||
async def main_async() -> int:
|
||||
if len(sys.argv) < 2:
|
||||
result = MetadataResult(
|
||||
success=False,
|
||||
error="Usage: atcoder.py metadata <contest_id> OR atcoder.py tests <contest_id> <problem_letter> OR atcoder.py contests",
|
||||
error="Usage: atcoder.py metadata <contest_id> OR atcoder.py tests <contest_id> OR atcoder.py contests",
|
||||
)
|
||||
print(json.dumps(asdict(result)))
|
||||
sys.exit(1)
|
||||
return 1
|
||||
|
||||
mode: str = sys.argv[1]
|
||||
scraper = AtCoderScraper()
|
||||
scraper = AtcoderScraper()
|
||||
|
||||
if mode == "metadata":
|
||||
if len(sys.argv) != 3:
|
||||
result = MetadataResult(
|
||||
success=False,
|
||||
error="Usage: atcoder.py metadata <contest_id>",
|
||||
success=False, error="Usage: atcoder.py metadata <contest_id>"
|
||||
)
|
||||
print(json.dumps(asdict(result)))
|
||||
sys.exit(1)
|
||||
|
||||
contest_id: str = sys.argv[2]
|
||||
result = scraper.scrape_contest_metadata(contest_id)
|
||||
return 1
|
||||
contest_id = sys.argv[2]
|
||||
result = await scraper.scrape_contest_metadata(contest_id)
|
||||
print(json.dumps(asdict(result)))
|
||||
if not result.success:
|
||||
sys.exit(1)
|
||||
return 0 if result.success else 1
|
||||
|
||||
elif mode == "tests":
|
||||
if len(sys.argv) != 4:
|
||||
if mode == "tests":
|
||||
if len(sys.argv) != 3:
|
||||
tests_result = TestsResult(
|
||||
success=False,
|
||||
error="Usage: atcoder.py tests <contest_id> <problem_letter>",
|
||||
error="Usage: atcoder.py tests <contest_id>",
|
||||
problem_id="",
|
||||
url="",
|
||||
tests=[],
|
||||
|
|
@ -419,35 +378,32 @@ def main() -> None:
|
|||
memory_mb=0,
|
||||
)
|
||||
print(json.dumps(asdict(tests_result)))
|
||||
sys.exit(1)
|
||||
return 1
|
||||
contest_id = sys.argv[2]
|
||||
await scraper.stream_tests_for_category_async(contest_id)
|
||||
return 0
|
||||
|
||||
test_contest_id: str = sys.argv[2]
|
||||
problem_letter: str = sys.argv[3]
|
||||
tests_result = scraper.scrape_problem_tests(test_contest_id, problem_letter)
|
||||
print(json.dumps(asdict(tests_result)))
|
||||
if not tests_result.success:
|
||||
sys.exit(1)
|
||||
|
||||
elif mode == "contests":
|
||||
if mode == "contests":
|
||||
if len(sys.argv) != 2:
|
||||
contest_result = ContestListResult(
|
||||
success=False, error="Usage: atcoder.py contests"
|
||||
)
|
||||
print(json.dumps(asdict(contest_result)))
|
||||
sys.exit(1)
|
||||
|
||||
contest_result = scraper.scrape_contest_list()
|
||||
return 1
|
||||
contest_result = await scraper.scrape_contest_list()
|
||||
print(json.dumps(asdict(contest_result)))
|
||||
if not contest_result.success:
|
||||
sys.exit(1)
|
||||
return 0 if contest_result.success else 1
|
||||
|
||||
else:
|
||||
result = MetadataResult(
|
||||
success=False,
|
||||
error=f"Unknown mode: {mode}. Use 'metadata', 'tests', or 'contests'",
|
||||
)
|
||||
print(json.dumps(asdict(result)))
|
||||
sys.exit(1)
|
||||
result = MetadataResult(
|
||||
success=False,
|
||||
error="Unknown mode. Use 'metadata <contest_id>', 'tests <contest_id>', or 'contests'",
|
||||
)
|
||||
print(json.dumps(asdict(result)))
|
||||
return 1
|
||||
|
||||
|
||||
def main() -> None:
|
||||
sys.exit(asyncio.run(main_async()))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -1,8 +1,13 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Awaitable, Callable, ParamSpec, cast
|
||||
|
||||
from .models import ContestListResult, MetadataResult, TestsResult
|
||||
|
||||
P = ParamSpec("P")
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScraperConfig:
|
||||
|
|
@ -13,21 +18,18 @@ class ScraperConfig:
|
|||
|
||||
|
||||
class BaseScraper(ABC):
|
||||
def __init__(self, config: ScraperConfig | None = None):
|
||||
self.config = config or ScraperConfig()
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def platform_name(self) -> str: ...
|
||||
|
||||
@abstractmethod
|
||||
def scrape_contest_metadata(self, contest_id: str) -> MetadataResult: ...
|
||||
async def scrape_contest_metadata(self, contest_id: str) -> MetadataResult: ...
|
||||
|
||||
@abstractmethod
|
||||
def scrape_problem_tests(self, contest_id: str, problem_id: str) -> TestsResult: ...
|
||||
async def scrape_contest_list(self) -> ContestListResult: ...
|
||||
|
||||
@abstractmethod
|
||||
def scrape_contest_list(self) -> ContestListResult: ...
|
||||
async def stream_tests_for_category_async(self, category_id: str) -> None: ...
|
||||
|
||||
def _create_metadata_error(
|
||||
self, error_msg: str, contest_id: str = ""
|
||||
|
|
@ -56,15 +58,21 @@ class BaseScraper(ABC):
|
|||
success=False, error=f"{self.platform_name}: {error_msg}"
|
||||
)
|
||||
|
||||
def _safe_execute(self, operation: str, func, *args, **kwargs):
|
||||
async def _safe_execute(
|
||||
self,
|
||||
operation: str,
|
||||
func: Callable[P, Awaitable[Any]],
|
||||
*args: P.args,
|
||||
**kwargs: P.kwargs,
|
||||
):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
return await func(*args, **kwargs)
|
||||
except Exception as e:
|
||||
if operation == "metadata":
|
||||
contest_id = args[0] if args else ""
|
||||
contest_id = cast(str, args[0]) if args else ""
|
||||
return self._create_metadata_error(str(e), contest_id)
|
||||
elif operation == "tests":
|
||||
problem_id = args[1] if len(args) > 1 else ""
|
||||
problem_id = cast(str, args[1]) if len(args) > 1 else ""
|
||||
return self._create_tests_error(str(e), problem_id)
|
||||
elif operation == "contests":
|
||||
return self._create_contests_error(str(e))
|
||||
|
|
|
|||
|
|
@ -1,9 +1,12 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
from dataclasses import asdict
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
|
|
@ -19,224 +22,132 @@ from .models import (
|
|||
TestsResult,
|
||||
)
|
||||
|
||||
|
||||
def scrape(url: str) -> list[TestCase]:
|
||||
try:
|
||||
page = StealthyFetcher.fetch(url, headless=True, solve_cloudflare=True)
|
||||
html = page.html_content
|
||||
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
input_sections = soup.find_all("div", class_="input")
|
||||
output_sections = soup.find_all("div", class_="output")
|
||||
|
||||
individual_inputs: dict[str, list[str]] = {}
|
||||
individual_outputs: dict[str, list[str]] = {}
|
||||
|
||||
for inp_section in input_sections:
|
||||
inp_pre = inp_section.find("pre")
|
||||
if not inp_pre or not isinstance(inp_pre, Tag):
|
||||
continue
|
||||
|
||||
test_line_divs = inp_pre.find_all(
|
||||
"div", class_=lambda x: x and "test-example-line-" in x
|
||||
)
|
||||
if not test_line_divs:
|
||||
continue
|
||||
|
||||
for div in test_line_divs:
|
||||
classes = div.get("class", [])
|
||||
class_name = next(
|
||||
(
|
||||
cls
|
||||
for cls in classes
|
||||
if "test-example-line-" in cls and cls.split("-")[-1].isdigit()
|
||||
),
|
||||
None,
|
||||
)
|
||||
if not class_name:
|
||||
continue
|
||||
|
||||
test_num = class_name.replace("test-example-line-", "")
|
||||
if test_num not in individual_inputs:
|
||||
individual_inputs[test_num] = []
|
||||
individual_inputs[test_num].append(div.get_text().strip())
|
||||
|
||||
for out_section in output_sections:
|
||||
out_pre = out_section.find("pre")
|
||||
if not out_pre or not isinstance(out_pre, Tag):
|
||||
continue
|
||||
|
||||
test_line_divs = out_pre.find_all(
|
||||
"div", class_=lambda x: x and "test-example-line-" in x
|
||||
)
|
||||
if not test_line_divs:
|
||||
continue
|
||||
|
||||
for div in test_line_divs:
|
||||
classes = div.get("class", [])
|
||||
class_name = next(
|
||||
(
|
||||
cls
|
||||
for cls in classes
|
||||
if "test-example-line-" in cls and cls.split("-")[-1].isdigit()
|
||||
),
|
||||
None,
|
||||
)
|
||||
if not class_name:
|
||||
continue
|
||||
|
||||
test_num = class_name.replace("test-example-line-", "")
|
||||
if test_num not in individual_outputs:
|
||||
individual_outputs[test_num] = []
|
||||
individual_outputs[test_num].append(div.get_text().strip())
|
||||
|
||||
if individual_inputs and individual_outputs:
|
||||
common_tests = set(individual_inputs.keys()) & set(
|
||||
individual_outputs.keys()
|
||||
)
|
||||
if common_tests:
|
||||
tests = []
|
||||
for test_num in sorted(common_tests):
|
||||
input_text = "\n".join(individual_inputs[test_num])
|
||||
output_text = "\n".join(individual_outputs[test_num])
|
||||
prefixed_input = "1\n" + input_text
|
||||
tests.append(TestCase(input=prefixed_input, expected=output_text))
|
||||
return tests
|
||||
all_inputs = []
|
||||
all_outputs = []
|
||||
|
||||
for inp_section in input_sections:
|
||||
inp_pre = inp_section.find("pre")
|
||||
if not inp_pre or not isinstance(inp_pre, Tag):
|
||||
continue
|
||||
|
||||
divs = inp_pre.find_all("div")
|
||||
if divs:
|
||||
lines = [div.get_text().strip() for div in divs if isinstance(div, Tag)]
|
||||
text = "\n".join(lines)
|
||||
else:
|
||||
text = inp_pre.get_text().replace("\r", "").strip()
|
||||
all_inputs.append(text)
|
||||
|
||||
for out_section in output_sections:
|
||||
out_pre = out_section.find("pre")
|
||||
if not out_pre or not isinstance(out_pre, Tag):
|
||||
continue
|
||||
|
||||
divs = out_pre.find_all("div")
|
||||
if divs:
|
||||
lines = [div.get_text().strip() for div in divs if isinstance(div, Tag)]
|
||||
text = "\n".join(lines)
|
||||
else:
|
||||
text = out_pre.get_text().replace("\r", "").strip()
|
||||
all_outputs.append(text)
|
||||
|
||||
if not all_inputs or not all_outputs:
|
||||
return []
|
||||
|
||||
combined_input = "\n".join(all_inputs)
|
||||
combined_output = "\n".join(all_outputs)
|
||||
return [TestCase(input=combined_input, expected=combined_output)]
|
||||
|
||||
except Exception as e:
|
||||
print(f"Scrapling failed: {e}", file=sys.stderr)
|
||||
return []
|
||||
# suppress scrapling logging - https://github.com/D4Vinci/Scrapling/issues/31)
|
||||
logging.getLogger("scrapling").setLevel(logging.CRITICAL)
|
||||
|
||||
|
||||
def parse_problem_url(contest_id: str, problem_letter: str) -> str:
|
||||
BASE_URL = "https://codeforces.com"
|
||||
API_CONTEST_LIST_URL = f"{BASE_URL}/api/contest.list"
|
||||
TIMEOUT_SECONDS = 30
|
||||
HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
|
||||
}
|
||||
|
||||
|
||||
def _text_from_pre(pre: Tag) -> str:
|
||||
return (
|
||||
f"https://codeforces.com/contest/{contest_id}/problem/{problem_letter.upper()}"
|
||||
pre.get_text(separator="\n", strip=False)
|
||||
.replace("\r", "")
|
||||
.replace("\xa0", " ")
|
||||
.rstrip("\n")
|
||||
)
|
||||
|
||||
|
||||
def extract_problem_limits(soup: BeautifulSoup) -> tuple[int, float]:
|
||||
timeout_ms = None
|
||||
memory_mb = None
|
||||
|
||||
time_limit_div = soup.find("div", class_="time-limit")
|
||||
if time_limit_div:
|
||||
text = time_limit_div.get_text().strip()
|
||||
match = re.search(r"(\d+) seconds?", text)
|
||||
if match:
|
||||
seconds = int(match.group(1))
|
||||
timeout_ms = seconds * 1000
|
||||
|
||||
if timeout_ms is None:
|
||||
raise ValueError("Could not find valid timeout in time-limit section")
|
||||
|
||||
memory_limit_div = soup.find("div", class_="memory-limit")
|
||||
if memory_limit_div:
|
||||
text = memory_limit_div.get_text().strip()
|
||||
match = re.search(r"(\d+) megabytes", text)
|
||||
if match:
|
||||
memory_mb = float(match.group(1))
|
||||
|
||||
if memory_mb is None:
|
||||
raise ValueError("Could not find valid memory limit in memory-limit section")
|
||||
|
||||
def _extract_limits(block: Tag) -> tuple[int, float]:
|
||||
tdiv = block.find("div", class_="time-limit")
|
||||
mdiv = block.find("div", class_="memory-limit")
|
||||
timeout_ms = 0
|
||||
memory_mb = 0.0
|
||||
if tdiv:
|
||||
ttxt = tdiv.get_text(" ", strip=True)
|
||||
ts = re.search(r"(\d+)\s*seconds?", ttxt)
|
||||
if ts:
|
||||
timeout_ms = int(ts.group(1)) * 1000
|
||||
if mdiv:
|
||||
mtxt = mdiv.get_text(" ", strip=True)
|
||||
ms = re.search(r"(\d+)\s*megabytes?", mtxt)
|
||||
if ms:
|
||||
memory_mb = float(ms.group(1))
|
||||
return timeout_ms, memory_mb
|
||||
|
||||
|
||||
def scrape_contest_problems(contest_id: str) -> list[ProblemSummary]:
|
||||
try:
|
||||
contest_url: str = f"https://codeforces.com/contest/{contest_id}"
|
||||
page = StealthyFetcher.fetch(contest_url, headless=True, solve_cloudflare=True)
|
||||
html = page.html_content
|
||||
def _extract_title(block: Tag) -> tuple[str, str]:
|
||||
t = block.find("div", class_="title")
|
||||
if not t:
|
||||
return "", ""
|
||||
s = t.get_text(" ", strip=True)
|
||||
parts = s.split(".", 1)
|
||||
if len(parts) != 2:
|
||||
return "", s.strip()
|
||||
return parts[0].strip().upper(), parts[1].strip()
|
||||
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
problems: list[ProblemSummary] = []
|
||||
|
||||
problem_links = soup.find_all(
|
||||
"a", href=lambda x: x and f"/contest/{contest_id}/problem/" in x
|
||||
def _extract_samples(block: Tag) -> list[TestCase]:
|
||||
st = block.find("div", class_="sample-test")
|
||||
if not st:
|
||||
return []
|
||||
|
||||
inputs = [
|
||||
_text_from_pre(pre)
|
||||
for inp in st.find_all("div", class_="input") # type: ignore[union-attr]
|
||||
for pre in [inp.find("pre")]
|
||||
if isinstance(pre, Tag)
|
||||
]
|
||||
outputs = [
|
||||
_text_from_pre(pre)
|
||||
for out in st.find_all("div", class_="output") # type: ignore[union-attr]
|
||||
for pre in [out.find("pre")]
|
||||
if isinstance(pre, Tag)
|
||||
]
|
||||
|
||||
n = min(len(inputs), len(outputs))
|
||||
return [TestCase(input=inputs[i], expected=outputs[i]) for i in range(n)]
|
||||
|
||||
|
||||
def _is_interactive(block: Tag) -> bool:
|
||||
ps = block.find("div", class_="problem-statement")
|
||||
txt = ps.get_text(" ", strip=True) if ps else block.get_text(" ", strip=True)
|
||||
return "This is an interactive problem" in txt
|
||||
|
||||
|
||||
def _fetch_problems_html(contest_id: str) -> str:
|
||||
url = f"{BASE_URL}/contest/{contest_id}/problems"
|
||||
page = StealthyFetcher.fetch(
|
||||
url,
|
||||
headless=True,
|
||||
solve_cloudflare=True,
|
||||
)
|
||||
return page.html_content
|
||||
|
||||
|
||||
def _parse_all_blocks(html: str) -> list[dict[str, Any]]:
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
blocks = soup.find_all("div", class_="problem-statement")
|
||||
out: list[dict[str, Any]] = []
|
||||
for b in blocks:
|
||||
holder = b.find_parent("div", class_="problemindexholder")
|
||||
letter = (holder.get("problemindex") if holder else "").strip().upper()
|
||||
name = _extract_title(b)[1] # keep your name extraction
|
||||
if not letter:
|
||||
continue
|
||||
tests = _extract_samples(b)
|
||||
timeout_ms, memory_mb = _extract_limits(b)
|
||||
interactive = _is_interactive(b)
|
||||
out.append(
|
||||
{
|
||||
"letter": letter,
|
||||
"name": name,
|
||||
"tests": tests,
|
||||
"timeout_ms": timeout_ms,
|
||||
"memory_mb": memory_mb,
|
||||
"interactive": interactive,
|
||||
}
|
||||
)
|
||||
|
||||
for link in problem_links:
|
||||
if not isinstance(link, Tag):
|
||||
continue
|
||||
href: str = str(link.get("href", ""))
|
||||
if f"/contest/{contest_id}/problem/" in href:
|
||||
problem_letter: str = href.split("/")[-1].lower()
|
||||
problem_name: str = link.get_text(strip=True)
|
||||
|
||||
if not (problem_letter and problem_name):
|
||||
continue
|
||||
|
||||
problems.append(ProblemSummary(id=problem_letter, name=problem_name))
|
||||
|
||||
seen: set[str] = set()
|
||||
unique_problems: list[ProblemSummary] = []
|
||||
for p in problems:
|
||||
if p.id not in seen:
|
||||
seen.add(p.id)
|
||||
unique_problems.append(p)
|
||||
|
||||
return unique_problems
|
||||
|
||||
except Exception as e:
|
||||
print(f"Failed to scrape contest problems: {e}", file=sys.stderr)
|
||||
return []
|
||||
return out
|
||||
|
||||
|
||||
def scrape_sample_tests(url: str) -> list[TestCase]:
|
||||
print(f"Scraping: {url}", file=sys.stderr)
|
||||
return scrape(url)
|
||||
|
||||
|
||||
def scrape_contests() -> list[ContestSummary]:
|
||||
response = requests.get("https://codeforces.com/api/contest.list", timeout=10)
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
if data["status"] != "OK":
|
||||
return []
|
||||
|
||||
contests = []
|
||||
for contest in data["result"]:
|
||||
contest_id = str(contest["id"])
|
||||
name = contest["name"]
|
||||
contests.append(ContestSummary(id=contest_id, name=name, display_name=name))
|
||||
|
||||
return contests
|
||||
def _scrape_contest_problems_sync(contest_id: str) -> list[ProblemSummary]:
|
||||
html = _fetch_problems_html(contest_id)
|
||||
blocks = _parse_all_blocks(html)
|
||||
problems: list[ProblemSummary] = []
|
||||
seen: set[str] = set()
|
||||
for b in blocks:
|
||||
pid = b["letter"].upper()
|
||||
if pid in seen:
|
||||
continue
|
||||
seen.add(pid)
|
||||
problems.append(ProblemSummary(id=pid.lower(), name=b["name"]))
|
||||
return problems
|
||||
|
||||
|
||||
class CodeforcesScraper(BaseScraper):
|
||||
|
|
@ -244,81 +155,94 @@ class CodeforcesScraper(BaseScraper):
|
|||
def platform_name(self) -> str:
|
||||
return "codeforces"
|
||||
|
||||
def scrape_contest_metadata(self, contest_id: str) -> MetadataResult:
|
||||
return self._safe_execute(
|
||||
"metadata", self._scrape_contest_metadata_impl, contest_id
|
||||
)
|
||||
|
||||
def scrape_problem_tests(self, contest_id: str, problem_id: str) -> TestsResult:
|
||||
return self._safe_execute(
|
||||
"tests", self._scrape_problem_tests_impl, contest_id, problem_id
|
||||
)
|
||||
|
||||
def scrape_contest_list(self) -> ContestListResult:
|
||||
return self._safe_execute("contests", self._scrape_contest_list_impl)
|
||||
|
||||
def _scrape_contest_metadata_impl(self, contest_id: str) -> MetadataResult:
|
||||
problems = scrape_contest_problems(contest_id)
|
||||
if not problems:
|
||||
return self._create_metadata_error(
|
||||
f"No problems found for contest {contest_id}", contest_id
|
||||
)
|
||||
return MetadataResult(
|
||||
success=True, error="", contest_id=contest_id, problems=problems
|
||||
)
|
||||
|
||||
def _scrape_problem_tests_impl(
|
||||
self, contest_id: str, problem_letter: str
|
||||
) -> TestsResult:
|
||||
problem_id = contest_id + problem_letter.lower()
|
||||
url = parse_problem_url(contest_id, problem_letter)
|
||||
tests = scrape_sample_tests(url)
|
||||
|
||||
page = StealthyFetcher.fetch(url, headless=True, solve_cloudflare=True)
|
||||
html = page.html_content
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
timeout_ms, memory_mb = extract_problem_limits(soup)
|
||||
|
||||
problem_statement_div = soup.find("div", class_="problem-statement")
|
||||
interactive = bool(
|
||||
problem_statement_div
|
||||
and "This is an interactive problem" in problem_statement_div.get_text()
|
||||
)
|
||||
|
||||
if not tests:
|
||||
return self._create_tests_error(
|
||||
f"No tests found for {contest_id} {problem_letter}", problem_id, url
|
||||
async def scrape_contest_metadata(self, contest_id: str) -> MetadataResult:
|
||||
async def impl(cid: str) -> MetadataResult:
|
||||
problems = await asyncio.to_thread(_scrape_contest_problems_sync, cid)
|
||||
if not problems:
|
||||
return self._create_metadata_error(
|
||||
f"No problems found for contest {cid}", cid
|
||||
)
|
||||
return MetadataResult(
|
||||
success=True, error="", contest_id=cid, problems=problems
|
||||
)
|
||||
|
||||
return TestsResult(
|
||||
success=True,
|
||||
error="",
|
||||
problem_id=problem_id,
|
||||
url=url,
|
||||
tests=tests,
|
||||
timeout_ms=timeout_ms,
|
||||
memory_mb=memory_mb,
|
||||
interactive=interactive,
|
||||
)
|
||||
return await self._safe_execute("metadata", impl, contest_id)
|
||||
|
||||
def _scrape_contest_list_impl(self) -> ContestListResult:
|
||||
contests = scrape_contests()
|
||||
if not contests:
|
||||
return self._create_contests_error("No contests found")
|
||||
return ContestListResult(success=True, error="", contests=contests)
|
||||
async def scrape_contest_list(self) -> ContestListResult:
|
||||
async def impl() -> ContestListResult:
|
||||
try:
|
||||
r = requests.get(API_CONTEST_LIST_URL, timeout=TIMEOUT_SECONDS)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
if data.get("status") != "OK":
|
||||
return self._create_contests_error("Invalid API response")
|
||||
|
||||
contests: list[ContestSummary] = []
|
||||
for c in data["result"]:
|
||||
if c.get("phase") != "FINISHED":
|
||||
continue
|
||||
cid = str(c["id"])
|
||||
name = c["name"]
|
||||
contests.append(
|
||||
ContestSummary(id=cid, name=name, display_name=name)
|
||||
)
|
||||
|
||||
if not contests:
|
||||
return self._create_contests_error("No contests found")
|
||||
|
||||
return ContestListResult(success=True, error="", contests=contests)
|
||||
except Exception as e:
|
||||
return self._create_contests_error(str(e))
|
||||
|
||||
return await self._safe_execute("contests", impl)
|
||||
|
||||
async def stream_tests_for_category_async(self, category_id: str) -> None:
|
||||
html = await asyncio.to_thread(_fetch_problems_html, category_id)
|
||||
blocks = await asyncio.to_thread(_parse_all_blocks, html)
|
||||
|
||||
for b in blocks:
|
||||
pid = b["letter"].lower()
|
||||
tests: list[TestCase] = b["tests"]
|
||||
|
||||
if not tests:
|
||||
print(
|
||||
json.dumps(
|
||||
{
|
||||
"problem_id": pid,
|
||||
"error": f"{self.platform_name}: no tests found",
|
||||
}
|
||||
),
|
||||
flush=True,
|
||||
)
|
||||
continue
|
||||
|
||||
print(
|
||||
json.dumps(
|
||||
{
|
||||
"problem_id": pid,
|
||||
"tests": [
|
||||
{"input": t.input, "expected": t.expected} for t in tests
|
||||
],
|
||||
"timeout_ms": b["timeout_ms"],
|
||||
"memory_mb": b["memory_mb"],
|
||||
"interactive": bool(b["interactive"]),
|
||||
}
|
||||
),
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
async def main_async() -> int:
|
||||
if len(sys.argv) < 2:
|
||||
result = MetadataResult(
|
||||
success=False,
|
||||
error="Usage: codeforces.py metadata <contest_id> OR codeforces.py tests <contest_id> <problem_letter> OR codeforces.py contests",
|
||||
error="Usage: codeforces.py metadata <contest_id> OR codeforces.py tests <contest_id> OR codeforces.py contests",
|
||||
)
|
||||
print(json.dumps(asdict(result)))
|
||||
sys.exit(1)
|
||||
return 1
|
||||
|
||||
scraper = CodeforcesScraper()
|
||||
mode: str = sys.argv[1]
|
||||
scraper = CodeforcesScraper()
|
||||
|
||||
if mode == "metadata":
|
||||
if len(sys.argv) != 3:
|
||||
|
|
@ -326,17 +250,17 @@ def main() -> None:
|
|||
success=False, error="Usage: codeforces.py metadata <contest_id>"
|
||||
)
|
||||
print(json.dumps(asdict(result)))
|
||||
sys.exit(1)
|
||||
|
||||
contest_id: str = sys.argv[2]
|
||||
result = scraper.scrape_contest_metadata(contest_id)
|
||||
return 1
|
||||
contest_id = sys.argv[2]
|
||||
result = await scraper.scrape_contest_metadata(contest_id)
|
||||
print(json.dumps(asdict(result)))
|
||||
return 0 if result.success else 1
|
||||
|
||||
elif mode == "tests":
|
||||
if len(sys.argv) != 4:
|
||||
if mode == "tests":
|
||||
if len(sys.argv) != 3:
|
||||
tests_result = TestsResult(
|
||||
success=False,
|
||||
error="Usage: codeforces.py tests <contest_id> <problem_letter>",
|
||||
error="Usage: codeforces.py tests <contest_id>",
|
||||
problem_id="",
|
||||
url="",
|
||||
tests=[],
|
||||
|
|
@ -344,31 +268,32 @@ def main() -> None:
|
|||
memory_mb=0,
|
||||
)
|
||||
print(json.dumps(asdict(tests_result)))
|
||||
sys.exit(1)
|
||||
return 1
|
||||
contest_id = sys.argv[2]
|
||||
await scraper.stream_tests_for_category_async(contest_id)
|
||||
return 0
|
||||
|
||||
tests_contest_id: str = sys.argv[2]
|
||||
problem_letter: str = sys.argv[3]
|
||||
tests_result = scraper.scrape_problem_tests(tests_contest_id, problem_letter)
|
||||
print(json.dumps(asdict(tests_result)))
|
||||
|
||||
elif mode == "contests":
|
||||
if mode == "contests":
|
||||
if len(sys.argv) != 2:
|
||||
contest_result = ContestListResult(
|
||||
success=False, error="Usage: codeforces.py contests"
|
||||
)
|
||||
print(json.dumps(asdict(contest_result)))
|
||||
sys.exit(1)
|
||||
|
||||
contest_result = scraper.scrape_contest_list()
|
||||
return 1
|
||||
contest_result = await scraper.scrape_contest_list()
|
||||
print(json.dumps(asdict(contest_result)))
|
||||
return 0 if contest_result.success else 1
|
||||
|
||||
else:
|
||||
result = MetadataResult(
|
||||
success=False,
|
||||
error=f"Unknown mode: {mode}. Use 'metadata', 'tests', or 'contests'",
|
||||
)
|
||||
print(json.dumps(asdict(result)))
|
||||
sys.exit(1)
|
||||
result = MetadataResult(
|
||||
success=False,
|
||||
error="Unknown mode. Use 'metadata <contest_id>', 'tests <contest_id>', or 'contests'",
|
||||
)
|
||||
print(json.dumps(asdict(result)))
|
||||
return 1
|
||||
|
||||
|
||||
def main() -> None:
|
||||
sys.exit(asyncio.run(main_async()))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
546
scrapers/cses.py
546
scrapers/cses.py
|
|
@ -1,13 +1,13 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from dataclasses import asdict
|
||||
from typing import Any
|
||||
|
||||
import backoff
|
||||
import requests
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
import httpx
|
||||
|
||||
from .base import BaseScraper
|
||||
from .models import (
|
||||
|
|
@ -19,6 +19,15 @@ from .models import (
|
|||
TestsResult,
|
||||
)
|
||||
|
||||
BASE_URL = "https://cses.fi"
|
||||
INDEX_PATH = "/problemset/list"
|
||||
TASK_PATH = "/problemset/task/{id}"
|
||||
HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
}
|
||||
TIMEOUT_S = 15.0
|
||||
CONNECTIONS = 8
|
||||
|
||||
|
||||
def normalize_category_name(category_name: str) -> str:
|
||||
return category_name.lower().replace(" ", "_").replace("&", "and")
|
||||
|
|
@ -57,256 +66,114 @@ def snake_to_title(name: str) -> str:
|
|||
return " ".join(map(fix_word, enumerate(words)))
|
||||
|
||||
|
||||
@backoff.on_exception(
|
||||
backoff.expo,
|
||||
(requests.exceptions.RequestException, requests.exceptions.HTTPError),
|
||||
max_tries=4,
|
||||
jitter=backoff.random_jitter,
|
||||
on_backoff=lambda details: print(
|
||||
f"Request failed (attempt {details['tries']}), retrying in {details['wait']:.1f}s: {details['exception']}",
|
||||
file=sys.stderr,
|
||||
),
|
||||
async def fetch_text(client: httpx.AsyncClient, path: str) -> str:
|
||||
r = await client.get(BASE_URL + path, headers=HEADERS, timeout=TIMEOUT_S)
|
||||
r.raise_for_status()
|
||||
return r.text
|
||||
|
||||
|
||||
CATEGORY_BLOCK_RE = re.compile(
|
||||
r'<h2>(?P<cat>[^<]+)</h2>\s*<ul class="task-list">(?P<body>.*?)</ul>',
|
||||
re.DOTALL,
|
||||
)
|
||||
@backoff.on_predicate(
|
||||
backoff.expo,
|
||||
lambda response: response.status_code == 429,
|
||||
max_tries=4,
|
||||
jitter=backoff.random_jitter,
|
||||
on_backoff=lambda details: print(
|
||||
f"Rate limited, retrying in {details['wait']:.1f}s", file=sys.stderr
|
||||
),
|
||||
TASK_LINK_RE = re.compile(
|
||||
r'<li class="task"><a href="/problemset/task/(?P<id>\d+)/?">(?P<title>[^<]+)</a>',
|
||||
re.DOTALL,
|
||||
)
|
||||
def make_request(url: str, headers: dict) -> requests.Response:
|
||||
response = requests.get(url, headers=headers, timeout=10)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
|
||||
TITLE_RE = re.compile(
|
||||
r'<div class="title-block">.*?<h1>(?P<title>[^<]+)</h1>', re.DOTALL
|
||||
)
|
||||
TIME_RE = re.compile(r"<li><b>Time limit:</b>\s*([0-9.]+)\s*s</li>")
|
||||
MEM_RE = re.compile(r"<li><b>Memory limit:</b>\s*(\d+)\s*MB</li>")
|
||||
SIDEBAR_CAT_RE = re.compile(
|
||||
r'<div class="nav sidebar">.*?<h4>(?P<cat>[^<]+)</h4>', re.DOTALL
|
||||
)
|
||||
|
||||
MD_BLOCK_RE = re.compile(r'<div class="md">(.*?)</div>', re.DOTALL | re.IGNORECASE)
|
||||
EXAMPLE_SECTION_RE = re.compile(
|
||||
r"<h[1-6][^>]*>\s*example[s]?:?\s*</h[1-6]>\s*(?P<section>.*?)(?=<h[1-6][^>]*>|$)",
|
||||
re.DOTALL | re.IGNORECASE,
|
||||
)
|
||||
LABELED_IO_RE = re.compile(
|
||||
r"input\s*:\s*</p>\s*<pre>(?P<input>.*?)</pre>.*?output\s*:\s*</p>\s*<pre>(?P<output>.*?)</pre>",
|
||||
re.DOTALL | re.IGNORECASE,
|
||||
)
|
||||
PRE_RE = re.compile(r"<pre>(.*?)</pre>", re.DOTALL | re.IGNORECASE)
|
||||
|
||||
|
||||
def scrape_category_problems(category_id: str) -> list[ProblemSummary]:
|
||||
category_name = snake_to_title(category_id)
|
||||
try:
|
||||
problemset_url = "https://cses.fi/problemset/"
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
}
|
||||
response = make_request(problemset_url, headers)
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
current_category = None
|
||||
problems = []
|
||||
target_found = False
|
||||
for element in soup.find_all(["h1", "h2", "ul"]):
|
||||
if not isinstance(element, Tag):
|
||||
continue
|
||||
if element.name in ["h1", "h2"]:
|
||||
text = element.get_text(strip=True)
|
||||
if not text or text.startswith("CSES") or text == "CSES Problem Set":
|
||||
continue
|
||||
if target_found and current_category != text:
|
||||
break
|
||||
current_category = text
|
||||
if text.lower() == category_name.lower():
|
||||
target_found = True
|
||||
elif element.name == "ul" and current_category and target_found:
|
||||
problem_links = element.find_all(
|
||||
"a", href=lambda x: x and "/problemset/task/" in x
|
||||
)
|
||||
for link in problem_links:
|
||||
href = link.get("href", "")
|
||||
if not href:
|
||||
continue
|
||||
problem_id = href.split("/")[-1]
|
||||
problem_name = link.get_text(strip=True)
|
||||
if not problem_id.isdigit() or not problem_name:
|
||||
continue
|
||||
problems.append(ProblemSummary(id=problem_id, name=problem_name))
|
||||
return problems
|
||||
except Exception as e:
|
||||
print(f"Failed to scrape CSES category {category_id}: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
|
||||
def parse_problem_url(problem_input: str) -> str | None:
|
||||
if problem_input.startswith("https://cses.fi/problemset/task/"):
|
||||
return problem_input.rstrip("/")
|
||||
elif problem_input.isdigit():
|
||||
return f"https://cses.fi/problemset/task/{problem_input}"
|
||||
return None
|
||||
|
||||
|
||||
def extract_problem_limits(soup: BeautifulSoup) -> tuple[int, float]:
|
||||
timeout_ms = None
|
||||
memory_mb = None
|
||||
constraints_ul = soup.find("ul", class_="task-constraints")
|
||||
if not constraints_ul or not isinstance(constraints_ul, Tag):
|
||||
raise ValueError("Could not find task-constraints section")
|
||||
for li in constraints_ul.find_all("li"):
|
||||
text = li.get_text()
|
||||
if "Time limit:" in text:
|
||||
match = re.search(r"Time limit:\s*(\d+(?:\.\d+)?)\s*s", text)
|
||||
if match:
|
||||
seconds = float(match.group(1))
|
||||
timeout_ms = int(seconds * 1000)
|
||||
if "Memory limit:" in text:
|
||||
match = re.search(r"Memory limit:\s*(\d+)\s*MB", text)
|
||||
if match:
|
||||
memory_mb = float(match.group(1))
|
||||
if timeout_ms is None:
|
||||
raise ValueError("Could not find valid timeout in task-constraints section")
|
||||
if memory_mb is None:
|
||||
raise ValueError(
|
||||
"Could not find valid memory limit in task-constraints section"
|
||||
)
|
||||
return timeout_ms, memory_mb
|
||||
|
||||
|
||||
def scrape_categories() -> list[ContestSummary]:
|
||||
try:
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
}
|
||||
response = make_request("https://cses.fi/problemset/", headers)
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
categories = []
|
||||
for h2 in soup.find_all("h2"):
|
||||
category_name = h2.get_text().strip()
|
||||
if category_name == "General":
|
||||
continue
|
||||
category_id = normalize_category_name(category_name)
|
||||
display_name = category_name
|
||||
categories.append(
|
||||
ContestSummary(
|
||||
id=category_id, name=category_name, display_name=display_name
|
||||
)
|
||||
def parse_categories(html: str) -> list[ContestSummary]:
|
||||
out: list[ContestSummary] = []
|
||||
for m in CATEGORY_BLOCK_RE.finditer(html):
|
||||
cat = m.group("cat").strip()
|
||||
if cat == "General":
|
||||
continue
|
||||
out.append(
|
||||
ContestSummary(
|
||||
id=normalize_category_name(cat),
|
||||
name=cat,
|
||||
display_name=cat,
|
||||
)
|
||||
return categories
|
||||
except Exception as e:
|
||||
print(f"Failed to scrape CSES categories: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
|
||||
def process_problem_element(
|
||||
element,
|
||||
current_category: str | None,
|
||||
all_categories: dict[str, list[ProblemSummary]],
|
||||
) -> str | None:
|
||||
if element.name == "h1":
|
||||
category_name = element.get_text().strip()
|
||||
if category_name not in all_categories:
|
||||
all_categories[category_name] = []
|
||||
return category_name
|
||||
if element.name != "a" or "/problemset/task/" not in element.get("href", ""):
|
||||
return current_category
|
||||
href = element.get("href", "")
|
||||
if not href:
|
||||
return current_category
|
||||
problem_id = href.split("/")[-1]
|
||||
problem_name = element.get_text(strip=True)
|
||||
if not (problem_id.isdigit() and problem_name and current_category):
|
||||
return current_category
|
||||
problem = ProblemSummary(id=problem_id, name=problem_name)
|
||||
all_categories[current_category].append(problem)
|
||||
return current_category
|
||||
|
||||
|
||||
def scrape_all_problems() -> dict[str, list[ProblemSummary]]:
|
||||
try:
|
||||
problemset_url = "https://cses.fi/problemset/"
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
}
|
||||
response = requests.get(problemset_url, headers=headers, timeout=10)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
all_categories: dict[str, list[ProblemSummary]] = {}
|
||||
current_category = None
|
||||
for element in soup.find_all(["h1", "h2", "ul"]):
|
||||
if not isinstance(element, Tag):
|
||||
continue
|
||||
if element.name in ["h1", "h2"]:
|
||||
text = element.get_text(strip=True)
|
||||
if text and not text.startswith("CSES") and text != "CSES Problem Set":
|
||||
current_category = text
|
||||
if current_category not in all_categories:
|
||||
all_categories[current_category] = []
|
||||
print(f"Found category: {current_category}", file=sys.stderr)
|
||||
elif element.name == "ul" and current_category:
|
||||
problem_links = element.find_all(
|
||||
"a", href=lambda x: x and "/problemset/task/" in x
|
||||
)
|
||||
for link in problem_links:
|
||||
href = link.get("href", "")
|
||||
if href:
|
||||
problem_id = href.split("/")[-1]
|
||||
problem_name = link.get_text(strip=True)
|
||||
if problem_id.isdigit() and problem_name:
|
||||
problem = ProblemSummary(id=problem_id, name=problem_name)
|
||||
all_categories[current_category].append(problem)
|
||||
print(
|
||||
f"Found {len(all_categories)} categories with {sum(len(probs) for probs in all_categories.values())} problems",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return all_categories
|
||||
except Exception as e:
|
||||
print(f"Failed to scrape CSES problems: {e}", file=sys.stderr)
|
||||
return {}
|
||||
|
||||
|
||||
def _collect_section_after(header: Tag) -> list[Tag]:
|
||||
out: list[Tag] = []
|
||||
cur = header.find_next_sibling()
|
||||
while cur and not (isinstance(cur, Tag) and cur.name in ("h1", "h2", "h3")):
|
||||
if isinstance(cur, Tag):
|
||||
out.append(cur)
|
||||
cur = cur.find_next_sibling()
|
||||
return out
|
||||
|
||||
|
||||
def extract_example_test_cases(soup: BeautifulSoup) -> list[tuple[str, str]]:
|
||||
example_headers = soup.find_all(
|
||||
lambda t: isinstance(t, Tag)
|
||||
and t.name in ("h1", "h2", "h3")
|
||||
and t.get_text(strip=True).lower().startswith("example")
|
||||
)
|
||||
cases: list[tuple[str, str]] = []
|
||||
for hdr in example_headers:
|
||||
section = _collect_section_after(hdr)
|
||||
|
||||
def find_labeled(label: str) -> str | None:
|
||||
for node in section:
|
||||
if not isinstance(node, Tag):
|
||||
continue
|
||||
if node.name in ("p", "h4", "h5", "h6"):
|
||||
txt = node.get_text(strip=True).lower().rstrip(":")
|
||||
if txt == label:
|
||||
pre = node.find_next_sibling("pre")
|
||||
if pre:
|
||||
return pre.get_text().strip()
|
||||
return None
|
||||
|
||||
inp = find_labeled("input")
|
||||
out = find_labeled("output")
|
||||
if not inp or not out:
|
||||
pres = [n for n in section if isinstance(n, Tag) and n.name == "pre"]
|
||||
if len(pres) >= 2:
|
||||
inp = inp or pres[0].get_text().strip()
|
||||
out = out or pres[1].get_text().strip()
|
||||
if inp and out:
|
||||
cases.append((inp, out))
|
||||
return cases
|
||||
def parse_category_problems(category_id: str, html: str) -> list[ProblemSummary]:
|
||||
want = snake_to_title(category_id)
|
||||
for m in CATEGORY_BLOCK_RE.finditer(html):
|
||||
cat = m.group("cat").strip()
|
||||
if cat != want:
|
||||
continue
|
||||
body = m.group("body")
|
||||
return [
|
||||
ProblemSummary(id=mm.group("id"), name=mm.group("title"))
|
||||
for mm in TASK_LINK_RE.finditer(body)
|
||||
]
|
||||
return []
|
||||
|
||||
|
||||
def scrape(url: str) -> list[TestCase]:
|
||||
try:
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
}
|
||||
response = make_request(url, headers)
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
pairs = extract_example_test_cases(soup)
|
||||
return [TestCase(input=inp, expected=out) for (inp, out) in pairs]
|
||||
except Exception as e:
|
||||
print(f"Error scraping CSES: {e}", file=sys.stderr)
|
||||
def parse_limits(html: str) -> tuple[int, int]:
|
||||
tm = TIME_RE.search(html)
|
||||
mm = MEM_RE.search(html)
|
||||
t = int(round(float(tm.group(1)) * 1000)) if tm else 0
|
||||
m = int(mm.group(1)) if mm else 0
|
||||
return t, m
|
||||
|
||||
|
||||
def parse_title(html: str) -> str:
|
||||
mt = TITLE_RE.search(html)
|
||||
return mt.group("title").strip() if mt else ""
|
||||
|
||||
|
||||
def parse_category_from_sidebar(html: str) -> str | None:
|
||||
m = SIDEBAR_CAT_RE.search(html)
|
||||
return m.group("cat").strip() if m else None
|
||||
|
||||
|
||||
def parse_tests(html: str) -> list[TestCase]:
|
||||
md = MD_BLOCK_RE.search(html)
|
||||
if not md:
|
||||
return []
|
||||
block = md.group(1)
|
||||
|
||||
msec = EXAMPLE_SECTION_RE.search(block)
|
||||
section = msec.group("section") if msec else block
|
||||
|
||||
mlabel = LABELED_IO_RE.search(section)
|
||||
if mlabel:
|
||||
a = mlabel.group("input").strip()
|
||||
b = mlabel.group("output").strip()
|
||||
return [TestCase(input=a, expected=b)]
|
||||
|
||||
pres = PRE_RE.findall(section)
|
||||
if len(pres) >= 2:
|
||||
return [TestCase(input=pres[0].strip(), expected=pres[1].strip())]
|
||||
|
||||
return []
|
||||
|
||||
|
||||
def task_path(problem_id: str | int) -> str:
|
||||
return TASK_PATH.format(id=str(problem_id))
|
||||
|
||||
|
||||
class CSESScraper(BaseScraper):
|
||||
|
|
@ -314,129 +181,99 @@ class CSESScraper(BaseScraper):
|
|||
def platform_name(self) -> str:
|
||||
return "cses"
|
||||
|
||||
def scrape_contest_metadata(self, contest_id: str) -> MetadataResult:
|
||||
return self._safe_execute("metadata", self._scrape_metadata_impl, contest_id)
|
||||
|
||||
def scrape_problem_tests(self, contest_id: str, problem_id: str) -> TestsResult:
|
||||
return self._safe_execute(
|
||||
"tests", self._scrape_tests_impl, contest_id, problem_id
|
||||
)
|
||||
|
||||
def scrape_contest_list(self) -> ContestListResult:
|
||||
return self._safe_execute("contests", self._scrape_contests_impl)
|
||||
|
||||
def _safe_execute(self, operation: str, func, *args):
|
||||
try:
|
||||
return func(*args)
|
||||
except Exception as e:
|
||||
error_msg = f"{self.platform_name}: {str(e)}"
|
||||
if operation == "metadata":
|
||||
return MetadataResult(success=False, error=error_msg)
|
||||
elif operation == "tests":
|
||||
return TestsResult(
|
||||
success=False,
|
||||
error=error_msg,
|
||||
problem_id="",
|
||||
url="",
|
||||
tests=[],
|
||||
timeout_ms=0,
|
||||
memory_mb=0,
|
||||
)
|
||||
elif operation == "contests":
|
||||
return ContestListResult(success=False, error=error_msg)
|
||||
|
||||
def _scrape_metadata_impl(self, category_id: str) -> MetadataResult:
|
||||
problems = scrape_category_problems(category_id)
|
||||
async def scrape_contest_metadata(self, contest_id: str) -> MetadataResult:
|
||||
async with httpx.AsyncClient() as client:
|
||||
html = await fetch_text(client, INDEX_PATH)
|
||||
problems = parse_category_problems(contest_id, html)
|
||||
if not problems:
|
||||
return MetadataResult(
|
||||
success=False,
|
||||
error=f"{self.platform_name}: No problems found for category: {category_id}",
|
||||
error=f"{self.platform_name}: No problems found for category: {contest_id}",
|
||||
)
|
||||
return MetadataResult(
|
||||
success=True, error="", contest_id=category_id, problems=problems
|
||||
success=True, error="", contest_id=contest_id, problems=problems
|
||||
)
|
||||
|
||||
def _scrape_tests_impl(self, category: str, problem_id: str) -> TestsResult:
|
||||
url = parse_problem_url(problem_id)
|
||||
if not url:
|
||||
return TestsResult(
|
||||
success=False,
|
||||
error=f"{self.platform_name}: Invalid problem input: {problem_id}. Use either problem ID (e.g., 1068) or full URL",
|
||||
problem_id=problem_id if problem_id.isdigit() else "",
|
||||
url="",
|
||||
tests=[],
|
||||
timeout_ms=0,
|
||||
memory_mb=0,
|
||||
)
|
||||
tests = scrape(url)
|
||||
m = re.search(r"/task/(\d+)", url)
|
||||
actual_problem_id = (
|
||||
problem_id if problem_id.isdigit() else (m.group(1) if m else "")
|
||||
)
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
}
|
||||
response = requests.get(url, headers=headers, timeout=10)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
timeout_ms, memory_mb = extract_problem_limits(soup)
|
||||
if not tests:
|
||||
return TestsResult(
|
||||
success=False,
|
||||
error=f"{self.platform_name}: No tests found for {problem_id}",
|
||||
problem_id=actual_problem_id,
|
||||
url=url,
|
||||
tests=[],
|
||||
timeout_ms=timeout_ms,
|
||||
memory_mb=memory_mb,
|
||||
)
|
||||
return TestsResult(
|
||||
success=True,
|
||||
error="",
|
||||
problem_id=actual_problem_id,
|
||||
url=url,
|
||||
tests=tests,
|
||||
timeout_ms=timeout_ms,
|
||||
memory_mb=memory_mb,
|
||||
)
|
||||
|
||||
def _scrape_contests_impl(self) -> ContestListResult:
|
||||
categories = scrape_categories()
|
||||
if not categories:
|
||||
async def scrape_contest_list(self) -> ContestListResult:
|
||||
async with httpx.AsyncClient() as client:
|
||||
html = await fetch_text(client, INDEX_PATH)
|
||||
cats = parse_categories(html)
|
||||
if not cats:
|
||||
return ContestListResult(
|
||||
success=False, error=f"{self.platform_name}: No contests found"
|
||||
)
|
||||
return ContestListResult(success=True, error="", contests=categories)
|
||||
return ContestListResult(success=True, error="", contests=cats)
|
||||
|
||||
async def stream_tests_for_category_async(self, category_id: str) -> None:
|
||||
async with httpx.AsyncClient(
|
||||
limits=httpx.Limits(max_connections=CONNECTIONS)
|
||||
) as client:
|
||||
index_html = await fetch_text(client, INDEX_PATH)
|
||||
problems = parse_category_problems(category_id, index_html)
|
||||
if not problems:
|
||||
return
|
||||
|
||||
sem = asyncio.Semaphore(CONNECTIONS)
|
||||
|
||||
async def run_one(pid: str) -> dict[str, Any]:
|
||||
async with sem:
|
||||
try:
|
||||
html = await fetch_text(client, task_path(pid))
|
||||
tests = parse_tests(html)
|
||||
timeout_ms, memory_mb = parse_limits(html)
|
||||
if not tests:
|
||||
return {
|
||||
"problem_id": pid,
|
||||
"error": f"{self.platform_name}: no tests found",
|
||||
}
|
||||
return {
|
||||
"problem_id": pid,
|
||||
"tests": [
|
||||
{"input": t.input, "expected": t.expected}
|
||||
for t in tests
|
||||
],
|
||||
"timeout_ms": timeout_ms,
|
||||
"memory_mb": memory_mb,
|
||||
"interactive": False,
|
||||
}
|
||||
except Exception as e:
|
||||
return {"problem_id": pid, "error": str(e)}
|
||||
|
||||
tasks = [run_one(p.id) for p in problems]
|
||||
for coro in asyncio.as_completed(tasks):
|
||||
payload = await coro
|
||||
print(json.dumps(payload), flush=True)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
async def main_async() -> int:
|
||||
if len(sys.argv) < 2:
|
||||
result = MetadataResult(
|
||||
success=False,
|
||||
error="Usage: cses.py metadata <category_id> OR cses.py tests <category> <problem_id> OR cses.py contests",
|
||||
error="Usage: cses.py metadata <category_id> OR cses.py tests <category> OR cses.py contests",
|
||||
)
|
||||
print(json.dumps(asdict(result)))
|
||||
sys.exit(1)
|
||||
return 1
|
||||
|
||||
mode: str = sys.argv[1]
|
||||
scraper = CSESScraper()
|
||||
|
||||
if mode == "metadata":
|
||||
if len(sys.argv) != 3:
|
||||
result = MetadataResult(
|
||||
success=False,
|
||||
error="Usage: cses.py metadata <category_id>",
|
||||
success=False, error="Usage: cses.py metadata <category_id>"
|
||||
)
|
||||
print(json.dumps(asdict(result)))
|
||||
sys.exit(1)
|
||||
return 1
|
||||
category_id = sys.argv[2]
|
||||
result = scraper.scrape_contest_metadata(category_id)
|
||||
result = await scraper.scrape_contest_metadata(category_id)
|
||||
print(json.dumps(asdict(result)))
|
||||
if not result.success:
|
||||
sys.exit(1)
|
||||
elif mode == "tests":
|
||||
if len(sys.argv) != 4:
|
||||
return 0 if result.success else 1
|
||||
|
||||
if mode == "tests":
|
||||
if len(sys.argv) != 3:
|
||||
tests_result = TestsResult(
|
||||
success=False,
|
||||
error="Usage: cses.py tests <category> <problem_id>",
|
||||
error="Usage: cses.py tests <category>",
|
||||
problem_id="",
|
||||
url="",
|
||||
tests=[],
|
||||
|
|
@ -444,31 +281,32 @@ def main() -> None:
|
|||
memory_mb=0,
|
||||
)
|
||||
print(json.dumps(asdict(tests_result)))
|
||||
sys.exit(1)
|
||||
return 1
|
||||
category = sys.argv[2]
|
||||
problem_id = sys.argv[3]
|
||||
tests_result = scraper.scrape_problem_tests(category, problem_id)
|
||||
print(json.dumps(asdict(tests_result)))
|
||||
if not tests_result.success:
|
||||
sys.exit(1)
|
||||
elif mode == "contests":
|
||||
await scraper.stream_tests_for_category_async(category)
|
||||
return 0
|
||||
|
||||
if mode == "contests":
|
||||
if len(sys.argv) != 2:
|
||||
contest_result = ContestListResult(
|
||||
success=False, error="Usage: cses.py contests"
|
||||
)
|
||||
print(json.dumps(asdict(contest_result)))
|
||||
sys.exit(1)
|
||||
contest_result = scraper.scrape_contest_list()
|
||||
return 1
|
||||
contest_result = await scraper.scrape_contest_list()
|
||||
print(json.dumps(asdict(contest_result)))
|
||||
if not contest_result.success:
|
||||
sys.exit(1)
|
||||
else:
|
||||
result = MetadataResult(
|
||||
success=False,
|
||||
error=f"Unknown mode: {mode}. Use 'metadata <category>', 'tests <category> <problem_id>', or 'contests'",
|
||||
)
|
||||
print(json.dumps(asdict(result)))
|
||||
sys.exit(1)
|
||||
return 0 if contest_result.success else 1
|
||||
|
||||
result = MetadataResult(
|
||||
success=False,
|
||||
error=f"Unknown mode: {mode}. Use 'metadata <category>', 'tests <category>', or 'contests'",
|
||||
)
|
||||
print(json.dumps(asdict(result)))
|
||||
return 1
|
||||
|
||||
|
||||
def main() -> None:
|
||||
sys.exit(asyncio.run(main_async()))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -1,43 +0,0 @@
|
|||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_codeforces_html():
|
||||
return """
|
||||
<div class="time-limit">Time limit: 1 seconds</div>
|
||||
<div class="memory-limit">Memory limit: 256 megabytes</div>
|
||||
<div class="input">
|
||||
<pre>
|
||||
<div class="test-example-line-1">3</div>
|
||||
<div class="test-example-line-1">1 2 3</div>
|
||||
</pre>
|
||||
</div>
|
||||
<div class="output">
|
||||
<pre>
|
||||
<div class="test-example-line-1">6</div>
|
||||
</pre>
|
||||
</div>
|
||||
"""
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_atcoder_html():
|
||||
return """
|
||||
<h3>Sample Input 1</h3>
|
||||
<pre>3
|
||||
1 2 3</pre>
|
||||
<h3>Sample Output 1</h3>
|
||||
<pre>6</pre>
|
||||
"""
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_cses_html():
|
||||
return """
|
||||
<h1>Example</h1>
|
||||
<p>Input:</p>
|
||||
<pre>3
|
||||
1 2 3</pre>
|
||||
<p>Output:</p>
|
||||
<pre>6</pre>
|
||||
"""
|
||||
|
|
@ -1,199 +0,0 @@
|
|||
from unittest.mock import Mock
|
||||
|
||||
from scrapers.atcoder import scrape, scrape_contest_problems, scrape_contests
|
||||
from scrapers.models import ContestSummary, ProblemSummary
|
||||
|
||||
|
||||
def test_scrape_success(mocker, mock_atcoder_html):
|
||||
mock_response = Mock()
|
||||
mock_response.text = mock_atcoder_html
|
||||
|
||||
mocker.patch("scrapers.atcoder.requests.get", return_value=mock_response)
|
||||
|
||||
result = scrape("https://atcoder.jp/contests/abc350/tasks/abc350_a")
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0].input == "3\n1 2 3"
|
||||
assert result[0].expected == "6"
|
||||
|
||||
|
||||
def test_scrape_contest_problems(mocker):
|
||||
mock_response = Mock()
|
||||
mock_response.text = """
|
||||
<table class="table">
|
||||
<tr><th>Task</th><th>Name</th></tr>
|
||||
<tr>
|
||||
<td></td>
|
||||
<td><a href="/contests/abc350/tasks/abc350_a">A - Water Tank</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td></td>
|
||||
<td><a href="/contests/abc350/tasks/abc350_b">B - Dentist Aoki</a></td>
|
||||
</tr>
|
||||
</table>
|
||||
"""
|
||||
|
||||
mocker.patch("scrapers.atcoder.requests.get", return_value=mock_response)
|
||||
|
||||
result = scrape_contest_problems("abc350")
|
||||
|
||||
assert len(result) == 2
|
||||
assert result[0] == ProblemSummary(id="a", name="A - Water Tank")
|
||||
assert result[1] == ProblemSummary(id="b", name="B - Dentist Aoki")
|
||||
|
||||
|
||||
def test_scrape_network_error(mocker):
|
||||
mocker.patch(
|
||||
"scrapers.atcoder.requests.get", side_effect=Exception("Network error")
|
||||
)
|
||||
|
||||
result = scrape("https://atcoder.jp/contests/abc350/tasks/abc350_a")
|
||||
|
||||
assert result == []
|
||||
|
||||
|
||||
def test_scrape_contests_success(mocker):
|
||||
def mock_get_side_effect(url, **kwargs):
|
||||
if url == "https://atcoder.jp/contests/archive":
|
||||
mock_response = Mock()
|
||||
mock_response.raise_for_status.return_value = None
|
||||
mock_response.text = """
|
||||
<html>
|
||||
<ul class="pagination">
|
||||
<li>1</li>
|
||||
</ul>
|
||||
</html>
|
||||
"""
|
||||
return mock_response
|
||||
elif "page=1" in url:
|
||||
mock_response = Mock()
|
||||
mock_response.raise_for_status.return_value = None
|
||||
mock_response.text = """
|
||||
<table class="table">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>2025-01-15 21:00:00+0900</td>
|
||||
<td><a href="/contests/abc350">AtCoder Beginner Contest 350</a></td>
|
||||
<td>01:40</td>
|
||||
<td> - 1999</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>2025-01-14 21:00:00+0900</td>
|
||||
<td><a href="/contests/arc170">AtCoder Regular Contest 170</a></td>
|
||||
<td>02:00</td>
|
||||
<td>1000 - 2799</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
"""
|
||||
return mock_response
|
||||
else:
|
||||
mock_response = Mock()
|
||||
mock_response.raise_for_status.return_value = None
|
||||
mock_response.text = "<html></html>"
|
||||
return mock_response
|
||||
|
||||
mocker.patch("scrapers.atcoder.requests.get", side_effect=mock_get_side_effect)
|
||||
|
||||
result = scrape_contests()
|
||||
|
||||
assert len(result) == 2
|
||||
assert result[0] == ContestSummary(
|
||||
id="abc350",
|
||||
name="AtCoder Beginner Contest 350",
|
||||
display_name="AtCoder Beginner Contest 350",
|
||||
)
|
||||
assert result[1] == ContestSummary(
|
||||
id="arc170",
|
||||
name="AtCoder Regular Contest 170",
|
||||
display_name="AtCoder Regular Contest 170",
|
||||
)
|
||||
|
||||
|
||||
def test_scrape_contests_no_table(mocker):
|
||||
mock_response = Mock()
|
||||
mock_response.text = "<html><body>No table found</body></html>"
|
||||
|
||||
mocker.patch("scrapers.atcoder.requests.get", return_value=mock_response)
|
||||
|
||||
result = scrape_contests()
|
||||
|
||||
assert result == []
|
||||
|
||||
|
||||
def test_scrape_contests_network_error(mocker):
|
||||
mocker.patch(
|
||||
"scrapers.atcoder.requests.get", side_effect=Exception("Network error")
|
||||
)
|
||||
|
||||
result = scrape_contests()
|
||||
|
||||
assert result == []
|
||||
|
||||
|
||||
def test_scrape_contests_filters_ahc(mocker):
|
||||
def mock_get_side_effect(url, **kwargs):
|
||||
if url == "https://atcoder.jp/contests/archive":
|
||||
mock_response = Mock()
|
||||
mock_response.raise_for_status.return_value = None
|
||||
mock_response.text = """
|
||||
<html>
|
||||
<ul class="pagination">
|
||||
<li>1</li>
|
||||
</ul>
|
||||
</html>
|
||||
"""
|
||||
return mock_response
|
||||
elif "page=1" in url:
|
||||
mock_response = Mock()
|
||||
mock_response.raise_for_status.return_value = None
|
||||
mock_response.text = """
|
||||
<table class="table">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>2025-01-15 21:00:00+0900</td>
|
||||
<td><a href="/contests/abc350">AtCoder Beginner Contest 350</a></td>
|
||||
<td>01:40</td>
|
||||
<td> - 1999</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>2025-01-14 21:00:00+0900</td>
|
||||
<td><a href="/contests/ahc044">AtCoder Heuristic Contest 044</a></td>
|
||||
<td>05:00</td>
|
||||
<td>-</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>2025-01-13 21:00:00+0900</td>
|
||||
<td><a href="/contests/arc170">AtCoder Regular Contest 170</a></td>
|
||||
<td>02:00</td>
|
||||
<td>1000 - 2799</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
"""
|
||||
return mock_response
|
||||
else:
|
||||
mock_response = Mock()
|
||||
mock_response.raise_for_status.return_value = None
|
||||
mock_response.text = "<html></html>"
|
||||
return mock_response
|
||||
|
||||
mocker.patch("scrapers.atcoder.requests.get", side_effect=mock_get_side_effect)
|
||||
|
||||
result = scrape_contests()
|
||||
|
||||
assert len(result) == 2
|
||||
assert result[0] == ContestSummary(
|
||||
id="abc350",
|
||||
name="AtCoder Beginner Contest 350",
|
||||
display_name="AtCoder Beginner Contest 350",
|
||||
)
|
||||
assert result[1] == ContestSummary(
|
||||
id="arc170",
|
||||
name="AtCoder Regular Contest 170",
|
||||
display_name="AtCoder Regular Contest 170",
|
||||
)
|
||||
|
||||
# Ensure ahc044 is filtered out
|
||||
contest_ids = [contest.id for contest in result]
|
||||
assert "ahc044" not in contest_ids
|
||||
|
|
@ -1,97 +0,0 @@
|
|||
from unittest.mock import Mock
|
||||
|
||||
from scrapers.codeforces import CodeforcesScraper
|
||||
from scrapers.models import ContestSummary, ProblemSummary
|
||||
|
||||
|
||||
def test_scrape_success(mocker, mock_codeforces_html):
|
||||
mock_page = Mock()
|
||||
mock_page.html_content = mock_codeforces_html
|
||||
mocker.patch("scrapers.codeforces.StealthyFetcher.fetch", return_value=mock_page)
|
||||
|
||||
scraper = CodeforcesScraper()
|
||||
result = scraper.scrape_problem_tests("1900", "A")
|
||||
|
||||
assert result.success
|
||||
assert len(result.tests) == 1
|
||||
assert result.tests[0].input == "1\n3\n1 2 3"
|
||||
assert result.tests[0].expected == "6"
|
||||
|
||||
|
||||
def test_scrape_contest_problems(mocker):
|
||||
html = """
|
||||
<a href="/contest/1900/problem/A">A. Problem A</a>
|
||||
<a href="/contest/1900/problem/B">B. Problem B</a>
|
||||
"""
|
||||
mock_page = Mock()
|
||||
mock_page.html_content = html
|
||||
mocker.patch("scrapers.codeforces.StealthyFetcher.fetch", return_value=mock_page)
|
||||
|
||||
scraper = CodeforcesScraper()
|
||||
result = scraper.scrape_contest_metadata("1900")
|
||||
|
||||
assert result.success
|
||||
assert len(result.problems) == 2
|
||||
assert result.problems[0] == ProblemSummary(id="a", name="A. Problem A")
|
||||
assert result.problems[1] == ProblemSummary(id="b", name="B. Problem B")
|
||||
|
||||
|
||||
def test_scrape_network_error(mocker):
|
||||
mocker.patch(
|
||||
"scrapers.codeforces.StealthyFetcher.fetch",
|
||||
side_effect=Exception("Network error"),
|
||||
)
|
||||
|
||||
scraper = CodeforcesScraper()
|
||||
result = scraper.scrape_problem_tests("1900", "A")
|
||||
|
||||
assert not result.success
|
||||
assert "network error" in result.error.lower()
|
||||
|
||||
|
||||
def test_scrape_contests_success(mocker):
|
||||
mock_response = Mock()
|
||||
mock_response.json.return_value = {
|
||||
"status": "OK",
|
||||
"result": [
|
||||
{"id": 1951, "name": "Educational Codeforces Round 168 (Rated for Div. 2)"},
|
||||
{"id": 1950, "name": "Codeforces Round 936 (Div. 2)"},
|
||||
{"id": 1949, "name": "Codeforces Global Round 26"},
|
||||
],
|
||||
}
|
||||
mocker.patch("scrapers.codeforces.requests.get", return_value=mock_response)
|
||||
|
||||
scraper = CodeforcesScraper()
|
||||
result = scraper.scrape_contest_list()
|
||||
|
||||
assert result.success
|
||||
assert len(result.contests) == 3
|
||||
assert result.contests[0] == ContestSummary(
|
||||
id="1951",
|
||||
name="Educational Codeforces Round 168 (Rated for Div. 2)",
|
||||
display_name="Educational Codeforces Round 168 (Rated for Div. 2)",
|
||||
)
|
||||
|
||||
|
||||
def test_scrape_contests_api_error(mocker):
|
||||
mock_response = Mock()
|
||||
mock_response.json.return_value = {"status": "FAILED", "result": []}
|
||||
mocker.patch("scrapers.codeforces.requests.get", return_value=mock_response)
|
||||
|
||||
scraper = CodeforcesScraper()
|
||||
result = scraper.scrape_contest_list()
|
||||
|
||||
assert not result.success
|
||||
assert "no contests found" in result.error.lower()
|
||||
|
||||
|
||||
def test_scrape_contests_network_error(mocker):
|
||||
mocker.patch(
|
||||
"scrapers.codeforces.requests.get", side_effect=Exception("Network error")
|
||||
)
|
||||
|
||||
scraper = CodeforcesScraper()
|
||||
result = scraper.scrape_contest_list()
|
||||
|
||||
assert not result.success
|
||||
assert "network error" in result.error.lower()
|
||||
|
|
@ -1,185 +0,0 @@
|
|||
from unittest.mock import Mock
|
||||
|
||||
from scrapers.cses import (
|
||||
normalize_category_name,
|
||||
scrape,
|
||||
scrape_all_problems,
|
||||
scrape_categories,
|
||||
scrape_category_problems,
|
||||
snake_to_title,
|
||||
)
|
||||
from scrapers.models import ContestSummary, ProblemSummary
|
||||
|
||||
|
||||
def test_scrape_success(mocker, mock_cses_html):
|
||||
mock_response = Mock()
|
||||
mock_response.text = mock_cses_html
|
||||
|
||||
mocker.patch("scrapers.cses.requests.get", return_value=mock_response)
|
||||
|
||||
result = scrape("https://cses.fi/problemset/task/1068")
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0].input == "3\n1 2 3"
|
||||
assert result[0].expected == "6"
|
||||
|
||||
|
||||
def test_scrape_all_problems(mocker):
|
||||
mock_response = Mock()
|
||||
mock_response.text = """
|
||||
<div class="content">
|
||||
<h1>Introductory Problems</h1>
|
||||
<ul>
|
||||
<li><a href="/problemset/task/1068">Weird Algorithm</a></li>
|
||||
<li><a href="/problemset/task/1083">Missing Number</a></li>
|
||||
</ul>
|
||||
<h1>Sorting and Searching</h1>
|
||||
<ul>
|
||||
<li><a href="/problemset/task/1084">Apartments</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
"""
|
||||
mock_response.raise_for_status = Mock()
|
||||
|
||||
mocker.patch("scrapers.cses.requests.get", return_value=mock_response)
|
||||
|
||||
result = scrape_all_problems()
|
||||
|
||||
assert "Introductory Problems" in result
|
||||
assert "Sorting and Searching" in result
|
||||
assert len(result["Introductory Problems"]) == 2
|
||||
assert result["Introductory Problems"][0] == ProblemSummary(
|
||||
id="1068",
|
||||
name="Weird Algorithm",
|
||||
)
|
||||
|
||||
|
||||
def test_scrape_network_error(mocker):
|
||||
mocker.patch("scrapers.cses.requests.get", side_effect=Exception("Network error"))
|
||||
|
||||
result = scrape("https://cses.fi/problemset/task/1068")
|
||||
|
||||
assert result == []
|
||||
|
||||
|
||||
def test_normalize_category_name():
|
||||
assert normalize_category_name("Sorting and Searching") == "sorting_and_searching"
|
||||
assert normalize_category_name("Dynamic Programming") == "dynamic_programming"
|
||||
assert normalize_category_name("Graph Algorithms") == "graph_algorithms"
|
||||
|
||||
|
||||
def test_snake_to_title():
|
||||
assert snake_to_title("sorting_and_searching") == "Sorting and Searching"
|
||||
assert snake_to_title("dynamic_programming") == "Dynamic Programming"
|
||||
assert snake_to_title("graph_algorithms") == "Graph Algorithms"
|
||||
|
||||
|
||||
def test_scrape_category_problems_success(mocker):
|
||||
mock_response = Mock()
|
||||
mock_response.text = """
|
||||
<div class="content">
|
||||
<h1>General</h1>
|
||||
<ul>
|
||||
<li><a href="/problemset/task/1000">Test Problem</a></li>
|
||||
</ul>
|
||||
<h1>Sorting and Searching</h1>
|
||||
<ul>
|
||||
<li><a href="/problemset/task/1640">Sum of Two Values</a></li>
|
||||
<li><a href="/problemset/task/1643">Maximum Subarray Sum</a></li>
|
||||
</ul>
|
||||
<h1>Dynamic Programming</h1>
|
||||
<ul>
|
||||
<li><a href="/problemset/task/1633">Dice Combinations</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
"""
|
||||
mock_response.raise_for_status = Mock()
|
||||
|
||||
mocker.patch("scrapers.cses.requests.get", return_value=mock_response)
|
||||
|
||||
result = scrape_category_problems("sorting_and_searching")
|
||||
|
||||
assert len(result) == 2
|
||||
assert result[0].id == "1640"
|
||||
assert result[0].name == "Sum of Two Values"
|
||||
assert result[1].id == "1643"
|
||||
assert result[1].name == "Maximum Subarray Sum"
|
||||
|
||||
|
||||
def test_scrape_category_problems_not_found(mocker):
|
||||
mock_response = Mock()
|
||||
mock_response.text = """
|
||||
<div class="content">
|
||||
<h1>Some Other Category</h1>
|
||||
<ul>
|
||||
<li><a href="/problemset/task/1000">Test Problem</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
"""
|
||||
mock_response.raise_for_status = Mock()
|
||||
|
||||
mocker.patch("scrapers.cses.requests.get", return_value=mock_response)
|
||||
|
||||
result = scrape_category_problems("nonexistent_category")
|
||||
|
||||
assert result == []
|
||||
|
||||
|
||||
def test_scrape_category_problems_network_error(mocker):
|
||||
mocker.patch("scrapers.cses.requests.get", side_effect=Exception("Network error"))
|
||||
|
||||
result = scrape_category_problems("sorting_and_searching")
|
||||
|
||||
assert result == []
|
||||
|
||||
|
||||
def test_scrape_categories_success(mocker):
|
||||
mock_response = Mock()
|
||||
mock_response.text = """
|
||||
<html>
|
||||
<body>
|
||||
<h2>General</h2>
|
||||
<ul class="task-list">
|
||||
<li class="link"><a href="/register">Register</a></li>
|
||||
</ul>
|
||||
|
||||
<h2>Introductory Problems</h2>
|
||||
<ul class="task-list">
|
||||
<li class="task"><a href="/problemset/task/1068">Weird Algorithm</a></li>
|
||||
<li class="task"><a href="/problemset/task/1083">Missing Number</a></li>
|
||||
</ul>
|
||||
|
||||
<h2>Sorting and Searching</h2>
|
||||
<ul class="task-list">
|
||||
<li class="task"><a href="/problemset/task/1621">Distinct Numbers</a></li>
|
||||
<li class="task"><a href="/problemset/task/1084">Apartments</a></li>
|
||||
<li class="task"><a href="/problemset/task/1090">Ferris Wheel</a></li>
|
||||
</ul>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
mock_response.raise_for_status = Mock()
|
||||
|
||||
mocker.patch("scrapers.cses.requests.get", return_value=mock_response)
|
||||
|
||||
result = scrape_categories()
|
||||
|
||||
assert len(result) == 2
|
||||
assert result[0] == ContestSummary(
|
||||
id="introductory_problems",
|
||||
name="Introductory Problems",
|
||||
display_name="Introductory Problems",
|
||||
)
|
||||
assert result[1] == ContestSummary(
|
||||
id="sorting_and_searching",
|
||||
name="Sorting and Searching",
|
||||
display_name="Sorting and Searching",
|
||||
)
|
||||
|
||||
|
||||
def test_scrape_categories_network_error(mocker):
|
||||
mocker.patch("scrapers.cses.requests.get", side_effect=Exception("Network error"))
|
||||
|
||||
result = scrape_categories()
|
||||
|
||||
assert result == []
|
||||
2
tests/scrapers/test_filler.py
Normal file
2
tests/scrapers/test_filler.py
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
def test():
|
||||
assert 5 == 5
|
||||
415
uv.lock
generated
415
uv.lock
generated
|
|
@ -92,6 +92,20 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyio"
|
||||
version = "4.11.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "idna" },
|
||||
{ name = "sniffio" },
|
||||
{ name = "typing-extensions", marker = "python_full_version < '3.13'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/c6/78/7d432127c41b50bccba979505f272c16cbcadcc33645d5fa3a738110ae75/anyio-4.11.0.tar.gz", hash = "sha256:82a8d0b81e318cc5ce71a5f1f8b5c4e63619620b63141ef8c995fa0db95a57c4", size = 219094, upload-time = "2025-09-23T09:19:12.58Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/15/b3/9b1a8074496371342ec1e796a96f99c82c945a339cd81a8e73de28b4cf9e/anyio-4.11.0-py3-none-any.whl", hash = "sha256:0287e96f4d26d4149305414d4e3bc32f0dcd0862365a4bddea19d7a1ec38c4fc", size = 109097, upload-time = "2025-09-23T09:19:10.601Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "attrs"
|
||||
version = "25.3.0"
|
||||
|
|
@ -101,15 +115,6 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815, upload-time = "2025-03-13T11:10:21.14Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "automat"
|
||||
version = "25.4.16"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/e3/0f/d40bbe294bbf004d436a8bcbcfaadca8b5140d39ad0ad3d73d1a8ba15f14/automat-25.4.16.tar.gz", hash = "sha256:0017591a5477066e90d26b0e696ddc143baafd87b588cfac8100bc6be9634de0", size = 129977, upload-time = "2025-04-16T20:12:16.002Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/02/ff/1175b0b7371e46244032d43a56862d0af455823b5280a50c63d99cc50f18/automat-25.4.16-py3-none-any.whl", hash = "sha256:04e9bce696a8d5671ee698005af6e5a9fa15354140a87f4870744604dcdd3ba1", size = 42842, upload-time = "2025-04-16T20:12:14.447Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "backoff"
|
||||
version = "2.2.1"
|
||||
|
|
@ -119,6 +124,18 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148, upload-time = "2022-10-05T19:19:30.546Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "basedpyright"
|
||||
version = "1.31.6"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "nodejs-wheel-binaries" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a9/f6/c5657b1e464d04757cde2db76922a88091fe16854bd3d12e470c23b0dcf1/basedpyright-1.31.6.tar.gz", hash = "sha256:07f3602ba1582218dfd1db25b8b69cd3493e1f4367f46a44fd57bb9034b52ea9", size = 22683901, upload-time = "2025-10-01T13:11:21.317Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/4e/2b/34f338b4c04fe965fd209ed872d9fdd893dacc1a06feb6c9fec13ff535c1/basedpyright-1.31.6-py3-none-any.whl", hash = "sha256:620968ee69c14eee6682f29ffd6f813a30966afb1083ecfa4caf155c5d24f2d5", size = 11805295, upload-time = "2025-10-01T13:11:18.308Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "beautifulsoup4"
|
||||
version = "4.13.5"
|
||||
|
|
@ -332,77 +349,6 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "constantly"
|
||||
version = "23.10.4"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/4d/6f/cb2a94494ff74aa9528a36c5b1422756330a75a8367bf20bd63171fc324d/constantly-23.10.4.tar.gz", hash = "sha256:aa92b70a33e2ac0bb33cd745eb61776594dc48764b06c35e0efd050b7f1c7cbd", size = 13300, upload-time = "2023-10-28T23:18:24.316Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/b8/40/c199d095151addf69efdb4b9ca3a4f20f70e20508d6222bffb9b76f58573/constantly-23.10.4-py3-none-any.whl", hash = "sha256:3fd9b4d1c3dc1ec9757f3c52aef7e53ad9323dbe39f51dfd4c43853b68dfa3f9", size = 13547, upload-time = "2023-10-28T23:18:23.038Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cryptography"
|
||||
version = "46.0.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "cffi", marker = "platform_python_implementation != 'PyPy'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a9/62/e3664e6ffd7743e1694b244dde70b43a394f6f7fbcacf7014a8ff5197c73/cryptography-46.0.1.tar.gz", hash = "sha256:ed570874e88f213437f5cf758f9ef26cbfc3f336d889b1e592ee11283bb8d1c7", size = 749198, upload-time = "2025-09-17T00:10:35.797Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/4c/8c/44ee01267ec01e26e43ebfdae3f120ec2312aa72fa4c0507ebe41a26739f/cryptography-46.0.1-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:1cd6d50c1a8b79af1a6f703709d8973845f677c8e97b1268f5ff323d38ce8475", size = 7285044, upload-time = "2025-09-17T00:08:36.807Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/22/59/9ae689a25047e0601adfcb159ec4f83c0b4149fdb5c3030cc94cd218141d/cryptography-46.0.1-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0ff483716be32690c14636e54a1f6e2e1b7bf8e22ca50b989f88fa1b2d287080", size = 4308182, upload-time = "2025-09-17T00:08:39.388Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c4/ee/ca6cc9df7118f2fcd142c76b1da0f14340d77518c05b1ebfbbabca6b9e7d/cryptography-46.0.1-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9873bf7c1f2a6330bdfe8621e7ce64b725784f9f0c3a6a55c3047af5849f920e", size = 4572393, upload-time = "2025-09-17T00:08:41.663Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7f/a3/0f5296f63815d8e985922b05c31f77ce44787b3127a67c0b7f70f115c45f/cryptography-46.0.1-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:0dfb7c88d4462a0cfdd0d87a3c245a7bc3feb59de101f6ff88194f740f72eda6", size = 4308400, upload-time = "2025-09-17T00:08:43.559Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5d/8c/74fcda3e4e01be1d32775d5b4dd841acaac3c1b8fa4d0774c7ac8d52463d/cryptography-46.0.1-cp311-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e22801b61613ebdebf7deb18b507919e107547a1d39a3b57f5f855032dd7cfb8", size = 4015786, upload-time = "2025-09-17T00:08:45.758Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/dc/b8/85d23287baeef273b0834481a3dd55bbed3a53587e3b8d9f0898235b8f91/cryptography-46.0.1-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:757af4f6341ce7a1e47c326ca2a81f41d236070217e5fbbad61bbfe299d55d28", size = 4982606, upload-time = "2025-09-17T00:08:47.602Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e5/d3/de61ad5b52433b389afca0bc70f02a7a1f074651221f599ce368da0fe437/cryptography-46.0.1-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f7a24ea78de345cfa7f6a8d3bde8b242c7fac27f2bd78fa23474ca38dfaeeab9", size = 4604234, upload-time = "2025-09-17T00:08:49.879Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/dc/1f/dbd4d6570d84748439237a7478d124ee0134bf166ad129267b7ed8ea6d22/cryptography-46.0.1-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:9e8776dac9e660c22241b6587fae51a67b4b0147daa4d176b172c3ff768ad736", size = 4307669, upload-time = "2025-09-17T00:08:52.321Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ec/fd/ca0a14ce7f0bfe92fa727aacaf2217eb25eb7e4ed513b14d8e03b26e63ed/cryptography-46.0.1-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:9f40642a140c0c8649987027867242b801486865277cbabc8c6059ddef16dc8b", size = 4947579, upload-time = "2025-09-17T00:08:54.697Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/89/6b/09c30543bb93401f6f88fce556b3bdbb21e55ae14912c04b7bf355f5f96c/cryptography-46.0.1-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:449ef2b321bec7d97ef2c944173275ebdab78f3abdd005400cc409e27cd159ab", size = 4603669, upload-time = "2025-09-17T00:08:57.16Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/23/9a/38cb01cb09ce0adceda9fc627c9cf98eb890fc8d50cacbe79b011df20f8a/cryptography-46.0.1-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:2dd339ba3345b908fa3141ddba4025568fa6fd398eabce3ef72a29ac2d73ad75", size = 4435828, upload-time = "2025-09-17T00:08:59.606Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/0f/53/435b5c36a78d06ae0bef96d666209b0ecd8f8181bfe4dda46536705df59e/cryptography-46.0.1-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7411c910fb2a412053cf33cfad0153ee20d27e256c6c3f14d7d7d1d9fec59fd5", size = 4709553, upload-time = "2025-09-17T00:09:01.832Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f5/c4/0da6e55595d9b9cd3b6eb5dc22f3a07ded7f116a3ea72629cab595abb804/cryptography-46.0.1-cp311-abi3-win32.whl", hash = "sha256:cbb8e769d4cac884bb28e3ff620ef1001b75588a5c83c9c9f1fdc9afbe7f29b0", size = 3058327, upload-time = "2025-09-17T00:09:03.726Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/95/0f/cd29a35e0d6e78a0ee61793564c8cff0929c38391cb0de27627bdc7525aa/cryptography-46.0.1-cp311-abi3-win_amd64.whl", hash = "sha256:92e8cfe8bd7dd86eac0a677499894862cd5cc2fd74de917daa881d00871ac8e7", size = 3523893, upload-time = "2025-09-17T00:09:06.272Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f2/dd/eea390f3e78432bc3d2f53952375f8b37cb4d37783e626faa6a51e751719/cryptography-46.0.1-cp311-abi3-win_arm64.whl", hash = "sha256:db5597a4c7353b2e5fb05a8e6cb74b56a4658a2b7bf3cb6b1821ae7e7fd6eaa0", size = 2932145, upload-time = "2025-09-17T00:09:08.568Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/0a/fb/c73588561afcd5e24b089952bd210b14676c0c5bf1213376350ae111945c/cryptography-46.0.1-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:4c49eda9a23019e11d32a0eb51a27b3e7ddedde91e099c0ac6373e3aacc0d2ee", size = 7193928, upload-time = "2025-09-17T00:09:10.595Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/26/34/0ff0bb2d2c79f25a2a63109f3b76b9108a906dd2a2eb5c1d460b9938adbb/cryptography-46.0.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9babb7818fdd71394e576cf26c5452df77a355eac1a27ddfa24096665a27f8fd", size = 4293515, upload-time = "2025-09-17T00:09:12.861Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/df/b7/d4f848aee24ecd1be01db6c42c4a270069a4f02a105d9c57e143daf6cf0f/cryptography-46.0.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9f2c4cc63be3ef43c0221861177cee5d14b505cd4d4599a89e2cd273c4d3542a", size = 4545619, upload-time = "2025-09-17T00:09:15.397Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/44/a5/42fedefc754fd1901e2d95a69815ea4ec8a9eed31f4c4361fcab80288661/cryptography-46.0.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:41c281a74df173876da1dc9a9b6953d387f06e3d3ed9284e3baae3ab3f40883a", size = 4299160, upload-time = "2025-09-17T00:09:17.155Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/86/a1/cd21174f56e769c831fbbd6399a1b7519b0ff6280acec1b826d7b072640c/cryptography-46.0.1-cp314-cp314t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0a17377fa52563d730248ba1f68185461fff36e8bc75d8787a7dd2e20a802b7a", size = 3994491, upload-time = "2025-09-17T00:09:18.971Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8d/2f/a8cbfa1c029987ddc746fd966711d4fa71efc891d37fbe9f030fe5ab4eec/cryptography-46.0.1-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:0d1922d9280e08cde90b518a10cd66831f632960a8d08cb3418922d83fce6f12", size = 4960157, upload-time = "2025-09-17T00:09:20.923Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/67/ae/63a84e6789e0d5a2502edf06b552bcb0fa9ff16147265d5c44a211942abe/cryptography-46.0.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:af84e8e99f1a82cea149e253014ea9dc89f75b82c87bb6c7242203186f465129", size = 4577263, upload-time = "2025-09-17T00:09:23.356Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ef/8f/1b9fa8e92bd9cbcb3b7e1e593a5232f2c1e6f9bd72b919c1a6b37d315f92/cryptography-46.0.1-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:ef648d2c690703501714588b2ba640facd50fd16548133b11b2859e8655a69da", size = 4298703, upload-time = "2025-09-17T00:09:25.566Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c3/af/bb95db070e73fea3fae31d8a69ac1463d89d1c084220f549b00dd01094a8/cryptography-46.0.1-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:e94eb5fa32a8a9f9bf991f424f002913e3dd7c699ef552db9b14ba6a76a6313b", size = 4926363, upload-time = "2025-09-17T00:09:27.451Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f5/3b/d8fb17ffeb3a83157a1cc0aa5c60691d062aceecba09c2e5e77ebfc1870c/cryptography-46.0.1-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:534b96c0831855e29fc3b069b085fd185aa5353033631a585d5cd4dd5d40d657", size = 4576958, upload-time = "2025-09-17T00:09:29.924Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d9/46/86bc3a05c10c8aa88c8ae7e953a8b4e407c57823ed201dbcba55c4d655f4/cryptography-46.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f9b55038b5c6c47559aa33626d8ecd092f354e23de3c6975e4bb205df128a2a0", size = 4422507, upload-time = "2025-09-17T00:09:32.222Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a8/4e/387e5a21dfd2b4198e74968a541cfd6128f66f8ec94ed971776e15091ac3/cryptography-46.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ec13b7105117dbc9afd023300fb9954d72ca855c274fe563e72428ece10191c0", size = 4683964, upload-time = "2025-09-17T00:09:34.118Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/25/a3/f9f5907b166adb8f26762071474b38bbfcf89858a5282f032899075a38a1/cryptography-46.0.1-cp314-cp314t-win32.whl", hash = "sha256:504e464944f2c003a0785b81668fe23c06f3b037e9cb9f68a7c672246319f277", size = 3029705, upload-time = "2025-09-17T00:09:36.381Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/12/66/4d3a4f1850db2e71c2b1628d14b70b5e4c1684a1bd462f7fffb93c041c38/cryptography-46.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:c52fded6383f7e20eaf70a60aeddd796b3677c3ad2922c801be330db62778e05", size = 3502175, upload-time = "2025-09-17T00:09:38.261Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/52/c7/9f10ad91435ef7d0d99a0b93c4360bea3df18050ff5b9038c489c31ac2f5/cryptography-46.0.1-cp314-cp314t-win_arm64.whl", hash = "sha256:9495d78f52c804b5ec8878b5b8c7873aa8e63db9cd9ee387ff2db3fffe4df784", size = 2912354, upload-time = "2025-09-17T00:09:40.078Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/98/e5/fbd632385542a3311915976f88e0dfcf09e62a3fc0aff86fb6762162a24d/cryptography-46.0.1-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:d84c40bdb8674c29fa192373498b6cb1e84f882889d21a471b45d1f868d8d44b", size = 7255677, upload-time = "2025-09-17T00:09:42.407Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/56/3e/13ce6eab9ad6eba1b15a7bd476f005a4c1b3f299f4c2f32b22408b0edccf/cryptography-46.0.1-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9ed64e5083fa806709e74fc5ea067dfef9090e5b7a2320a49be3c9df3583a2d8", size = 4301110, upload-time = "2025-09-17T00:09:45.614Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a2/67/65dc233c1ddd688073cf7b136b06ff4b84bf517ba5529607c9d79720fc67/cryptography-46.0.1-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:341fb7a26bc9d6093c1b124b9f13acc283d2d51da440b98b55ab3f79f2522ead", size = 4562369, upload-time = "2025-09-17T00:09:47.601Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/17/db/d64ae4c6f4e98c3dac5bf35dd4d103f4c7c345703e43560113e5e8e31b2b/cryptography-46.0.1-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6ef1488967e729948d424d09c94753d0167ce59afba8d0f6c07a22b629c557b2", size = 4302126, upload-time = "2025-09-17T00:09:49.335Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3d/19/5f1eea17d4805ebdc2e685b7b02800c4f63f3dd46cfa8d4c18373fea46c8/cryptography-46.0.1-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7823bc7cdf0b747ecfb096d004cc41573c2f5c7e3a29861603a2871b43d3ef32", size = 4009431, upload-time = "2025-09-17T00:09:51.239Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/81/b5/229ba6088fe7abccbfe4c5edb96c7a5ad547fac5fdd0d40aa6ea540b2985/cryptography-46.0.1-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:f736ab8036796f5a119ff8211deda416f8c15ce03776db704a7a4e17381cb2ef", size = 4980739, upload-time = "2025-09-17T00:09:54.181Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3a/9c/50aa38907b201e74bc43c572f9603fa82b58e831bd13c245613a23cff736/cryptography-46.0.1-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:e46710a240a41d594953012213ea8ca398cd2448fbc5d0f1be8160b5511104a0", size = 4592289, upload-time = "2025-09-17T00:09:56.731Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5a/33/229858f8a5bb22f82468bb285e9f4c44a31978d5f5830bb4ea1cf8a4e454/cryptography-46.0.1-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:84ef1f145de5aee82ea2447224dc23f065ff4cc5791bb3b506615957a6ba8128", size = 4301815, upload-time = "2025-09-17T00:09:58.548Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/52/cb/b76b2c87fbd6ed4a231884bea3ce073406ba8e2dae9defad910d33cbf408/cryptography-46.0.1-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:9394c7d5a7565ac5f7d9ba38b2617448eba384d7b107b262d63890079fad77ca", size = 4943251, upload-time = "2025-09-17T00:10:00.475Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/94/0f/f66125ecf88e4cb5b8017ff43f3a87ede2d064cb54a1c5893f9da9d65093/cryptography-46.0.1-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:ed957044e368ed295257ae3d212b95456bd9756df490e1ac4538857f67531fcc", size = 4591247, upload-time = "2025-09-17T00:10:02.874Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f6/22/9f3134ae436b63b463cfdf0ff506a0570da6873adb4bf8c19b8a5b4bac64/cryptography-46.0.1-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f7de12fa0eee6234de9a9ce0ffcfa6ce97361db7a50b09b65c63ac58e5f22fc7", size = 4428534, upload-time = "2025-09-17T00:10:04.994Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/89/39/e6042bcb2638650b0005c752c38ea830cbfbcbb1830e4d64d530000aa8dc/cryptography-46.0.1-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7fab1187b6c6b2f11a326f33b036f7168f5b996aedd0c059f9738915e4e8f53a", size = 4699541, upload-time = "2025-09-17T00:10:06.925Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/68/46/753d457492d15458c7b5a653fc9a84a1c9c7a83af6ebdc94c3fc373ca6e8/cryptography-46.0.1-cp38-abi3-win32.whl", hash = "sha256:45f790934ac1018adeba46a0f7289b2b8fe76ba774a88c7f1922213a56c98bc1", size = 3043779, upload-time = "2025-09-17T00:10:08.951Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2f/50/b6f3b540c2f6ee712feeb5fa780bb11fad76634e71334718568e7695cb55/cryptography-46.0.1-cp38-abi3-win_amd64.whl", hash = "sha256:7176a5ab56fac98d706921f6416a05e5aff7df0e4b91516f450f8627cda22af3", size = 3517226, upload-time = "2025-09-17T00:10:10.769Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ff/e8/77d17d00981cdd27cc493e81e1749a0b8bbfb843780dbd841e30d7f50743/cryptography-46.0.1-cp38-abi3-win_arm64.whl", hash = "sha256:efc9e51c3e595267ff84adf56e9b357db89ab2279d7e375ffcaf8f678606f3d9", size = 2923149, upload-time = "2025-09-17T00:10:13.236Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/27/27/077e09fd92075dd1338ea0ffaf5cfee641535545925768350ad90d8c36ca/cryptography-46.0.1-pp311-pypy311_pp73-macosx_10_9_x86_64.whl", hash = "sha256:b9c79af2c3058430d911ff1a5b2b96bbfe8da47d5ed961639ce4681886614e70", size = 3722319, upload-time = "2025-09-17T00:10:20.273Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/db/32/6fc7250280920418651640d76cee34d91c1e0601d73acd44364570cf041f/cryptography-46.0.1-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:0ca4be2af48c24df689a150d9cd37404f689e2968e247b6b8ff09bff5bcd786f", size = 4249030, upload-time = "2025-09-17T00:10:22.396Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/32/33/8d5398b2da15a15110b2478480ab512609f95b45ead3a105c9a9c76f9980/cryptography-46.0.1-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:13e67c4d3fb8b6bc4ef778a7ccdd8df4cd15b4bcc18f4239c8440891a11245cc", size = 4528009, upload-time = "2025-09-17T00:10:24.418Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/fd/1c/4012edad2a8977ab386c36b6e21f5065974d37afa3eade83a9968cba4855/cryptography-46.0.1-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:15b5fd9358803b0d1cc42505a18d8bca81dabb35b5cfbfea1505092e13a9d96d", size = 4248902, upload-time = "2025-09-17T00:10:26.255Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/58/a3/257cd5ae677302de8fa066fca9de37128f6729d1e63c04dd6a15555dd450/cryptography-46.0.1-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:e34da95e29daf8a71cb2841fd55df0511539a6cdf33e6f77c1e95e44006b9b46", size = 4527150, upload-time = "2025-09-17T00:10:28.28Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6a/cd/fe6b65e1117ec7631f6be8951d3db076bac3e1b096e3e12710ed071ffc3c/cryptography-46.0.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:34f04b7311174469ab3ac2647469743720f8b6c8b046f238e5cb27905695eb2a", size = 3448210, upload-time = "2025-09-17T00:10:30.145Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cssselect"
|
||||
version = "1.3.0"
|
||||
|
|
@ -450,15 +396,6 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/7c/24/f7351052cf9db771fe4f32fca47fd66e6d9b53d8613b17faf7d130a9d553/cython-3.1.4-py3-none-any.whl", hash = "sha256:d194d95e4fa029a3f6c7d46bdd16d973808c7ea4797586911fdb67cb98b1a2c6", size = 1227541, upload-time = "2025-09-16T07:20:29.595Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "defusedxml"
|
||||
version = "0.7.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520, upload-time = "2021-03-08T10:59:26.269Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "distlib"
|
||||
version = "0.4.0"
|
||||
|
|
@ -611,15 +548,40 @@ wheels = [
|
|||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyperlink"
|
||||
version = "21.0.0"
|
||||
name = "h11"
|
||||
version = "0.16.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "httpcore"
|
||||
version = "1.0.9"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "certifi" },
|
||||
{ name = "h11" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "httpx"
|
||||
version = "0.28.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "anyio" },
|
||||
{ name = "certifi" },
|
||||
{ name = "httpcore" },
|
||||
{ name = "idna" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/3a/51/1947bd81d75af87e3bb9e34593a4cf118115a8feb451ce7a69044ef1412e/hyperlink-21.0.0.tar.gz", hash = "sha256:427af957daa58bc909471c6c40f74c5450fa123dd093fc53efd2e91d2705a56b", size = 140743, upload-time = "2021-01-08T05:51:20.972Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/6e/aa/8caf6a0a3e62863cbb9dab27135660acba46903b703e224f14f447e57934/hyperlink-21.0.0-py2.py3-none-any.whl", hash = "sha256:e6b14c37ecb73e89c77d78cdb4c2cc8f3fb59a885c5b3f819ff4ed80f25af1b4", size = 74638, upload-time = "2021-01-08T05:51:22.906Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -640,18 +602,6 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "incremental"
|
||||
version = "24.7.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "setuptools" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/27/87/156b374ff6578062965afe30cc57627d35234369b3336cf244b240c8d8e6/incremental-24.7.2.tar.gz", hash = "sha256:fb4f1d47ee60efe87d4f6f0ebb5f70b9760db2b2574c59c8e8912be4ebd464c9", size = 28157, upload-time = "2024-07-29T20:03:55.441Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/0d/38/221e5b2ae676a3938c2c1919131410c342b6efc2baffeda395dd66eeca8f/incremental-24.7.2-py3-none-any.whl", hash = "sha256:8cb2c3431530bec48ad70513931a760f446ad6c25e8333ca5d95e24b0ed7b8fe", size = 20516, upload-time = "2024-07-29T20:03:53.677Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iniconfig"
|
||||
version = "2.1.0"
|
||||
|
|
@ -661,38 +611,6 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itemadapter"
|
||||
version = "0.12.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/e9/50/2fd91416acfbd316b58de909cfc2a5c2daaa4ced67fb76cb0dedcbd13197/itemadapter-0.12.2.tar.gz", hash = "sha256:8e05c07cea966a7a8c4f096150ee2c91d9b4104a76f9afd029b235e1b564a61f", size = 32089, upload-time = "2025-09-02T12:15:19.751Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/9a/ce/b2d995ddf3d493849f5608c7eab92c24cc50933503c645de3e4843aa7800/itemadapter-0.12.2-py3-none-any.whl", hash = "sha256:17ff8acb169fb11dbed8af83e805c19c3b890bde4653761b4d3c1544142e04b6", size = 18480, upload-time = "2025-09-02T12:15:18.259Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itemloaders"
|
||||
version = "1.3.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "itemadapter" },
|
||||
{ name = "jmespath" },
|
||||
{ name = "parsel" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b6/3e/c549370e95c9dc7ec5e155c075e2700fa75abe5625608a4ce5009eabe0bf/itemloaders-1.3.2.tar.gz", hash = "sha256:4faf5b3abe83bf014476e3fd9ccf66867282971d9f1d4e96d9a61b60c3786770", size = 19707, upload-time = "2024-09-30T13:48:49.417Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/d5/68/9592dcfd9c24467b545fac17b098a171e372bf0d775400fa1971712bca57/itemloaders-1.3.2-py3-none-any.whl", hash = "sha256:6a91465f721c7bad8b07e1fbb0560cf99f4845156ed9f7bf2ca424336c6a677c", size = 12194, upload-time = "2024-09-30T13:48:47.82Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jmespath"
|
||||
version = "1.0.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/00/2a/e867e8531cf3e36b41201936b7fa7ba7b5702dbef42922193f05c8976cd6/jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe", size = 25843, upload-time = "2022-06-17T18:00:12.224Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256, upload-time = "2022-06-17T18:00:10.251Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "language-tags"
|
||||
version = "1.2.0"
|
||||
|
|
@ -1030,6 +948,15 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ndjson"
|
||||
version = "0.3.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b4/d5/209b6ca94566f9c94c0ec41cee1681c0a3b92a306a84a9b0fcd662088dc3/ndjson-0.3.1.tar.gz", hash = "sha256:bf9746cb6bb1cb53d172cda7f154c07c786d665ff28341e4e689b796b229e5d6", size = 6448, upload-time = "2020-02-25T05:01:07.873Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/70/c9/04ba0056011ba96a58163ebfd666d8385300bd12da1afe661a5a147758d7/ndjson-0.3.1-py2.py3-none-any.whl", hash = "sha256:839c22275e6baa3040077b83c005ac24199b94973309a8a1809be962c753a410", size = 5305, upload-time = "2020-02-25T05:01:06.39Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nodeenv"
|
||||
version = "1.9.1"
|
||||
|
|
@ -1039,6 +966,22 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314, upload-time = "2024-06-04T18:44:08.352Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nodejs-wheel-binaries"
|
||||
version = "22.20.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/0f/54/02f58c8119e2f1984e2572cc77a7b469dbaf4f8d171ad376e305749ef48e/nodejs_wheel_binaries-22.20.0.tar.gz", hash = "sha256:a62d47c9fd9c32191dff65bbe60261504f26992a0a19fe8b4d523256a84bd351", size = 8058, upload-time = "2025-09-26T09:48:00.906Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/24/6d/333e5458422f12318e3c3e6e7f194353aa68b0d633217c7e89833427ca01/nodejs_wheel_binaries-22.20.0-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:455add5ac4f01c9c830ab6771dbfad0fdf373f9b040d3aabe8cca9b6c56654fb", size = 53246314, upload-time = "2025-09-26T09:47:32.536Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/56/30/dcd6879d286a35b3c4c8f9e5e0e1bcf4f9e25fe35310fc77ecf97f915a23/nodejs_wheel_binaries-22.20.0-py2.py3-none-macosx_11_0_x86_64.whl", hash = "sha256:5d8c12f97eea7028b34a84446eb5ca81829d0c428dfb4e647e09ac617f4e21fa", size = 53644391, upload-time = "2025-09-26T09:47:36.093Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/58/be/c7b2e7aa3bb281d380a1c531f84d0ccfe225832dfc3bed1ca171753b9630/nodejs_wheel_binaries-22.20.0-py2.py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a2b0989194148f66e9295d8f11bc463bde02cbe276517f4d20a310fb84780ae", size = 60282516, upload-time = "2025-09-26T09:47:39.88Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3e/c5/8befacf4190e03babbae54cb0809fb1a76e1600ec3967ab8ee9f8fc85b65/nodejs_wheel_binaries-22.20.0-py2.py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b5c500aa4dc046333ecb0a80f183e069e5c30ce637f1c1a37166b2c0b642dc21", size = 60347290, upload-time = "2025-09-26T09:47:43.712Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c0/bd/cfffd1e334277afa0714962c6ec432b5fe339340a6bca2e5fa8e678e7590/nodejs_wheel_binaries-22.20.0-py2.py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:3279eb1b99521f0d20a850bbfc0159a658e0e85b843b3cf31b090d7da9f10dfc", size = 62178798, upload-time = "2025-09-26T09:47:47.752Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/08/14/10b83a9c02faac985b3e9f5e65d63a34fc0f46b48d8a2c3e4caa3e1e7318/nodejs_wheel_binaries-22.20.0-py2.py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:d29705797b33bade62d79d8f106c2453c8a26442a9b2a5576610c0f7e7c351ed", size = 62772957, upload-time = "2025-09-26T09:47:51.266Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b4/a9/c6a480259aa0d6b270aac2c6ba73a97444b9267adde983a5b7e34f17e45a/nodejs_wheel_binaries-22.20.0-py2.py3-none-win_amd64.whl", hash = "sha256:4bd658962f24958503541963e5a6f2cc512a8cb301e48a69dc03c879f40a28ae", size = 40120431, upload-time = "2025-09-26T09:47:54.363Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/42/b1/6a4eb2c6e9efa028074b0001b61008c9d202b6b46caee9e5d1b18c088216/nodejs_wheel_binaries-22.20.0-py2.py3-none-win_arm64.whl", hash = "sha256:1fccac931faa210d22b6962bcdbc99269d16221d831b9a118bbb80fe434a60b8", size = 38844133, upload-time = "2025-09-26T09:47:57.357Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "numpy"
|
||||
version = "2.3.3"
|
||||
|
|
@ -1193,22 +1136,6 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parsel"
|
||||
version = "1.10.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "cssselect" },
|
||||
{ name = "jmespath" },
|
||||
{ name = "lxml" },
|
||||
{ name = "packaging" },
|
||||
{ name = "w3lib" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/f6/df/acd504c154c0b9028b0d8491a77fdd5f86e9c06ee04f986abf85e36d9a5f/parsel-1.10.0.tar.gz", hash = "sha256:14f17db9559f51b43357b9dfe43cec870a8efb5ea4857abb624ec6ff80d8a080", size = 51421, upload-time = "2025-01-17T15:38:31.941Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/12/18/35d1d947553d24909dca37e2ff11720eecb601360d1bac8d7a9a1bc7eb08/parsel-1.10.0-py2.py3-none-any.whl", hash = "sha256:6a0c28bd81f9df34ba665884c88efa0b18b8d2c44c81f64e27f2f0cb37d46169", size = 17266, upload-time = "2025-01-17T15:38:27.83Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "patchright"
|
||||
version = "1.55.2"
|
||||
|
|
@ -1363,36 +1290,6 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/cc/35/cc0aaecf278bb4575b8555f2b137de5ab821595ddae9da9d3cd1da4072c7/propcache-0.3.2-py3-none-any.whl", hash = "sha256:98f1ec44fb675f5052cccc8e609c46ed23a35a1cfd18545ad4e29002d858a43f", size = 12663, upload-time = "2025-06-09T22:56:04.484Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "protego"
|
||||
version = "0.5.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/19/9b/9c3a649167c7e43a0818df515d515e66d95a261fdfdf2a6afd45be9db696/protego-0.5.0.tar.gz", hash = "sha256:225dee0acfcc71de8c6f7cef9c618e5a9d3e7baa7ae1470b8d076a064033c463", size = 3137494, upload-time = "2025-06-24T13:58:45.31Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/3a/cb/4347985f89ca3e4beb5d0cb85f8b951c9e339564bd2a3f388d6fb78382cc/protego-0.5.0-py3-none-any.whl", hash = "sha256:4237227840a67fdeec289a9b89652455b5657806388c17e1a556e160435f8fc5", size = 10356, upload-time = "2025-06-24T13:58:44.08Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyasn1"
|
||||
version = "0.6.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/ba/e9/01f1a64245b89f039897cb0130016d79f77d52669aae6ee7b159a6c4c018/pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034", size = 145322, upload-time = "2024-09-10T22:41:42.55Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c8/f1/d6a797abb14f6283c0ddff96bbdd46937f64122b8c925cab503dd37f8214/pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629", size = 83135, upload-time = "2024-09-11T16:00:36.122Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyasn1-modules"
|
||||
version = "0.4.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "pyasn1" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892, upload-time = "2025-03-28T02:41:22.17Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pycparser"
|
||||
version = "2.23"
|
||||
|
|
@ -1402,15 +1299,6 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/a0/e3/59cd50310fc9b59512193629e1984c1f95e5c8ae6e5d8c69532ccc65a7fe/pycparser-2.23-py3-none-any.whl", hash = "sha256:e5c6e8d3fbad53479cab09ac03729e0a9faf2bee3db8208a550daf5af81a5934", size = 118140, upload-time = "2025-09-09T13:23:46.651Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pydispatcher"
|
||||
version = "2.0.7"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/21/db/030d0700ae90d2f9d52c2f3c1f864881e19cef8cba3b0a08759c8494c19c/PyDispatcher-2.0.7.tar.gz", hash = "sha256:b777c6ad080dc1bad74a4c29d6a46914fa6701ac70f94b0d66fbcfde62f5be31", size = 38891, upload-time = "2023-02-17T20:11:13.106Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/66/0e/9ee7bc0b48ec45d93b302fa2d787830dca4dc454d31a237faa5815995988/PyDispatcher-2.0.7-py3-none-any.whl", hash = "sha256:96543bea04115ffde08f851e1d45cacbfd1ee866ac42127d9b476dc5aefa7de0", size = 12040, upload-time = "2023-02-17T20:11:11.991Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyee"
|
||||
version = "13.0.0"
|
||||
|
|
@ -1463,25 +1351,6 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/c1/7c/54afe9ffee547c41e1161691e72067a37ed27466ac71c089bfdcd07ca70d/pyobjc_framework_cocoa-11.1-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:1b5de4e1757bb65689d6dc1f8d8717de9ec8587eb0c4831c134f13aba29f9b71", size = 396742, upload-time = "2025-06-14T20:46:57.64Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyopenssl"
|
||||
version = "25.3.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "cryptography" },
|
||||
{ name = "typing-extensions", marker = "python_full_version < '3.13'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/80/be/97b83a464498a79103036bc74d1038df4a7ef0e402cfaf4d5e113fb14759/pyopenssl-25.3.0.tar.gz", hash = "sha256:c981cb0a3fd84e8602d7afc209522773b94c1c2446a3c710a75b06fe1beae329", size = 184073, upload-time = "2025-09-17T00:32:21.037Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/d1/81/ef2b1dfd1862567d573a4fdbc9f969067621764fbb74338496840a1d2977/pyopenssl-25.3.0-py3-none-any.whl", hash = "sha256:1fda6fc034d5e3d179d39e59c1895c9faeaf40a79de5fc4cbbfbe0d36f4a77b6", size = 57268, upload-time = "2025-09-17T00:32:19.474Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pypydispatcher"
|
||||
version = "2.1.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d5/7b/65f55513d3c769fd677f90032d8d8703e3dc17e88a41b6074d2177548bca/PyPyDispatcher-2.1.2.tar.gz", hash = "sha256:b6bec5dfcff9d2535bca2b23c80eae367b1ac250a645106948d315fcfa9130f2", size = 23224, upload-time = "2017-07-03T14:20:51.806Z" }
|
||||
|
||||
[[package]]
|
||||
name = "pysocks"
|
||||
version = "1.7.1"
|
||||
|
|
@ -1554,15 +1423,6 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446, upload-time = "2024-08-06T20:33:04.33Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "queuelib"
|
||||
version = "1.8.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/4c/78/9ace6888cf6d390c9aec3ba93020838b08934959b544a7f10b15db815d29/queuelib-1.8.0.tar.gz", hash = "sha256:582bc65514481100b0539bd671da6b355b878869cfc77d92c63b75fcc9cf8e27", size = 11675, upload-time = "2025-03-31T12:18:46.193Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/70/44/542f4e702fafc477260d3463ae1bcdd113faac9d42336601af50985af914/queuelib-1.8.0-py3-none-any.whl", hash = "sha256:599468c5589716e63d3bb753dae7bf32cc94838ade1e7b450a061faec4a2015d", size = 13615, upload-time = "2025-03-31T12:18:43.526Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "requests"
|
||||
version = "2.32.5"
|
||||
|
|
@ -1598,14 +1458,15 @@ dependencies = [
|
|||
{ name = "backoff" },
|
||||
{ name = "beautifulsoup4" },
|
||||
{ name = "curl-cffi" },
|
||||
{ name = "playwright" },
|
||||
{ name = "httpx" },
|
||||
{ name = "ndjson" },
|
||||
{ name = "requests" },
|
||||
{ name = "scrapling", extra = ["fetchers"] },
|
||||
{ name = "scrapy" },
|
||||
]
|
||||
|
||||
[package.dev-dependencies]
|
||||
dev = [
|
||||
{ name = "basedpyright" },
|
||||
{ name = "mypy" },
|
||||
{ name = "pre-commit" },
|
||||
{ name = "pytest" },
|
||||
|
|
@ -1619,14 +1480,15 @@ requires-dist = [
|
|||
{ name = "backoff", specifier = ">=2.2.1" },
|
||||
{ name = "beautifulsoup4", specifier = ">=4.13.5" },
|
||||
{ name = "curl-cffi", specifier = ">=0.13.0" },
|
||||
{ name = "playwright", specifier = ">=1.55.0" },
|
||||
{ name = "httpx", specifier = ">=0.28.1" },
|
||||
{ name = "ndjson", specifier = ">=0.3.1" },
|
||||
{ name = "requests", specifier = ">=2.32.5" },
|
||||
{ name = "scrapling", extras = ["fetchers"], specifier = ">=0.3.5" },
|
||||
{ name = "scrapy", specifier = ">=2.13.3" },
|
||||
]
|
||||
|
||||
[package.metadata.requires-dev]
|
||||
dev = [
|
||||
{ name = "basedpyright", specifier = ">=1.31.6" },
|
||||
{ name = "mypy", specifier = ">=1.18.2" },
|
||||
{ name = "pre-commit", specifier = ">=4.3.0" },
|
||||
{ name = "pytest", specifier = ">=8.0.0" },
|
||||
|
|
@ -1661,35 +1523,6 @@ fetchers = [
|
|||
{ name = "playwright" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scrapy"
|
||||
version = "2.13.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "cryptography" },
|
||||
{ name = "cssselect" },
|
||||
{ name = "defusedxml" },
|
||||
{ name = "itemadapter" },
|
||||
{ name = "itemloaders" },
|
||||
{ name = "lxml" },
|
||||
{ name = "packaging" },
|
||||
{ name = "parsel" },
|
||||
{ name = "protego" },
|
||||
{ name = "pydispatcher", marker = "platform_python_implementation == 'CPython'" },
|
||||
{ name = "pyopenssl" },
|
||||
{ name = "pypydispatcher", marker = "platform_python_implementation == 'PyPy'" },
|
||||
{ name = "queuelib" },
|
||||
{ name = "service-identity" },
|
||||
{ name = "tldextract" },
|
||||
{ name = "twisted" },
|
||||
{ name = "w3lib" },
|
||||
{ name = "zope-interface" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/be/6c/bab0c01c5c50842548f0b5e936dfd2520a1ce84c171472c2cfe4d0599841/scrapy-2.13.3.tar.gz", hash = "sha256:bf17588c10e46a9d70c49a05380b749e3c7fba58204a367a5747ce6da2bd204d", size = 1220051, upload-time = "2025-07-02T15:41:15.776Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/53/cb/474b56910b9fb823298008444790a6d5fb9c8dfb936101136932d586287a/scrapy-2.13.3-py3-none-any.whl", hash = "sha256:9c16a482e1474b501f7b7121a4071ddc5cec4c0c7c0320217ed678d4fb8a3e9e", size = 321805, upload-time = "2025-07-02T15:41:13.782Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "screeninfo"
|
||||
version = "0.8.1"
|
||||
|
|
@ -1704,27 +1537,12 @@ wheels = [
|
|||
]
|
||||
|
||||
[[package]]
|
||||
name = "service-identity"
|
||||
version = "24.2.0"
|
||||
name = "sniffio"
|
||||
version = "1.3.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "attrs" },
|
||||
{ name = "cryptography" },
|
||||
{ name = "pyasn1" },
|
||||
{ name = "pyasn1-modules" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/07/a5/dfc752b979067947261dbbf2543470c58efe735c3c1301dd870ef27830ee/service_identity-24.2.0.tar.gz", hash = "sha256:b8683ba13f0d39c6cd5d625d2c5f65421d6d707b013b375c355751557cbe8e09", size = 39245, upload-time = "2024-10-26T07:21:57.736Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/08/2c/ca6dd598b384bc1ce581e24aaae0f2bed4ccac57749d5c3befbb5e742081/service_identity-24.2.0-py3-none-any.whl", hash = "sha256:6b047fbd8a84fd0bb0d55ebce4031e400562b9196e1e0d3e0fe2b8a59f6d4a85", size = 11364, upload-time = "2024-10-26T07:21:56.302Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "setuptools"
|
||||
version = "80.9.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/18/5d/3bf57dcd21979b887f014ea83c24ae194cfcd12b9e0fda66b957c69d1fca/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c", size = 1319958, upload-time = "2025-05-27T00:56:51.443Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -1763,24 +1581,6 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "twisted"
|
||||
version = "25.5.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "attrs" },
|
||||
{ name = "automat" },
|
||||
{ name = "constantly" },
|
||||
{ name = "hyperlink" },
|
||||
{ name = "incremental" },
|
||||
{ name = "typing-extensions" },
|
||||
{ name = "zope-interface" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/13/0f/82716ed849bf7ea4984c21385597c949944f0f9b428b5710f79d0afc084d/twisted-25.5.0.tar.gz", hash = "sha256:1deb272358cb6be1e3e8fc6f9c8b36f78eb0fa7c2233d2dbe11ec6fee04ea316", size = 3545725, upload-time = "2025-06-07T09:52:24.858Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/eb/66/ab7efd8941f0bc7b2bd555b0f0471bff77df4c88e0cc31120c82737fec77/twisted-25.5.0-py3-none-any.whl", hash = "sha256:8559f654d01a54a8c3efe66d533d43f383531ebf8d81d9f9ab4769d91ca15df7", size = 3204767, upload-time = "2025-06-07T09:52:21.428Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "types-beautifulsoup4"
|
||||
version = "4.12.0.20250516"
|
||||
|
|
@ -1866,15 +1666,6 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/76/06/04c8e804f813cf972e3262f3f8584c232de64f0cde9f703b46cf53a45090/virtualenv-20.34.0-py3-none-any.whl", hash = "sha256:341f5afa7eee943e4984a9207c025feedd768baff6753cd660c857ceb3e36026", size = 5983279, upload-time = "2025-08-13T14:24:05.111Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "w3lib"
|
||||
version = "2.3.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/bf/7d/1172cfaa1e29beb9bf938e484c122b3bdc82e8e37b17a4f753ba6d6e009f/w3lib-2.3.1.tar.gz", hash = "sha256:5c8ac02a3027576174c2b61eb9a2170ba1b197cae767080771b6f1febda249a4", size = 49531, upload-time = "2025-01-27T14:22:10.453Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/58/dd/56f0d8af71e475ed194d702f8b4cf9cea812c95e82ad823d239023c6558c/w3lib-2.3.1-py3-none-any.whl", hash = "sha256:9ccd2ae10c8c41c7279cd8ad4fe65f834be894fe7bfdd7304b991fd69325847b", size = 21751, upload-time = "2025-01-27T14:22:09.421Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "yarl"
|
||||
version = "1.20.1"
|
||||
|
|
@ -1956,29 +1747,3 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/94/c3/b2e9f38bc3e11191981d57ea08cab2166e74ea770024a646617c9cddd9f6/yarl-1.20.1-cp313-cp313t-win_amd64.whl", hash = "sha256:541d050a355bbbc27e55d906bc91cb6fe42f96c01413dd0f4ed5a5240513874f", size = 93003, upload-time = "2025-06-10T00:45:27.752Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b4/2d/2345fce04cfd4bee161bf1e7d9cdc702e3e16109021035dbb24db654a622/yarl-1.20.1-py3-none-any.whl", hash = "sha256:83b8eb083fe4683c6115795d9fc1cfaf2cbbefb19b3a1cb68f6527460f483a77", size = 46542, upload-time = "2025-06-10T00:46:07.521Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zope-interface"
|
||||
version = "8.0.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/88/3a/7fcf02178b8fad0a51e67e32765cd039ae505d054d744d76b8c2bbcba5ba/zope_interface-8.0.1.tar.gz", hash = "sha256:eba5610d042c3704a48222f7f7c6ab5b243ed26f917e2bc69379456b115e02d1", size = 253746, upload-time = "2025-09-25T05:55:51.285Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/f2/2f/c10c739bcb9b072090c97c2e08533777497190daa19d190d72b4cce9c7cb/zope_interface-8.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4bd01022d2e1bce4a4a4ed9549edb25393c92e607d7daa6deff843f1f68b479d", size = 207903, upload-time = "2025-09-25T05:58:21.671Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b5/e1/9845ac3697f108d9a1af6912170c59a23732090bbfb35955fe77e5544955/zope_interface-8.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:29be8db8b712d94f1c05e24ea230a879271d787205ba1c9a6100d1d81f06c69a", size = 208345, upload-time = "2025-09-25T05:58:24.217Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f2/49/6573bc8b841cfab18e80c8e8259f1abdbbf716140011370de30231be79ad/zope_interface-8.0.1-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:51ae1b856565b30455b7879fdf0a56a88763b401d3f814fa9f9542d7410dbd7e", size = 255027, upload-time = "2025-09-25T05:58:19.975Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e2/fd/908b0fd4b1ab6e412dfac9bd2b606f2893ef9ba3dd36d643f5e5b94c57b3/zope_interface-8.0.1-cp311-cp311-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d2e7596149cb1acd1d4d41b9f8fe2ffc0e9e29e2e91d026311814181d0d9efaf", size = 259800, upload-time = "2025-09-25T05:58:11.487Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/dc/78/8419a2b4e88410520ed4b7f93bbd25a6d4ae66c4e2b131320f2b90f43077/zope_interface-8.0.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b2737c11c34fb9128816759864752d007ec4f987b571c934c30723ed881a7a4f", size = 260978, upload-time = "2025-09-25T06:26:24.483Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e5/90/caf68152c292f1810e2bd3acd2177badf08a740aa8a348714617d6c9ad0b/zope_interface-8.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:cf66e4bf731aa7e0ced855bb3670e8cda772f6515a475c6a107bad5cb6604103", size = 212155, upload-time = "2025-09-25T05:59:40.318Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/dc/a6/0f08713ddda834c428ebf97b2a7fd8dea50c0100065a8955924dbd94dae8/zope_interface-8.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:115f27c1cc95ce7a517d960ef381beedb0a7ce9489645e80b9ab3cbf8a78799c", size = 208609, upload-time = "2025-09-25T05:58:53.698Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e9/5e/d423045f54dc81e0991ec655041e7a0eccf6b2642535839dd364b35f4d7f/zope_interface-8.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:af655c573b84e3cb6a4f6fd3fbe04e4dc91c63c6b6f99019b3713ef964e589bc", size = 208797, upload-time = "2025-09-25T05:58:56.258Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c6/43/39d4bb3f7a80ebd261446792493cfa4e198badd47107224f5b6fe1997ad9/zope_interface-8.0.1-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:23f82ef9b2d5370750cc1bf883c3b94c33d098ce08557922a3fbc7ff3b63dfe1", size = 259242, upload-time = "2025-09-25T05:58:21.602Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/da/29/49effcff64ef30731e35520a152a9dfcafec86cf114b4c2aff942e8264ba/zope_interface-8.0.1-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:35a1565d5244997f2e629c5c68715b3d9d9036e8df23c4068b08d9316dcb2822", size = 264696, upload-time = "2025-09-25T05:58:13.351Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c7/39/b947673ec9a258eeaa20208dd2f6127d9fbb3e5071272a674ebe02063a78/zope_interface-8.0.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:029ea1db7e855a475bf88d9910baab4e94d007a054810e9007ac037a91c67c6f", size = 264229, upload-time = "2025-09-25T06:26:26.226Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8f/ee/eed6efd1fc3788d1bef7a814e0592d8173b7fe601c699b935009df035fc2/zope_interface-8.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0beb3e7f7dc153944076fcaf717a935f68d39efa9fce96ec97bafcc0c2ea6cab", size = 212270, upload-time = "2025-09-25T05:58:53.584Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5f/dc/3c12fca01c910c793d636ffe9c0984e0646abaf804e44552070228ed0ede/zope_interface-8.0.1-cp313-cp313-macosx_10_9_x86_64.whl", hash = "sha256:c7cc027fc5c61c5d69e5080c30b66382f454f43dc379c463a38e78a9c6bab71a", size = 208992, upload-time = "2025-09-25T05:58:40.712Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/46/71/6127b7282a3e380ca927ab2b40778a9c97935a4a57a2656dadc312db5f30/zope_interface-8.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:fcf9097ff3003b7662299f1c25145e15260ec2a27f9a9e69461a585d79ca8552", size = 209051, upload-time = "2025-09-25T05:58:42.182Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/56/86/4387a9f951ee18b0e41fda77da77d59c33e59f04660578e2bad688703e64/zope_interface-8.0.1-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:6d965347dd1fb9e9a53aa852d4ded46b41ca670d517fd54e733a6b6a4d0561c2", size = 259223, upload-time = "2025-09-25T05:58:23.191Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/61/08/ce60a114466abc067c68ed41e2550c655f551468ae17b4b17ea360090146/zope_interface-8.0.1-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9a3b8bb77a4b89427a87d1e9eb969ab05e38e6b4a338a9de10f6df23c33ec3c2", size = 264690, upload-time = "2025-09-25T05:58:15.052Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/36/9a/62a9ba3a919594605a07c34eee3068659bbd648e2fa0c4a86d876810b674/zope_interface-8.0.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:87e6b089002c43231fb9afec89268391bcc7a3b66e76e269ffde19a8112fb8d5", size = 264201, upload-time = "2025-09-25T06:26:27.797Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/da/06/8fe88bd7edef60566d21ef5caca1034e10f6b87441ea85de4bbf9ea74768/zope_interface-8.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:64a43f5280aa770cbafd0307cb3d1ff430e2a1001774e8ceb40787abe4bb6658", size = 212273, upload-time = "2025-09-25T06:00:25.398Z" },
|
||||
]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue