Merge pull request #114 from barrett-ruth/feat/scrapling

Scraping & Picker Fixes
This commit is contained in:
Barrett Ruth 2025-10-01 04:02:27 +02:00 committed by GitHub
commit 64d4d59d06
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 1114 additions and 432 deletions

View file

@ -60,5 +60,7 @@ jobs:
uses: astral-sh/setup-uv@v4 uses: astral-sh/setup-uv@v4
- name: Install dependencies with pytest - name: Install dependencies with pytest
run: uv sync --dev run: uv sync --dev
- name: Fetch camoufox data
run: uv run camoufox fetch
- name: Run Python tests - name: Run Python tests
run: uv run pytest tests/scrapers/ -v run: uv run pytest tests/scrapers/ -v

View file

@ -364,29 +364,18 @@ Example: Setting up and solving AtCoder contest ABC324
PICKER INTEGRATION *cp-picker* PICKER INTEGRATION *cp-picker*
When picker integration is enabled in configuration, cp.nvim provides interactive When picker integration is enabled in configuration, cp.nvim provides interactive
platform, contest, and problem selection using telescope.nvim or fzf-lua. platform and contest selection using telescope.nvim or fzf-lua.
:CP pick *:CP-pick* :CP pick *:CP-pick*
Launch configured picker for interactive problem selection. Launch configured picker for interactive problem selection.
Control Flow: Select Platform → Contest → Problem → Code! Control Flow: Select Platform → Contest → Code!
Requires picker = 'telescope' or picker = 'fzf-lua' in configuration. Requires picker = 'telescope' or picker = 'fzf-lua' in configuration.
Requires corresponding plugin (telescope.nvim or fzf-lua) to be installed. Requires corresponding plugin (telescope.nvim or fzf-lua) to be installed.
Picker Controls ~ PICKER KEYMAPS *cp-picker-keys*
*cp-picker-controls* <c-r> Force refresh contest list, bypassing cache.
The picker interface provides several keyboard shortcuts for enhanced control: Useful when contest lists are outdated or incomplete
<c-r> Force refresh contest list, bypassing cache
Useful when contest lists are outdated or incomplete
Shows loading indicator during refresh operation
Standard picker controls (telescope.nvim/fzf-lua):
<cr> Select current item and proceed to next step
<c-c> / <esc> Cancel picker and return to editor
<c-n> / <down> Navigate to next item
<c-p> / <up> Navigate to previous item
/ Start filtering/searching items
============================================================================== ==============================================================================
RUN PANEL *cp-run* RUN PANEL *cp-run*
@ -542,13 +531,13 @@ prevent them from being overridden: >lua
============================================================================== ==============================================================================
RUN PANEL KEYMAPS *cp-test-keys* RUN PANEL KEYMAPS *cp-test-keys*
<c-n> Navigate to next test case (configurable via <c-n> Navigate to next test case (configurable via
run_panel.next_test_key) run_panel.next_test_key)
<c-p> Navigate to previous test case (configurable via <c-p> Navigate to previous test case (configurable via
run_panel.prev_test_key) run_panel.prev_test_key)
<c-t> Cycle through diff modes: none → git → vim (configurable <c-t> Cycle through diff modes: none → git → vim (configurable
via run_panel.toggle_diff_key) via run_panel.toggle_diff_key)
<c-q> Exit run panel/interactive terminal and restore layout <c-q> Exit run panel/interactive terminal and restore layout
Diff Modes ~ Diff Modes ~

View file

@ -227,10 +227,6 @@ end
---@param file_path string ---@param file_path string
---@return FileState? ---@return FileState?
function M.get_file_state(file_path) function M.get_file_state(file_path)
vim.validate({
file_path = { file_path, 'string' },
})
if not cache_data.file_states then if not cache_data.file_states then
return nil return nil
end end
@ -244,14 +240,6 @@ end
---@param problem_id? string ---@param problem_id? string
---@param language? string ---@param language? string
function M.set_file_state(file_path, platform, contest_id, problem_id, language) function M.set_file_state(file_path, platform, contest_id, problem_id, language)
vim.validate({
file_path = { file_path, 'string' },
platform = { platform, 'string' },
contest_id = { contest_id, 'string' },
problem_id = { problem_id, { 'string', 'nil' }, true },
language = { language, { 'string', 'nil' }, true },
})
if not cache_data.file_states then if not cache_data.file_states then
cache_data.file_states = {} cache_data.file_states = {}
end end
@ -269,10 +257,6 @@ end
---@param platform string ---@param platform string
---@return table[]? ---@return table[]?
function M.get_contest_list(platform) function M.get_contest_list(platform)
vim.validate({
platform = { platform, 'string' },
})
if not cache_data.contest_lists or not cache_data.contest_lists[platform] then if not cache_data.contest_lists or not cache_data.contest_lists[platform] then
return nil return nil
end end
@ -283,11 +267,6 @@ end
---@param platform string ---@param platform string
---@param contests table[] ---@param contests table[]
function M.set_contest_list(platform, contests) function M.set_contest_list(platform, contests)
vim.validate({
platform = { platform, 'string' },
contests = { contests, 'table' },
})
if not cache_data.contest_lists then if not cache_data.contest_lists then
cache_data.contest_lists = {} cache_data.contest_lists = {}
end end
@ -302,10 +281,6 @@ end
---@param platform string ---@param platform string
function M.clear_contest_list(platform) function M.clear_contest_list(platform)
vim.validate({
platform = { platform, 'string' },
})
if cache_data.contest_lists and cache_data.contest_lists[platform] then if cache_data.contest_lists and cache_data.contest_lists[platform] then
cache_data.contest_lists[platform] = nil cache_data.contest_lists[platform] = nil
M.save() M.save()
@ -313,16 +288,15 @@ function M.clear_contest_list(platform)
end end
function M.clear_all() function M.clear_all()
cache_data = {} cache_data = {
file_states = {},
contest_lists = {},
}
M.save() M.save()
end end
---@param platform string ---@param platform string
function M.clear_platform(platform) function M.clear_platform(platform)
vim.validate({
platform = { platform, 'string' },
})
if cache_data[platform] then if cache_data[platform] then
cache_data[platform] = nil cache_data[platform] = nil
end end

View file

@ -14,6 +14,8 @@ function M.handle_pick_action()
return return
end end
local picker
if config.picker == 'telescope' then if config.picker == 'telescope' then
local ok = pcall(require, 'telescope') local ok = pcall(require, 'telescope')
if not ok then if not ok then
@ -23,12 +25,13 @@ function M.handle_pick_action()
) )
return return
end end
local ok_cp, telescope_cp = pcall(require, 'cp.pickers.telescope') local ok_cp, telescope_picker = pcall(require, 'cp.pickers.telescope')
if not ok_cp then if not ok_cp then
logger.log('Failed to load telescope integration', vim.log.levels.ERROR) logger.log('Failed to load telescope integration', vim.log.levels.ERROR)
return return
end end
telescope_cp.platform_picker()
picker = telescope_picker
elseif config.picker == 'fzf-lua' then elseif config.picker == 'fzf-lua' then
local ok, _ = pcall(require, 'fzf-lua') local ok, _ = pcall(require, 'fzf-lua')
if not ok then if not ok then
@ -38,13 +41,16 @@ function M.handle_pick_action()
) )
return return
end end
local ok_cp, fzf_cp = pcall(require, 'cp.pickers.fzf_lua') local ok_cp, fzf_picker = pcall(require, 'cp.pickers.fzf_lua')
if not ok_cp then if not ok_cp then
logger.log('Failed to load fzf-lua integration', vim.log.levels.ERROR) logger.log('Failed to load fzf-lua integration', vim.log.levels.ERROR)
return return
end end
fzf_cp.platform_picker()
picker = fzf_picker
end end
picker.pick()
end end
return M return M

View file

@ -286,11 +286,6 @@ end
---@param problem_id? string ---@param problem_id? string
---@return string ---@return string
local function default_filename(contest_id, problem_id) local function default_filename(contest_id, problem_id)
vim.validate({
contest_id = { contest_id, 'string' },
problem_id = { problem_id, { 'string', 'nil' }, true },
})
if problem_id then if problem_id then
return (contest_id .. problem_id):lower() return (contest_id .. problem_id):lower()
else else

View file

@ -1,8 +1,8 @@
local picker_utils = require('cp.pickers') local picker_utils = require('cp.pickers')
local contest_picker, problem_picker local M = {}
function contest_picker(platform) local function contest_picker(platform)
local constants = require('cp.constants') local constants = require('cp.constants')
local platform_display_name = constants.PLATFORM_DISPLAY_NAMES[platform] or platform local platform_display_name = constants.PLATFORM_DISPLAY_NAMES[platform] or platform
local fzf = require('fzf-lua') local fzf = require('fzf-lua')
@ -41,7 +41,8 @@ function contest_picker(platform)
end end
if contest then if contest then
problem_picker(platform, contest.id) local cp = require('cp')
cp.handle_command({ fargs = { platform, contest.id } })
end end
end, end,
['ctrl-r'] = function() ['ctrl-r'] = function()
@ -53,55 +54,7 @@ function contest_picker(platform)
}) })
end end
function problem_picker(platform, contest_id) function M.pick()
local constants = require('cp.constants')
local platform_display_name = constants.PLATFORM_DISPLAY_NAMES[platform] or platform
local fzf = require('fzf-lua')
local problems = picker_utils.get_problems_for_contest(platform, contest_id)
if #problems == 0 then
vim.notify(
("Contest %s %s hasn't started yet or has no available problems"):format(
platform_display_name,
contest_id
),
vim.log.levels.WARN
)
contest_picker(platform)
return
end
local entries = vim.tbl_map(function(problem)
return problem.display_name
end, problems)
return fzf.fzf_exec(entries, {
prompt = ('Select Problem (%s %s)> '):format(platform_display_name, contest_id),
actions = {
['default'] = function(selected)
if not selected or #selected == 0 then
return
end
local selected_name = selected[1]
local problem = nil
for _, p in ipairs(problems) do
if p.display_name == selected_name then
problem = p
break
end
end
if problem then
local cp = require('cp')
cp.handle_command({ fargs = { platform, contest_id, problem.id } })
end
end,
},
})
end
local function platform_picker()
local fzf = require('fzf-lua') local fzf = require('fzf-lua')
local platforms = picker_utils.get_platforms() local platforms = picker_utils.get_platforms()
local entries = vim.tbl_map(function(platform) local entries = vim.tbl_map(function(platform)
@ -133,6 +86,4 @@ local function platform_picker()
}) })
end end
return { return M
platform_picker = platform_picker,
}

View file

@ -20,7 +20,7 @@ local utils = require('cp.utils')
---@field display_name string Formatted display name for picker ---@field display_name string Formatted display name for picker
---@return cp.PlatformItem[] ---@return cp.PlatformItem[]
local function get_platforms() function M.get_platforms()
local constants = require('cp.constants') local constants = require('cp.constants')
local result = {} local result = {}
@ -39,7 +39,7 @@ end
---Get list of contests for a specific platform ---Get list of contests for a specific platform
---@param platform string Platform identifier (e.g. "codeforces", "atcoder") ---@param platform string Platform identifier (e.g. "codeforces", "atcoder")
---@return cp.ContestItem[] ---@return cp.ContestItem[]
local function get_contests_for_platform(platform) function M.get_contests_for_platform(platform)
logger.log('loading contests...', vim.log.levels.INFO, true) logger.log('loading contests...', vim.log.levels.INFO, true)
cache.load() cache.load()
@ -108,7 +108,6 @@ local function get_contests_for_platform(platform)
}) })
end end
cache.set_contest_list(platform, contests)
logger.log(('loaded %d contests'):format(#contests)) logger.log(('loaded %d contests'):format(#contests))
return contests return contests
end end
@ -116,7 +115,9 @@ end
---@param platform string Platform identifier ---@param platform string Platform identifier
---@param contest_id string Contest identifier ---@param contest_id string Contest identifier
---@return cp.ProblemItem[] ---@return cp.ProblemItem[]
local function get_problems_for_contest(platform, contest_id) function M.get_problems_for_contest(platform, contest_id)
logger.log('loading contest problems...', vim.log.levels.INFO, true)
local problems = {} local problems = {}
cache.load() cache.load()
@ -132,8 +133,6 @@ local function get_problems_for_contest(platform, contest_id)
return problems return problems
end end
logger.log('loading contest problems...', vim.log.levels.INFO, true)
if not utils.setup_python_env() then if not utils.setup_python_env() then
return problems return problems
end end
@ -197,16 +196,11 @@ end
---@param platform string Platform identifier ---@param platform string Platform identifier
---@param contest_id string Contest identifier ---@param contest_id string Contest identifier
---@param problem_id string Problem identifier ---@param problem_id string Problem identifier
local function setup_problem(platform, contest_id, problem_id) function M.setup_problem(platform, contest_id, problem_id)
vim.schedule(function() vim.schedule(function()
local cp = require('cp') local cp = require('cp')
cp.handle_command({ fargs = { platform, contest_id, problem_id } }) cp.handle_command({ fargs = { platform, contest_id, problem_id } })
end) end)
end end
M.get_platforms = get_platforms
M.get_contests_for_platform = get_contests_for_platform
M.get_problems_for_contest = get_problems_for_contest
M.setup_problem = setup_problem
return M return M

View file

@ -6,9 +6,9 @@ local actions = require('telescope.actions')
local picker_utils = require('cp.pickers') local picker_utils = require('cp.pickers')
local contest_picker, problem_picker local M = {}
function contest_picker(opts, platform) local function contest_picker(opts, platform)
local constants = require('cp.constants') local constants = require('cp.constants')
local platform_display_name = constants.PLATFORM_DISPLAY_NAMES[platform] or platform local platform_display_name = constants.PLATFORM_DISPLAY_NAMES[platform] or platform
local contests = picker_utils.get_contests_for_platform(platform) local contests = picker_utils.get_contests_for_platform(platform)
@ -24,7 +24,7 @@ function contest_picker(opts, platform)
pickers pickers
.new(opts, { .new(opts, {
prompt_title = ('Select Contest (%s)'):format(platform_display_name), prompt_title = ('Select Contest (%s)'):format(platform_display_name),
results_title = '<C-r> refresh', results_title = '<c-r> refresh',
finder = finders.new_table({ finder = finders.new_table({
results = contests, results = contests,
entry_maker = function(entry) entry_maker = function(entry)
@ -42,11 +42,12 @@ function contest_picker(opts, platform)
actions.close(prompt_bufnr) actions.close(prompt_bufnr)
if selection then if selection then
problem_picker(opts, platform, selection.value.id) local cp = require('cp')
cp.handle_command({ fargs = { platform, selection.value.id } })
end end
end) end)
map('i', '<C-r>', function() map('i', '<c-r>', function()
local cache = require('cp.cache') local cache = require('cp.cache')
cache.clear_contest_list(platform) cache.clear_contest_list(platform)
actions.close(prompt_bufnr) actions.close(prompt_bufnr)
@ -59,54 +60,7 @@ function contest_picker(opts, platform)
:find() :find()
end end
function problem_picker(opts, platform, contest_id) function M.pick(opts)
local constants = require('cp.constants')
local platform_display_name = constants.PLATFORM_DISPLAY_NAMES[platform] or platform
local problems = picker_utils.get_problems_for_contest(platform, contest_id)
if #problems == 0 then
vim.notify(
("Contest %s %s hasn't started yet or has no available problems"):format(
platform_display_name,
contest_id
),
vim.log.levels.WARN
)
contest_picker(opts, platform)
return
end
pickers
.new(opts, {
prompt_title = ('Select Problem (%s %s)'):format(platform_display_name, contest_id),
finder = finders.new_table({
results = problems,
entry_maker = function(entry)
return {
value = entry,
display = entry.display_name,
ordinal = entry.display_name,
}
end,
}),
sorter = conf.generic_sorter(opts),
attach_mappings = function(prompt_bufnr)
actions.select_default:replace(function()
local selection = action_state.get_selected_entry()
actions.close(prompt_bufnr)
if selection then
local cp = require('cp')
cp.handle_command({ fargs = { platform, contest_id, selection.value.id } })
end
end)
return true
end,
})
:find()
end
local function platform_picker(opts)
opts = opts or {} opts = opts or {}
local platforms = picker_utils.get_platforms() local platforms = picker_utils.get_platforms()
@ -140,6 +94,4 @@ local function platform_picker(opts)
:find() :find()
end end
return { return M
platform_picker = platform_picker,
}

View file

@ -15,11 +15,6 @@ local filetype_to_language = constants.filetype_to_language
---@param contest_config table ---@param contest_config table
---@return string ---@return string
local function get_language_from_file(source_file, contest_config) local function get_language_from_file(source_file, contest_config)
vim.validate({
source_file = { source_file, 'string' },
contest_config = { contest_config, 'table' },
})
local extension = vim.fn.fnamemodify(source_file, ':e') local extension = vim.fn.fnamemodify(source_file, ':e')
local language = filetype_to_language[extension] or contest_config.default_language local language = filetype_to_language[extension] or contest_config.default_language
return language return language
@ -29,11 +24,6 @@ end
---@param substitutions table<string, string> ---@param substitutions table<string, string>
---@return string[] ---@return string[]
local function substitute_template(cmd_template, substitutions) local function substitute_template(cmd_template, substitutions)
vim.validate({
cmd_template = { cmd_template, 'table' },
substitutions = { substitutions, 'table' },
})
local result = {} local result = {}
for _, arg in ipairs(cmd_template) do for _, arg in ipairs(cmd_template) do
local substituted = arg local substituted = arg
@ -50,12 +40,6 @@ end
---@param substitutions table<string, string> ---@param substitutions table<string, string>
---@return string[] ---@return string[]
local function build_command(cmd_template, executable, substitutions) local function build_command(cmd_template, executable, substitutions)
vim.validate({
cmd_template = { cmd_template, 'table' },
executable = { executable, { 'string', 'nil' }, true },
substitutions = { substitutions, 'table' },
})
local cmd = substitute_template(cmd_template, substitutions) local cmd = substitute_template(cmd_template, substitutions)
if executable then if executable then
table.insert(cmd, 1, executable) table.insert(cmd, 1, executable)
@ -67,11 +51,6 @@ end
---@param substitutions table<string, string> ---@param substitutions table<string, string>
---@return {code: integer, stdout: string, stderr: string} ---@return {code: integer, stdout: string, stderr: string}
function M.compile_generic(language_config, substitutions) function M.compile_generic(language_config, substitutions)
vim.validate({
language_config = { language_config, 'table' },
substitutions = { substitutions, 'table' },
})
if not language_config.compile then if not language_config.compile then
logger.log('no compilation step required') logger.log('no compilation step required')
return { code = 0, stderr = '' } return { code = 0, stderr = '' }
@ -107,12 +86,6 @@ end
---@param timeout_ms number ---@param timeout_ms number
---@return ExecuteResult ---@return ExecuteResult
local function execute_command(cmd, input_data, timeout_ms) local function execute_command(cmd, input_data, timeout_ms)
vim.validate({
cmd = { cmd, 'table' },
input_data = { input_data, 'string' },
timeout_ms = { timeout_ms, 'number' },
})
local redirected_cmd = vim.deepcopy(cmd) local redirected_cmd = vim.deepcopy(cmd)
if #redirected_cmd > 0 then if #redirected_cmd > 0 then
redirected_cmd[#redirected_cmd] = redirected_cmd[#redirected_cmd] .. ' 2>&1' redirected_cmd[#redirected_cmd] = redirected_cmd[#redirected_cmd] .. ' 2>&1'
@ -158,12 +131,6 @@ end
---@param is_debug boolean ---@param is_debug boolean
---@return string ---@return string
local function format_output(exec_result, expected_file, is_debug) local function format_output(exec_result, expected_file, is_debug)
vim.validate({
exec_result = { exec_result, 'table' },
expected_file = { expected_file, 'string' },
is_debug = { is_debug, 'boolean' },
})
local output_lines = { exec_result.stdout } local output_lines = { exec_result.stdout }
local metadata_lines = {} local metadata_lines = {}
@ -207,10 +174,6 @@ end
---@param is_debug? boolean ---@param is_debug? boolean
---@return {success: boolean, output: string?} ---@return {success: boolean, output: string?}
function M.compile_problem(contest_config, is_debug) function M.compile_problem(contest_config, is_debug)
vim.validate({
contest_config = { contest_config, 'table' },
})
local state = require('cp.state') local state = require('cp.state')
local source_file = state.get_source_file() local source_file = state.get_source_file()
if not source_file then if not source_file then
@ -249,12 +212,9 @@ function M.compile_problem(contest_config, is_debug)
return { success = true, output = nil } return { success = true, output = nil }
end end
---@param contest_config ContestConfig
---@param is_debug boolean
function M.run_problem(contest_config, is_debug) function M.run_problem(contest_config, is_debug)
vim.validate({
contest_config = { contest_config, 'table' },
is_debug = { is_debug, 'boolean' },
})
local state = require('cp.state') local state = require('cp.state')
local source_file = state.get_source_file() local source_file = state.get_source_file()
local output_file = state.get_output_file() local output_file = state.get_output_file()

View file

@ -7,8 +7,10 @@ requires-python = ">=3.11"
dependencies = [ dependencies = [
"backoff>=2.2.1", "backoff>=2.2.1",
"beautifulsoup4>=4.13.5", "beautifulsoup4>=4.13.5",
"cloudscraper>=1.2.71", "curl-cffi>=0.13.0",
"playwright>=1.55.0",
"requests>=2.32.5", "requests>=2.32.5",
"scrapling[fetchers]>=0.3.5",
"scrapy>=2.13.3", "scrapy>=2.13.3",
] ]

View file

@ -21,6 +21,44 @@ from .models import (
) )
def _make_request(url: str, timeout: int = 10) -> requests.Response:
headers = {
"User-Agent": (
"Mozilla/5.0 (X11; Linux x86_64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/120.0.0.0 Safari/537.36"
)
}
@backoff.on_exception(
backoff.expo,
(requests.exceptions.RequestException, requests.exceptions.HTTPError),
max_tries=5,
jitter=backoff.random_jitter,
on_backoff=lambda details: print(
f"Request error on {url} (attempt {details['tries']}), "
f"retrying in {details['wait']:.1f}s: {details['exception']}",
file=sys.stderr,
),
)
@backoff.on_predicate(
backoff.expo,
lambda resp: resp.status_code == 429,
max_tries=5,
jitter=backoff.random_jitter,
on_backoff=lambda details: print(
f"Rate limited on {url}, retrying in {details['wait']:.1f}s",
file=sys.stderr,
),
)
def _req():
return requests.get(url, headers=headers, timeout=timeout)
resp = _req()
resp.raise_for_status()
return resp
def extract_problem_limits(soup: BeautifulSoup) -> tuple[int, float]: def extract_problem_limits(soup: BeautifulSoup) -> tuple[int, float]:
timeout_ms = None timeout_ms = None
memory_mb = None memory_mb = None
@ -82,12 +120,7 @@ def extract_problem_from_row(row, contest_id: str) -> ProblemSummary | None:
def scrape_contest_problems(contest_id: str) -> list[ProblemSummary]: def scrape_contest_problems(contest_id: str) -> list[ProblemSummary]:
try: try:
contest_url = f"https://atcoder.jp/contests/{contest_id}/tasks" contest_url = f"https://atcoder.jp/contests/{contest_id}/tasks"
headers = { response = _make_request(contest_url)
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
response = requests.get(contest_url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser") soup = BeautifulSoup(response.text, "html.parser")
task_table = soup.find("table", class_="table") task_table = soup.find("table", class_="table")
@ -138,12 +171,7 @@ def extract_test_case_from_headers(sample_headers, i: int) -> tuple[str, str] |
def scrape(url: str) -> list[TestCase]: def scrape(url: str) -> list[TestCase]:
try: try:
headers = { response = _make_request(url)
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser") soup = BeautifulSoup(response.text, "html.parser")
sample_headers = soup.find_all( sample_headers = soup.find_all(
@ -171,14 +199,7 @@ def scrape(url: str) -> list[TestCase]:
def scrape_contests() -> list[ContestSummary]: def scrape_contests() -> list[ContestSummary]:
def get_max_pages() -> int: def get_max_pages() -> int:
try: try:
headers = { response = _make_request("https://atcoder.jp/contests/archive")
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
response = requests.get(
"https://atcoder.jp/contests/archive", headers=headers, timeout=10
)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser") soup = BeautifulSoup(response.text, "html.parser")
pagination = soup.find("ul", class_="pagination") pagination = soup.find("ul", class_="pagination")
if not pagination or not isinstance(pagination, Tag): if not pagination or not isinstance(pagination, Tag):
@ -196,37 +217,8 @@ def scrape_contests() -> list[ContestSummary]:
return 15 return 15
def scrape_page(page: int) -> list[ContestSummary]: def scrape_page(page: int) -> list[ContestSummary]:
@backoff.on_exception(
backoff.expo,
(requests.exceptions.RequestException, requests.exceptions.HTTPError),
max_tries=4,
jitter=backoff.random_jitter,
on_backoff=lambda details: print(
f"Request failed on page {page} (attempt {details['tries']}), retrying in {details['wait']:.1f}s: {details['exception']}",
file=sys.stderr,
),
)
@backoff.on_predicate(
backoff.expo,
lambda response: response.status_code == 429,
max_tries=4,
jitter=backoff.random_jitter,
on_backoff=lambda details: print(
f"Rate limited on page {page}, retrying in {details['wait']:.1f}s",
file=sys.stderr,
),
)
def make_request() -> requests.Response:
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
url = f"https://atcoder.jp/contests/archive?page={page}"
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
return response
try: try:
response = make_request() response = _make_request(f"https://atcoder.jp/contests/archive?page={page}")
except Exception: except Exception:
return [] return []
@ -354,15 +346,7 @@ class AtCoderScraper(BaseScraper):
url = parse_problem_url(contest_id, problem_letter) url = parse_problem_url(contest_id, problem_letter)
tests = scrape(url) tests = scrape(url)
response = requests.get( response = _make_request(url)
url,
headers={
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
},
timeout=10,
)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser") soup = BeautifulSoup(response.text, "html.parser")
timeout_ms, memory_mb = extract_problem_limits(soup) timeout_ms, memory_mb = extract_problem_limits(soup)

View file

@ -5,8 +5,9 @@ import re
import sys import sys
from dataclasses import asdict from dataclasses import asdict
import cloudscraper import requests
from bs4 import BeautifulSoup, Tag from bs4 import BeautifulSoup, Tag
from scrapling.fetchers import StealthySession
from .base import BaseScraper from .base import BaseScraper
from .models import ( from .models import (
@ -21,11 +22,11 @@ from .models import (
def scrape(url: str) -> list[TestCase]: def scrape(url: str) -> list[TestCase]:
try: try:
scraper = cloudscraper.create_scraper() with StealthySession(headless=True, solve_cloudflare=True) as session:
response = scraper.get(url, timeout=10) page = session.fetch(url, google_search=False)
response.raise_for_status() html = page.html_content
soup = BeautifulSoup(response.text, "html.parser") soup = BeautifulSoup(html, "html.parser")
input_sections = soup.find_all("div", class_="input") input_sections = soup.find_all("div", class_="input")
output_sections = soup.find_all("div", class_="output") output_sections = soup.find_all("div", class_="output")
@ -139,7 +140,7 @@ def scrape(url: str) -> list[TestCase]:
return [TestCase(input=combined_input, expected=combined_output)] return [TestCase(input=combined_input, expected=combined_output)]
except Exception as e: except Exception as e:
print(f"CloudScraper failed: {e}", file=sys.stderr) print(f"Scrapling failed: {e}", file=sys.stderr)
return [] return []
@ -180,11 +181,11 @@ def extract_problem_limits(soup: BeautifulSoup) -> tuple[int, float]:
def scrape_contest_problems(contest_id: str) -> list[ProblemSummary]: def scrape_contest_problems(contest_id: str) -> list[ProblemSummary]:
try: try:
contest_url: str = f"https://codeforces.com/contest/{contest_id}" contest_url: str = f"https://codeforces.com/contest/{contest_id}"
scraper = cloudscraper.create_scraper() with StealthySession(headless=True, solve_cloudflare=True) as session:
response = scraper.get(contest_url, timeout=10) page = session.fetch(contest_url, google_search=False)
response.raise_for_status() html = page.html_content
soup = BeautifulSoup(response.text, "html.parser") soup = BeautifulSoup(html, "html.parser")
problems: list[ProblemSummary] = [] problems: list[ProblemSummary] = []
problem_links = soup.find_all( problem_links = soup.find_all(
@ -224,8 +225,7 @@ def scrape_sample_tests(url: str) -> list[TestCase]:
def scrape_contests() -> list[ContestSummary]: def scrape_contests() -> list[ContestSummary]:
scraper = cloudscraper.create_scraper() response = requests.get("https://codeforces.com/api/contest.list", timeout=10)
response = scraper.get("https://codeforces.com/api/contest.list", timeout=10)
response.raise_for_status() response.raise_for_status()
data = response.json() data = response.json()
@ -236,7 +236,6 @@ def scrape_contests() -> list[ContestSummary]:
for contest in data["result"]: for contest in data["result"]:
contest_id = str(contest["id"]) contest_id = str(contest["id"])
name = contest["name"] name = contest["name"]
contests.append(ContestSummary(id=contest_id, name=name, display_name=name)) contests.append(ContestSummary(id=contest_id, name=name, display_name=name))
return contests return contests
@ -277,10 +276,10 @@ class CodeforcesScraper(BaseScraper):
url = parse_problem_url(contest_id, problem_letter) url = parse_problem_url(contest_id, problem_letter)
tests = scrape_sample_tests(url) tests = scrape_sample_tests(url)
scraper = cloudscraper.create_scraper() with StealthySession(headless=True, solve_cloudflare=True) as session:
response = scraper.get(url, timeout=self.config.timeout_seconds) page = session.fetch(url, google_search=False)
response.raise_for_status() html = page.html_content
soup = BeautifulSoup(response.text, "html.parser") soup = BeautifulSoup(html, "html.parser")
timeout_ms, memory_mb = extract_problem_limits(soup) timeout_ms, memory_mb = extract_problem_limits(soup)
problem_statement_div = soup.find("div", class_="problem-statement") problem_statement_div = soup.find("div", class_="problem-statement")

View file

@ -30,7 +30,6 @@ class ScrapingResult:
class MetadataResult(ScrapingResult): class MetadataResult(ScrapingResult):
contest_id: str = "" contest_id: str = ""
problems: list[ProblemSummary] = field(default_factory=list) problems: list[ProblemSummary] = field(default_factory=list)
categories: dict[str, list[ProblemSummary]] = field(default_factory=dict)
@dataclass @dataclass

View file

@ -235,28 +235,6 @@ describe('cp.execute', function()
end) end)
end) end)
describe('parameter validation', function()
it('validates language_config parameter', function()
assert.has_error(function()
execute.compile_generic(nil, {})
end)
assert.has_error(function()
execute.compile_generic('not_table', {})
end)
end)
it('validates substitutions parameter', function()
assert.has_error(function()
execute.compile_generic({}, nil)
end)
assert.has_error(function()
execute.compile_generic({}, 'not_table')
end)
end)
end)
describe('directory creation', function() describe('directory creation', function()
it('creates build and io directories', function() it('creates build and io directories', function()
local language_config = { local language_config = {

View file

@ -17,15 +17,24 @@ describe('cp.fzf_lua', function()
describe('module loading', function() describe('module loading', function()
it('loads fzf-lua integration without error', function() it('loads fzf-lua integration without error', function()
assert.has_no.errors(function() assert.has_no_errors(function()
require('cp.pickers.fzf_lua') require('cp.pickers.fzf_lua')
end) end)
end) end)
it('returns module with platform_picker function', function() it('returns module with picker function', function()
local fzf_lua_cp = require('cp.pickers.fzf_lua') local fzf_lua_cp = require('cp.pickers.fzf_lua')
assert.is_table(fzf_lua_cp) assert.is_table(fzf_lua_cp)
assert.is_function(fzf_lua_cp.platform_picker) assert.is_function(fzf_lua_cp.pick)
end)
end)
describe('basic running', function()
it('can run and open the picker with :CP pick', function()
local cp = require('cp')
assert.has_no_errors(function()
cp.handle_command({ fargs = { 'pick' } })
end)
end) end)
end) end)
end) end)

View file

@ -129,7 +129,6 @@ describe('cp.picker', function()
return '/tmp' return '/tmp'
end end
-- Mock vim.system to return success with problems
vim.system = function() vim.system = function()
return { return {
wait = function() wait = function()
@ -188,27 +187,4 @@ describe('cp.picker', function()
assert.equals(0, #problems) assert.equals(0, #problems)
end) end)
end) end)
describe('setup_problem', function()
it('calls cp.handle_command with correct arguments', function()
local cp = require('cp')
local called_with = nil
cp.handle_command = function(opts)
called_with = opts
end
picker.setup_problem('codeforces', '1951', 'a')
vim.wait(100, function()
return called_with ~= nil
end)
assert.is_table(called_with)
assert.is_table(called_with.fargs)
assert.equals('codeforces', called_with.fargs[1])
assert.equals('1951', called_with.fargs[2])
assert.equals('a', called_with.fargs[3])
end)
end)
end) end)

View file

@ -64,15 +64,24 @@ describe('cp.telescope', function()
describe('module loading', function() describe('module loading', function()
it('registers telescope extension without error', function() it('registers telescope extension without error', function()
assert.has_no.errors(function() assert.has_no_errors(function()
require('cp.pickers.telescope') require('cp.pickers.telescope')
end) end)
end) end)
it('returns module with platform_picker function', function() it('returns module with picker function', function()
local telescope_cp = require('cp.pickers.telescope') local telescope_cp = require('cp.pickers.telescope')
assert.is_table(telescope_cp) assert.is_table(telescope_cp)
assert.is_function(telescope_cp.platform_picker) assert.is_function(telescope_cp.pick)
end)
end)
describe('basic running', function()
it('can run and open the picker with :CP pick', function()
local cp = require('cp')
assert.has_no_errors(function()
cp.handle_command({ fargs = { 'pick' } })
end)
end) end)
end) end)
end) end)

View file

@ -4,15 +4,18 @@ from scrapers.codeforces import CodeforcesScraper
from scrapers.models import ContestSummary, ProblemSummary from scrapers.models import ContestSummary, ProblemSummary
def test_scrape_success(mocker, mock_codeforces_html): def make_mock_session(html: str):
mock_scraper = Mock() """Return a mock StealthySession that yields the given HTML."""
mock_response = Mock() mock_session = Mock()
mock_response.text = mock_codeforces_html mock_session.fetch.return_value.html_content = html
mock_scraper.get.return_value = mock_response mock_session.__enter__ = lambda s: s
mock_session.__exit__ = lambda s, exc_type, exc_val, exc_tb: None
return mock_session
mocker.patch(
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper def test_scrape_success(mocker, mock_codeforces_html):
) mock_session = make_mock_session(mock_codeforces_html)
mocker.patch("scrapers.codeforces.StealthySession", return_value=mock_session)
scraper = CodeforcesScraper() scraper = CodeforcesScraper()
result = scraper.scrape_problem_tests("1900", "A") result = scraper.scrape_problem_tests("1900", "A")
@ -24,17 +27,12 @@ def test_scrape_success(mocker, mock_codeforces_html):
def test_scrape_contest_problems(mocker): def test_scrape_contest_problems(mocker):
mock_scraper = Mock() html = """
mock_response = Mock()
mock_response.text = """
<a href="/contest/1900/problem/A">A. Problem A</a> <a href="/contest/1900/problem/A">A. Problem A</a>
<a href="/contest/1900/problem/B">B. Problem B</a> <a href="/contest/1900/problem/B">B. Problem B</a>
""" """
mock_scraper.get.return_value = mock_response mock_session = make_mock_session(html)
mocker.patch("scrapers.codeforces.StealthySession", return_value=mock_session)
mocker.patch(
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper
)
scraper = CodeforcesScraper() scraper = CodeforcesScraper()
result = scraper.scrape_contest_metadata("1900") result = scraper.scrape_contest_metadata("1900")
@ -46,12 +44,11 @@ def test_scrape_contest_problems(mocker):
def test_scrape_network_error(mocker): def test_scrape_network_error(mocker):
mock_scraper = Mock() mock_session = Mock()
mock_scraper.get.side_effect = Exception("Network error") mock_session.fetch.side_effect = Exception("Network error")
mock_session.__enter__ = lambda s: s
mocker.patch( mock_session.__exit__ = lambda s, exc_type, exc_val, exc_tb: None
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper mocker.patch("scrapers.codeforces.StealthySession", return_value=mock_session)
)
scraper = CodeforcesScraper() scraper = CodeforcesScraper()
result = scraper.scrape_problem_tests("1900", "A") result = scraper.scrape_problem_tests("1900", "A")
@ -61,7 +58,6 @@ def test_scrape_network_error(mocker):
def test_scrape_contests_success(mocker): def test_scrape_contests_success(mocker):
mock_scraper = Mock()
mock_response = Mock() mock_response = Mock()
mock_response.json.return_value = { mock_response.json.return_value = {
"status": "OK", "status": "OK",
@ -71,11 +67,7 @@ def test_scrape_contests_success(mocker):
{"id": 1949, "name": "Codeforces Global Round 26"}, {"id": 1949, "name": "Codeforces Global Round 26"},
], ],
} }
mock_scraper.get.return_value = mock_response mocker.patch("scrapers.codeforces.requests.get", return_value=mock_response)
mocker.patch(
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper
)
scraper = CodeforcesScraper() scraper = CodeforcesScraper()
result = scraper.scrape_contest_list() result = scraper.scrape_contest_list()
@ -87,27 +79,12 @@ def test_scrape_contests_success(mocker):
name="Educational Codeforces Round 168 (Rated for Div. 2)", name="Educational Codeforces Round 168 (Rated for Div. 2)",
display_name="Educational Codeforces Round 168 (Rated for Div. 2)", display_name="Educational Codeforces Round 168 (Rated for Div. 2)",
) )
assert result.contests[1] == ContestSummary(
id="1950",
name="Codeforces Round 936 (Div. 2)",
display_name="Codeforces Round 936 (Div. 2)",
)
assert result.contests[2] == ContestSummary(
id="1949",
name="Codeforces Global Round 26",
display_name="Codeforces Global Round 26",
)
def test_scrape_contests_api_error(mocker): def test_scrape_contests_api_error(mocker):
mock_scraper = Mock()
mock_response = Mock() mock_response = Mock()
mock_response.json.return_value = {"status": "FAILED", "result": []} mock_response.json.return_value = {"status": "FAILED", "result": []}
mock_scraper.get.return_value = mock_response mocker.patch("scrapers.codeforces.requests.get", return_value=mock_response)
mocker.patch(
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper
)
scraper = CodeforcesScraper() scraper = CodeforcesScraper()
result = scraper.scrape_contest_list() result = scraper.scrape_contest_list()
@ -117,11 +94,8 @@ def test_scrape_contests_api_error(mocker):
def test_scrape_contests_network_error(mocker): def test_scrape_contests_network_error(mocker):
mock_scraper = Mock()
mock_scraper.get.side_effect = Exception("Network error")
mocker.patch( mocker.patch(
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper "scrapers.codeforces.requests.get", side_effect=Exception("Network error")
) )
scraper = CodeforcesScraper() scraper = CodeforcesScraper()

989
uv.lock generated

File diff suppressed because it is too large Load diff