Merge pull request #114 from barrett-ruth/feat/scrapling
Scraping & Picker Fixes
This commit is contained in:
commit
64d4d59d06
19 changed files with 1114 additions and 432 deletions
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
|
|
@ -60,5 +60,7 @@ jobs:
|
|||
uses: astral-sh/setup-uv@v4
|
||||
- name: Install dependencies with pytest
|
||||
run: uv sync --dev
|
||||
- name: Fetch camoufox data
|
||||
run: uv run camoufox fetch
|
||||
- name: Run Python tests
|
||||
run: uv run pytest tests/scrapers/ -v
|
||||
|
|
|
|||
29
doc/cp.txt
29
doc/cp.txt
|
|
@ -364,29 +364,18 @@ Example: Setting up and solving AtCoder contest ABC324
|
|||
PICKER INTEGRATION *cp-picker*
|
||||
|
||||
When picker integration is enabled in configuration, cp.nvim provides interactive
|
||||
platform, contest, and problem selection using telescope.nvim or fzf-lua.
|
||||
platform and contest selection using telescope.nvim or fzf-lua.
|
||||
|
||||
:CP pick *:CP-pick*
|
||||
Launch configured picker for interactive problem selection.
|
||||
Control Flow: Select Platform → Contest → Problem → Code!
|
||||
Control Flow: Select Platform → Contest → Code!
|
||||
|
||||
Requires picker = 'telescope' or picker = 'fzf-lua' in configuration.
|
||||
Requires corresponding plugin (telescope.nvim or fzf-lua) to be installed.
|
||||
|
||||
Picker Controls ~
|
||||
*cp-picker-controls*
|
||||
The picker interface provides several keyboard shortcuts for enhanced control:
|
||||
|
||||
<c-r> Force refresh contest list, bypassing cache
|
||||
Useful when contest lists are outdated or incomplete
|
||||
Shows loading indicator during refresh operation
|
||||
|
||||
Standard picker controls (telescope.nvim/fzf-lua):
|
||||
<cr> Select current item and proceed to next step
|
||||
<c-c> / <esc> Cancel picker and return to editor
|
||||
<c-n> / <down> Navigate to next item
|
||||
<c-p> / <up> Navigate to previous item
|
||||
/ Start filtering/searching items
|
||||
PICKER KEYMAPS *cp-picker-keys*
|
||||
<c-r> Force refresh contest list, bypassing cache.
|
||||
Useful when contest lists are outdated or incomplete
|
||||
|
||||
==============================================================================
|
||||
RUN PANEL *cp-run*
|
||||
|
|
@ -542,13 +531,13 @@ prevent them from being overridden: >lua
|
|||
|
||||
==============================================================================
|
||||
RUN PANEL KEYMAPS *cp-test-keys*
|
||||
<c-n> Navigate to next test case (configurable via
|
||||
<c-n> Navigate to next test case (configurable via
|
||||
run_panel.next_test_key)
|
||||
<c-p> Navigate to previous test case (configurable via
|
||||
<c-p> Navigate to previous test case (configurable via
|
||||
run_panel.prev_test_key)
|
||||
<c-t> Cycle through diff modes: none → git → vim (configurable
|
||||
<c-t> Cycle through diff modes: none → git → vim (configurable
|
||||
via run_panel.toggle_diff_key)
|
||||
<c-q> Exit run panel/interactive terminal and restore layout
|
||||
<c-q> Exit run panel/interactive terminal and restore layout
|
||||
|
||||
Diff Modes ~
|
||||
|
||||
|
|
|
|||
|
|
@ -227,10 +227,6 @@ end
|
|||
---@param file_path string
|
||||
---@return FileState?
|
||||
function M.get_file_state(file_path)
|
||||
vim.validate({
|
||||
file_path = { file_path, 'string' },
|
||||
})
|
||||
|
||||
if not cache_data.file_states then
|
||||
return nil
|
||||
end
|
||||
|
|
@ -244,14 +240,6 @@ end
|
|||
---@param problem_id? string
|
||||
---@param language? string
|
||||
function M.set_file_state(file_path, platform, contest_id, problem_id, language)
|
||||
vim.validate({
|
||||
file_path = { file_path, 'string' },
|
||||
platform = { platform, 'string' },
|
||||
contest_id = { contest_id, 'string' },
|
||||
problem_id = { problem_id, { 'string', 'nil' }, true },
|
||||
language = { language, { 'string', 'nil' }, true },
|
||||
})
|
||||
|
||||
if not cache_data.file_states then
|
||||
cache_data.file_states = {}
|
||||
end
|
||||
|
|
@ -269,10 +257,6 @@ end
|
|||
---@param platform string
|
||||
---@return table[]?
|
||||
function M.get_contest_list(platform)
|
||||
vim.validate({
|
||||
platform = { platform, 'string' },
|
||||
})
|
||||
|
||||
if not cache_data.contest_lists or not cache_data.contest_lists[platform] then
|
||||
return nil
|
||||
end
|
||||
|
|
@ -283,11 +267,6 @@ end
|
|||
---@param platform string
|
||||
---@param contests table[]
|
||||
function M.set_contest_list(platform, contests)
|
||||
vim.validate({
|
||||
platform = { platform, 'string' },
|
||||
contests = { contests, 'table' },
|
||||
})
|
||||
|
||||
if not cache_data.contest_lists then
|
||||
cache_data.contest_lists = {}
|
||||
end
|
||||
|
|
@ -302,10 +281,6 @@ end
|
|||
|
||||
---@param platform string
|
||||
function M.clear_contest_list(platform)
|
||||
vim.validate({
|
||||
platform = { platform, 'string' },
|
||||
})
|
||||
|
||||
if cache_data.contest_lists and cache_data.contest_lists[platform] then
|
||||
cache_data.contest_lists[platform] = nil
|
||||
M.save()
|
||||
|
|
@ -313,16 +288,15 @@ function M.clear_contest_list(platform)
|
|||
end
|
||||
|
||||
function M.clear_all()
|
||||
cache_data = {}
|
||||
cache_data = {
|
||||
file_states = {},
|
||||
contest_lists = {},
|
||||
}
|
||||
M.save()
|
||||
end
|
||||
|
||||
---@param platform string
|
||||
function M.clear_platform(platform)
|
||||
vim.validate({
|
||||
platform = { platform, 'string' },
|
||||
})
|
||||
|
||||
if cache_data[platform] then
|
||||
cache_data[platform] = nil
|
||||
end
|
||||
|
|
|
|||
|
|
@ -14,6 +14,8 @@ function M.handle_pick_action()
|
|||
return
|
||||
end
|
||||
|
||||
local picker
|
||||
|
||||
if config.picker == 'telescope' then
|
||||
local ok = pcall(require, 'telescope')
|
||||
if not ok then
|
||||
|
|
@ -23,12 +25,13 @@ function M.handle_pick_action()
|
|||
)
|
||||
return
|
||||
end
|
||||
local ok_cp, telescope_cp = pcall(require, 'cp.pickers.telescope')
|
||||
local ok_cp, telescope_picker = pcall(require, 'cp.pickers.telescope')
|
||||
if not ok_cp then
|
||||
logger.log('Failed to load telescope integration', vim.log.levels.ERROR)
|
||||
return
|
||||
end
|
||||
telescope_cp.platform_picker()
|
||||
|
||||
picker = telescope_picker
|
||||
elseif config.picker == 'fzf-lua' then
|
||||
local ok, _ = pcall(require, 'fzf-lua')
|
||||
if not ok then
|
||||
|
|
@ -38,13 +41,16 @@ function M.handle_pick_action()
|
|||
)
|
||||
return
|
||||
end
|
||||
local ok_cp, fzf_cp = pcall(require, 'cp.pickers.fzf_lua')
|
||||
local ok_cp, fzf_picker = pcall(require, 'cp.pickers.fzf_lua')
|
||||
if not ok_cp then
|
||||
logger.log('Failed to load fzf-lua integration', vim.log.levels.ERROR)
|
||||
return
|
||||
end
|
||||
fzf_cp.platform_picker()
|
||||
|
||||
picker = fzf_picker
|
||||
end
|
||||
|
||||
picker.pick()
|
||||
end
|
||||
|
||||
return M
|
||||
|
|
|
|||
|
|
@ -286,11 +286,6 @@ end
|
|||
---@param problem_id? string
|
||||
---@return string
|
||||
local function default_filename(contest_id, problem_id)
|
||||
vim.validate({
|
||||
contest_id = { contest_id, 'string' },
|
||||
problem_id = { problem_id, { 'string', 'nil' }, true },
|
||||
})
|
||||
|
||||
if problem_id then
|
||||
return (contest_id .. problem_id):lower()
|
||||
else
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
local picker_utils = require('cp.pickers')
|
||||
|
||||
local contest_picker, problem_picker
|
||||
local M = {}
|
||||
|
||||
function contest_picker(platform)
|
||||
local function contest_picker(platform)
|
||||
local constants = require('cp.constants')
|
||||
local platform_display_name = constants.PLATFORM_DISPLAY_NAMES[platform] or platform
|
||||
local fzf = require('fzf-lua')
|
||||
|
|
@ -41,7 +41,8 @@ function contest_picker(platform)
|
|||
end
|
||||
|
||||
if contest then
|
||||
problem_picker(platform, contest.id)
|
||||
local cp = require('cp')
|
||||
cp.handle_command({ fargs = { platform, contest.id } })
|
||||
end
|
||||
end,
|
||||
['ctrl-r'] = function()
|
||||
|
|
@ -53,55 +54,7 @@ function contest_picker(platform)
|
|||
})
|
||||
end
|
||||
|
||||
function problem_picker(platform, contest_id)
|
||||
local constants = require('cp.constants')
|
||||
local platform_display_name = constants.PLATFORM_DISPLAY_NAMES[platform] or platform
|
||||
local fzf = require('fzf-lua')
|
||||
local problems = picker_utils.get_problems_for_contest(platform, contest_id)
|
||||
|
||||
if #problems == 0 then
|
||||
vim.notify(
|
||||
("Contest %s %s hasn't started yet or has no available problems"):format(
|
||||
platform_display_name,
|
||||
contest_id
|
||||
),
|
||||
vim.log.levels.WARN
|
||||
)
|
||||
contest_picker(platform)
|
||||
return
|
||||
end
|
||||
|
||||
local entries = vim.tbl_map(function(problem)
|
||||
return problem.display_name
|
||||
end, problems)
|
||||
|
||||
return fzf.fzf_exec(entries, {
|
||||
prompt = ('Select Problem (%s %s)> '):format(platform_display_name, contest_id),
|
||||
actions = {
|
||||
['default'] = function(selected)
|
||||
if not selected or #selected == 0 then
|
||||
return
|
||||
end
|
||||
|
||||
local selected_name = selected[1]
|
||||
local problem = nil
|
||||
for _, p in ipairs(problems) do
|
||||
if p.display_name == selected_name then
|
||||
problem = p
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
if problem then
|
||||
local cp = require('cp')
|
||||
cp.handle_command({ fargs = { platform, contest_id, problem.id } })
|
||||
end
|
||||
end,
|
||||
},
|
||||
})
|
||||
end
|
||||
|
||||
local function platform_picker()
|
||||
function M.pick()
|
||||
local fzf = require('fzf-lua')
|
||||
local platforms = picker_utils.get_platforms()
|
||||
local entries = vim.tbl_map(function(platform)
|
||||
|
|
@ -133,6 +86,4 @@ local function platform_picker()
|
|||
})
|
||||
end
|
||||
|
||||
return {
|
||||
platform_picker = platform_picker,
|
||||
}
|
||||
return M
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ local utils = require('cp.utils')
|
|||
---@field display_name string Formatted display name for picker
|
||||
|
||||
---@return cp.PlatformItem[]
|
||||
local function get_platforms()
|
||||
function M.get_platforms()
|
||||
local constants = require('cp.constants')
|
||||
local result = {}
|
||||
|
||||
|
|
@ -39,7 +39,7 @@ end
|
|||
---Get list of contests for a specific platform
|
||||
---@param platform string Platform identifier (e.g. "codeforces", "atcoder")
|
||||
---@return cp.ContestItem[]
|
||||
local function get_contests_for_platform(platform)
|
||||
function M.get_contests_for_platform(platform)
|
||||
logger.log('loading contests...', vim.log.levels.INFO, true)
|
||||
|
||||
cache.load()
|
||||
|
|
@ -108,7 +108,6 @@ local function get_contests_for_platform(platform)
|
|||
})
|
||||
end
|
||||
|
||||
cache.set_contest_list(platform, contests)
|
||||
logger.log(('loaded %d contests'):format(#contests))
|
||||
return contests
|
||||
end
|
||||
|
|
@ -116,7 +115,9 @@ end
|
|||
---@param platform string Platform identifier
|
||||
---@param contest_id string Contest identifier
|
||||
---@return cp.ProblemItem[]
|
||||
local function get_problems_for_contest(platform, contest_id)
|
||||
function M.get_problems_for_contest(platform, contest_id)
|
||||
logger.log('loading contest problems...', vim.log.levels.INFO, true)
|
||||
|
||||
local problems = {}
|
||||
|
||||
cache.load()
|
||||
|
|
@ -132,8 +133,6 @@ local function get_problems_for_contest(platform, contest_id)
|
|||
return problems
|
||||
end
|
||||
|
||||
logger.log('loading contest problems...', vim.log.levels.INFO, true)
|
||||
|
||||
if not utils.setup_python_env() then
|
||||
return problems
|
||||
end
|
||||
|
|
@ -197,16 +196,11 @@ end
|
|||
---@param platform string Platform identifier
|
||||
---@param contest_id string Contest identifier
|
||||
---@param problem_id string Problem identifier
|
||||
local function setup_problem(platform, contest_id, problem_id)
|
||||
function M.setup_problem(platform, contest_id, problem_id)
|
||||
vim.schedule(function()
|
||||
local cp = require('cp')
|
||||
cp.handle_command({ fargs = { platform, contest_id, problem_id } })
|
||||
end)
|
||||
end
|
||||
|
||||
M.get_platforms = get_platforms
|
||||
M.get_contests_for_platform = get_contests_for_platform
|
||||
M.get_problems_for_contest = get_problems_for_contest
|
||||
M.setup_problem = setup_problem
|
||||
|
||||
return M
|
||||
|
|
|
|||
|
|
@ -6,9 +6,9 @@ local actions = require('telescope.actions')
|
|||
|
||||
local picker_utils = require('cp.pickers')
|
||||
|
||||
local contest_picker, problem_picker
|
||||
local M = {}
|
||||
|
||||
function contest_picker(opts, platform)
|
||||
local function contest_picker(opts, platform)
|
||||
local constants = require('cp.constants')
|
||||
local platform_display_name = constants.PLATFORM_DISPLAY_NAMES[platform] or platform
|
||||
local contests = picker_utils.get_contests_for_platform(platform)
|
||||
|
|
@ -24,7 +24,7 @@ function contest_picker(opts, platform)
|
|||
pickers
|
||||
.new(opts, {
|
||||
prompt_title = ('Select Contest (%s)'):format(platform_display_name),
|
||||
results_title = '<C-r> refresh',
|
||||
results_title = '<c-r> refresh',
|
||||
finder = finders.new_table({
|
||||
results = contests,
|
||||
entry_maker = function(entry)
|
||||
|
|
@ -42,11 +42,12 @@ function contest_picker(opts, platform)
|
|||
actions.close(prompt_bufnr)
|
||||
|
||||
if selection then
|
||||
problem_picker(opts, platform, selection.value.id)
|
||||
local cp = require('cp')
|
||||
cp.handle_command({ fargs = { platform, selection.value.id } })
|
||||
end
|
||||
end)
|
||||
|
||||
map('i', '<C-r>', function()
|
||||
map('i', '<c-r>', function()
|
||||
local cache = require('cp.cache')
|
||||
cache.clear_contest_list(platform)
|
||||
actions.close(prompt_bufnr)
|
||||
|
|
@ -59,54 +60,7 @@ function contest_picker(opts, platform)
|
|||
:find()
|
||||
end
|
||||
|
||||
function problem_picker(opts, platform, contest_id)
|
||||
local constants = require('cp.constants')
|
||||
local platform_display_name = constants.PLATFORM_DISPLAY_NAMES[platform] or platform
|
||||
local problems = picker_utils.get_problems_for_contest(platform, contest_id)
|
||||
|
||||
if #problems == 0 then
|
||||
vim.notify(
|
||||
("Contest %s %s hasn't started yet or has no available problems"):format(
|
||||
platform_display_name,
|
||||
contest_id
|
||||
),
|
||||
vim.log.levels.WARN
|
||||
)
|
||||
contest_picker(opts, platform)
|
||||
return
|
||||
end
|
||||
|
||||
pickers
|
||||
.new(opts, {
|
||||
prompt_title = ('Select Problem (%s %s)'):format(platform_display_name, contest_id),
|
||||
finder = finders.new_table({
|
||||
results = problems,
|
||||
entry_maker = function(entry)
|
||||
return {
|
||||
value = entry,
|
||||
display = entry.display_name,
|
||||
ordinal = entry.display_name,
|
||||
}
|
||||
end,
|
||||
}),
|
||||
sorter = conf.generic_sorter(opts),
|
||||
attach_mappings = function(prompt_bufnr)
|
||||
actions.select_default:replace(function()
|
||||
local selection = action_state.get_selected_entry()
|
||||
actions.close(prompt_bufnr)
|
||||
|
||||
if selection then
|
||||
local cp = require('cp')
|
||||
cp.handle_command({ fargs = { platform, contest_id, selection.value.id } })
|
||||
end
|
||||
end)
|
||||
return true
|
||||
end,
|
||||
})
|
||||
:find()
|
||||
end
|
||||
|
||||
local function platform_picker(opts)
|
||||
function M.pick(opts)
|
||||
opts = opts or {}
|
||||
|
||||
local platforms = picker_utils.get_platforms()
|
||||
|
|
@ -140,6 +94,4 @@ local function platform_picker(opts)
|
|||
:find()
|
||||
end
|
||||
|
||||
return {
|
||||
platform_picker = platform_picker,
|
||||
}
|
||||
return M
|
||||
|
|
|
|||
|
|
@ -15,11 +15,6 @@ local filetype_to_language = constants.filetype_to_language
|
|||
---@param contest_config table
|
||||
---@return string
|
||||
local function get_language_from_file(source_file, contest_config)
|
||||
vim.validate({
|
||||
source_file = { source_file, 'string' },
|
||||
contest_config = { contest_config, 'table' },
|
||||
})
|
||||
|
||||
local extension = vim.fn.fnamemodify(source_file, ':e')
|
||||
local language = filetype_to_language[extension] or contest_config.default_language
|
||||
return language
|
||||
|
|
@ -29,11 +24,6 @@ end
|
|||
---@param substitutions table<string, string>
|
||||
---@return string[]
|
||||
local function substitute_template(cmd_template, substitutions)
|
||||
vim.validate({
|
||||
cmd_template = { cmd_template, 'table' },
|
||||
substitutions = { substitutions, 'table' },
|
||||
})
|
||||
|
||||
local result = {}
|
||||
for _, arg in ipairs(cmd_template) do
|
||||
local substituted = arg
|
||||
|
|
@ -50,12 +40,6 @@ end
|
|||
---@param substitutions table<string, string>
|
||||
---@return string[]
|
||||
local function build_command(cmd_template, executable, substitutions)
|
||||
vim.validate({
|
||||
cmd_template = { cmd_template, 'table' },
|
||||
executable = { executable, { 'string', 'nil' }, true },
|
||||
substitutions = { substitutions, 'table' },
|
||||
})
|
||||
|
||||
local cmd = substitute_template(cmd_template, substitutions)
|
||||
if executable then
|
||||
table.insert(cmd, 1, executable)
|
||||
|
|
@ -67,11 +51,6 @@ end
|
|||
---@param substitutions table<string, string>
|
||||
---@return {code: integer, stdout: string, stderr: string}
|
||||
function M.compile_generic(language_config, substitutions)
|
||||
vim.validate({
|
||||
language_config = { language_config, 'table' },
|
||||
substitutions = { substitutions, 'table' },
|
||||
})
|
||||
|
||||
if not language_config.compile then
|
||||
logger.log('no compilation step required')
|
||||
return { code = 0, stderr = '' }
|
||||
|
|
@ -107,12 +86,6 @@ end
|
|||
---@param timeout_ms number
|
||||
---@return ExecuteResult
|
||||
local function execute_command(cmd, input_data, timeout_ms)
|
||||
vim.validate({
|
||||
cmd = { cmd, 'table' },
|
||||
input_data = { input_data, 'string' },
|
||||
timeout_ms = { timeout_ms, 'number' },
|
||||
})
|
||||
|
||||
local redirected_cmd = vim.deepcopy(cmd)
|
||||
if #redirected_cmd > 0 then
|
||||
redirected_cmd[#redirected_cmd] = redirected_cmd[#redirected_cmd] .. ' 2>&1'
|
||||
|
|
@ -158,12 +131,6 @@ end
|
|||
---@param is_debug boolean
|
||||
---@return string
|
||||
local function format_output(exec_result, expected_file, is_debug)
|
||||
vim.validate({
|
||||
exec_result = { exec_result, 'table' },
|
||||
expected_file = { expected_file, 'string' },
|
||||
is_debug = { is_debug, 'boolean' },
|
||||
})
|
||||
|
||||
local output_lines = { exec_result.stdout }
|
||||
local metadata_lines = {}
|
||||
|
||||
|
|
@ -207,10 +174,6 @@ end
|
|||
---@param is_debug? boolean
|
||||
---@return {success: boolean, output: string?}
|
||||
function M.compile_problem(contest_config, is_debug)
|
||||
vim.validate({
|
||||
contest_config = { contest_config, 'table' },
|
||||
})
|
||||
|
||||
local state = require('cp.state')
|
||||
local source_file = state.get_source_file()
|
||||
if not source_file then
|
||||
|
|
@ -249,12 +212,9 @@ function M.compile_problem(contest_config, is_debug)
|
|||
return { success = true, output = nil }
|
||||
end
|
||||
|
||||
---@param contest_config ContestConfig
|
||||
---@param is_debug boolean
|
||||
function M.run_problem(contest_config, is_debug)
|
||||
vim.validate({
|
||||
contest_config = { contest_config, 'table' },
|
||||
is_debug = { is_debug, 'boolean' },
|
||||
})
|
||||
|
||||
local state = require('cp.state')
|
||||
local source_file = state.get_source_file()
|
||||
local output_file = state.get_output_file()
|
||||
|
|
|
|||
|
|
@ -7,8 +7,10 @@ requires-python = ">=3.11"
|
|||
dependencies = [
|
||||
"backoff>=2.2.1",
|
||||
"beautifulsoup4>=4.13.5",
|
||||
"cloudscraper>=1.2.71",
|
||||
"curl-cffi>=0.13.0",
|
||||
"playwright>=1.55.0",
|
||||
"requests>=2.32.5",
|
||||
"scrapling[fetchers]>=0.3.5",
|
||||
"scrapy>=2.13.3",
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -21,6 +21,44 @@ from .models import (
|
|||
)
|
||||
|
||||
|
||||
def _make_request(url: str, timeout: int = 10) -> requests.Response:
|
||||
headers = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (X11; Linux x86_64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/120.0.0.0 Safari/537.36"
|
||||
)
|
||||
}
|
||||
|
||||
@backoff.on_exception(
|
||||
backoff.expo,
|
||||
(requests.exceptions.RequestException, requests.exceptions.HTTPError),
|
||||
max_tries=5,
|
||||
jitter=backoff.random_jitter,
|
||||
on_backoff=lambda details: print(
|
||||
f"Request error on {url} (attempt {details['tries']}), "
|
||||
f"retrying in {details['wait']:.1f}s: {details['exception']}",
|
||||
file=sys.stderr,
|
||||
),
|
||||
)
|
||||
@backoff.on_predicate(
|
||||
backoff.expo,
|
||||
lambda resp: resp.status_code == 429,
|
||||
max_tries=5,
|
||||
jitter=backoff.random_jitter,
|
||||
on_backoff=lambda details: print(
|
||||
f"Rate limited on {url}, retrying in {details['wait']:.1f}s",
|
||||
file=sys.stderr,
|
||||
),
|
||||
)
|
||||
def _req():
|
||||
return requests.get(url, headers=headers, timeout=timeout)
|
||||
|
||||
resp = _req()
|
||||
resp.raise_for_status()
|
||||
return resp
|
||||
|
||||
|
||||
def extract_problem_limits(soup: BeautifulSoup) -> tuple[int, float]:
|
||||
timeout_ms = None
|
||||
memory_mb = None
|
||||
|
|
@ -82,12 +120,7 @@ def extract_problem_from_row(row, contest_id: str) -> ProblemSummary | None:
|
|||
def scrape_contest_problems(contest_id: str) -> list[ProblemSummary]:
|
||||
try:
|
||||
contest_url = f"https://atcoder.jp/contests/{contest_id}/tasks"
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
}
|
||||
|
||||
response = requests.get(contest_url, headers=headers, timeout=10)
|
||||
response.raise_for_status()
|
||||
response = _make_request(contest_url)
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
task_table = soup.find("table", class_="table")
|
||||
|
|
@ -138,12 +171,7 @@ def extract_test_case_from_headers(sample_headers, i: int) -> tuple[str, str] |
|
|||
|
||||
def scrape(url: str) -> list[TestCase]:
|
||||
try:
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
}
|
||||
|
||||
response = requests.get(url, headers=headers, timeout=10)
|
||||
response.raise_for_status()
|
||||
response = _make_request(url)
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
sample_headers = soup.find_all(
|
||||
|
|
@ -171,14 +199,7 @@ def scrape(url: str) -> list[TestCase]:
|
|||
def scrape_contests() -> list[ContestSummary]:
|
||||
def get_max_pages() -> int:
|
||||
try:
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
}
|
||||
response = requests.get(
|
||||
"https://atcoder.jp/contests/archive", headers=headers, timeout=10
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
response = _make_request("https://atcoder.jp/contests/archive")
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
pagination = soup.find("ul", class_="pagination")
|
||||
if not pagination or not isinstance(pagination, Tag):
|
||||
|
|
@ -196,37 +217,8 @@ def scrape_contests() -> list[ContestSummary]:
|
|||
return 15
|
||||
|
||||
def scrape_page(page: int) -> list[ContestSummary]:
|
||||
@backoff.on_exception(
|
||||
backoff.expo,
|
||||
(requests.exceptions.RequestException, requests.exceptions.HTTPError),
|
||||
max_tries=4,
|
||||
jitter=backoff.random_jitter,
|
||||
on_backoff=lambda details: print(
|
||||
f"Request failed on page {page} (attempt {details['tries']}), retrying in {details['wait']:.1f}s: {details['exception']}",
|
||||
file=sys.stderr,
|
||||
),
|
||||
)
|
||||
@backoff.on_predicate(
|
||||
backoff.expo,
|
||||
lambda response: response.status_code == 429,
|
||||
max_tries=4,
|
||||
jitter=backoff.random_jitter,
|
||||
on_backoff=lambda details: print(
|
||||
f"Rate limited on page {page}, retrying in {details['wait']:.1f}s",
|
||||
file=sys.stderr,
|
||||
),
|
||||
)
|
||||
def make_request() -> requests.Response:
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
}
|
||||
url = f"https://atcoder.jp/contests/archive?page={page}"
|
||||
response = requests.get(url, headers=headers, timeout=10)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
|
||||
try:
|
||||
response = make_request()
|
||||
response = _make_request(f"https://atcoder.jp/contests/archive?page={page}")
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
|
@ -354,15 +346,7 @@ class AtCoderScraper(BaseScraper):
|
|||
url = parse_problem_url(contest_id, problem_letter)
|
||||
tests = scrape(url)
|
||||
|
||||
response = requests.get(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
},
|
||||
timeout=10,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
response = _make_request(url)
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
timeout_ms, memory_mb = extract_problem_limits(soup)
|
||||
|
||||
|
|
|
|||
|
|
@ -5,8 +5,9 @@ import re
|
|||
import sys
|
||||
from dataclasses import asdict
|
||||
|
||||
import cloudscraper
|
||||
import requests
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
from scrapling.fetchers import StealthySession
|
||||
|
||||
from .base import BaseScraper
|
||||
from .models import (
|
||||
|
|
@ -21,11 +22,11 @@ from .models import (
|
|||
|
||||
def scrape(url: str) -> list[TestCase]:
|
||||
try:
|
||||
scraper = cloudscraper.create_scraper()
|
||||
response = scraper.get(url, timeout=10)
|
||||
response.raise_for_status()
|
||||
with StealthySession(headless=True, solve_cloudflare=True) as session:
|
||||
page = session.fetch(url, google_search=False)
|
||||
html = page.html_content
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
input_sections = soup.find_all("div", class_="input")
|
||||
output_sections = soup.find_all("div", class_="output")
|
||||
|
||||
|
|
@ -139,7 +140,7 @@ def scrape(url: str) -> list[TestCase]:
|
|||
return [TestCase(input=combined_input, expected=combined_output)]
|
||||
|
||||
except Exception as e:
|
||||
print(f"CloudScraper failed: {e}", file=sys.stderr)
|
||||
print(f"Scrapling failed: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
|
||||
|
|
@ -180,11 +181,11 @@ def extract_problem_limits(soup: BeautifulSoup) -> tuple[int, float]:
|
|||
def scrape_contest_problems(contest_id: str) -> list[ProblemSummary]:
|
||||
try:
|
||||
contest_url: str = f"https://codeforces.com/contest/{contest_id}"
|
||||
scraper = cloudscraper.create_scraper()
|
||||
response = scraper.get(contest_url, timeout=10)
|
||||
response.raise_for_status()
|
||||
with StealthySession(headless=True, solve_cloudflare=True) as session:
|
||||
page = session.fetch(contest_url, google_search=False)
|
||||
html = page.html_content
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
problems: list[ProblemSummary] = []
|
||||
|
||||
problem_links = soup.find_all(
|
||||
|
|
@ -224,8 +225,7 @@ def scrape_sample_tests(url: str) -> list[TestCase]:
|
|||
|
||||
|
||||
def scrape_contests() -> list[ContestSummary]:
|
||||
scraper = cloudscraper.create_scraper()
|
||||
response = scraper.get("https://codeforces.com/api/contest.list", timeout=10)
|
||||
response = requests.get("https://codeforces.com/api/contest.list", timeout=10)
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
|
|
@ -236,7 +236,6 @@ def scrape_contests() -> list[ContestSummary]:
|
|||
for contest in data["result"]:
|
||||
contest_id = str(contest["id"])
|
||||
name = contest["name"]
|
||||
|
||||
contests.append(ContestSummary(id=contest_id, name=name, display_name=name))
|
||||
|
||||
return contests
|
||||
|
|
@ -277,10 +276,10 @@ class CodeforcesScraper(BaseScraper):
|
|||
url = parse_problem_url(contest_id, problem_letter)
|
||||
tests = scrape_sample_tests(url)
|
||||
|
||||
scraper = cloudscraper.create_scraper()
|
||||
response = scraper.get(url, timeout=self.config.timeout_seconds)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
with StealthySession(headless=True, solve_cloudflare=True) as session:
|
||||
page = session.fetch(url, google_search=False)
|
||||
html = page.html_content
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
timeout_ms, memory_mb = extract_problem_limits(soup)
|
||||
|
||||
problem_statement_div = soup.find("div", class_="problem-statement")
|
||||
|
|
|
|||
|
|
@ -30,7 +30,6 @@ class ScrapingResult:
|
|||
class MetadataResult(ScrapingResult):
|
||||
contest_id: str = ""
|
||||
problems: list[ProblemSummary] = field(default_factory=list)
|
||||
categories: dict[str, list[ProblemSummary]] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
|
|||
|
|
@ -235,28 +235,6 @@ describe('cp.execute', function()
|
|||
end)
|
||||
end)
|
||||
|
||||
describe('parameter validation', function()
|
||||
it('validates language_config parameter', function()
|
||||
assert.has_error(function()
|
||||
execute.compile_generic(nil, {})
|
||||
end)
|
||||
|
||||
assert.has_error(function()
|
||||
execute.compile_generic('not_table', {})
|
||||
end)
|
||||
end)
|
||||
|
||||
it('validates substitutions parameter', function()
|
||||
assert.has_error(function()
|
||||
execute.compile_generic({}, nil)
|
||||
end)
|
||||
|
||||
assert.has_error(function()
|
||||
execute.compile_generic({}, 'not_table')
|
||||
end)
|
||||
end)
|
||||
end)
|
||||
|
||||
describe('directory creation', function()
|
||||
it('creates build and io directories', function()
|
||||
local language_config = {
|
||||
|
|
|
|||
|
|
@ -17,15 +17,24 @@ describe('cp.fzf_lua', function()
|
|||
|
||||
describe('module loading', function()
|
||||
it('loads fzf-lua integration without error', function()
|
||||
assert.has_no.errors(function()
|
||||
assert.has_no_errors(function()
|
||||
require('cp.pickers.fzf_lua')
|
||||
end)
|
||||
end)
|
||||
|
||||
it('returns module with platform_picker function', function()
|
||||
it('returns module with picker function', function()
|
||||
local fzf_lua_cp = require('cp.pickers.fzf_lua')
|
||||
assert.is_table(fzf_lua_cp)
|
||||
assert.is_function(fzf_lua_cp.platform_picker)
|
||||
assert.is_function(fzf_lua_cp.pick)
|
||||
end)
|
||||
end)
|
||||
|
||||
describe('basic running', function()
|
||||
it('can run and open the picker with :CP pick', function()
|
||||
local cp = require('cp')
|
||||
assert.has_no_errors(function()
|
||||
cp.handle_command({ fargs = { 'pick' } })
|
||||
end)
|
||||
end)
|
||||
end)
|
||||
end)
|
||||
|
|
|
|||
|
|
@ -129,7 +129,6 @@ describe('cp.picker', function()
|
|||
return '/tmp'
|
||||
end
|
||||
|
||||
-- Mock vim.system to return success with problems
|
||||
vim.system = function()
|
||||
return {
|
||||
wait = function()
|
||||
|
|
@ -188,27 +187,4 @@ describe('cp.picker', function()
|
|||
assert.equals(0, #problems)
|
||||
end)
|
||||
end)
|
||||
|
||||
describe('setup_problem', function()
|
||||
it('calls cp.handle_command with correct arguments', function()
|
||||
local cp = require('cp')
|
||||
local called_with = nil
|
||||
|
||||
cp.handle_command = function(opts)
|
||||
called_with = opts
|
||||
end
|
||||
|
||||
picker.setup_problem('codeforces', '1951', 'a')
|
||||
|
||||
vim.wait(100, function()
|
||||
return called_with ~= nil
|
||||
end)
|
||||
|
||||
assert.is_table(called_with)
|
||||
assert.is_table(called_with.fargs)
|
||||
assert.equals('codeforces', called_with.fargs[1])
|
||||
assert.equals('1951', called_with.fargs[2])
|
||||
assert.equals('a', called_with.fargs[3])
|
||||
end)
|
||||
end)
|
||||
end)
|
||||
|
|
|
|||
|
|
@ -64,15 +64,24 @@ describe('cp.telescope', function()
|
|||
|
||||
describe('module loading', function()
|
||||
it('registers telescope extension without error', function()
|
||||
assert.has_no.errors(function()
|
||||
assert.has_no_errors(function()
|
||||
require('cp.pickers.telescope')
|
||||
end)
|
||||
end)
|
||||
|
||||
it('returns module with platform_picker function', function()
|
||||
it('returns module with picker function', function()
|
||||
local telescope_cp = require('cp.pickers.telescope')
|
||||
assert.is_table(telescope_cp)
|
||||
assert.is_function(telescope_cp.platform_picker)
|
||||
assert.is_function(telescope_cp.pick)
|
||||
end)
|
||||
end)
|
||||
|
||||
describe('basic running', function()
|
||||
it('can run and open the picker with :CP pick', function()
|
||||
local cp = require('cp')
|
||||
assert.has_no_errors(function()
|
||||
cp.handle_command({ fargs = { 'pick' } })
|
||||
end)
|
||||
end)
|
||||
end)
|
||||
end)
|
||||
|
|
|
|||
|
|
@ -4,15 +4,18 @@ from scrapers.codeforces import CodeforcesScraper
|
|||
from scrapers.models import ContestSummary, ProblemSummary
|
||||
|
||||
|
||||
def test_scrape_success(mocker, mock_codeforces_html):
|
||||
mock_scraper = Mock()
|
||||
mock_response = Mock()
|
||||
mock_response.text = mock_codeforces_html
|
||||
mock_scraper.get.return_value = mock_response
|
||||
def make_mock_session(html: str):
|
||||
"""Return a mock StealthySession that yields the given HTML."""
|
||||
mock_session = Mock()
|
||||
mock_session.fetch.return_value.html_content = html
|
||||
mock_session.__enter__ = lambda s: s
|
||||
mock_session.__exit__ = lambda s, exc_type, exc_val, exc_tb: None
|
||||
return mock_session
|
||||
|
||||
mocker.patch(
|
||||
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper
|
||||
)
|
||||
|
||||
def test_scrape_success(mocker, mock_codeforces_html):
|
||||
mock_session = make_mock_session(mock_codeforces_html)
|
||||
mocker.patch("scrapers.codeforces.StealthySession", return_value=mock_session)
|
||||
|
||||
scraper = CodeforcesScraper()
|
||||
result = scraper.scrape_problem_tests("1900", "A")
|
||||
|
|
@ -24,17 +27,12 @@ def test_scrape_success(mocker, mock_codeforces_html):
|
|||
|
||||
|
||||
def test_scrape_contest_problems(mocker):
|
||||
mock_scraper = Mock()
|
||||
mock_response = Mock()
|
||||
mock_response.text = """
|
||||
html = """
|
||||
<a href="/contest/1900/problem/A">A. Problem A</a>
|
||||
<a href="/contest/1900/problem/B">B. Problem B</a>
|
||||
"""
|
||||
mock_scraper.get.return_value = mock_response
|
||||
|
||||
mocker.patch(
|
||||
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper
|
||||
)
|
||||
mock_session = make_mock_session(html)
|
||||
mocker.patch("scrapers.codeforces.StealthySession", return_value=mock_session)
|
||||
|
||||
scraper = CodeforcesScraper()
|
||||
result = scraper.scrape_contest_metadata("1900")
|
||||
|
|
@ -46,12 +44,11 @@ def test_scrape_contest_problems(mocker):
|
|||
|
||||
|
||||
def test_scrape_network_error(mocker):
|
||||
mock_scraper = Mock()
|
||||
mock_scraper.get.side_effect = Exception("Network error")
|
||||
|
||||
mocker.patch(
|
||||
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper
|
||||
)
|
||||
mock_session = Mock()
|
||||
mock_session.fetch.side_effect = Exception("Network error")
|
||||
mock_session.__enter__ = lambda s: s
|
||||
mock_session.__exit__ = lambda s, exc_type, exc_val, exc_tb: None
|
||||
mocker.patch("scrapers.codeforces.StealthySession", return_value=mock_session)
|
||||
|
||||
scraper = CodeforcesScraper()
|
||||
result = scraper.scrape_problem_tests("1900", "A")
|
||||
|
|
@ -61,7 +58,6 @@ def test_scrape_network_error(mocker):
|
|||
|
||||
|
||||
def test_scrape_contests_success(mocker):
|
||||
mock_scraper = Mock()
|
||||
mock_response = Mock()
|
||||
mock_response.json.return_value = {
|
||||
"status": "OK",
|
||||
|
|
@ -71,11 +67,7 @@ def test_scrape_contests_success(mocker):
|
|||
{"id": 1949, "name": "Codeforces Global Round 26"},
|
||||
],
|
||||
}
|
||||
mock_scraper.get.return_value = mock_response
|
||||
|
||||
mocker.patch(
|
||||
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper
|
||||
)
|
||||
mocker.patch("scrapers.codeforces.requests.get", return_value=mock_response)
|
||||
|
||||
scraper = CodeforcesScraper()
|
||||
result = scraper.scrape_contest_list()
|
||||
|
|
@ -87,27 +79,12 @@ def test_scrape_contests_success(mocker):
|
|||
name="Educational Codeforces Round 168 (Rated for Div. 2)",
|
||||
display_name="Educational Codeforces Round 168 (Rated for Div. 2)",
|
||||
)
|
||||
assert result.contests[1] == ContestSummary(
|
||||
id="1950",
|
||||
name="Codeforces Round 936 (Div. 2)",
|
||||
display_name="Codeforces Round 936 (Div. 2)",
|
||||
)
|
||||
assert result.contests[2] == ContestSummary(
|
||||
id="1949",
|
||||
name="Codeforces Global Round 26",
|
||||
display_name="Codeforces Global Round 26",
|
||||
)
|
||||
|
||||
|
||||
def test_scrape_contests_api_error(mocker):
|
||||
mock_scraper = Mock()
|
||||
mock_response = Mock()
|
||||
mock_response.json.return_value = {"status": "FAILED", "result": []}
|
||||
mock_scraper.get.return_value = mock_response
|
||||
|
||||
mocker.patch(
|
||||
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper
|
||||
)
|
||||
mocker.patch("scrapers.codeforces.requests.get", return_value=mock_response)
|
||||
|
||||
scraper = CodeforcesScraper()
|
||||
result = scraper.scrape_contest_list()
|
||||
|
|
@ -117,11 +94,8 @@ def test_scrape_contests_api_error(mocker):
|
|||
|
||||
|
||||
def test_scrape_contests_network_error(mocker):
|
||||
mock_scraper = Mock()
|
||||
mock_scraper.get.side_effect = Exception("Network error")
|
||||
|
||||
mocker.patch(
|
||||
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper
|
||||
"scrapers.codeforces.requests.get", side_effect=Exception("Network error")
|
||||
)
|
||||
|
||||
scraper = CodeforcesScraper()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue