diff --git a/lua/cp/cache.lua b/lua/cp/cache.lua new file mode 100644 index 0000000..0294b50 --- /dev/null +++ b/lua/cp/cache.lua @@ -0,0 +1,90 @@ +local M = {} + +local cache_file = vim.fn.stdpath("data") .. "/cp-contest-cache.json" +local cache_data = {} + +local function get_expiry_date(contest_type) + if contest_type == "cses" then + return os.time() + (30 * 24 * 60 * 60) + end + return nil +end + +local function is_cache_valid(contest_data, contest_type) + if contest_type ~= "cses" then + return true + end + + local expires_at = contest_data.expires_at + if not expires_at then + return false + end + + return os.time() < expires_at +end + +function M.load() + if vim.fn.filereadable(cache_file) == 0 then + cache_data = {} + return + end + + local content = vim.fn.readfile(cache_file) + if #content == 0 then + cache_data = {} + return + end + + local ok, decoded = pcall(vim.json.decode, table.concat(content, "\n")) + if ok then + cache_data = decoded + else + cache_data = {} + end +end + +function M.save() + vim.fn.mkdir(vim.fn.fnamemodify(cache_file, ":h"), "p") + local encoded = vim.json.encode(cache_data) + vim.fn.writefile(vim.split(encoded, "\n"), cache_file) +end + +function M.get_contest_data(contest_type, contest_id) + if not cache_data[contest_type] then + return nil + end + + local contest_data = cache_data[contest_type][contest_id] + if not contest_data then + return nil + end + + if not is_cache_valid(contest_data, contest_type) then + return nil + end + + return contest_data +end + +function M.set_contest_data(contest_type, contest_id, problems) + if not cache_data[contest_type] then + cache_data[contest_type] = {} + end + + cache_data[contest_type][contest_id] = { + problems = problems, + scraped_at = os.date("%Y-%m-%d"), + expires_at = get_expiry_date(contest_type), + } + + M.save() +end + +function M.clear_contest_data(contest_type, contest_id) + if cache_data[contest_type] and cache_data[contest_type][contest_id] then + cache_data[contest_type][contest_id] = nil + M.save() + end +end + +return M \ No newline at end of file diff --git a/lua/cp/config.lua b/lua/cp/config.lua index 548bdc9..9f39c60 100644 --- a/lua/cp/config.lua +++ b/lua/cp/config.lua @@ -44,8 +44,8 @@ local function extend_contest_config(base_config, contest_config) local result = vim.tbl_deep_extend("force", base_config, contest_config) local std_flag = ("-std=c++%d"):format(result.cpp_version) - table.insert(result.compile_flags, 1, std_flag) - table.insert(result.debug_flags, 1, std_flag) + result.compile_flags = vim.list_extend({ std_flag }, result.compile_flags) + result.debug_flags = vim.list_extend({ std_flag }, result.debug_flags) return result end diff --git a/lua/cp/execute.lua b/lua/cp/execute.lua index d1acc8e..774346b 100644 --- a/lua/cp/execute.lua +++ b/lua/cp/execute.lua @@ -2,12 +2,20 @@ local M = {} local signal_codes = { [128] = "SIGILL", - [130] = "SIGABRT", - [131] = "SIGBUS", + [130] = "SIGINT", + [131] = "SIGQUIT", + [132] = "SIGILL", + [133] = "SIGTRAP", + [134] = "SIGABRT", + [135] = "SIGBUS", [136] = "SIGFPE", - [135] = "SIGSEGV", - [137] = "SIGPIPE", - [139] = "SIGTERM", + [137] = "SIGKILL", + [138] = "SIGUSR1", + [139] = "SIGSEGV", + [140] = "SIGUSR2", + [141] = "SIGPIPE", + [142] = "SIGALRM", + [143] = "SIGTERM", } local function ensure_directories() @@ -31,34 +39,41 @@ local function execute_binary(binary_path, input_data, timeout_ms) local end_time = vim.loop.hrtime() local execution_time = (end_time - start_time) / 1000000 + local actual_code = result.code or 0 + return { stdout = result.stdout or "", stderr = result.stderr or "", - code = result.code, + code = actual_code, time_ms = execution_time, timed_out = result.code == 124, } end local function format_output(exec_result, expected_file, is_debug) - local lines = { exec_result.stdout } + local output_lines = { exec_result.stdout } + local metadata_lines = {} if exec_result.timed_out then - table.insert(lines, "\n[code]: 124 (TIMEOUT)") + table.insert(metadata_lines, "[code]: 124 (TIMEOUT)") elseif exec_result.code >= 128 then local signal_name = signal_codes[exec_result.code] or "SIGNAL" - table.insert(lines, ("\n[code]: %d (%s)"):format(exec_result.code, signal_name)) + table.insert(metadata_lines, ("[code]: %d (%s)"):format(exec_result.code, signal_name)) else - table.insert(lines, ("\n[code]: %d"):format(exec_result.code)) + table.insert(metadata_lines, ("[code]: %d"):format(exec_result.code)) end - table.insert(lines, ("\n[time]: %.2f ms"):format(exec_result.time_ms)) - table.insert(lines, ("\n[debug]: %s"):format(is_debug and "true" or "false")) + table.insert(metadata_lines, ("[time]: %.2f ms"):format(exec_result.time_ms)) + table.insert(metadata_lines, ("[debug]: %s"):format(is_debug and "true" or "false")) if vim.fn.filereadable(expected_file) == 1 and exec_result.code == 0 then local expected_content = vim.fn.readfile(expected_file) local actual_lines = vim.split(exec_result.stdout, "\n") + while #actual_lines > 0 and actual_lines[#actual_lines] == "" do + table.remove(actual_lines) + end + local matches = #actual_lines == #expected_content if matches then for i, line in ipairs(actual_lines) do @@ -69,10 +84,10 @@ local function format_output(exec_result, expected_file, is_debug) end end - table.insert(lines, ("\n[matches]: %s"):format(matches and "true" or "false")) + table.insert(metadata_lines, ("[matches]: %s"):format(matches and "true" or "false")) end - return table.concat(lines, "") + return table.concat(output_lines, "") .. "\n" .. table.concat(metadata_lines, "\n") end ---@param ctx ProblemContext diff --git a/lua/cp/init.lua b/lua/cp/init.lua index 5dc988b..73d9fd5 100644 --- a/lua/cp/init.lua +++ b/lua/cp/init.lua @@ -5,6 +5,7 @@ local scrape = require("cp.scrape") local window = require("cp.window") local logger = require("cp.log") local problem = require("cp.problem") +local cache = require("cp.cache") local M = {} local config = {} @@ -14,27 +15,33 @@ if not vim.fn.has("nvim-0.10.0") then return M end -local competition_types = { "atcoder", "codeforces", "cses" } +local platforms = { "atcoder", "codeforces", "cses" } +local actions = { "run", "debug", "diff", "next", "prev" } -local function setup_contest(contest_type) - if not vim.tbl_contains(competition_types, contest_type) then - logger.log( - ("unknown contest type. Available: [%s]"):format(table.concat(competition_types, ", ")), - vim.log.levels.ERROR - ) +local function set_platform(platform) + if not vim.tbl_contains(platforms, platform) then + logger.log(("unknown platform. Available: [%s]"):format(table.concat(platforms, ", ")), vim.log.levels.ERROR) + return false + end + + vim.g.cp = vim.g.cp or {} + vim.g.cp.platform = platform + vim.fn.mkdir("build", "p") + vim.fn.mkdir("io", "p") + return true +end + +---@param contest_id string +---@param problem_id? string +local function setup_problem(contest_id, problem_id) + if not vim.g.cp or not vim.g.cp.platform then + logger.log("no platform set. run :CP first", vim.log.levels.ERROR) return end - vim.g.cp_contest = contest_type - vim.fn.mkdir("build", "p") - vim.fn.mkdir("io", "p") - logger.log(("set up %s contest environment"):format(contest_type)) -end - -local function setup_problem(contest_id, problem_id) - if not vim.g.cp_contest then - logger.log("no contest mode set. run :CP first", vim.log.levels.ERROR) - return + local metadata_result = scrape.scrape_contest_metadata(vim.g.cp.platform, contest_id) + if not metadata_result.success then + logger.log("failed to load contest metadata: " .. (metadata_result.error or "unknown error"), vim.log.levels.WARN) end if vim.g.cp_diff_mode then @@ -52,18 +59,19 @@ local function setup_problem(contest_id, problem_id) vim.cmd("silent only") - vim.g.cp_contest_id = contest_id - vim.g.cp_problem_id = problem_id + vim.g.cp.contest_id = contest_id + vim.g.cp.problem_id = problem_id - local ctx = problem.create_context(vim.g.cp_contest, contest_id, problem_id, config) + local ctx = problem.create_context(vim.g.cp.platform, contest_id, problem_id, config) local scrape_result = scrape.scrape_problem(ctx) if not scrape_result.success then - logger.log("scraping failed: " .. scrape_result.error, vim.log.levels.WARN) + logger.log("scraping failed: " .. (scrape_result.error or "unknown error"), vim.log.levels.WARN) logger.log("you can manually add test cases to io/ directory", vim.log.levels.INFO) else - logger.log(("scraped %d test case(s) for %s"):format(scrape_result.test_count, scrape_result.problem_id)) + local test_count = scrape_result.test_count or 0 + logger.log(("scraped %d test case(s) for %s"):format(test_count, scrape_result.problem_id)) end vim.cmd.e(ctx.source_file) @@ -71,8 +79,8 @@ local function setup_problem(contest_id, problem_id) if vim.api.nvim_buf_get_lines(0, 0, -1, true)[1] == "" then local has_luasnip, luasnip = pcall(require, "luasnip") if has_luasnip then - vim.api.nvim_buf_set_lines(0, 0, -1, false, { vim.g.cp_contest }) - vim.api.nvim_win_set_cursor(0, { 1, #vim.g.cp_contest }) + vim.api.nvim_buf_set_lines(0, 0, -1, false, { vim.g.cp.platform }) + vim.api.nvim_win_set_cursor(0, { 1, #vim.g.cp.platform }) vim.cmd.startinsert({ bang = true }) vim.schedule(function() @@ -82,7 +90,7 @@ local function setup_problem(contest_id, problem_id) vim.cmd.stopinsert() end) else - vim.api.nvim_input(("i%s"):format(vim.g.cp_contest)) + vim.api.nvim_input(("i%s"):format(vim.g.cp.platform)) end end @@ -123,12 +131,12 @@ local function run_problem() config.hooks.before_run(problem_id) end - if not vim.g.cp_contest then - logger.log("no contest mode set", vim.log.levels.ERROR) + if not vim.g.cp_platform then + logger.log("no platform set", vim.log.levels.ERROR) return end - local contest_config = config.contests[vim.g.cp_contest] + local contest_config = config.contests[vim.g.cp_platform] vim.schedule(function() local ctx = problem.create_context(vim.g.cp_contest, vim.g.cp_contest_id, vim.g.cp_problem_id, config) @@ -147,12 +155,12 @@ local function debug_problem() config.hooks.before_debug(problem_id) end - if not vim.g.cp_contest then - logger.log("no contest mode set", vim.log.levels.ERROR) + if not vim.g.cp_platform then + logger.log("no platform set", vim.log.levels.ERROR) return end - local contest_config = config.contests[vim.g.cp_contest] + local contest_config = config.contests[vim.g.cp_platform] vim.schedule(function() local ctx = problem.create_context(vim.g.cp_contest, vim.g.cp_contest_id, vim.g.cp_problem_id, config) @@ -193,6 +201,64 @@ local function diff_problem() end end +---@param delta number 1 for next, -1 for prev +local function navigate_problem(delta) + if not vim.g.cp_platform or not vim.g.cp_contest_id then + logger.log("no contest set. run :CP first", vim.log.levels.ERROR) + return + end + + cache.load() + local contest_data = cache.get_contest_data(vim.g.cp_platform, vim.g.cp_contest_id) + if not contest_data or not contest_data.problems then + logger.log("no contest metadata found. set up a problem first to cache contest data", vim.log.levels.ERROR) + return + end + + local problems = contest_data.problems + local current_problem_id + + if vim.g.cp_platform == "cses" then + current_problem_id = vim.g.cp_contest_id + else + current_problem_id = vim.g.cp_problem_id + end + + if not current_problem_id then + logger.log("no current problem set", vim.log.levels.ERROR) + return + end + + local current_index = nil + for i, problem in ipairs(problems) do + if problem.id == current_problem_id then + current_index = i + break + end + end + + if not current_index then + logger.log("current problem not found in contest", vim.log.levels.ERROR) + return + end + + local new_index = current_index + delta + + if new_index < 1 or new_index > #problems then + local direction = delta > 0 and "next" or "previous" + logger.log(("no %s problem available"):format(direction), vim.log.levels.INFO) + return + end + + local new_problem = problems[new_index] + + if vim.g.cp_platform == "cses" then + setup_problem(new_problem.id) + else + setup_problem(vim.g.cp_contest_id, new_problem.id) + end +end + local initialized = false function M.is_initialized() @@ -210,43 +276,92 @@ function M.setup(user_config) initialized = true end -function M.handle_command(opts) - local args = opts.fargs +local function parse_command(args) if #args == 0 then - logger.log("Usage: :CP ", vim.log.levels.ERROR) + return { type = "error", message = "Usage: :CP [problem] | :CP | :CP " } + end + + local first = args[1] + + if vim.tbl_contains(actions, first) then + return { type = "action", action = first } + end + + if vim.tbl_contains(platforms, first) then + if #args == 1 then + return { type = "platform_only", platform = first } + elseif #args == 2 then + return { type = "contest_setup", platform = first, contest = args[2] } + elseif #args == 3 then + return { type = "full_setup", platform = first, contest = args[2], problem = args[3] } + else + return { type = "error", message = "Too many arguments" } + end + end + + if vim.g.cp and vim.g.cp.platform and vim.g.cp.contest_id then + return { type = "problem_switch", problem = first } + end + + return { type = "error", message = "Unknown command or no contest context" } +end + +function M.handle_command(opts) + local cmd = parse_command(opts.fargs) + + if cmd.type == "error" then + logger.log(cmd.message, vim.log.levels.ERROR) return end - local cmd = args[1] + if cmd.type == "action" then + if cmd.action == "run" then + run_problem() + elseif cmd.action == "debug" then + debug_problem() + elseif cmd.action == "diff" then + diff_problem() + elseif cmd.action == "next" then + navigate_problem(1) + elseif cmd.action == "prev" then + navigate_problem(-1) + end + return + end - if vim.tbl_contains(competition_types, cmd) then - if args[2] then - setup_contest(cmd) - if (cmd == "atcoder" or cmd == "codeforces") and args[3] then - setup_problem(args[2], args[3]) + if cmd.type == "platform_only" then + set_platform(cmd.platform) + return + end + + if cmd.type == "contest_setup" then + if set_platform(cmd.platform) then + vim.g.cp.contest_id = cmd.contest + local metadata_result = scrape.scrape_contest_metadata(cmd.platform, cmd.contest) + if not metadata_result.success then + logger.log("failed to load contest metadata: " .. (metadata_result.error or "unknown error"), vim.log.levels.WARN) else - setup_problem(args[2]) + logger.log(("loaded %d problems for %s %s"):format(#metadata_result.problems, cmd.platform, cmd.contest)) end - else - setup_contest(cmd) end - elseif cmd == "run" then - run_problem() - elseif cmd == "debug" then - debug_problem() - elseif cmd == "diff" then - diff_problem() - elseif vim.g.cp_contest and not vim.tbl_contains(competition_types, cmd) then - if (vim.g.cp_contest == "atcoder" or vim.g.cp_contest == "codeforces") and args[2] then - setup_problem(cmd, args[2]) - else - setup_problem(cmd) + return + end + + if cmd.type == "full_setup" then + if set_platform(cmd.platform) then + vim.g.cp.contest_id = cmd.contest + setup_problem(cmd.contest, cmd.problem) end - else - logger.log( - ("unknown contest type '%s'. Available: [%s]"):format(cmd, table.concat(competition_types, ", ")), - vim.log.levels.ERROR - ) + return + end + + if cmd.type == "problem_switch" then + if vim.g.cp.platform == "cses" then + setup_problem(cmd.problem) + else + setup_problem(vim.g.cp.contest_id, cmd.problem) + end + return end end diff --git a/lua/cp/problem.lua b/lua/cp/problem.lua index 2ae83a7..4aa73d2 100644 --- a/lua/cp/problem.lua +++ b/lua/cp/problem.lua @@ -3,7 +3,7 @@ ---@field contest_id string Contest ID (e.g. "abc123", "1933") ---@field problem_id? string Problem ID for AtCoder/Codeforces (e.g. "a", "b") ---@field source_file string Source filename (e.g. "abc123a.cpp") ----@field binary_file string Binary output path (e.g. "build/abc123a") +---@field binary_file string Binary output path (e.g. "build/abc123a.run") ---@field input_file string Input test file path (e.g. "io/abc123a.in") ---@field output_file string Output file path (e.g. "io/abc123a.out") ---@field expected_file string Expected output path (e.g. "io/abc123a.expected") @@ -26,7 +26,7 @@ function M.create_context(contest, contest_id, problem_id, config) contest_id = contest_id, problem_id = problem_id, source_file = source_file, - binary_file = ("build/%s"):format(base_name), + binary_file = ("build/%s.run"):format(base_name), input_file = ("io/%s.in"):format(base_name), output_file = ("io/%s.out"):format(base_name), expected_file = ("io/%s.expected"):format(base_name), diff --git a/lua/cp/scrape.lua b/lua/cp/scrape.lua index b03a5c1..62188d9 100644 --- a/lua/cp/scrape.lua +++ b/lua/cp/scrape.lua @@ -1,5 +1,6 @@ local M = {} local logger = require("cp.log") +local cache = require("cp.cache") local function get_plugin_path() local plugin_path = debug.getinfo(1, "S").source:sub(2) @@ -35,7 +36,72 @@ local function setup_python_env() return true end +---@param contest_type string +---@param contest_id string +---@return {success: boolean, problems?: table[], error?: string} +function M.scrape_contest_metadata(contest_type, contest_id) + cache.load() + + local cached_data = cache.get_contest_data(contest_type, contest_id) + if cached_data then + return { + success = true, + problems = cached_data.problems, + } + end + + if not setup_python_env() then + return { + success = false, + error = "Python environment setup failed", + } + end + + local plugin_path = get_plugin_path() + local scraper_path = plugin_path .. "/scrapers/" .. contest_type .. ".py" + local args = { "uv", "run", scraper_path, "metadata", contest_id } + + local result = vim.system(args, { + cwd = plugin_path, + text = true, + timeout = 30000, + }):wait() + + if result.code ~= 0 then + return { + success = false, + error = "Failed to run metadata scraper: " .. (result.stderr or "Unknown error"), + } + end + + local ok, data = pcall(vim.json.decode, result.stdout) + if not ok then + return { + success = false, + error = "Failed to parse metadata scraper output: " .. tostring(data), + } + end + + if not data.success then + return data + end + + local problems_list + if contest_type == "cses" then + problems_list = data.categories and data.categories["CSES Problem Set"] or {} + else + problems_list = data.problems or {} + end + + cache.set_contest_data(contest_type, contest_id, problems_list) + return { + success = true, + problems = problems_list, + } +end + ---@param ctx ProblemContext +---@return {success: boolean, problem_id: string, test_count?: number, url?: string, error?: string} function M.scrape_problem(ctx) ensure_io_directory() @@ -50,6 +116,7 @@ function M.scrape_problem(ctx) if not setup_python_env() then return { success = false, + problem_id = ctx.problem_name, error = "Python environment setup failed", } end @@ -59,9 +126,9 @@ function M.scrape_problem(ctx) local args if ctx.contest == "cses" then - args = { "uv", "run", scraper_path, ctx.contest_id } + args = { "uv", "run", scraper_path, "tests", ctx.contest_id } else - args = { "uv", "run", scraper_path, ctx.contest_id, ctx.problem_id } + args = { "uv", "run", scraper_path, "tests", ctx.contest_id, ctx.problem_id } end local result = vim.system(args, { @@ -73,7 +140,8 @@ function M.scrape_problem(ctx) if result.code ~= 0 then return { success = false, - error = "Failed to run scraper: " .. (result.stderr or "Unknown error"), + problem_id = ctx.problem_name, + error = "Failed to run tests scraper: " .. (result.stderr or "Unknown error"), } end @@ -81,7 +149,8 @@ function M.scrape_problem(ctx) if not ok then return { success = false, - error = "Failed to parse scraper output: " .. tostring(data), + problem_id = ctx.problem_name, + error = "Failed to parse tests scraper output: " .. tostring(data), } end @@ -89,7 +158,7 @@ function M.scrape_problem(ctx) return data end - if #data.test_cases > 0 then + if data.test_cases and #data.test_cases > 0 then local all_inputs = {} local all_outputs = {} @@ -113,7 +182,7 @@ function M.scrape_problem(ctx) return { success = true, problem_id = ctx.problem_name, - test_count = #data.test_cases, + test_count = data.test_cases and #data.test_cases or 0, url = data.url, } end diff --git a/plugin/cp.lua b/plugin/cp.lua index 9fc7e28..e0dfd18 100644 --- a/plugin/cp.lua +++ b/plugin/cp.lua @@ -3,7 +3,8 @@ if vim.g.loaded_cp then end vim.g.loaded_cp = 1 -local competition_types = { "atcoder", "codeforces", "cses" } +local platforms = { "atcoder", "codeforces", "cses" } +local actions = { "run", "debug", "diff", "next", "prev" } vim.api.nvim_create_user_command("CP", function(opts) local cp = require("cp") @@ -13,10 +14,46 @@ vim.api.nvim_create_user_command("CP", function(opts) cp.handle_command(opts) end, { nargs = "*", - complete = function(ArgLead, _, _) - local commands = vim.list_extend(vim.deepcopy(competition_types), { "run", "debug", "diff" }) - return vim.tbl_filter(function(cmd) - return cmd:find(ArgLead, 1, true) == 1 - end, commands) + complete = function(ArgLead, CmdLine, CursorPos) + local args = vim.split(vim.trim(CmdLine), "%s+") + local num_args = #args + if CmdLine:sub(-1) == " " then + num_args = num_args + 1 + end + + if num_args == 2 then + local candidates = {} + vim.list_extend(candidates, platforms) + vim.list_extend(candidates, actions) + if vim.g.cp_platform and vim.g.cp_contest_id then + local cache = require("cp.cache") + cache.load() + local contest_data = cache.get_contest_data(vim.g.cp_platform, vim.g.cp_contest_id) + if contest_data and contest_data.problems then + for _, problem in ipairs(contest_data.problems) do + table.insert(candidates, problem.id) + end + end + end + return vim.tbl_filter(function(cmd) + return cmd:find(ArgLead, 1, true) == 1 + end, candidates) + elseif num_args == 4 then + if vim.tbl_contains(platforms, args[2]) then + local cache = require("cp.cache") + cache.load() + local contest_data = cache.get_contest_data(args[2], args[3]) + if contest_data and contest_data.problems then + local candidates = {} + for _, problem in ipairs(contest_data.problems) do + table.insert(candidates, problem.id) + end + return vim.tbl_filter(function(cmd) + return cmd:find(ArgLead, 1, true) == 1 + end, candidates) + end + end + end + return {} end, }) diff --git a/scrapers/atcoder.py b/scrapers/atcoder.py index 1f84f2e..031208e 100644 --- a/scrapers/atcoder.py +++ b/scrapers/atcoder.py @@ -12,6 +12,52 @@ def parse_problem_url(contest_id: str, problem_letter: str) -> str: return f"https://atcoder.jp/contests/{contest_id}/tasks/{task_id}" +def scrape_contest_problems(contest_id: str): + try: + contest_url = f"https://atcoder.jp/contests/{contest_id}/tasks" + headers = { + "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" + } + + response = requests.get(contest_url, headers=headers, timeout=10) + response.raise_for_status() + + soup = BeautifulSoup(response.text, "html.parser") + problems = [] + + task_table = soup.find("table", class_="table") + if not task_table: + return [] + + rows = task_table.find_all("tr")[1:] # Skip header row + + for row in rows: + cells = row.find_all("td") + if len(cells) >= 2: + task_link = cells[1].find("a") + if task_link: + task_name = task_link.get_text(strip=True) + task_href = task_link.get("href", "") + + # Extract problem letter from task name or URL + task_id = task_href.split("/")[-1] if task_href else "" + if task_id.startswith(contest_id + "_"): + problem_letter = task_id[len(contest_id) + 1:] + + if problem_letter and task_name: + problems.append({ + "id": problem_letter.lower(), + "name": task_name + }) + + problems.sort(key=lambda x: x["id"]) + return problems + + except Exception as e: + print(f"Failed to scrape AtCoder contest problems: {e}", file=sys.stderr) + return [] + + def scrape(url: str) -> list[tuple[str, str]]: try: headers = { @@ -57,54 +103,98 @@ def scrape(url: str) -> list[tuple[str, str]]: def main(): - if len(sys.argv) != 3: + if len(sys.argv) < 2: result = { "success": False, - "error": "Usage: atcoder.py ", - "problem_id": None, + "error": "Usage: atcoder.py metadata OR atcoder.py tests ", } print(json.dumps(result)) sys.exit(1) - contest_id = sys.argv[1] - problem_letter = sys.argv[2] - problem_id = contest_id + problem_letter.lower() + mode = sys.argv[1] - url = parse_problem_url(contest_id, problem_letter) - print(f"Scraping: {url}", file=sys.stderr) + if mode == "metadata": + if len(sys.argv) != 3: + result = { + "success": False, + "error": "Usage: atcoder.py metadata ", + } + print(json.dumps(result)) + sys.exit(1) - tests = scrape(url) + contest_id = sys.argv[2] + problems = scrape_contest_problems(contest_id) + + if not problems: + result = { + "success": False, + "error": f"No problems found for contest {contest_id}", + } + print(json.dumps(result)) + sys.exit(1) - if not tests: result = { - "success": False, - "error": f"No tests found for {contest_id} {problem_letter}", + "success": True, + "contest_id": contest_id, + "problems": problems, + } + print(json.dumps(result)) + + elif mode == "tests": + if len(sys.argv) != 4: + result = { + "success": False, + "error": "Usage: atcoder.py tests ", + } + print(json.dumps(result)) + sys.exit(1) + + contest_id = sys.argv[2] + problem_letter = sys.argv[3] + problem_id = contest_id + problem_letter.lower() + + url = parse_problem_url(contest_id, problem_letter) + print(f"Scraping: {url}", file=sys.stderr) + + tests = scrape(url) + + if not tests: + result = { + "success": False, + "error": f"No tests found for {contest_id} {problem_letter}", + "problem_id": problem_id, + "url": url, + } + print(json.dumps(result)) + sys.exit(1) + + test_cases = [] + for input_data, output_data in tests: + test_cases.append({"input": input_data, "output": output_data}) + + if test_cases: + combined_input = ( + str(len(test_cases)) + "\n" + "\n".join(tc["input"] for tc in test_cases) + ) + combined_output = "\n".join(tc["output"] for tc in test_cases) + test_cases = [{"input": combined_input, "output": combined_output}] + + result = { + "success": True, "problem_id": problem_id, "url": url, + "test_cases": test_cases, + } + print(json.dumps(result)) + + else: + result = { + "success": False, + "error": f"Unknown mode: {mode}. Use 'metadata' or 'tests'", } print(json.dumps(result)) sys.exit(1) - test_cases = [] - for input_data, output_data in tests: - test_cases.append({"input": input_data, "output": output_data}) - - if test_cases: - combined_input = ( - str(len(test_cases)) + "\n" + "\n".join(tc["input"] for tc in test_cases) - ) - combined_output = "\n".join(tc["output"] for tc in test_cases) - test_cases = [{"input": combined_input, "output": combined_output}] - - result = { - "success": True, - "problem_id": problem_id, - "url": url, - "test_cases": test_cases, - } - - print(json.dumps(result)) - if __name__ == "__main__": main() diff --git a/scrapers/codeforces.py b/scrapers/codeforces.py index 35589a0..170dacc 100644 --- a/scrapers/codeforces.py +++ b/scrapers/codeforces.py @@ -60,51 +60,135 @@ def parse_problem_url(contest_id: str, problem_letter: str) -> str: ) +def scrape_contest_problems(contest_id: str): + try: + contest_url = f"https://codeforces.com/contest/{contest_id}" + scraper = cloudscraper.create_scraper() + response = scraper.get(contest_url, timeout=10) + response.raise_for_status() + + soup = BeautifulSoup(response.text, "html.parser") + problems = [] + + problem_links = soup.find_all("a", href=lambda x: x and f"/contest/{contest_id}/problem/" in x) + + for link in problem_links: + href = link.get("href", "") + if f"/contest/{contest_id}/problem/" in href: + problem_letter = href.split("/")[-1].lower() + problem_name = link.get_text(strip=True) + + if problem_letter and problem_name and len(problem_letter) == 1: + problems.append({ + "id": problem_letter, + "name": problem_name + }) + + problems.sort(key=lambda x: x["id"]) + + seen = set() + unique_problems = [] + for p in problems: + if p["id"] not in seen: + seen.add(p["id"]) + unique_problems.append(p) + + return unique_problems + + except Exception as e: + print(f"Failed to scrape contest problems: {e}", file=sys.stderr) + return [] + + def scrape_sample_tests(url: str): print(f"Scraping: {url}", file=sys.stderr) return scrape(url) def main(): - if len(sys.argv) != 3: + if len(sys.argv) < 2: result = { "success": False, - "error": "Usage: codeforces.py ", - "problem_id": None, + "error": "Usage: codeforces.py metadata OR codeforces.py tests ", } print(json.dumps(result)) sys.exit(1) - contest_id = sys.argv[1] - problem_letter = sys.argv[2] - problem_id = contest_id + problem_letter.lower() + mode = sys.argv[1] - url = parse_problem_url(contest_id, problem_letter) - tests = scrape_sample_tests(url) + if mode == "metadata": + if len(sys.argv) != 3: + result = { + "success": False, + "error": "Usage: codeforces.py metadata ", + } + print(json.dumps(result)) + sys.exit(1) + + contest_id = sys.argv[2] + problems = scrape_contest_problems(contest_id) + + if not problems: + result = { + "success": False, + "error": f"No problems found for contest {contest_id}", + } + print(json.dumps(result)) + sys.exit(1) - if not tests: result = { - "success": False, - "error": f"No tests found for {contest_id} {problem_letter}", + "success": True, + "contest_id": contest_id, + "problems": problems, + } + print(json.dumps(result)) + + elif mode == "tests": + if len(sys.argv) != 4: + result = { + "success": False, + "error": "Usage: codeforces.py tests ", + } + print(json.dumps(result)) + sys.exit(1) + + contest_id = sys.argv[2] + problem_letter = sys.argv[3] + problem_id = contest_id + problem_letter.lower() + + url = parse_problem_url(contest_id, problem_letter) + tests = scrape_sample_tests(url) + + if not tests: + result = { + "success": False, + "error": f"No tests found for {contest_id} {problem_letter}", + "problem_id": problem_id, + "url": url, + } + print(json.dumps(result)) + sys.exit(1) + + test_cases = [] + for input_data, output_data in tests: + test_cases.append({"input": input_data, "output": output_data}) + + result = { + "success": True, "problem_id": problem_id, "url": url, + "test_cases": test_cases, + } + print(json.dumps(result)) + + else: + result = { + "success": False, + "error": f"Unknown mode: {mode}. Use 'metadata' or 'tests'", } print(json.dumps(result)) sys.exit(1) - test_cases = [] - for input_data, output_data in tests: - test_cases.append({"input": input_data, "output": output_data}) - - result = { - "success": True, - "problem_id": problem_id, - "url": url, - "test_cases": test_cases, - } - - print(json.dumps(result)) - if __name__ == "__main__": main() diff --git a/scrapers/cses.py b/scrapers/cses.py index 8da2ba6..4cd583f 100755 --- a/scrapers/cses.py +++ b/scrapers/cses.py @@ -15,6 +15,53 @@ def parse_problem_url(problem_input: str) -> str | None: return None +def scrape_all_problems(): + try: + problemset_url = "https://cses.fi/problemset/" + headers = { + "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" + } + + response = requests.get(problemset_url, headers=headers, timeout=10) + response.raise_for_status() + + soup = BeautifulSoup(response.text, "html.parser") + all_categories = {} + + # Find all problem links first + problem_links = soup.find_all("a", href=lambda x: x and "/problemset/task/" in x) + print(f"Found {len(problem_links)} problem links", file=sys.stderr) + + # Group by categories - look for h1 elements that precede problem lists + current_category = None + for element in soup.find_all(["h1", "a"]): + if element.name == "h1": + current_category = element.get_text().strip() + if current_category not in all_categories: + all_categories[current_category] = [] + elif element.name == "a" and "/problemset/task/" in element.get("href", ""): + href = element.get("href", "") + problem_id = href.split("/")[-1] + problem_name = element.get_text(strip=True) + + if problem_id.isdigit() and problem_name and current_category: + all_categories[current_category].append({ + "id": problem_id, + "name": problem_name + }) + + # Sort problems in each category + for category in all_categories: + all_categories[category].sort(key=lambda x: int(x["id"])) + + print(f"Found {len(all_categories)} categories", file=sys.stderr) + return all_categories + + except Exception as e: + print(f"Failed to scrape CSES problems: {e}", file=sys.stderr) + return {} + + def scrape(url: str) -> list[tuple[str, str]]: try: headers = { @@ -57,56 +104,98 @@ def scrape(url: str) -> list[tuple[str, str]]: def main(): - if len(sys.argv) != 2: + if len(sys.argv) < 2: result = { "success": False, - "error": "Usage: cses.py ", - "problem_id": None, + "error": "Usage: cses.py metadata OR cses.py tests ", } print(json.dumps(result)) sys.exit(1) - problem_input = sys.argv[1] - url = parse_problem_url(problem_input) + mode = sys.argv[1] + + if mode == "metadata": + if len(sys.argv) != 2: + result = { + "success": False, + "error": "Usage: cses.py metadata", + } + print(json.dumps(result)) + sys.exit(1) + + all_categories = scrape_all_problems() + + if not all_categories: + result = { + "success": False, + "error": "Failed to scrape CSES problem categories", + } + print(json.dumps(result)) + sys.exit(1) - if not url: result = { - "success": False, - "error": f"Invalid problem input: {problem_input}. Use either problem ID (e.g., 1068) or full URL", - "problem_id": problem_input if problem_input.isdigit() else None, + "success": True, + "categories": all_categories, } print(json.dumps(result)) - sys.exit(1) - tests = scrape(url) + elif mode == "tests": + if len(sys.argv) != 3: + result = { + "success": False, + "error": "Usage: cses.py tests ", + } + print(json.dumps(result)) + sys.exit(1) - problem_id = ( - problem_input if problem_input.isdigit() else problem_input.split("/")[-1] - ) + problem_input = sys.argv[2] + url = parse_problem_url(problem_input) + + if not url: + result = { + "success": False, + "error": f"Invalid problem input: {problem_input}. Use either problem ID (e.g., 1068) or full URL", + "problem_id": problem_input if problem_input.isdigit() else None, + } + print(json.dumps(result)) + sys.exit(1) + + tests = scrape(url) + + problem_id = ( + problem_input if problem_input.isdigit() else problem_input.split("/")[-1] + ) + + if not tests: + result = { + "success": False, + "error": f"No tests found for {problem_input}", + "problem_id": problem_id, + "url": url, + } + print(json.dumps(result)) + sys.exit(1) + + test_cases = [] + for input_data, output_data in tests: + test_cases.append({"input": input_data, "output": output_data}) - if not tests: result = { - "success": False, - "error": f"No tests found for {problem_input}", + "success": True, "problem_id": problem_id, "url": url, + "test_cases": test_cases, + } + print(json.dumps(result)) + + else: + result = { + "success": False, + "error": f"Unknown mode: {mode}. Use 'metadata' or 'tests'", } print(json.dumps(result)) sys.exit(1) - test_cases = [] - for input_data, output_data in tests: - test_cases.append({"input": input_data, "output": output_data}) - - result = { - "success": True, - "problem_id": problem_id, - "url": url, - "test_cases": test_cases, - } - - print(json.dumps(result)) - if __name__ == "__main__": main()