fix(scrapers): fix

This commit is contained in:
Barrett Ruth 2025-10-04 15:00:37 -04:00
parent 18dbcd43d2
commit b9a2c7a4ff
7 changed files with 84 additions and 28 deletions

View file

@ -98,7 +98,6 @@ local function parse_and_strip_time_v(output)
end
local peak_mb = peak_kb / 1024.0
head = head:gsub('\n+$', '')
return head, peak_mb
end

View file

@ -58,6 +58,22 @@ local function load_constraints_from_cache(platform, contest_id, problem_id)
return nil
end
--- Normalize raw problem output to a "canonical" version
--- Usually, most contests ignore leading/trailing whitespace and empty lines
---@param lines string
local function normalize_lines(lines)
local normalized = {}
for _, line in
ipairs(vim.tbl_values(vim.split(((lines or ''):gsub('\r', '')), '\n', { plain = true })))
do
local trimmed_line = vim.trim(line)
if trimmed_line ~= '' then
table.insert(normalized, trimmed_line)
end
end
return table.concat(normalized, '\n')
end
---@param test_cases TestCase[]
---@return RanTestCase[]
local function create_sentinal_panel_data(test_cases)
@ -106,8 +122,7 @@ local function run_single_test_case(contest_config, cp_config, test_case)
local r = exec.run(cmd, stdin_content, timeout_ms, memory_mb)
local ansi = require('cp.ui.ansi')
local out = (r.stdout or ''):gsub('\n$', '')
local out = r.stdout or ''
local highlights = {}
if out ~= '' then
if cp_config.run_panel.ansi then
@ -130,8 +145,8 @@ local function run_single_test_case(contest_config, cp_config, test_case)
out = table.concat(trimmed, '\n')
end
local expected = (test_case.expected or ''):gsub('\n$', '')
local ok = out == expected
local expected = test_case.expected or ''
local ok = normalize_lines(out) == normalize_lines(expected)
local signal = r.signal
if not signal and r.code and r.code >= 128 then

View file

@ -25,7 +25,7 @@ end
---@return AnsiParseResult
function M.parse_ansi_text(text)
local clean_text = text:gsub('\027%[[%d;]*[a-zA-Z]', '')
local lines = vim.split(clean_text, '\n', { plain = true, trimempty = false })
local lines = vim.split(clean_text, '\n', { plain = true })
local highlights = {}
local line_num = 0

View file

@ -13,7 +13,7 @@ local M = {}
local vim_backend = {
name = 'vim',
render = function(_, actual)
local actual_lines = vim.split(actual, '\n', { plain = true, trimempty = true })
local actual_lines = vim.split(actual, '\n', { plain = true })
return {
content = actual_lines,
@ -27,7 +27,7 @@ local none_backend = {
name = 'none',
render = function(expected, actual)
local expected_lines = vim.split(expected, '\n', { plain = true, trimempty = true })
local actual_lines = vim.split(actual, '\n', { plain = true, trimempty = true })
local actual_lines = vim.split(actual, '\n', { plain = true })
return {
content = { expected = expected_lines, actual = actual_lines },
@ -64,7 +64,7 @@ local git_backend = {
if result.code == 0 then
return {
content = vim.split(actual, '\n', { plain = true, trimempty = true }),
content = vim.split(actual, '\n', { plain = true }),
highlights = {},
}
else

View file

@ -22,7 +22,7 @@ local function create_none_diff_layout(parent_win, expected_content, actual_cont
vim.api.nvim_set_option_value('winbar', 'Actual', { win = actual_win })
local expected_lines = vim.split(expected_content, '\n', { plain = true, trimempty = true })
local actual_lines = vim.split(actual_content, '\n', { plain = true, trimempty = true })
local actual_lines = vim.split(actual_content, '\n', { plain = true })
utils.update_buffer_content(expected_buf, expected_lines, {})
utils.update_buffer_content(actual_buf, actual_lines, {})
@ -59,7 +59,7 @@ local function create_vim_diff_layout(parent_win, expected_content, actual_conte
vim.api.nvim_set_option_value('winbar', 'Actual', { win = actual_win })
local expected_lines = vim.split(expected_content, '\n', { plain = true, trimempty = true })
local actual_lines = vim.split(actual_content, '\n', { plain = true, trimempty = true })
local actual_lines = vim.split(actual_content, '\n', { plain = true })
utils.update_buffer_content(expected_buf, expected_lines, {})
utils.update_buffer_content(actual_buf, actual_lines, {})
@ -108,7 +108,7 @@ local function create_git_diff_layout(parent_win, expected_content, actual_conte
if diff_result.raw_diff and diff_result.raw_diff ~= '' then
highlight.parse_and_apply_diff(diff_buf, diff_result.raw_diff, diff_namespace)
else
local lines = vim.split(actual_content, '\n', { plain = true, trimempty = true })
local lines = vim.split(actual_content, '\n', { plain = true })
utils.update_buffer_content(diff_buf, lines, {})
end
@ -124,7 +124,7 @@ end
local function create_single_layout(parent_win, content)
local buf = utils.create_buffer_with_options()
local lines = vim.split(content, '\n', { plain = true, trimempty = true })
local lines = vim.split(content, '\n', { plain = true })
utils.update_buffer_content(buf, lines, {})
vim.api.nvim_set_current_win(parent_win)
@ -218,7 +218,7 @@ function M.update_diff_panes(
end
else
if desired_mode == 'single' then
local lines = vim.split(actual_content, '\n', { plain = true, trimempty = true })
local lines = vim.split(actual_content, '\n', { plain = true })
utils.update_buffer_content(
current_diff_layout.buffers[1],
lines,
@ -237,7 +237,7 @@ function M.update_diff_panes(
diff_namespace
)
else
local lines = vim.split(actual_content, '\n', { plain = true, trimempty = true })
local lines = vim.split(actual_content, '\n', { plain = true })
utils.update_buffer_content(
current_diff_layout.buffers[1],
lines,
@ -247,7 +247,7 @@ function M.update_diff_panes(
end
elseif desired_mode == 'none' then
local expected_lines = vim.split(expected_content, '\n', { plain = true, trimempty = true })
local actual_lines = vim.split(actual_content, '\n', { plain = true, trimempty = true })
local actual_lines = vim.split(actual_content, '\n', { plain = true })
utils.update_buffer_content(current_diff_layout.buffers[1], expected_lines, {})
utils.update_buffer_content(
current_diff_layout.buffers[2],
@ -257,7 +257,7 @@ function M.update_diff_panes(
)
else
local expected_lines = vim.split(expected_content, '\n', { plain = true, trimempty = true })
local actual_lines = vim.split(actual_content, '\n', { plain = true, trimempty = true })
local actual_lines = vim.split(actual_content, '\n', { plain = true })
utils.update_buffer_content(current_diff_layout.buffers[1], expected_lines, {})
utils.update_buffer_content(
current_diff_layout.buffers[2],

View file

@ -197,9 +197,9 @@ def _extract_samples(html: str) -> list[TestCase]:
mi = re.search(r"Sample\s*Input\s*(\d+)", title, flags=re.I)
mo = re.search(r"Sample\s*Output\s*(\d+)", title, flags=re.I)
if mi:
inputs[mi.group(1)] = t
inputs[mi.group(1)] = t.strip()
elif mo:
outputs[mo.group(1)] = t
outputs[mo.group(1)] = t.strip()
cases: list[TestCase] = []
for k in sorted(set(inputs) & set(outputs), key=lambda s: int(s)):
cases.append(TestCase(input=inputs[k], expected=outputs[k]))

View file

@ -39,7 +39,7 @@ def _text_from_pre(pre: Tag) -> str:
pre.get_text(separator="\n", strip=False)
.replace("\r", "")
.replace("\xa0", " ")
.rstrip("\n")
.strip()
)
@ -61,6 +61,20 @@ def _extract_limits(block: Tag) -> tuple[int, float]:
return timeout_ms, memory_mb
def _group_lines_by_id(pre: Tag) -> dict[int, list[str]]:
groups: dict[int, list[str]] = {}
if not isinstance(pre, Tag):
return groups
for div in pre.find_all("div", class_="test-example-line"):
cls = " ".join(div.get("class", []))
m = re.search(r"\btest-example-line-(\d+)\b", cls)
if not m:
continue
gid = int(m.group(1))
groups.setdefault(gid, []).append(div.get_text("", strip=False))
return groups
def _extract_title(block: Tag) -> tuple[str, str]:
t = block.find("div", class_="title")
if not t:
@ -77,19 +91,47 @@ def _extract_samples(block: Tag) -> list[TestCase]:
if not st:
return []
inputs = [
_text_from_pre(pre)
input_pres: list[Tag] = [ # type: ignore[misc]
inp.find("pre") # type: ignore[misc]
for inp in st.find_all("div", class_="input") # type: ignore[union-attr]
for pre in [inp.find("pre")]
if isinstance(pre, Tag)
if isinstance(inp, Tag) and inp.find("pre")
]
outputs = [
_text_from_pre(pre)
output_pres: list[Tag] = [
out.find("pre") # type: ignore[misc]
for out in st.find_all("div", class_="output") # type: ignore[union-attr]
for pre in [out.find("pre")]
if isinstance(pre, Tag)
if isinstance(out, Tag) and out.find("pre")
]
input_pres = [p for p in input_pres if isinstance(p, Tag)]
output_pres = [p for p in output_pres if isinstance(p, Tag)]
has_grouped = any(
p.find("div", class_="test-example-line") for p in input_pres + output_pres
)
if has_grouped:
inputs_by_gid: dict[int, list[str]] = {}
outputs_by_gid: dict[int, list[str]] = {}
for p in input_pres:
g = _group_lines_by_id(p)
for k, v in g.items():
inputs_by_gid.setdefault(k, []).extend(v)
for p in output_pres:
g = _group_lines_by_id(p)
for k, v in g.items():
outputs_by_gid.setdefault(k, []).extend(v)
inputs_by_gid.pop(0, None)
outputs_by_gid.pop(0, None)
keys = sorted(set(inputs_by_gid.keys()) & set(outputs_by_gid.keys()))
if keys:
return [
TestCase(
input="\n".join(inputs_by_gid[k]).strip(),
expected="\n".join(outputs_by_gid[k]).strip(),
)
for k in keys
]
inputs = [_text_from_pre(p) for p in input_pres]
outputs = [_text_from_pre(p) for p in output_pres]
n = min(len(inputs), len(outputs))
return [TestCase(input=inputs[i], expected=outputs[i]) for i in range(n)]