feat(scraper): add precision extraction, start_time, and submit support
Problem: problem pages contain floating-point precision requirements and contest start timestamps that were not being extracted or stored. The submit workflow also needed a foundation in the scraper layer. Solution: add extract_precision() to base.py and propagate through all scrapers into cache. Add start_time to ContestSummary and extract it from AtCoder and Codeforces. Add SubmitResult model, abstract submit() method, submit CLI case with get_language_id() resolution, stdin/env_extra support in run_scraper, and a full AtCoder submit implementation; stub the remaining platforms.
This commit is contained in:
parent
865e3b5928
commit
90bd13580b
9 changed files with 245 additions and 20 deletions
|
|
@ -27,7 +27,7 @@
|
||||||
---@field multi_test? boolean
|
---@field multi_test? boolean
|
||||||
---@field memory_mb? number
|
---@field memory_mb? number
|
||||||
---@field timeout_ms? number
|
---@field timeout_ms? number
|
||||||
---@field epsilon? number
|
---@field precision? number
|
||||||
---@field combined_test? CombinedTest
|
---@field combined_test? CombinedTest
|
||||||
---@field test_cases TestCase[]
|
---@field test_cases TestCase[]
|
||||||
|
|
||||||
|
|
@ -231,7 +231,8 @@ function M.set_test_cases(
|
||||||
timeout_ms,
|
timeout_ms,
|
||||||
memory_mb,
|
memory_mb,
|
||||||
interactive,
|
interactive,
|
||||||
multi_test
|
multi_test,
|
||||||
|
precision
|
||||||
)
|
)
|
||||||
vim.validate({
|
vim.validate({
|
||||||
platform = { platform, 'string' },
|
platform = { platform, 'string' },
|
||||||
|
|
@ -243,6 +244,7 @@ function M.set_test_cases(
|
||||||
memory_mb = { memory_mb, { 'number', 'nil' }, true },
|
memory_mb = { memory_mb, { 'number', 'nil' }, true },
|
||||||
interactive = { interactive, { 'boolean', 'nil' }, true },
|
interactive = { interactive, { 'boolean', 'nil' }, true },
|
||||||
multi_test = { multi_test, { 'boolean', 'nil' }, true },
|
multi_test = { multi_test, { 'boolean', 'nil' }, true },
|
||||||
|
precision = { precision, { 'number', 'nil' }, true },
|
||||||
})
|
})
|
||||||
|
|
||||||
local index = cache_data[platform][contest_id].index_map[problem_id]
|
local index = cache_data[platform][contest_id].index_map[problem_id]
|
||||||
|
|
@ -253,6 +255,7 @@ function M.set_test_cases(
|
||||||
cache_data[platform][contest_id].problems[index].memory_mb = memory_mb
|
cache_data[platform][contest_id].problems[index].memory_mb = memory_mb
|
||||||
cache_data[platform][contest_id].problems[index].interactive = interactive
|
cache_data[platform][contest_id].problems[index].interactive = interactive
|
||||||
cache_data[platform][contest_id].problems[index].multi_test = multi_test
|
cache_data[platform][contest_id].problems[index].multi_test = multi_test
|
||||||
|
cache_data[platform][contest_id].problems[index].precision = precision
|
||||||
|
|
||||||
M.save()
|
M.save()
|
||||||
end
|
end
|
||||||
|
|
@ -278,7 +281,7 @@ end
|
||||||
---@param contest_id string
|
---@param contest_id string
|
||||||
---@param problem_id? string
|
---@param problem_id? string
|
||||||
---@return number?
|
---@return number?
|
||||||
function M.get_epsilon(platform, contest_id, problem_id)
|
function M.get_precision(platform, contest_id, problem_id)
|
||||||
vim.validate({
|
vim.validate({
|
||||||
platform = { platform, 'string' },
|
platform = { platform, 'string' },
|
||||||
contest_id = { contest_id, 'string' },
|
contest_id = { contest_id, 'string' },
|
||||||
|
|
@ -299,7 +302,7 @@ function M.get_epsilon(platform, contest_id, problem_id)
|
||||||
end
|
end
|
||||||
|
|
||||||
local problem_data = cache_data[platform][contest_id].problems[index]
|
local problem_data = cache_data[platform][contest_id].problems[index]
|
||||||
return problem_data and problem_data.epsilon or nil
|
return problem_data and problem_data.precision or nil
|
||||||
end
|
end
|
||||||
|
|
||||||
---@param file_path string
|
---@param file_path string
|
||||||
|
|
@ -349,11 +352,24 @@ function M.set_contest_summaries(platform, contests)
|
||||||
cache_data[platform][contest.id] = cache_data[platform][contest.id] or {}
|
cache_data[platform][contest.id] = cache_data[platform][contest.id] or {}
|
||||||
cache_data[platform][contest.id].display_name = contest.display_name
|
cache_data[platform][contest.id].display_name = contest.display_name
|
||||||
cache_data[platform][contest.id].name = contest.name
|
cache_data[platform][contest.id].name = contest.name
|
||||||
|
if contest.start_time then
|
||||||
|
cache_data[platform][contest.id].start_time = contest.start_time
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
M.save()
|
M.save()
|
||||||
end
|
end
|
||||||
|
|
||||||
|
---@param platform string
|
||||||
|
---@param contest_id string
|
||||||
|
---@return integer?
|
||||||
|
function M.get_contest_start_time(platform, contest_id)
|
||||||
|
if not cache_data[platform] or not cache_data[platform][contest_id] then
|
||||||
|
return nil
|
||||||
|
end
|
||||||
|
return cache_data[platform][contest_id].start_time
|
||||||
|
end
|
||||||
|
|
||||||
function M.clear_all()
|
function M.clear_all()
|
||||||
cache_data = {}
|
cache_data = {}
|
||||||
M.save()
|
M.save()
|
||||||
|
|
|
||||||
|
|
@ -56,6 +56,12 @@ local function run_scraper(platform, subcommand, args, opts)
|
||||||
env.PYTHONPATH = ''
|
env.PYTHONPATH = ''
|
||||||
env.CONDA_PREFIX = ''
|
env.CONDA_PREFIX = ''
|
||||||
|
|
||||||
|
if opts and opts.env_extra then
|
||||||
|
for k, v in pairs(opts.env_extra) do
|
||||||
|
env[k] = v
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
if opts and opts.ndjson then
|
if opts and opts.ndjson then
|
||||||
local uv = vim.uv
|
local uv = vim.uv
|
||||||
local stdout = uv.new_pipe(false)
|
local stdout = uv.new_pipe(false)
|
||||||
|
|
@ -126,6 +132,9 @@ local function run_scraper(platform, subcommand, args, opts)
|
||||||
end
|
end
|
||||||
|
|
||||||
local sysopts = { text = true, timeout = 30000, env = env, cwd = plugin_path }
|
local sysopts = { text = true, timeout = 30000, env = env, cwd = plugin_path }
|
||||||
|
if opts and opts.stdin then
|
||||||
|
sysopts.stdin = opts.stdin
|
||||||
|
end
|
||||||
if opts and opts.sync then
|
if opts and opts.sync then
|
||||||
local result = vim.system(cmd, sysopts):wait()
|
local result = vim.system(cmd, sysopts):wait()
|
||||||
return syshandle(result)
|
return syshandle(result)
|
||||||
|
|
@ -228,6 +237,7 @@ function M.scrape_all_tests(platform, contest_id, callback)
|
||||||
memory_mb = ev.memory_mb or 0,
|
memory_mb = ev.memory_mb or 0,
|
||||||
interactive = ev.interactive or false,
|
interactive = ev.interactive or false,
|
||||||
multi_test = ev.multi_test or false,
|
multi_test = ev.multi_test or false,
|
||||||
|
precision = ev.precision,
|
||||||
problem_id = ev.problem_id,
|
problem_id = ev.problem_id,
|
||||||
})
|
})
|
||||||
end
|
end
|
||||||
|
|
@ -236,4 +246,21 @@ function M.scrape_all_tests(platform, contest_id, callback)
|
||||||
})
|
})
|
||||||
end
|
end
|
||||||
|
|
||||||
|
function M.submit(platform, contest_id, problem_id, language, source_code, credentials, callback)
|
||||||
|
local creds_json = vim.json.encode(credentials)
|
||||||
|
run_scraper(platform, 'submit', { contest_id, problem_id, language }, {
|
||||||
|
stdin = source_code,
|
||||||
|
env_extra = { CP_CREDENTIALS = creds_json },
|
||||||
|
on_exit = function(result)
|
||||||
|
if type(callback) == 'function' then
|
||||||
|
if result and result.success then
|
||||||
|
callback(result.data or { success = true })
|
||||||
|
else
|
||||||
|
callback({ success = false, error = result and result.error or 'unknown' })
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end,
|
||||||
|
})
|
||||||
|
end
|
||||||
|
|
||||||
return M
|
return M
|
||||||
|
|
|
||||||
|
|
@ -130,7 +130,8 @@ local function start_tests(platform, contest_id, problems)
|
||||||
ev.timeout_ms or 0,
|
ev.timeout_ms or 0,
|
||||||
ev.memory_mb or 0,
|
ev.memory_mb or 0,
|
||||||
ev.interactive,
|
ev.interactive,
|
||||||
ev.multi_test
|
ev.multi_test,
|
||||||
|
ev.precision
|
||||||
)
|
)
|
||||||
|
|
||||||
local io_state = state.get_io_view_state()
|
local io_state = state.get_io_view_state()
|
||||||
|
|
|
||||||
|
|
@ -14,13 +14,14 @@ from bs4 import BeautifulSoup, Tag
|
||||||
from requests.adapters import HTTPAdapter
|
from requests.adapters import HTTPAdapter
|
||||||
from urllib3.util.retry import Retry
|
from urllib3.util.retry import Retry
|
||||||
|
|
||||||
from .base import BaseScraper
|
from .base import BaseScraper, extract_precision
|
||||||
from .models import (
|
from .models import (
|
||||||
CombinedTest,
|
CombinedTest,
|
||||||
ContestListResult,
|
ContestListResult,
|
||||||
ContestSummary,
|
ContestSummary,
|
||||||
MetadataResult,
|
MetadataResult,
|
||||||
ProblemSummary,
|
ProblemSummary,
|
||||||
|
SubmitResult,
|
||||||
TestCase,
|
TestCase,
|
||||||
TestsResult,
|
TestsResult,
|
||||||
)
|
)
|
||||||
|
|
@ -121,6 +122,23 @@ def _parse_last_page(html: str) -> int:
|
||||||
return max(nums) if nums else 1
|
return max(nums) if nums else 1
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_start_time(tr: Tag) -> int | None:
|
||||||
|
tds = tr.select("td")
|
||||||
|
if not tds:
|
||||||
|
return None
|
||||||
|
time_el = tds[0].select_one("time.fixtime-full")
|
||||||
|
if not time_el:
|
||||||
|
return None
|
||||||
|
text = time_el.get_text(strip=True)
|
||||||
|
try:
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
dt = datetime.strptime(text, "%Y-%m-%d %H:%M:%S%z")
|
||||||
|
return int(dt.timestamp())
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _parse_archive_contests(html: str) -> list[ContestSummary]:
|
def _parse_archive_contests(html: str) -> list[ContestSummary]:
|
||||||
soup = BeautifulSoup(html, "html.parser")
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
tbody = soup.select_one("table.table-default tbody") or soup.select_one("tbody")
|
tbody = soup.select_one("table.table-default tbody") or soup.select_one("tbody")
|
||||||
|
|
@ -139,7 +157,10 @@ def _parse_archive_contests(html: str) -> list[ContestSummary]:
|
||||||
continue
|
continue
|
||||||
cid = m.group(1)
|
cid = m.group(1)
|
||||||
name = a.get_text(strip=True)
|
name = a.get_text(strip=True)
|
||||||
out.append(ContestSummary(id=cid, name=name, display_name=name))
|
start_time = _parse_start_time(tr)
|
||||||
|
out.append(
|
||||||
|
ContestSummary(id=cid, name=name, display_name=name, start_time=start_time)
|
||||||
|
)
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -169,7 +190,7 @@ def _parse_tasks_list(html: str) -> list[dict[str, str]]:
|
||||||
return rows
|
return rows
|
||||||
|
|
||||||
|
|
||||||
def _extract_problem_info(html: str) -> tuple[int, float, bool]:
|
def _extract_problem_info(html: str) -> tuple[int, float, bool, float | None]:
|
||||||
soup = BeautifulSoup(html, "html.parser")
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
txt = soup.get_text(" ", strip=True)
|
txt = soup.get_text(" ", strip=True)
|
||||||
timeout_ms = 0
|
timeout_ms = 0
|
||||||
|
|
@ -181,9 +202,10 @@ def _extract_problem_info(html: str) -> tuple[int, float, bool]:
|
||||||
if ms:
|
if ms:
|
||||||
memory_mb = float(ms.group(1)) * MIB_TO_MB
|
memory_mb = float(ms.group(1)) * MIB_TO_MB
|
||||||
div = soup.select_one("#problem-statement")
|
div = soup.select_one("#problem-statement")
|
||||||
txt = div.get_text(" ", strip=True) if div else soup.get_text(" ", strip=True)
|
body = div.get_text(" ", strip=True) if div else soup.get_text(" ", strip=True)
|
||||||
interactive = "This is an interactive" in txt
|
interactive = "This is an interactive" in body
|
||||||
return timeout_ms, memory_mb, interactive
|
precision = extract_precision(body)
|
||||||
|
return timeout_ms, memory_mb, interactive, precision
|
||||||
|
|
||||||
|
|
||||||
def _extract_samples(html: str) -> list[TestCase]:
|
def _extract_samples(html: str) -> list[TestCase]:
|
||||||
|
|
@ -220,12 +242,13 @@ def _scrape_problem_page_sync(contest_id: str, slug: str) -> dict[str, Any]:
|
||||||
tests = _extract_samples(html)
|
tests = _extract_samples(html)
|
||||||
except Exception:
|
except Exception:
|
||||||
tests = []
|
tests = []
|
||||||
timeout_ms, memory_mb, interactive = _extract_problem_info(html)
|
timeout_ms, memory_mb, interactive, precision = _extract_problem_info(html)
|
||||||
return {
|
return {
|
||||||
"tests": tests,
|
"tests": tests,
|
||||||
"timeout_ms": timeout_ms,
|
"timeout_ms": timeout_ms,
|
||||||
"memory_mb": memory_mb,
|
"memory_mb": memory_mb,
|
||||||
"interactive": interactive,
|
"interactive": interactive,
|
||||||
|
"precision": precision,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -241,14 +264,29 @@ def _to_problem_summaries(rows: list[dict[str, str]]) -> list[ProblemSummary]:
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
async def _fetch_upcoming_contests_async(
|
||||||
|
client: httpx.AsyncClient,
|
||||||
|
) -> list[ContestSummary]:
|
||||||
|
try:
|
||||||
|
html = await _get_async(client, f"{BASE_URL}/contests/")
|
||||||
|
return _parse_archive_contests(html)
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
async def _fetch_all_contests_async() -> list[ContestSummary]:
|
async def _fetch_all_contests_async() -> list[ContestSummary]:
|
||||||
async with httpx.AsyncClient(
|
async with httpx.AsyncClient(
|
||||||
limits=httpx.Limits(max_connections=100, max_keepalive_connections=100),
|
limits=httpx.Limits(max_connections=100, max_keepalive_connections=100),
|
||||||
) as client:
|
) as client:
|
||||||
|
upcoming = await _fetch_upcoming_contests_async(client)
|
||||||
first_html = await _get_async(client, ARCHIVE_URL)
|
first_html = await _get_async(client, ARCHIVE_URL)
|
||||||
last = _parse_last_page(first_html)
|
last = _parse_last_page(first_html)
|
||||||
out = _parse_archive_contests(first_html)
|
out = _parse_archive_contests(first_html)
|
||||||
if last <= 1:
|
if last <= 1:
|
||||||
|
seen = {c.id for c in out}
|
||||||
|
for c in upcoming:
|
||||||
|
if c.id not in seen:
|
||||||
|
out.append(c)
|
||||||
return out
|
return out
|
||||||
tasks = [
|
tasks = [
|
||||||
asyncio.create_task(_get_async(client, f"{ARCHIVE_URL}?page={p}"))
|
asyncio.create_task(_get_async(client, f"{ARCHIVE_URL}?page={p}"))
|
||||||
|
|
@ -257,6 +295,10 @@ async def _fetch_all_contests_async() -> list[ContestSummary]:
|
||||||
for coro in asyncio.as_completed(tasks):
|
for coro in asyncio.as_completed(tasks):
|
||||||
html = await coro
|
html = await coro
|
||||||
out.extend(_parse_archive_contests(html))
|
out.extend(_parse_archive_contests(html))
|
||||||
|
seen = {c.id for c in out}
|
||||||
|
for c in upcoming:
|
||||||
|
if c.id not in seen:
|
||||||
|
out.append(c)
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -319,6 +361,7 @@ class AtcoderScraper(BaseScraper):
|
||||||
"memory_mb": data.get("memory_mb", 0),
|
"memory_mb": data.get("memory_mb", 0),
|
||||||
"interactive": bool(data.get("interactive")),
|
"interactive": bool(data.get("interactive")),
|
||||||
"multi_test": False,
|
"multi_test": False,
|
||||||
|
"precision": data.get("precision"),
|
||||||
}
|
}
|
||||||
),
|
),
|
||||||
flush=True,
|
flush=True,
|
||||||
|
|
@ -326,6 +369,61 @@ class AtcoderScraper(BaseScraper):
|
||||||
|
|
||||||
await asyncio.gather(*(emit(r) for r in rows))
|
await asyncio.gather(*(emit(r) for r in rows))
|
||||||
|
|
||||||
|
async def submit(self, contest_id: str, problem_id: str, source_code: str, language_id: str, credentials: dict[str, str]) -> SubmitResult:
|
||||||
|
def _submit_sync() -> SubmitResult:
|
||||||
|
try:
|
||||||
|
login_page = _session.get(f"{BASE_URL}/login", headers=HEADERS, timeout=TIMEOUT_SECONDS)
|
||||||
|
login_page.raise_for_status()
|
||||||
|
soup = BeautifulSoup(login_page.text, "html.parser")
|
||||||
|
csrf_input = soup.find("input", {"name": "csrf_token"})
|
||||||
|
if not csrf_input:
|
||||||
|
return SubmitResult(success=False, error="Could not find CSRF token on login page")
|
||||||
|
csrf_token = csrf_input.get("value", "")
|
||||||
|
|
||||||
|
login_resp = _session.post(
|
||||||
|
f"{BASE_URL}/login",
|
||||||
|
data={
|
||||||
|
"username": credentials.get("username", ""),
|
||||||
|
"password": credentials.get("password", ""),
|
||||||
|
"csrf_token": csrf_token,
|
||||||
|
},
|
||||||
|
headers=HEADERS,
|
||||||
|
timeout=TIMEOUT_SECONDS,
|
||||||
|
)
|
||||||
|
login_resp.raise_for_status()
|
||||||
|
|
||||||
|
submit_page = _session.get(
|
||||||
|
f"{BASE_URL}/contests/{contest_id}/submit",
|
||||||
|
headers=HEADERS,
|
||||||
|
timeout=TIMEOUT_SECONDS,
|
||||||
|
)
|
||||||
|
submit_page.raise_for_status()
|
||||||
|
soup = BeautifulSoup(submit_page.text, "html.parser")
|
||||||
|
csrf_input = soup.find("input", {"name": "csrf_token"})
|
||||||
|
if not csrf_input:
|
||||||
|
return SubmitResult(success=False, error="Could not find CSRF token on submit page")
|
||||||
|
csrf_token = csrf_input.get("value", "")
|
||||||
|
|
||||||
|
task_screen_name = f"{contest_id}_{problem_id}"
|
||||||
|
submit_resp = _session.post(
|
||||||
|
f"{BASE_URL}/contests/{contest_id}/submit",
|
||||||
|
data={
|
||||||
|
"data.TaskScreenName": task_screen_name,
|
||||||
|
"data.LanguageId": language_id,
|
||||||
|
"sourceCode": source_code,
|
||||||
|
"csrf_token": csrf_token,
|
||||||
|
},
|
||||||
|
headers=HEADERS,
|
||||||
|
timeout=TIMEOUT_SECONDS,
|
||||||
|
)
|
||||||
|
submit_resp.raise_for_status()
|
||||||
|
|
||||||
|
return SubmitResult(success=True, error="", submission_id="", verdict="submitted")
|
||||||
|
except Exception as e:
|
||||||
|
return SubmitResult(success=False, error=str(e))
|
||||||
|
|
||||||
|
return await asyncio.to_thread(_submit_sync)
|
||||||
|
|
||||||
|
|
||||||
async def main_async() -> int:
|
async def main_async() -> int:
|
||||||
if len(sys.argv) < 2:
|
if len(sys.argv) < 2:
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,31 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
from .models import CombinedTest, ContestListResult, MetadataResult, TestsResult
|
from .language_ids import get_language_id
|
||||||
|
from .models import CombinedTest, ContestListResult, MetadataResult, SubmitResult, TestsResult
|
||||||
|
|
||||||
|
_PRECISION_ABS_REL_RE = re.compile(
|
||||||
|
r"(?:absolute|relative)\s+error[^.]*?10\s*[\^{]\s*\{?\s*[-\u2212]\s*(\d+)\s*\}?",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
_PRECISION_DECIMAL_RE = re.compile(
|
||||||
|
r"round(?:ed)?\s+to\s+(\d+)\s+decimal\s+place",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_precision(text: str) -> float | None:
|
||||||
|
m = _PRECISION_ABS_REL_RE.search(text)
|
||||||
|
if m:
|
||||||
|
return 10 ** -int(m.group(1))
|
||||||
|
m = _PRECISION_DECIMAL_RE.search(text)
|
||||||
|
if m:
|
||||||
|
return 10 ** -int(m.group(1))
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class BaseScraper(ABC):
|
class BaseScraper(ABC):
|
||||||
|
|
@ -19,6 +42,9 @@ class BaseScraper(ABC):
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def stream_tests_for_category_async(self, category_id: str) -> None: ...
|
async def stream_tests_for_category_async(self, category_id: str) -> None: ...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def submit(self, contest_id: str, problem_id: str, source_code: str, language_id: str, credentials: dict[str, str]) -> SubmitResult: ...
|
||||||
|
|
||||||
def _usage(self) -> str:
|
def _usage(self) -> str:
|
||||||
name = self.platform_name
|
name = self.platform_name
|
||||||
return f"Usage: {name}.py metadata <id> | tests <id> | contests"
|
return f"Usage: {name}.py metadata <id> | tests <id> | contests"
|
||||||
|
|
@ -40,6 +66,9 @@ class BaseScraper(ABC):
|
||||||
def _contests_error(self, msg: str) -> ContestListResult:
|
def _contests_error(self, msg: str) -> ContestListResult:
|
||||||
return ContestListResult(success=False, error=msg)
|
return ContestListResult(success=False, error=msg)
|
||||||
|
|
||||||
|
def _submit_error(self, msg: str) -> SubmitResult:
|
||||||
|
return SubmitResult(success=False, error=msg)
|
||||||
|
|
||||||
async def _run_cli_async(self, args: list[str]) -> int:
|
async def _run_cli_async(self, args: list[str]) -> int:
|
||||||
if len(args) < 2:
|
if len(args) < 2:
|
||||||
print(self._metadata_error(self._usage()).model_dump_json())
|
print(self._metadata_error(self._usage()).model_dump_json())
|
||||||
|
|
@ -71,6 +100,21 @@ class BaseScraper(ABC):
|
||||||
print(result.model_dump_json())
|
print(result.model_dump_json())
|
||||||
return 0 if result.success else 1
|
return 0 if result.success else 1
|
||||||
|
|
||||||
|
case "submit":
|
||||||
|
if len(args) != 5:
|
||||||
|
print(self._submit_error("Usage: <platform> submit <contest_id> <problem_id> <language_id>").model_dump_json())
|
||||||
|
return 1
|
||||||
|
source_code = sys.stdin.read()
|
||||||
|
creds_raw = os.environ.get("CP_CREDENTIALS", "{}")
|
||||||
|
try:
|
||||||
|
credentials = json.loads(creds_raw)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
credentials = {}
|
||||||
|
language_id = get_language_id(self.platform_name, args[4]) or args[4]
|
||||||
|
result = await self.submit(args[2], args[3], source_code, language_id, credentials)
|
||||||
|
print(result.model_dump_json())
|
||||||
|
return 0 if result.success else 1
|
||||||
|
|
||||||
case _:
|
case _:
|
||||||
print(
|
print(
|
||||||
self._metadata_error(
|
self._metadata_error(
|
||||||
|
|
|
||||||
|
|
@ -8,12 +8,13 @@ from typing import Any
|
||||||
import httpx
|
import httpx
|
||||||
from curl_cffi import requests as curl_requests
|
from curl_cffi import requests as curl_requests
|
||||||
|
|
||||||
from .base import BaseScraper
|
from .base import BaseScraper, extract_precision
|
||||||
from .models import (
|
from .models import (
|
||||||
ContestListResult,
|
ContestListResult,
|
||||||
ContestSummary,
|
ContestSummary,
|
||||||
MetadataResult,
|
MetadataResult,
|
||||||
ProblemSummary,
|
ProblemSummary,
|
||||||
|
SubmitResult,
|
||||||
TestCase,
|
TestCase,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -219,11 +220,13 @@ class CodeChefScraper(BaseScraper):
|
||||||
)
|
)
|
||||||
memory_mb = _extract_memory_limit(html)
|
memory_mb = _extract_memory_limit(html)
|
||||||
interactive = False
|
interactive = False
|
||||||
|
precision = extract_precision(html)
|
||||||
except Exception:
|
except Exception:
|
||||||
tests = []
|
tests = []
|
||||||
timeout_ms = 1000
|
timeout_ms = 1000
|
||||||
memory_mb = 256.0
|
memory_mb = 256.0
|
||||||
interactive = False
|
interactive = False
|
||||||
|
precision = None
|
||||||
combined_input = "\n".join(t.input for t in tests) if tests else ""
|
combined_input = "\n".join(t.input for t in tests) if tests else ""
|
||||||
combined_expected = (
|
combined_expected = (
|
||||||
"\n".join(t.expected for t in tests) if tests else ""
|
"\n".join(t.expected for t in tests) if tests else ""
|
||||||
|
|
@ -241,6 +244,7 @@ class CodeChefScraper(BaseScraper):
|
||||||
"memory_mb": memory_mb,
|
"memory_mb": memory_mb,
|
||||||
"interactive": interactive,
|
"interactive": interactive,
|
||||||
"multi_test": False,
|
"multi_test": False,
|
||||||
|
"precision": precision,
|
||||||
}
|
}
|
||||||
|
|
||||||
tasks = [run_one(problem_code) for problem_code in problems.keys()]
|
tasks = [run_one(problem_code) for problem_code in problems.keys()]
|
||||||
|
|
@ -248,6 +252,9 @@ class CodeChefScraper(BaseScraper):
|
||||||
payload = await coro
|
payload = await coro
|
||||||
print(json.dumps(payload), flush=True)
|
print(json.dumps(payload), flush=True)
|
||||||
|
|
||||||
|
async def submit(self, contest_id: str, problem_id: str, source_code: str, language_id: str, credentials: dict[str, str]) -> SubmitResult:
|
||||||
|
return SubmitResult(success=False, error="CodeChef submit not yet implemented", submission_id="", verdict="")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
CodeChefScraper().run_cli()
|
CodeChefScraper().run_cli()
|
||||||
|
|
|
||||||
|
|
@ -9,12 +9,13 @@ import requests
|
||||||
from bs4 import BeautifulSoup, Tag
|
from bs4 import BeautifulSoup, Tag
|
||||||
from curl_cffi import requests as curl_requests
|
from curl_cffi import requests as curl_requests
|
||||||
|
|
||||||
from .base import BaseScraper
|
from .base import BaseScraper, extract_precision
|
||||||
from .models import (
|
from .models import (
|
||||||
ContestListResult,
|
ContestListResult,
|
||||||
ContestSummary,
|
ContestSummary,
|
||||||
MetadataResult,
|
MetadataResult,
|
||||||
ProblemSummary,
|
ProblemSummary,
|
||||||
|
SubmitResult,
|
||||||
TestCase,
|
TestCase,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -153,6 +154,7 @@ def _parse_all_blocks(html: str) -> list[dict[str, Any]]:
|
||||||
raw_samples, is_grouped = _extract_samples(b)
|
raw_samples, is_grouped = _extract_samples(b)
|
||||||
timeout_ms, memory_mb = _extract_limits(b)
|
timeout_ms, memory_mb = _extract_limits(b)
|
||||||
interactive = _is_interactive(b)
|
interactive = _is_interactive(b)
|
||||||
|
precision = extract_precision(b.get_text(" ", strip=True))
|
||||||
|
|
||||||
if is_grouped and raw_samples:
|
if is_grouped and raw_samples:
|
||||||
combined_input = f"{len(raw_samples)}\n" + "\n".join(
|
combined_input = f"{len(raw_samples)}\n" + "\n".join(
|
||||||
|
|
@ -179,6 +181,7 @@ def _parse_all_blocks(html: str) -> list[dict[str, Any]]:
|
||||||
"memory_mb": memory_mb,
|
"memory_mb": memory_mb,
|
||||||
"interactive": interactive,
|
"interactive": interactive,
|
||||||
"multi_test": is_grouped,
|
"multi_test": is_grouped,
|
||||||
|
"precision": precision,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
return out
|
return out
|
||||||
|
|
@ -228,11 +231,20 @@ class CodeforcesScraper(BaseScraper):
|
||||||
|
|
||||||
contests: list[ContestSummary] = []
|
contests: list[ContestSummary] = []
|
||||||
for c in data["result"]:
|
for c in data["result"]:
|
||||||
if c.get("phase") != "FINISHED":
|
phase = c.get("phase")
|
||||||
|
if phase not in ("FINISHED", "BEFORE", "CODING"):
|
||||||
continue
|
continue
|
||||||
cid = str(c["id"])
|
cid = str(c["id"])
|
||||||
name = c["name"]
|
name = c["name"]
|
||||||
contests.append(ContestSummary(id=cid, name=name, display_name=name))
|
start_time = c.get("startTimeSeconds") if phase != "FINISHED" else None
|
||||||
|
contests.append(
|
||||||
|
ContestSummary(
|
||||||
|
id=cid,
|
||||||
|
name=name,
|
||||||
|
display_name=name,
|
||||||
|
start_time=start_time,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
if not contests:
|
if not contests:
|
||||||
return self._contests_error("No contests found")
|
return self._contests_error("No contests found")
|
||||||
|
|
@ -263,11 +275,15 @@ class CodeforcesScraper(BaseScraper):
|
||||||
"memory_mb": b.get("memory_mb", 0),
|
"memory_mb": b.get("memory_mb", 0),
|
||||||
"interactive": bool(b.get("interactive")),
|
"interactive": bool(b.get("interactive")),
|
||||||
"multi_test": bool(b.get("multi_test", False)),
|
"multi_test": bool(b.get("multi_test", False)),
|
||||||
|
"precision": b.get("precision"),
|
||||||
}
|
}
|
||||||
),
|
),
|
||||||
flush=True,
|
flush=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def submit(self, contest_id: str, problem_id: str, source_code: str, language_id: str, credentials: dict[str, str]) -> SubmitResult:
|
||||||
|
return SubmitResult(success=False, error="Codeforces submit not yet implemented", submission_id="", verdict="")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
CodeforcesScraper().run_cli()
|
CodeforcesScraper().run_cli()
|
||||||
|
|
|
||||||
|
|
@ -7,12 +7,13 @@ from typing import Any
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from .base import BaseScraper
|
from .base import BaseScraper, extract_precision
|
||||||
from .models import (
|
from .models import (
|
||||||
ContestListResult,
|
ContestListResult,
|
||||||
ContestSummary,
|
ContestSummary,
|
||||||
MetadataResult,
|
MetadataResult,
|
||||||
ProblemSummary,
|
ProblemSummary,
|
||||||
|
SubmitResult,
|
||||||
TestCase,
|
TestCase,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -129,17 +130,21 @@ def parse_category_problems(category_id: str, html: str) -> list[ProblemSummary]
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
def _extract_problem_info(html: str) -> tuple[int, int, bool]:
|
def _extract_problem_info(html: str) -> tuple[int, int, bool, float | None]:
|
||||||
tm = TIME_RE.search(html)
|
tm = TIME_RE.search(html)
|
||||||
mm = MEM_RE.search(html)
|
mm = MEM_RE.search(html)
|
||||||
t = int(round(float(tm.group(1)) * 1000)) if tm else 0
|
t = int(round(float(tm.group(1)) * 1000)) if tm else 0
|
||||||
m = int(mm.group(1)) if mm else 0
|
m = int(mm.group(1)) if mm else 0
|
||||||
md = MD_BLOCK_RE.search(html)
|
md = MD_BLOCK_RE.search(html)
|
||||||
interactive = False
|
interactive = False
|
||||||
|
precision = None
|
||||||
if md:
|
if md:
|
||||||
body = md.group(1)
|
body = md.group(1)
|
||||||
interactive = "This is an interactive problem." in body
|
interactive = "This is an interactive problem." in body
|
||||||
return t, m, interactive
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
precision = extract_precision(BeautifulSoup(body, "html.parser").get_text(" "))
|
||||||
|
return t, m, interactive, precision
|
||||||
|
|
||||||
|
|
||||||
def parse_title(html: str) -> str:
|
def parse_title(html: str) -> str:
|
||||||
|
|
@ -257,6 +262,9 @@ class CSESScraper(BaseScraper):
|
||||||
payload = await coro
|
payload = await coro
|
||||||
print(json.dumps(payload), flush=True)
|
print(json.dumps(payload), flush=True)
|
||||||
|
|
||||||
|
async def submit(self, contest_id: str, problem_id: str, source_code: str, language_id: str, credentials: dict[str, str]) -> SubmitResult:
|
||||||
|
return SubmitResult(success=False, error="CSES submit not yet implemented", submission_id="", verdict="")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
CSESScraper().run_cli()
|
CSESScraper().run_cli()
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@ class ContestSummary(BaseModel):
|
||||||
id: str
|
id: str
|
||||||
name: str
|
name: str
|
||||||
display_name: str | None = None
|
display_name: str | None = None
|
||||||
|
start_time: int | None = None
|
||||||
|
|
||||||
model_config = ConfigDict(extra="forbid")
|
model_config = ConfigDict(extra="forbid")
|
||||||
|
|
||||||
|
|
@ -63,6 +64,13 @@ class TestsResult(ScrapingResult):
|
||||||
model_config = ConfigDict(extra="forbid")
|
model_config = ConfigDict(extra="forbid")
|
||||||
|
|
||||||
|
|
||||||
|
class SubmitResult(ScrapingResult):
|
||||||
|
submission_id: str = ""
|
||||||
|
verdict: str = ""
|
||||||
|
|
||||||
|
model_config = ConfigDict(extra="forbid")
|
||||||
|
|
||||||
|
|
||||||
class ScraperConfig(BaseModel):
|
class ScraperConfig(BaseModel):
|
||||||
timeout_seconds: int = 30
|
timeout_seconds: int = 30
|
||||||
max_retries: int = 3
|
max_retries: int = 3
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue