fix(scrapers): fix multi-test case codeforces running
This commit is contained in:
parent
73c91e2b28
commit
3654748632
1 changed files with 27 additions and 5 deletions
|
|
@ -83,7 +83,7 @@ def _extract_title(block: Tag) -> tuple[str, str]:
|
||||||
return parts[0].strip().upper(), parts[1].strip()
|
return parts[0].strip().upper(), parts[1].strip()
|
||||||
|
|
||||||
|
|
||||||
def _extract_samples(block: Tag) -> list[TestCase]:
|
def _extract_samples(block: Tag, multi_test: bool = False) -> list[TestCase]:
|
||||||
st = block.find("div", class_="sample-test")
|
st = block.find("div", class_="sample-test")
|
||||||
if not st:
|
if not st:
|
||||||
return []
|
return []
|
||||||
|
|
@ -119,18 +119,26 @@ def _extract_samples(block: Tag) -> list[TestCase]:
|
||||||
outputs_by_gid.pop(0, None)
|
outputs_by_gid.pop(0, None)
|
||||||
keys = sorted(set(inputs_by_gid.keys()) & set(outputs_by_gid.keys()))
|
keys = sorted(set(inputs_by_gid.keys()) & set(outputs_by_gid.keys()))
|
||||||
if keys:
|
if keys:
|
||||||
return [
|
samples = [
|
||||||
TestCase(
|
TestCase(
|
||||||
input="\n".join(inputs_by_gid[k]).strip(),
|
input="\n".join(inputs_by_gid[k]).strip(),
|
||||||
expected="\n".join(outputs_by_gid[k]).strip(),
|
expected="\n".join(outputs_by_gid[k]).strip(),
|
||||||
)
|
)
|
||||||
for k in keys
|
for k in keys
|
||||||
]
|
]
|
||||||
|
if multi_test:
|
||||||
|
return [TestCase(input=f"1\n{tc.input}", expected=tc.expected) for tc in samples]
|
||||||
|
return samples
|
||||||
|
|
||||||
inputs = [_text_from_pre(p) for p in input_pres]
|
inputs = [_text_from_pre(p) for p in input_pres]
|
||||||
outputs = [_text_from_pre(p) for p in output_pres]
|
outputs = [_text_from_pre(p) for p in output_pres]
|
||||||
n = min(len(inputs), len(outputs))
|
n = min(len(inputs), len(outputs))
|
||||||
return [TestCase(input=inputs[i], expected=outputs[i]) for i in range(n)]
|
samples = [TestCase(input=inputs[i], expected=outputs[i]) for i in range(n)]
|
||||||
|
|
||||||
|
if multi_test and samples:
|
||||||
|
return [TestCase(input=f"1\n{tc.input}", expected=tc.expected) for tc in samples]
|
||||||
|
|
||||||
|
return samples
|
||||||
|
|
||||||
|
|
||||||
def _is_interactive(block: Tag) -> bool:
|
def _is_interactive(block: Tag) -> bool:
|
||||||
|
|
@ -139,6 +147,19 @@ def _is_interactive(block: Tag) -> bool:
|
||||||
return "This is an interactive problem" in txt
|
return "This is an interactive problem" in txt
|
||||||
|
|
||||||
|
|
||||||
|
def _is_multi_test_case(block: Tag) -> bool:
|
||||||
|
input_spec = block.find("div", class_="input-specification")
|
||||||
|
if not input_spec:
|
||||||
|
return False
|
||||||
|
txt = input_spec.get_text(" ", strip=True).lower()
|
||||||
|
patterns = [
|
||||||
|
r"first line.*contains.*integer.*number of test case",
|
||||||
|
r"first line.*integer.*denoting.*number of test case",
|
||||||
|
r"first line.*number of test case",
|
||||||
|
]
|
||||||
|
return any(re.search(pattern, txt) for pattern in patterns)
|
||||||
|
|
||||||
|
|
||||||
def _fetch_problems_html(contest_id: str) -> str:
|
def _fetch_problems_html(contest_id: str) -> str:
|
||||||
url = f"{BASE_URL}/contest/{contest_id}/problems"
|
url = f"{BASE_URL}/contest/{contest_id}/problems"
|
||||||
page = StealthyFetcher.fetch(
|
page = StealthyFetcher.fetch(
|
||||||
|
|
@ -156,10 +177,11 @@ def _parse_all_blocks(html: str) -> list[dict[str, Any]]:
|
||||||
for b in blocks:
|
for b in blocks:
|
||||||
holder = b.find_parent("div", class_="problemindexholder")
|
holder = b.find_parent("div", class_="problemindexholder")
|
||||||
letter = (holder.get("problemindex") if holder else "").strip().upper()
|
letter = (holder.get("problemindex") if holder else "").strip().upper()
|
||||||
name = _extract_title(b)[1] # keep your name extraction
|
name = _extract_title(b)[1]
|
||||||
if not letter:
|
if not letter:
|
||||||
continue
|
continue
|
||||||
tests = _extract_samples(b)
|
multi_test = _is_multi_test_case(b)
|
||||||
|
tests = _extract_samples(b, multi_test)
|
||||||
timeout_ms, memory_mb = _extract_limits(b)
|
timeout_ms, memory_mb = _extract_limits(b)
|
||||||
interactive = _is_interactive(b)
|
interactive = _is_interactive(b)
|
||||||
out.append(
|
out.append(
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue