fix: replace curl_cffi with scrapling in codeforces metadata
Problem: `codeforces.py` used `curl_cffi` to bypass Cloudflare when fetching contest problem HTML, making it unavailable in the nix python env and requiring an extra dependency. Solution: rewrite `_fetch_problems_html` to use scrapling `StealthySession` with `solve_cloudflare=True`, matching the existing CF submit pattern. Extend `needs_browser` in `scraper.lua` to route CF `metadata` and `tests` through the FHS env on NixOS. Remove `curl-cffi` from `pyproject.toml`, `flake.nix`, and test mocks.
This commit is contained in:
parent
543480a4fe
commit
297c71e7c7
5 changed files with 30 additions and 35 deletions
|
|
@ -7,7 +7,6 @@ from typing import Any
|
|||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
from curl_cffi import requests as curl_requests
|
||||
|
||||
from .base import BaseScraper, extract_precision
|
||||
from .models import (
|
||||
|
|
@ -141,10 +140,30 @@ def _is_interactive(block: Tag) -> bool:
|
|||
|
||||
|
||||
def _fetch_problems_html(contest_id: str) -> str:
|
||||
try:
|
||||
from scrapling.fetchers import StealthySession # type: ignore[import-untyped,unresolved-import]
|
||||
except ImportError:
|
||||
raise RuntimeError("scrapling is required for Codeforces metadata")
|
||||
|
||||
from .atcoder import _ensure_browser
|
||||
|
||||
_ensure_browser()
|
||||
|
||||
url = f"{BASE_URL}/contest/{contest_id}/problems"
|
||||
response = curl_requests.get(url, impersonate="chrome", timeout=HTTP_TIMEOUT)
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
html = ""
|
||||
|
||||
def page_action(page):
|
||||
nonlocal html
|
||||
html = page.content()
|
||||
|
||||
with StealthySession(
|
||||
headless=True,
|
||||
timeout=BROWSER_SESSION_TIMEOUT,
|
||||
google_search=False,
|
||||
) as session:
|
||||
session.fetch(url, page_action=page_action, solve_cloudflare=True)
|
||||
|
||||
return html
|
||||
|
||||
|
||||
def _parse_all_blocks(html: str) -> list[dict[str, Any]]:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue