fix: use a diff scraper for now

This commit is contained in:
Barrett Ruth 2025-12-08 19:46:14 -06:00
parent eb3f93587f
commit dfd8275421
3 changed files with 8 additions and 10 deletions

View file

@ -6,7 +6,7 @@ import sys
from typing import Any from typing import Any
import httpx import httpx
from scrapling.fetchers import StealthyFetcher from scrapling.fetchers import Fetcher
from .base import BaseScraper from .base import BaseScraper
from .models import ( from .models import (
@ -52,7 +52,7 @@ def _extract_memory_limit(html: str) -> float:
def _fetch_html_sync(url: str) -> str: def _fetch_html_sync(url: str) -> str:
response = StealthyFetcher.fetch(url, headless=True, network_idle=True) response = Fetcher.get(url)
return str(response.body) return str(response.body)

View file

@ -9,7 +9,7 @@ from typing import Any
import requests import requests
from bs4 import BeautifulSoup, Tag from bs4 import BeautifulSoup, Tag
from scrapling.fetchers import StealthyFetcher from scrapling.fetchers import Fetcher
from .base import BaseScraper from .base import BaseScraper
from .models import ( from .models import (
@ -143,10 +143,8 @@ def _is_interactive(block: Tag) -> bool:
def _fetch_problems_html(contest_id: str) -> str: def _fetch_problems_html(contest_id: str) -> str:
url = f"{BASE_URL}/contest/{contest_id}/problems" url = f"{BASE_URL}/contest/{contest_id}/problems"
page = StealthyFetcher.fetch( page = Fetcher.get(
url, url,
headless=True,
solve_cloudflare=True,
) )
return page.html_content return page.html_content

View file

@ -172,7 +172,7 @@ def run_scraper_offline(fixture_text):
raise AssertionError(f"Unexpected requests.get call: {url}") raise AssertionError(f"Unexpected requests.get call: {url}")
return { return {
"StealthyFetcher.fetch": _mock_stealthy_fetch, "Fetcher.get": _mock_stealthy_fetch,
"requests.get": _mock_requests_get, "requests.get": _mock_requests_get,
} }
@ -226,7 +226,7 @@ def run_scraper_offline(fixture_text):
return { return {
"__offline_get_async": __offline_get_async, "__offline_get_async": __offline_get_async,
"StealthyFetcher.fetch": _mock_stealthy_fetch, "Fetcher.get": _mock_stealthy_fetch,
} }
case _: case _:
@ -238,7 +238,7 @@ def run_scraper_offline(fixture_text):
offline_fetches = _make_offline_fetches(scraper_name) offline_fetches = _make_offline_fetches(scraper_name)
if scraper_name == "codeforces": if scraper_name == "codeforces":
fetchers.StealthyFetcher.fetch = offline_fetches["StealthyFetcher.fetch"] # type: ignore[assignment] fetchers.Fetcher.get = offline_fetches["Fetcher.get"] # type: ignore[assignment]
requests.get = offline_fetches["requests.get"] requests.get = offline_fetches["requests.get"]
elif scraper_name == "atcoder": elif scraper_name == "atcoder":
ns._fetch = offline_fetches["_fetch"] ns._fetch = offline_fetches["_fetch"]
@ -247,7 +247,7 @@ def run_scraper_offline(fixture_text):
httpx.AsyncClient.get = offline_fetches["__offline_fetch_text"] # type: ignore[assignment] httpx.AsyncClient.get = offline_fetches["__offline_fetch_text"] # type: ignore[assignment]
elif scraper_name == "codechef": elif scraper_name == "codechef":
httpx.AsyncClient.get = offline_fetches["__offline_get_async"] # type: ignore[assignment] httpx.AsyncClient.get = offline_fetches["__offline_get_async"] # type: ignore[assignment]
fetchers.StealthyFetcher.fetch = offline_fetches["StealthyFetcher.fetch"] # type: ignore[assignment] fetchers.Fetcher.get = offline_fetches["Fetcher.get"] # type: ignore[assignment]
main_async = getattr(ns, "main_async") main_async = getattr(ns, "main_async")
assert callable(main_async), f"main_async not found in {scraper_name}" assert callable(main_async), f"main_async not found in {scraper_name}"