fix: use a diff scraper for now
This commit is contained in:
parent
eb3f93587f
commit
dfd8275421
3 changed files with 8 additions and 10 deletions
|
|
@ -6,7 +6,7 @@ import sys
|
|||
from typing import Any
|
||||
|
||||
import httpx
|
||||
from scrapling.fetchers import StealthyFetcher
|
||||
from scrapling.fetchers import Fetcher
|
||||
|
||||
from .base import BaseScraper
|
||||
from .models import (
|
||||
|
|
@ -52,7 +52,7 @@ def _extract_memory_limit(html: str) -> float:
|
|||
|
||||
|
||||
def _fetch_html_sync(url: str) -> str:
|
||||
response = StealthyFetcher.fetch(url, headless=True, network_idle=True)
|
||||
response = Fetcher.get(url)
|
||||
return str(response.body)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from typing import Any
|
|||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
from scrapling.fetchers import StealthyFetcher
|
||||
from scrapling.fetchers import Fetcher
|
||||
|
||||
from .base import BaseScraper
|
||||
from .models import (
|
||||
|
|
@ -143,10 +143,8 @@ def _is_interactive(block: Tag) -> bool:
|
|||
|
||||
def _fetch_problems_html(contest_id: str) -> str:
|
||||
url = f"{BASE_URL}/contest/{contest_id}/problems"
|
||||
page = StealthyFetcher.fetch(
|
||||
page = Fetcher.get(
|
||||
url,
|
||||
headless=True,
|
||||
solve_cloudflare=True,
|
||||
)
|
||||
return page.html_content
|
||||
|
||||
|
|
|
|||
|
|
@ -172,7 +172,7 @@ def run_scraper_offline(fixture_text):
|
|||
raise AssertionError(f"Unexpected requests.get call: {url}")
|
||||
|
||||
return {
|
||||
"StealthyFetcher.fetch": _mock_stealthy_fetch,
|
||||
"Fetcher.get": _mock_stealthy_fetch,
|
||||
"requests.get": _mock_requests_get,
|
||||
}
|
||||
|
||||
|
|
@ -226,7 +226,7 @@ def run_scraper_offline(fixture_text):
|
|||
|
||||
return {
|
||||
"__offline_get_async": __offline_get_async,
|
||||
"StealthyFetcher.fetch": _mock_stealthy_fetch,
|
||||
"Fetcher.get": _mock_stealthy_fetch,
|
||||
}
|
||||
|
||||
case _:
|
||||
|
|
@ -238,7 +238,7 @@ def run_scraper_offline(fixture_text):
|
|||
offline_fetches = _make_offline_fetches(scraper_name)
|
||||
|
||||
if scraper_name == "codeforces":
|
||||
fetchers.StealthyFetcher.fetch = offline_fetches["StealthyFetcher.fetch"] # type: ignore[assignment]
|
||||
fetchers.Fetcher.get = offline_fetches["Fetcher.get"] # type: ignore[assignment]
|
||||
requests.get = offline_fetches["requests.get"]
|
||||
elif scraper_name == "atcoder":
|
||||
ns._fetch = offline_fetches["_fetch"]
|
||||
|
|
@ -247,7 +247,7 @@ def run_scraper_offline(fixture_text):
|
|||
httpx.AsyncClient.get = offline_fetches["__offline_fetch_text"] # type: ignore[assignment]
|
||||
elif scraper_name == "codechef":
|
||||
httpx.AsyncClient.get = offline_fetches["__offline_get_async"] # type: ignore[assignment]
|
||||
fetchers.StealthyFetcher.fetch = offline_fetches["StealthyFetcher.fetch"] # type: ignore[assignment]
|
||||
fetchers.Fetcher.get = offline_fetches["Fetcher.get"] # type: ignore[assignment]
|
||||
|
||||
main_async = getattr(ns, "main_async")
|
||||
assert callable(main_async), f"main_async not found in {scraper_name}"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue