Merge pull request #202 from barrett-ruth/fix/notice

use `scrapling.Fetcher.get`, not `scrapling.StealthyFetcher.fetch`
This commit is contained in:
Barrett Ruth 2025-12-08 19:48:15 -06:00 committed by GitHub
commit edb341ae51
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 8 additions and 10 deletions

View file

@ -6,7 +6,7 @@ import sys
from typing import Any
import httpx
from scrapling.fetchers import StealthyFetcher
from scrapling.fetchers import Fetcher
from .base import BaseScraper
from .models import (
@ -52,7 +52,7 @@ def _extract_memory_limit(html: str) -> float:
def _fetch_html_sync(url: str) -> str:
response = StealthyFetcher.fetch(url, headless=True, network_idle=True)
response = Fetcher.get(url)
return str(response.body)

View file

@ -9,7 +9,7 @@ from typing import Any
import requests
from bs4 import BeautifulSoup, Tag
from scrapling.fetchers import StealthyFetcher
from scrapling.fetchers import Fetcher
from .base import BaseScraper
from .models import (
@ -143,10 +143,8 @@ def _is_interactive(block: Tag) -> bool:
def _fetch_problems_html(contest_id: str) -> str:
url = f"{BASE_URL}/contest/{contest_id}/problems"
page = StealthyFetcher.fetch(
page = Fetcher.get(
url,
headless=True,
solve_cloudflare=True,
)
return page.html_content

View file

@ -172,7 +172,7 @@ def run_scraper_offline(fixture_text):
raise AssertionError(f"Unexpected requests.get call: {url}")
return {
"StealthyFetcher.fetch": _mock_stealthy_fetch,
"Fetcher.get": _mock_stealthy_fetch,
"requests.get": _mock_requests_get,
}
@ -226,7 +226,7 @@ def run_scraper_offline(fixture_text):
return {
"__offline_get_async": __offline_get_async,
"StealthyFetcher.fetch": _mock_stealthy_fetch,
"Fetcher.get": _mock_stealthy_fetch,
}
case _:
@ -238,7 +238,7 @@ def run_scraper_offline(fixture_text):
offline_fetches = _make_offline_fetches(scraper_name)
if scraper_name == "codeforces":
fetchers.StealthyFetcher.fetch = offline_fetches["StealthyFetcher.fetch"] # type: ignore[assignment]
fetchers.Fetcher.get = offline_fetches["Fetcher.get"] # type: ignore[assignment]
requests.get = offline_fetches["requests.get"]
elif scraper_name == "atcoder":
ns._fetch = offline_fetches["_fetch"]
@ -247,7 +247,7 @@ def run_scraper_offline(fixture_text):
httpx.AsyncClient.get = offline_fetches["__offline_fetch_text"] # type: ignore[assignment]
elif scraper_name == "codechef":
httpx.AsyncClient.get = offline_fetches["__offline_get_async"] # type: ignore[assignment]
fetchers.StealthyFetcher.fetch = offline_fetches["StealthyFetcher.fetch"] # type: ignore[assignment]
fetchers.Fetcher.get = offline_fetches["Fetcher.get"] # type: ignore[assignment]
main_async = getattr(ns, "main_async")
assert callable(main_async), f"main_async not found in {scraper_name}"