fix: use a diff scraper for now
This commit is contained in:
parent
eb3f93587f
commit
dfd8275421
3 changed files with 8 additions and 10 deletions
|
|
@ -6,7 +6,7 @@ import sys
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from scrapling.fetchers import StealthyFetcher
|
from scrapling.fetchers import Fetcher
|
||||||
|
|
||||||
from .base import BaseScraper
|
from .base import BaseScraper
|
||||||
from .models import (
|
from .models import (
|
||||||
|
|
@ -52,7 +52,7 @@ def _extract_memory_limit(html: str) -> float:
|
||||||
|
|
||||||
|
|
||||||
def _fetch_html_sync(url: str) -> str:
|
def _fetch_html_sync(url: str) -> str:
|
||||||
response = StealthyFetcher.fetch(url, headless=True, network_idle=True)
|
response = Fetcher.get(url)
|
||||||
return str(response.body)
|
return str(response.body)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ from typing import Any
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup, Tag
|
from bs4 import BeautifulSoup, Tag
|
||||||
from scrapling.fetchers import StealthyFetcher
|
from scrapling.fetchers import Fetcher
|
||||||
|
|
||||||
from .base import BaseScraper
|
from .base import BaseScraper
|
||||||
from .models import (
|
from .models import (
|
||||||
|
|
@ -143,10 +143,8 @@ def _is_interactive(block: Tag) -> bool:
|
||||||
|
|
||||||
def _fetch_problems_html(contest_id: str) -> str:
|
def _fetch_problems_html(contest_id: str) -> str:
|
||||||
url = f"{BASE_URL}/contest/{contest_id}/problems"
|
url = f"{BASE_URL}/contest/{contest_id}/problems"
|
||||||
page = StealthyFetcher.fetch(
|
page = Fetcher.get(
|
||||||
url,
|
url,
|
||||||
headless=True,
|
|
||||||
solve_cloudflare=True,
|
|
||||||
)
|
)
|
||||||
return page.html_content
|
return page.html_content
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -172,7 +172,7 @@ def run_scraper_offline(fixture_text):
|
||||||
raise AssertionError(f"Unexpected requests.get call: {url}")
|
raise AssertionError(f"Unexpected requests.get call: {url}")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"StealthyFetcher.fetch": _mock_stealthy_fetch,
|
"Fetcher.get": _mock_stealthy_fetch,
|
||||||
"requests.get": _mock_requests_get,
|
"requests.get": _mock_requests_get,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -226,7 +226,7 @@ def run_scraper_offline(fixture_text):
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"__offline_get_async": __offline_get_async,
|
"__offline_get_async": __offline_get_async,
|
||||||
"StealthyFetcher.fetch": _mock_stealthy_fetch,
|
"Fetcher.get": _mock_stealthy_fetch,
|
||||||
}
|
}
|
||||||
|
|
||||||
case _:
|
case _:
|
||||||
|
|
@ -238,7 +238,7 @@ def run_scraper_offline(fixture_text):
|
||||||
offline_fetches = _make_offline_fetches(scraper_name)
|
offline_fetches = _make_offline_fetches(scraper_name)
|
||||||
|
|
||||||
if scraper_name == "codeforces":
|
if scraper_name == "codeforces":
|
||||||
fetchers.StealthyFetcher.fetch = offline_fetches["StealthyFetcher.fetch"] # type: ignore[assignment]
|
fetchers.Fetcher.get = offline_fetches["Fetcher.get"] # type: ignore[assignment]
|
||||||
requests.get = offline_fetches["requests.get"]
|
requests.get = offline_fetches["requests.get"]
|
||||||
elif scraper_name == "atcoder":
|
elif scraper_name == "atcoder":
|
||||||
ns._fetch = offline_fetches["_fetch"]
|
ns._fetch = offline_fetches["_fetch"]
|
||||||
|
|
@ -247,7 +247,7 @@ def run_scraper_offline(fixture_text):
|
||||||
httpx.AsyncClient.get = offline_fetches["__offline_fetch_text"] # type: ignore[assignment]
|
httpx.AsyncClient.get = offline_fetches["__offline_fetch_text"] # type: ignore[assignment]
|
||||||
elif scraper_name == "codechef":
|
elif scraper_name == "codechef":
|
||||||
httpx.AsyncClient.get = offline_fetches["__offline_get_async"] # type: ignore[assignment]
|
httpx.AsyncClient.get = offline_fetches["__offline_get_async"] # type: ignore[assignment]
|
||||||
fetchers.StealthyFetcher.fetch = offline_fetches["StealthyFetcher.fetch"] # type: ignore[assignment]
|
fetchers.Fetcher.get = offline_fetches["Fetcher.get"] # type: ignore[assignment]
|
||||||
|
|
||||||
main_async = getattr(ns, "main_async")
|
main_async = getattr(ns, "main_async")
|
||||||
assert callable(main_async), f"main_async not found in {scraper_name}"
|
assert callable(main_async), f"main_async not found in {scraper_name}"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue