try to fix the setup
This commit is contained in:
parent
b36ffba63a
commit
1162e7046b
11 changed files with 256 additions and 1359 deletions
|
|
@ -6,7 +6,7 @@ import re
|
|||
from typing import Any
|
||||
|
||||
import httpx
|
||||
from scrapling.fetchers import Fetcher
|
||||
from curl_cffi import requests as curl_requests
|
||||
|
||||
from .base import BaseScraper
|
||||
from .models import (
|
||||
|
|
@ -50,8 +50,9 @@ def _extract_memory_limit(html: str) -> float:
|
|||
|
||||
|
||||
def _fetch_html_sync(url: str) -> str:
|
||||
response = Fetcher.get(url)
|
||||
return str(response.body)
|
||||
response = curl_requests.get(url, impersonate="chrome", timeout=TIMEOUT_S)
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
|
||||
|
||||
class CodeChefScraper(BaseScraper):
|
||||
|
|
|
|||
|
|
@ -2,13 +2,12 @@
|
|||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
from scrapling.fetchers import Fetcher
|
||||
from curl_cffi import requests as curl_requests
|
||||
|
||||
from .base import BaseScraper
|
||||
from .models import (
|
||||
|
|
@ -19,10 +18,6 @@ from .models import (
|
|||
TestCase,
|
||||
)
|
||||
|
||||
# suppress scrapling logging - https://github.com/D4Vinci/Scrapling/issues/31)
|
||||
logging.getLogger("scrapling").setLevel(logging.CRITICAL)
|
||||
|
||||
|
||||
BASE_URL = "https://codeforces.com"
|
||||
API_CONTEST_LIST_URL = f"{BASE_URL}/api/contest.list"
|
||||
TIMEOUT_SECONDS = 30
|
||||
|
|
@ -140,10 +135,9 @@ def _is_interactive(block: Tag) -> bool:
|
|||
|
||||
def _fetch_problems_html(contest_id: str) -> str:
|
||||
url = f"{BASE_URL}/contest/{contest_id}/problems"
|
||||
page = Fetcher.get(
|
||||
url,
|
||||
)
|
||||
return page.html_content
|
||||
response = curl_requests.get(url, impersonate="chrome", timeout=TIMEOUT_SECONDS)
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
|
||||
|
||||
def _parse_all_blocks(html: str) -> list[dict[str, Any]]:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue