try to fix the setup

This commit is contained in:
Barrett Ruth 2026-02-18 13:33:49 -05:00 committed by Barrett Ruth
parent b36ffba63a
commit 1162e7046b
11 changed files with 256 additions and 1359 deletions

View file

@ -6,7 +6,7 @@ import re
from typing import Any
import httpx
from scrapling.fetchers import Fetcher
from curl_cffi import requests as curl_requests
from .base import BaseScraper
from .models import (
@ -50,8 +50,9 @@ def _extract_memory_limit(html: str) -> float:
def _fetch_html_sync(url: str) -> str:
response = Fetcher.get(url)
return str(response.body)
response = curl_requests.get(url, impersonate="chrome", timeout=TIMEOUT_S)
response.raise_for_status()
return response.text
class CodeChefScraper(BaseScraper):

View file

@ -2,13 +2,12 @@
import asyncio
import json
import logging
import re
from typing import Any
import requests
from bs4 import BeautifulSoup, Tag
from scrapling.fetchers import Fetcher
from curl_cffi import requests as curl_requests
from .base import BaseScraper
from .models import (
@ -19,10 +18,6 @@ from .models import (
TestCase,
)
# suppress scrapling logging - https://github.com/D4Vinci/Scrapling/issues/31)
logging.getLogger("scrapling").setLevel(logging.CRITICAL)
BASE_URL = "https://codeforces.com"
API_CONTEST_LIST_URL = f"{BASE_URL}/api/contest.list"
TIMEOUT_SECONDS = 30
@ -140,10 +135,9 @@ def _is_interactive(block: Tag) -> bool:
def _fetch_problems_html(contest_id: str) -> str:
url = f"{BASE_URL}/contest/{contest_id}/problems"
page = Fetcher.get(
url,
)
return page.html_content
response = curl_requests.get(url, impersonate="chrome", timeout=TIMEOUT_SECONDS)
response.raise_for_status()
return response.text
def _parse_all_blocks(html: str) -> list[dict[str, Any]]: