try to fix the setup

2026-02-18 13:33:49 -05:00 · 2026-02-18 13:33:49 -05:00 · 1162e7046b
commit 1162e7046b
parent b36ffba63a
11 changed files with 256 additions and 1359 deletions
--- a/scrapers/codechef.py
+++ b/scrapers/codechef.py
@ -6,7 +6,7 @@ import re
 from typing import Any

 import httpx
-from scrapling.fetchers import Fetcher
+from curl_cffi import requests as curl_requests

 from .base import BaseScraper
 from .models import (
@ -50,8 +50,9 @@ def _extract_memory_limit(html: str) -> float:


 def _fetch_html_sync(url: str) -> str:
-    response = Fetcher.get(url)
-    return str(response.body)
+    response = curl_requests.get(url, impersonate="chrome", timeout=TIMEOUT_S)
+    response.raise_for_status()
+    return response.text


 class CodeChefScraper(BaseScraper):
--- a/scrapers/codeforces.py
+++ b/scrapers/codeforces.py
@ -2,13 +2,12 @@

 import asyncio
 import json
-import logging
 import re
 from typing import Any

 import requests
 from bs4 import BeautifulSoup, Tag
-from scrapling.fetchers import Fetcher
+from curl_cffi import requests as curl_requests

 from .base import BaseScraper
 from .models import (
@ -19,10 +18,6 @@ from .models import (
    TestCase,
 )

-# suppress scrapling logging - https://github.com/D4Vinci/Scrapling/issues/31)
-logging.getLogger("scrapling").setLevel(logging.CRITICAL)
-
-
 BASE_URL = "https://codeforces.com"
 API_CONTEST_LIST_URL = f"{BASE_URL}/api/contest.list"
 TIMEOUT_SECONDS = 30
@ -140,10 +135,9 @@ def _is_interactive(block: Tag) -> bool:

 def _fetch_problems_html(contest_id: str) -> str:
    url = f"{BASE_URL}/contest/{contest_id}/problems"
-    page = Fetcher.get(
-        url,
-    )
-    return page.html_content
+    response = curl_requests.get(url, impersonate="chrome", timeout=TIMEOUT_SECONDS)
+    response.raise_for_status()
+    return response.text


 def _parse_all_blocks(html: str) -> list[dict[str, Any]]: