From 86b20aaee5f32dd3d31224a0c6c0b07354299b6c Mon Sep 17 00:00:00 2001 From: Barrett Ruth Date: Sat, 7 Mar 2026 16:29:02 -0500 Subject: [PATCH] scraper stuff --- scrapers/codechef.py | 11 +++++++---- scrapers/codeforces.py | 21 ++++++++++++++++----- scrapers/kattis.py | 8 +++++++- scrapers/usaco.py | 7 ++++++- 4 files changed, 36 insertions(+), 11 deletions(-) diff --git a/scrapers/codechef.py b/scrapers/codechef.py index 998aa24..d7cb3ce 100644 --- a/scrapers/codechef.py +++ b/scrapers/codechef.py @@ -9,7 +9,12 @@ from typing import Any import httpx -from .base import BaseScraper, clear_platform_cookies, load_platform_cookies, save_platform_cookies +from .base import ( + BaseScraper, + clear_platform_cookies, + load_platform_cookies, + save_platform_cookies, +) from .timeouts import BROWSER_SESSION_TIMEOUT, HTTP_TIMEOUT from .models import ( ContestListResult, @@ -234,9 +239,7 @@ def _submit_headless_codechef( print(json.dumps({"status": "logging_in"}), flush=True) session.fetch(f"{BASE_URL}/login", page_action=login_action) if login_error: - return SubmitResult( - success=False, error=login_error - ) + return SubmitResult(success=False, error=login_error) logged_in = True if not _practice: diff --git a/scrapers/codeforces.py b/scrapers/codeforces.py index 19b8208..5bbfa38 100644 --- a/scrapers/codeforces.py +++ b/scrapers/codeforces.py @@ -8,7 +8,13 @@ from typing import Any import requests from bs4 import BeautifulSoup, Tag -from .base import BaseScraper, clear_platform_cookies, extract_precision, load_platform_cookies, save_platform_cookies +from .base import ( + BaseScraper, + clear_platform_cookies, + extract_precision, + load_platform_cookies, + save_platform_cookies, +) from .models import ( ContestListResult, ContestSummary, @@ -387,7 +393,9 @@ def _login_headless_cf(credentials: dict[str, str]) -> LoginResult: google_search=False, cookies=saved_cookies, ) as session: - session.fetch(f"{BASE_URL}/", page_action=check_action, solve_cloudflare=True) + session.fetch( + f"{BASE_URL}/", page_action=check_action, solve_cloudflare=True + ) if logged_in: return LoginResult(success=True, error="") except Exception: @@ -419,7 +427,9 @@ def _login_headless_cf(credentials: dict[str, str]) -> LoginResult: session.fetch(f"{BASE_URL}/", page_action=verify_action, network_idle=True) if not logged_in: - return LoginResult(success=False, error="Login failed (bad credentials?)") + return LoginResult( + success=False, error="Login failed (bad credentials?)" + ) try: browser_cookies = session.context.cookies() @@ -445,7 +455,6 @@ def _submit_headless( source_code = Path(file_path).read_text() - try: from scrapling.fetchers import StealthySession # type: ignore[import-untyped,unresolved-import] except ImportError: @@ -519,7 +528,9 @@ def _submit_headless( ) as session: if not _retried and saved_cookies: print(json.dumps({"status": "checking_login"}), flush=True) - session.fetch(f"{BASE_URL}/", page_action=check_login, solve_cloudflare=True) + session.fetch( + f"{BASE_URL}/", page_action=check_login, solve_cloudflare=True + ) if not logged_in: print(json.dumps({"status": "logging_in"}), flush=True) diff --git a/scrapers/kattis.py b/scrapers/kattis.py index 373d749..ac2c157 100644 --- a/scrapers/kattis.py +++ b/scrapers/kattis.py @@ -10,7 +10,13 @@ from pathlib import Path import httpx -from .base import BaseScraper, clear_platform_cookies, extract_precision, load_platform_cookies, save_platform_cookies +from .base import ( + BaseScraper, + clear_platform_cookies, + extract_precision, + load_platform_cookies, + save_platform_cookies, +) from .timeouts import HTTP_TIMEOUT from .models import ( ContestListResult, diff --git a/scrapers/usaco.py b/scrapers/usaco.py index 3c542ab..b6e95d2 100644 --- a/scrapers/usaco.py +++ b/scrapers/usaco.py @@ -8,7 +8,12 @@ from typing import Any, cast import httpx -from .base import BaseScraper, extract_precision, load_platform_cookies, save_platform_cookies +from .base import ( + BaseScraper, + extract_precision, + load_platform_cookies, + save_platform_cookies, +) from .timeouts import HTTP_TIMEOUT from .models import ( ContestListResult,