diff --git a/scrapers/codeforces.py b/scrapers/codeforces.py index 5ac615a..a0a0fe2 100644 --- a/scrapers/codeforces.py +++ b/scrapers/codeforces.py @@ -13,6 +13,7 @@ from .base import BaseScraper, extract_precision from .models import ( ContestListResult, ContestSummary, + LoginResult, MetadataResult, ProblemSummary, SubmitResult, @@ -303,6 +304,111 @@ class CodeforcesScraper(BaseScraper): credentials, ) + async def login(self, credentials: dict[str, str]) -> LoginResult: + if not credentials.get("username") or not credentials.get("password"): + return self._login_error("Missing username or password") + return await asyncio.to_thread(_login_headless_cf, credentials) + + +def _login_headless_cf(credentials: dict[str, str]) -> LoginResult: + from pathlib import Path + + try: + from scrapling.fetchers import StealthySession # type: ignore[import-untyped,unresolved-import] + except ImportError: + return LoginResult( + success=False, + error="scrapling is required for Codeforces login", + ) + + from .atcoder import _ensure_browser + + _ensure_browser() + + cookie_cache = Path.home() / ".cache" / "cp-nvim" / "codeforces-cookies.json" + cookie_cache.parent.mkdir(parents=True, exist_ok=True) + saved_cookies: list[dict[str, Any]] = [] + if cookie_cache.exists(): + try: + saved_cookies = json.loads(cookie_cache.read_text()) + except Exception: + pass + + logged_in = False + login_error: str | None = None + + def check_login(page): + nonlocal logged_in + logged_in = page.evaluate( + "() => Array.from(document.querySelectorAll('a'))" + ".some(a => a.textContent.includes('Logout'))" + ) + + def login_action(page): + nonlocal login_error + try: + page.fill( + 'input[name="handleOrEmail"]', + credentials.get("username", ""), + ) + page.fill( + 'input[name="password"]', + credentials.get("password", ""), + ) + page.locator('#enterForm input[type="submit"]').click() + page.wait_for_url( + lambda url: "/enter" not in url, timeout=BROWSER_NAV_TIMEOUT + ) + except Exception as e: + login_error = str(e) + + try: + with StealthySession( + headless=True, + timeout=BROWSER_SESSION_TIMEOUT, + google_search=False, + cookies=saved_cookies if saved_cookies else [], + ) as session: + if saved_cookies: + print(json.dumps({"status": "checking_login"}), flush=True) + session.fetch( + f"{BASE_URL}/", + page_action=check_login, + network_idle=True, + ) + + if not logged_in: + print(json.dumps({"status": "logging_in"}), flush=True) + session.fetch( + f"{BASE_URL}/enter", + page_action=login_action, + solve_cloudflare=True, + ) + if login_error: + return LoginResult( + success=False, error=f"Login failed: {login_error}" + ) + + session.fetch( + f"{BASE_URL}/", + page_action=check_login, + network_idle=True, + ) + if not logged_in: + return LoginResult( + success=False, error="Login failed (bad credentials?)" + ) + + try: + browser_cookies = session.context.cookies() + cookie_cache.write_text(json.dumps(browser_cookies)) + except Exception: + pass + + return LoginResult(success=True, error="") + except Exception as e: + return LoginResult(success=False, error=str(e)) + def _submit_headless( contest_id: str,