diff --git a/scrapers/atcoder.py b/scrapers/atcoder.py index b750a68..c52190b 100644 --- a/scrapers/atcoder.py +++ b/scrapers/atcoder.py @@ -6,7 +6,6 @@ import os import re import subprocess import time -from pathlib import Path from typing import Any import backoff @@ -16,7 +15,13 @@ from bs4 import BeautifulSoup, Tag from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry -from .base import BaseScraper, clear_platform_cookies, extract_precision, load_platform_cookies, save_platform_cookies +from .base import ( + BaseScraper, + clear_platform_cookies, + extract_precision, + load_platform_cookies, + save_platform_cookies, +) from .models import ( ContestListResult, ContestSummary, @@ -432,7 +437,9 @@ def _login_headless(credentials: dict[str, str]) -> LoginResult: google_search=False, cookies=saved_cookies, ) as session: - session.fetch(f"{BASE_URL}/home", page_action=check_action, network_idle=True) + session.fetch( + f"{BASE_URL}/home", page_action=check_action, network_idle=True + ) if logged_in: return LoginResult(success=True, error="") except Exception: @@ -462,9 +469,13 @@ def _login_headless(credentials: dict[str, str]) -> LoginResult: nonlocal logged_in logged_in = _at_check_logged_in(page) - session.fetch(f"{BASE_URL}/home", page_action=verify_action, network_idle=True) + session.fetch( + f"{BASE_URL}/home", page_action=verify_action, network_idle=True + ) if not logged_in: - return LoginResult(success=False, error="Login failed (bad credentials?)") + return LoginResult( + success=False, error="Login failed (bad credentials?)" + ) try: browser_cookies = session.context.cookies() @@ -547,7 +558,9 @@ def _submit_headless( ) as session: if not _retried and saved_cookies: print(json.dumps({"status": "checking_login"}), flush=True) - session.fetch(f"{BASE_URL}/home", page_action=check_login, network_idle=True) + session.fetch( + f"{BASE_URL}/home", page_action=check_login, network_idle=True + ) if not logged_in: print(json.dumps({"status": "logging_in"}), flush=True) @@ -558,7 +571,9 @@ def _submit_headless( ) login_error = get_login_error() if login_error: - return SubmitResult(success=False, error=f"Login failed: {login_error}") + return SubmitResult( + success=False, error=f"Login failed: {login_error}" + ) logged_in = True try: browser_cookies = session.context.cookies() @@ -577,13 +592,20 @@ def _submit_headless( if needs_relogin and not _retried: clear_platform_cookies("atcoder") return _submit_headless( - contest_id, problem_id, file_path, language_id, credentials, _retried=True + contest_id, + problem_id, + file_path, + language_id, + credentials, + _retried=True, ) if submit_error: return SubmitResult(success=False, error=submit_error) - return SubmitResult(success=True, error="", submission_id="", verdict="submitted") + return SubmitResult( + success=True, error="", submission_id="", verdict="submitted" + ) except Exception as e: return SubmitResult(success=False, error=str(e)) diff --git a/scrapers/base.py b/scrapers/base.py index 03b467a..035495a 100644 --- a/scrapers/base.py +++ b/scrapers/base.py @@ -7,6 +7,16 @@ from abc import ABC, abstractmethod from pathlib import Path from typing import Any +from .language_ids import get_language_id +from .models import ( + CombinedTest, + ContestListResult, + LoginResult, + MetadataResult, + SubmitResult, + TestsResult, +) + _COOKIE_FILE = Path.home() / ".cache" / "cp-nvim" / "cookies.json" @@ -37,16 +47,6 @@ def clear_platform_cookies(platform: str) -> None: pass -from .language_ids import get_language_id -from .models import ( - CombinedTest, - ContestListResult, - LoginResult, - MetadataResult, - SubmitResult, - TestsResult, -) - _PRECISION_ABS_REL_RE = re.compile( r"(?:absolute|relative)\s+error[^.]*?10\s*[\^{]\s*\{?\s*[-\u2212]\s*(\d+)\s*\}?", re.IGNORECASE, diff --git a/scrapers/codechef.py b/scrapers/codechef.py index 998aa24..b64fdf5 100644 --- a/scrapers/codechef.py +++ b/scrapers/codechef.py @@ -9,8 +9,18 @@ from typing import Any import httpx -from .base import BaseScraper, clear_platform_cookies, load_platform_cookies, save_platform_cookies -from .timeouts import BROWSER_SESSION_TIMEOUT, HTTP_TIMEOUT +from .base import ( + BaseScraper, + clear_platform_cookies, + load_platform_cookies, + save_platform_cookies, +) +from .timeouts import ( + BROWSER_ELEMENT_WAIT, + BROWSER_NAV_TIMEOUT, + BROWSER_SESSION_TIMEOUT, + HTTP_TIMEOUT, +) from .models import ( ContestListResult, ContestSummary, @@ -53,6 +63,29 @@ async def fetch_json(client: httpx.AsyncClient, path: str) -> dict[str, Any]: return r.json() +def _cc_check_logged_in(page) -> bool: + return "dashboard" in page.url or page.evaluate(_CC_CHECK_LOGIN_JS) + + +def _cc_login_action(credentials: dict[str, str]): + login_error: str | None = None + + def login_action(page): + nonlocal login_error + try: + page.locator('input[name="name"]').fill(credentials.get("username", "")) + page.locator('input[name="pass"]').fill(credentials.get("password", "")) + page.locator("input.cc-login-btn").click() + page.wait_for_function( + "() => !window.location.pathname.includes('/login')", + timeout=BROWSER_NAV_TIMEOUT, + ) + except Exception as e: + login_error = str(e) + + return login_action, lambda: login_error + + def _login_headless_codechef(credentials: dict[str, str]) -> LoginResult: try: from scrapling.fetchers import StealthySession # type: ignore[import-untyped,unresolved-import] @@ -66,26 +99,32 @@ def _login_headless_codechef(credentials: dict[str, str]) -> LoginResult: _ensure_browser() - logged_in = False - login_error: str | None = None + saved_cookies = load_platform_cookies("codechef") or [] - def check_login(page): - nonlocal logged_in - logged_in = "dashboard" in page.url or page.evaluate(_CC_CHECK_LOGIN_JS) + if saved_cookies: + print(json.dumps({"status": "checking_login"}), flush=True) + logged_in = False + + def check_action(page): + nonlocal logged_in + logged_in = _cc_check_logged_in(page) - def login_action(page): - nonlocal login_error try: - page.locator('input[name="name"]').fill(credentials.get("username", "")) - page.locator('input[name="pass"]').fill(credentials.get("password", "")) - page.locator("input.cc-login-btn").click() - try: - page.wait_for_url(lambda url: "/login" not in url, timeout=3000) - except Exception: - login_error = "bad credentials?" - return - except Exception as e: - login_error = str(e) + with StealthySession( + headless=True, + timeout=BROWSER_SESSION_TIMEOUT, + google_search=False, + cookies=saved_cookies, + ) as session: + session.fetch( + f"{BASE_URL}/", page_action=check_action, network_idle=True + ) + if logged_in: + return LoginResult(success=True, error="") + except Exception: + pass + + login_action, get_error = _cc_login_action(credentials) try: with StealthySession( @@ -94,11 +133,20 @@ def _login_headless_codechef(credentials: dict[str, str]) -> LoginResult: google_search=False, ) as session: print(json.dumps({"status": "logging_in"}), flush=True) - session.fetch(f"{BASE_URL}/login", page_action=login_action) + session.fetch( + f"{BASE_URL}/login", page_action=login_action, network_idle=True + ) + login_error = get_error() if login_error: - return LoginResult(success=False, error=login_error) + return LoginResult(success=False, error=f"Login failed: {login_error}") - session.fetch(f"{BASE_URL}/", page_action=check_login, network_idle=True) + logged_in = False + + def verify_action(page): + nonlocal logged_in + logged_in = _cc_check_logged_in(page) + + session.fetch(f"{BASE_URL}/", page_action=verify_action, network_idle=True) if not logged_in: return LoginResult( success=False, error="Login failed (bad credentials?)" @@ -106,7 +154,7 @@ def _login_headless_codechef(credentials: dict[str, str]) -> LoginResult: try: browser_cookies = session.context.cookies() - if browser_cookies: + if any(c.get("name") == "userkey" for c in browser_cookies): save_platform_cookies("codechef", browser_cookies) except Exception: pass @@ -144,27 +192,14 @@ def _submit_headless_codechef( saved_cookies = load_platform_cookies("codechef") or [] logged_in = bool(saved_cookies) - login_error: str | None = None submit_error: str | None = None needs_relogin = False def check_login(page): nonlocal logged_in - logged_in = "dashboard" in page.url or page.evaluate(_CC_CHECK_LOGIN_JS) + logged_in = _cc_check_logged_in(page) - def login_action(page): - nonlocal login_error - try: - page.locator('input[name="name"]').fill(credentials.get("username", "")) - page.locator('input[name="pass"]').fill(credentials.get("password", "")) - page.locator("input.cc-login-btn").click() - try: - page.wait_for_url(lambda url: "/login" not in url, timeout=3000) - except Exception: - login_error = "bad credentials?" - return - except Exception as e: - login_error = str(e) + _login_action, _get_login_error = _cc_login_action(credentials) def submit_action(page): nonlocal submit_error, needs_relogin @@ -172,12 +207,13 @@ def _submit_headless_codechef( needs_relogin = True return try: - page.wait_for_selector('[aria-haspopup="listbox"]', timeout=10000) + page.wait_for_selector( + '[aria-haspopup="listbox"]', timeout=BROWSER_ELEMENT_WAIT + ) page.locator('[aria-haspopup="listbox"]').click() - page.wait_for_selector('[role="option"]', timeout=5000) + page.wait_for_selector('[role="option"]', timeout=BROWSER_ELEMENT_WAIT) page.locator(f'[role="option"][data-value="{language_id}"]').click() - page.wait_for_timeout(250) page.locator(".ace_editor").click() page.keyboard.press("Control+a") @@ -192,7 +228,6 @@ def _submit_headless_codechef( }""", source_code, ) - page.wait_for_timeout(125) page.evaluate( "() => document.getElementById('submit_btn').scrollIntoView({block:'center'})" @@ -226,16 +261,21 @@ def _submit_headless_codechef( google_search=False, cookies=saved_cookies if saved_cookies else [], ) as session: - if not _retried and not _practice: + if not _retried and not _practice and saved_cookies: print(json.dumps({"status": "checking_login"}), flush=True) - session.fetch(f"{BASE_URL}/", page_action=check_login) + session.fetch( + f"{BASE_URL}/", page_action=check_login, network_idle=True + ) if not logged_in: print(json.dumps({"status": "logging_in"}), flush=True) - session.fetch(f"{BASE_URL}/login", page_action=login_action) + session.fetch( + f"{BASE_URL}/login", page_action=_login_action, network_idle=True + ) + login_error = _get_login_error() if login_error: return SubmitResult( - success=False, error=login_error + success=False, error=f"Login failed: {login_error}" ) logged_in = True @@ -250,7 +290,7 @@ def _submit_headless_codechef( try: browser_cookies = session.context.cookies() - if browser_cookies and logged_in: + if any(c.get("name") == "userkey" for c in browser_cookies): save_platform_cookies("codechef", browser_cookies) except Exception: pass diff --git a/scrapers/codeforces.py b/scrapers/codeforces.py index 19b8208..5bbfa38 100644 --- a/scrapers/codeforces.py +++ b/scrapers/codeforces.py @@ -8,7 +8,13 @@ from typing import Any import requests from bs4 import BeautifulSoup, Tag -from .base import BaseScraper, clear_platform_cookies, extract_precision, load_platform_cookies, save_platform_cookies +from .base import ( + BaseScraper, + clear_platform_cookies, + extract_precision, + load_platform_cookies, + save_platform_cookies, +) from .models import ( ContestListResult, ContestSummary, @@ -387,7 +393,9 @@ def _login_headless_cf(credentials: dict[str, str]) -> LoginResult: google_search=False, cookies=saved_cookies, ) as session: - session.fetch(f"{BASE_URL}/", page_action=check_action, solve_cloudflare=True) + session.fetch( + f"{BASE_URL}/", page_action=check_action, solve_cloudflare=True + ) if logged_in: return LoginResult(success=True, error="") except Exception: @@ -419,7 +427,9 @@ def _login_headless_cf(credentials: dict[str, str]) -> LoginResult: session.fetch(f"{BASE_URL}/", page_action=verify_action, network_idle=True) if not logged_in: - return LoginResult(success=False, error="Login failed (bad credentials?)") + return LoginResult( + success=False, error="Login failed (bad credentials?)" + ) try: browser_cookies = session.context.cookies() @@ -445,7 +455,6 @@ def _submit_headless( source_code = Path(file_path).read_text() - try: from scrapling.fetchers import StealthySession # type: ignore[import-untyped,unresolved-import] except ImportError: @@ -519,7 +528,9 @@ def _submit_headless( ) as session: if not _retried and saved_cookies: print(json.dumps({"status": "checking_login"}), flush=True) - session.fetch(f"{BASE_URL}/", page_action=check_login, solve_cloudflare=True) + session.fetch( + f"{BASE_URL}/", page_action=check_login, solve_cloudflare=True + ) if not logged_in: print(json.dumps({"status": "logging_in"}), flush=True) diff --git a/scrapers/kattis.py b/scrapers/kattis.py index 373d749..ac2c157 100644 --- a/scrapers/kattis.py +++ b/scrapers/kattis.py @@ -10,7 +10,13 @@ from pathlib import Path import httpx -from .base import BaseScraper, clear_platform_cookies, extract_precision, load_platform_cookies, save_platform_cookies +from .base import ( + BaseScraper, + clear_platform_cookies, + extract_precision, + load_platform_cookies, + save_platform_cookies, +) from .timeouts import HTTP_TIMEOUT from .models import ( ContestListResult, diff --git a/scrapers/usaco.py b/scrapers/usaco.py index 3c542ab..b6e95d2 100644 --- a/scrapers/usaco.py +++ b/scrapers/usaco.py @@ -8,7 +8,12 @@ from typing import Any, cast import httpx -from .base import BaseScraper, extract_precision, load_platform_cookies, save_platform_cookies +from .base import ( + BaseScraper, + extract_precision, + load_platform_cookies, + save_platform_cookies, +) from .timeouts import HTTP_TIMEOUT from .models import ( ContestListResult,