refactor: centralize timeout constants in scrapers/timeouts.py

Problem: each scraper defined its own timeout constants (`TIMEOUT_S`, `TIMEOUT_SECONDS`) with inconsistent values (15s vs 30s) and browser timeouts were scattered as magic numbers (60000, 15000, 5000, 500). Solution: introduce `scrapers/timeouts.py` with named constants for HTTP requests, browser session/navigation/element/turnstile/settle timeouts, and submission polling. All six scrapers now import from the shared module.
2026-03-05 01:35:40 -05:00 · 2026-03-05 01:35:40 -05:00 · 2cdde85d36
commit 2cdde85d36
parent f4055b071b
7 changed files with 58 additions and 37 deletions
--- a/scrapers/cses.py
+++ b/scrapers/cses.py
@ -9,6 +9,7 @@ from typing import Any
 import httpx

 from .base import BaseScraper, extract_precision
+from .timeouts import HTTP_TIMEOUT, SUBMIT_POLL_TIMEOUT
 from .models import (
    ContestListResult,
    ContestSummary,
@ -26,7 +27,6 @@ TASK_PATH = "/problemset/task/{id}"
 HEADERS = {
    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
 }
-TIMEOUT_S = 15.0
 CONNECTIONS = 8

 CSES_LANGUAGES: dict[str, dict[str, str]] = {
@ -78,7 +78,7 @@ def snake_to_title(name: str) -> str:


 async def fetch_text(client: httpx.AsyncClient, path: str) -> str:
-    r = await client.get(BASE_URL + path, headers=HEADERS, timeout=TIMEOUT_S)
+    r = await client.get(BASE_URL + path, headers=HEADERS, timeout=HTTP_TIMEOUT)
    r.raise_for_status()
    return r.text

@ -290,7 +290,7 @@ class CSESScraper(BaseScraper):
        password: str,
    ) -> str | None:
        login_page = await client.get(
-            f"{BASE_URL}/login", headers=HEADERS, timeout=TIMEOUT_S
+            f"{BASE_URL}/login", headers=HEADERS, timeout=HTTP_TIMEOUT
        )
        csrf_match = re.search(r'name="csrf_token" value="([^"]+)"', login_page.text)
        if not csrf_match:
@ -304,20 +304,20 @@ class CSESScraper(BaseScraper):
                "pass": password,
            },
            headers=HEADERS,
-            timeout=TIMEOUT_S,
+            timeout=HTTP_TIMEOUT,
        )

        if "Invalid username or password" in login_resp.text:
            return None

        api_resp = await client.post(
-            f"{API_URL}/login", headers=HEADERS, timeout=TIMEOUT_S
+            f"{API_URL}/login", headers=HEADERS, timeout=HTTP_TIMEOUT
        )
        api_data = api_resp.json()
        token: str = api_data["X-Auth-Token"]
        auth_url: str = api_data["authentication_url"]

-        auth_page = await client.get(auth_url, headers=HEADERS, timeout=TIMEOUT_S)
+        auth_page = await client.get(auth_url, headers=HEADERS, timeout=HTTP_TIMEOUT)
        auth_csrf = re.search(r'name="csrf_token" value="([^"]+)"', auth_page.text)
        form_token = re.search(r'name="token" value="([^"]+)"', auth_page.text)
        if not auth_csrf or not form_token:
@ -330,13 +330,13 @@ class CSESScraper(BaseScraper):
                "token": form_token.group(1),
            },
            headers=HEADERS,
-            timeout=TIMEOUT_S,
+            timeout=HTTP_TIMEOUT,
        )

        check = await client.get(
            f"{API_URL}/login",
            headers={"X-Auth-Token": token, **HEADERS},
-            timeout=TIMEOUT_S,
+            timeout=HTTP_TIMEOUT,
        )
        if check.status_code != 200:
            return None
@ -349,7 +349,7 @@ class CSESScraper(BaseScraper):
            r = await client.get(
                f"{API_URL}/login",
                headers={"X-Auth-Token": token, **HEADERS},
-                timeout=TIMEOUT_S,
+                timeout=HTTP_TIMEOUT,
            )
            return r.status_code == 200
        except Exception:
@ -415,7 +415,7 @@ class CSESScraper(BaseScraper):
                    "Content-Type": "application/json",
                    **HEADERS,
                },
-                timeout=TIMEOUT_S,
+                timeout=HTTP_TIMEOUT,
            )

            if r.status_code not in range(200, 300):
@ -438,7 +438,7 @@ class CSESScraper(BaseScraper):
                            "X-Auth-Token": token,
                            **HEADERS,
                        },
-                        timeout=30.0,
+                        timeout=SUBMIT_POLL_TIMEOUT,
                    )
                    if r.status_code == 200:
                        info = r.json()