#!/usr/bin/env python3 import asyncio import json import re import sys from dataclasses import asdict from typing import Any import httpx from .base import BaseScraper from .models import ( ContestListResult, ContestSummary, MetadataResult, ProblemSummary, TestCase, TestsResult, ) BASE_URL = "https://cses.fi" INDEX_PATH = "/problemset/list" TASK_PATH = "/problemset/task/{id}" HEADERS = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" } TIMEOUT_S = 15.0 CONNECTIONS = 8 def normalize_category_name(category_name: str) -> str: return category_name.lower().replace(" ", "_").replace("&", "and") def snake_to_title(name: str) -> str: small_words = { "a", "an", "the", "and", "but", "or", "nor", "for", "so", "yet", "at", "by", "in", "of", "on", "per", "to", "vs", "via", } words: list[str] = name.split("_") n = len(words) def fix_word(i_word): i, word = i_word lw = word.lower() return lw.capitalize() if i == 0 or i == n - 1 or lw not in small_words else lw return " ".join(map(fix_word, enumerate(words))) async def fetch_text(client: httpx.AsyncClient, path: str) -> str: r = await client.get(BASE_URL + path, headers=HEADERS, timeout=TIMEOUT_S) r.raise_for_status() return r.text CATEGORY_BLOCK_RE = re.compile( r'