From 67c23c4d695026704bf008860e51452c23f93225 Mon Sep 17 00:00:00 2001 From: Barrett Ruth Date: Tue, 30 Sep 2025 22:33:36 -0400 Subject: [PATCH] better scraper config --- doc/cp.nvim.txt | 2 +- scrapers/codeforces.py | 17 +++++++---------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/doc/cp.nvim.txt b/doc/cp.nvim.txt index b4a00d3..07d4312 100644 --- a/doc/cp.nvim.txt +++ b/doc/cp.nvim.txt @@ -28,7 +28,7 @@ COMMANDS *cp-commands* cp.nvim uses a single :CP command with intelligent argument parsing: State Restoration ~ - :CP Restore contest context from current file. + :CP Restore state from current file. Automatically detects platform, contest, problem, and language from cached state. Use this after switching files to restore your CP environment. diff --git a/scrapers/codeforces.py b/scrapers/codeforces.py index d98e657..94abf85 100644 --- a/scrapers/codeforces.py +++ b/scrapers/codeforces.py @@ -7,7 +7,7 @@ from dataclasses import asdict import requests from bs4 import BeautifulSoup, Tag -from scrapling.fetchers import StealthySession +from scrapling.fetchers import StealthyFetcher from .base import BaseScraper from .models import ( @@ -22,9 +22,8 @@ from .models import ( def scrape(url: str) -> list[TestCase]: try: - with StealthySession(headless=True, solve_cloudflare=True) as session: - page = session.fetch(url, google_search=False) - html = page.html_content + page = StealthyFetcher.fetch(url, headless=True, solve_cloudflare=True) + html = page.html_content soup = BeautifulSoup(html, "html.parser") input_sections = soup.find_all("div", class_="input") @@ -181,9 +180,8 @@ def extract_problem_limits(soup: BeautifulSoup) -> tuple[int, float]: def scrape_contest_problems(contest_id: str) -> list[ProblemSummary]: try: contest_url: str = f"https://codeforces.com/contest/{contest_id}" - with StealthySession(headless=True, solve_cloudflare=True) as session: - page = session.fetch(contest_url, google_search=False) - html = page.html_content + page = StealthyFetcher.fetch(contest_url, headless=True, solve_cloudflare=True) + html = page.html_content soup = BeautifulSoup(html, "html.parser") problems: list[ProblemSummary] = [] @@ -276,9 +274,8 @@ class CodeforcesScraper(BaseScraper): url = parse_problem_url(contest_id, problem_letter) tests = scrape_sample_tests(url) - with StealthySession(headless=True, solve_cloudflare=True) as session: - page = session.fetch(url, google_search=False) - html = page.html_content + page = StealthyFetcher.fetch(url, headless=True, solve_cloudflare=True) + html = page.html_content soup = BeautifulSoup(html, "html.parser") timeout_ms, memory_mb = extract_problem_limits(soup)