better scraper config

This commit is contained in:
Barrett Ruth 2025-09-30 22:33:36 -04:00
parent c1b15c2991
commit 67c23c4d69
2 changed files with 8 additions and 11 deletions

View file

@ -28,7 +28,7 @@ COMMANDS *cp-commands*
cp.nvim uses a single :CP command with intelligent argument parsing: cp.nvim uses a single :CP command with intelligent argument parsing:
State Restoration ~ State Restoration ~
:CP Restore contest context from current file. :CP Restore state from current file.
Automatically detects platform, contest, problem, Automatically detects platform, contest, problem,
and language from cached state. Use this after and language from cached state. Use this after
switching files to restore your CP environment. switching files to restore your CP environment.

View file

@ -7,7 +7,7 @@ from dataclasses import asdict
import requests import requests
from bs4 import BeautifulSoup, Tag from bs4 import BeautifulSoup, Tag
from scrapling.fetchers import StealthySession from scrapling.fetchers import StealthyFetcher
from .base import BaseScraper from .base import BaseScraper
from .models import ( from .models import (
@ -22,8 +22,7 @@ from .models import (
def scrape(url: str) -> list[TestCase]: def scrape(url: str) -> list[TestCase]:
try: try:
with StealthySession(headless=True, solve_cloudflare=True) as session: page = StealthyFetcher.fetch(url, headless=True, solve_cloudflare=True)
page = session.fetch(url, google_search=False)
html = page.html_content html = page.html_content
soup = BeautifulSoup(html, "html.parser") soup = BeautifulSoup(html, "html.parser")
@ -181,8 +180,7 @@ def extract_problem_limits(soup: BeautifulSoup) -> tuple[int, float]:
def scrape_contest_problems(contest_id: str) -> list[ProblemSummary]: def scrape_contest_problems(contest_id: str) -> list[ProblemSummary]:
try: try:
contest_url: str = f"https://codeforces.com/contest/{contest_id}" contest_url: str = f"https://codeforces.com/contest/{contest_id}"
with StealthySession(headless=True, solve_cloudflare=True) as session: page = StealthyFetcher.fetch(contest_url, headless=True, solve_cloudflare=True)
page = session.fetch(contest_url, google_search=False)
html = page.html_content html = page.html_content
soup = BeautifulSoup(html, "html.parser") soup = BeautifulSoup(html, "html.parser")
@ -276,8 +274,7 @@ class CodeforcesScraper(BaseScraper):
url = parse_problem_url(contest_id, problem_letter) url = parse_problem_url(contest_id, problem_letter)
tests = scrape_sample_tests(url) tests = scrape_sample_tests(url)
with StealthySession(headless=True, solve_cloudflare=True) as session: page = StealthyFetcher.fetch(url, headless=True, solve_cloudflare=True)
page = session.fetch(url, google_search=False)
html = page.html_content html = page.html_content
soup = BeautifulSoup(html, "html.parser") soup = BeautifulSoup(html, "html.parser")
timeout_ms, memory_mb = extract_problem_limits(soup) timeout_ms, memory_mb = extract_problem_limits(soup)