fix(ci): typing
This commit is contained in:
parent
87f9439607
commit
eb3f7762de
9 changed files with 339 additions and 155 deletions
95
scrapers/base.py
Normal file
95
scrapers/base.py
Normal file
|
|
@ -0,0 +1,95 @@
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Protocol
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from .models import ContestListResult, MetadataResult, TestsResult
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ScraperConfig:
|
||||||
|
timeout_seconds: int = 30
|
||||||
|
max_retries: int = 3
|
||||||
|
backoff_base: float = 2.0
|
||||||
|
rate_limit_delay: float = 1.0
|
||||||
|
|
||||||
|
|
||||||
|
class HttpClient(Protocol):
|
||||||
|
def get(self, url: str, **kwargs) -> requests.Response: ...
|
||||||
|
def close(self) -> None: ...
|
||||||
|
|
||||||
|
|
||||||
|
class BaseScraper(ABC):
|
||||||
|
def __init__(self, config: ScraperConfig | None = None):
|
||||||
|
self.config = config or ScraperConfig()
|
||||||
|
self._client: HttpClient | None = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
@abstractmethod
|
||||||
|
def platform_name(self) -> str: ...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _create_client(self) -> HttpClient: ...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def scrape_contest_metadata(self, contest_id: str) -> MetadataResult: ...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def scrape_problem_tests(self, contest_id: str, problem_id: str) -> TestsResult: ...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def scrape_contest_list(self) -> ContestListResult: ...
|
||||||
|
|
||||||
|
@property
|
||||||
|
def client(self) -> HttpClient:
|
||||||
|
if self._client is None:
|
||||||
|
self._client = self._create_client()
|
||||||
|
return self._client
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
if self._client is not None:
|
||||||
|
self._client.close()
|
||||||
|
self._client = None
|
||||||
|
|
||||||
|
def _create_metadata_error(
|
||||||
|
self, error_msg: str, contest_id: str = ""
|
||||||
|
) -> MetadataResult:
|
||||||
|
return MetadataResult(
|
||||||
|
success=False,
|
||||||
|
error=f"{self.platform_name}: {error_msg}",
|
||||||
|
contest_id=contest_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _create_tests_error(
|
||||||
|
self, error_msg: str, problem_id: str = "", url: str = ""
|
||||||
|
) -> TestsResult:
|
||||||
|
return TestsResult(
|
||||||
|
success=False,
|
||||||
|
error=f"{self.platform_name}: {error_msg}",
|
||||||
|
problem_id=problem_id,
|
||||||
|
url=url,
|
||||||
|
tests=[],
|
||||||
|
timeout_ms=0,
|
||||||
|
memory_mb=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _create_contests_error(self, error_msg: str) -> ContestListResult:
|
||||||
|
return ContestListResult(
|
||||||
|
success=False, error=f"{self.platform_name}: {error_msg}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def _safe_execute(self, operation: str, func, *args, **kwargs):
|
||||||
|
try:
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
except Exception as e:
|
||||||
|
if operation == "metadata":
|
||||||
|
contest_id = args[0] if args else ""
|
||||||
|
return self._create_metadata_error(str(e), contest_id)
|
||||||
|
elif operation == "tests":
|
||||||
|
problem_id = args[1] if len(args) > 1 else ""
|
||||||
|
return self._create_tests_error(str(e), problem_id)
|
||||||
|
elif operation == "contests":
|
||||||
|
return self._create_contests_error(str(e))
|
||||||
|
else:
|
||||||
|
raise
|
||||||
82
scrapers/clients.py
Normal file
82
scrapers/clients.py
Normal file
|
|
@ -0,0 +1,82 @@
|
||||||
|
import time
|
||||||
|
|
||||||
|
import backoff
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from .base import HttpClient, ScraperConfig
|
||||||
|
|
||||||
|
|
||||||
|
class RequestsClient:
|
||||||
|
def __init__(self, config: ScraperConfig, headers: dict[str, str] | None = None):
|
||||||
|
self.config = config
|
||||||
|
self.session = requests.Session()
|
||||||
|
|
||||||
|
default_headers = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||||
|
}
|
||||||
|
if headers:
|
||||||
|
default_headers.update(headers)
|
||||||
|
|
||||||
|
self.session.headers.update(default_headers)
|
||||||
|
|
||||||
|
@backoff.on_exception(
|
||||||
|
backoff.expo,
|
||||||
|
(requests.RequestException, requests.HTTPError),
|
||||||
|
max_tries=3,
|
||||||
|
base=2.0,
|
||||||
|
jitter=backoff.random_jitter,
|
||||||
|
)
|
||||||
|
@backoff.on_predicate(
|
||||||
|
backoff.expo,
|
||||||
|
lambda response: response.status_code == 429,
|
||||||
|
max_tries=3,
|
||||||
|
base=2.0,
|
||||||
|
jitter=backoff.random_jitter,
|
||||||
|
)
|
||||||
|
def get(self, url: str, **kwargs) -> requests.Response:
|
||||||
|
timeout = kwargs.get("timeout", self.config.timeout_seconds)
|
||||||
|
response = self.session.get(url, timeout=timeout, **kwargs)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
if (
|
||||||
|
hasattr(self.config, "rate_limit_delay")
|
||||||
|
and self.config.rate_limit_delay > 0
|
||||||
|
):
|
||||||
|
time.sleep(self.config.rate_limit_delay)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
self.session.close()
|
||||||
|
|
||||||
|
|
||||||
|
class CloudScraperClient:
|
||||||
|
def __init__(self, config: ScraperConfig):
|
||||||
|
import cloudscraper
|
||||||
|
|
||||||
|
self.config = config
|
||||||
|
self.scraper = cloudscraper.create_scraper()
|
||||||
|
|
||||||
|
@backoff.on_exception(
|
||||||
|
backoff.expo,
|
||||||
|
(requests.RequestException, requests.HTTPError),
|
||||||
|
max_tries=3,
|
||||||
|
base=2.0,
|
||||||
|
jitter=backoff.random_jitter,
|
||||||
|
)
|
||||||
|
def get(self, url: str, **kwargs) -> requests.Response:
|
||||||
|
timeout = kwargs.get("timeout", self.config.timeout_seconds)
|
||||||
|
response = self.scraper.get(url, timeout=timeout, **kwargs)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
if (
|
||||||
|
hasattr(self.config, "rate_limit_delay")
|
||||||
|
and self.config.rate_limit_delay > 0
|
||||||
|
):
|
||||||
|
time.sleep(self.config.rate_limit_delay)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
if hasattr(self.scraper, "close"):
|
||||||
|
self.scraper.close()
|
||||||
|
|
@ -5,9 +5,10 @@ import re
|
||||||
import sys
|
import sys
|
||||||
from dataclasses import asdict
|
from dataclasses import asdict
|
||||||
|
|
||||||
import cloudscraper
|
|
||||||
from bs4 import BeautifulSoup, Tag
|
from bs4 import BeautifulSoup, Tag
|
||||||
|
|
||||||
|
from .base import BaseScraper, HttpClient
|
||||||
|
from .clients import CloudScraperClient
|
||||||
from .models import (
|
from .models import (
|
||||||
ContestListResult,
|
ContestListResult,
|
||||||
ContestSummary,
|
ContestSummary,
|
||||||
|
|
@ -18,11 +19,73 @@ from .models import (
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def scrape(url: str) -> list[TestCase]:
|
class CodeforcesScraper(BaseScraper):
|
||||||
|
@property
|
||||||
|
def platform_name(self) -> str:
|
||||||
|
return "codeforces"
|
||||||
|
|
||||||
|
def _create_client(self) -> HttpClient:
|
||||||
|
return CloudScraperClient(self.config)
|
||||||
|
|
||||||
|
def scrape_contest_metadata(self, contest_id: str) -> MetadataResult:
|
||||||
|
return self._safe_execute(
|
||||||
|
"metadata", self._scrape_contest_metadata_impl, contest_id
|
||||||
|
)
|
||||||
|
|
||||||
|
def scrape_problem_tests(self, contest_id: str, problem_id: str) -> TestsResult:
|
||||||
|
return self._safe_execute(
|
||||||
|
"tests", self._scrape_problem_tests_impl, contest_id, problem_id
|
||||||
|
)
|
||||||
|
|
||||||
|
def scrape_contest_list(self) -> ContestListResult:
|
||||||
|
return self._safe_execute("contests", self._scrape_contest_list_impl)
|
||||||
|
|
||||||
|
def _scrape_contest_metadata_impl(self, contest_id: str) -> MetadataResult:
|
||||||
|
problems = scrape_contest_problems(contest_id, self.client)
|
||||||
|
if not problems:
|
||||||
|
return self._create_metadata_error(
|
||||||
|
f"No problems found for contest {contest_id}", contest_id
|
||||||
|
)
|
||||||
|
return MetadataResult(
|
||||||
|
success=True, error="", contest_id=contest_id, problems=problems
|
||||||
|
)
|
||||||
|
|
||||||
|
def _scrape_problem_tests_impl(
|
||||||
|
self, contest_id: str, problem_letter: str
|
||||||
|
) -> TestsResult:
|
||||||
|
problem_id = contest_id + problem_letter.lower()
|
||||||
|
url = parse_problem_url(contest_id, problem_letter)
|
||||||
|
tests = scrape_sample_tests(url, self.client)
|
||||||
|
|
||||||
|
response = self.client.get(url)
|
||||||
|
soup = BeautifulSoup(response.text, "html.parser")
|
||||||
|
timeout_ms, memory_mb = extract_problem_limits(soup)
|
||||||
|
|
||||||
|
if not tests:
|
||||||
|
return self._create_tests_error(
|
||||||
|
f"No tests found for {contest_id} {problem_letter}", problem_id, url
|
||||||
|
)
|
||||||
|
|
||||||
|
return TestsResult(
|
||||||
|
success=True,
|
||||||
|
error="",
|
||||||
|
problem_id=problem_id,
|
||||||
|
url=url,
|
||||||
|
tests=tests,
|
||||||
|
timeout_ms=timeout_ms,
|
||||||
|
memory_mb=memory_mb,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _scrape_contest_list_impl(self) -> ContestListResult:
|
||||||
|
contests = scrape_contests(self.client)
|
||||||
|
if not contests:
|
||||||
|
return self._create_contests_error("No contests found")
|
||||||
|
return ContestListResult(success=True, error="", contests=contests)
|
||||||
|
|
||||||
|
|
||||||
|
def scrape(url: str, client: HttpClient) -> list[TestCase]:
|
||||||
try:
|
try:
|
||||||
scraper = cloudscraper.create_scraper()
|
response = client.get(url)
|
||||||
response = scraper.get(url, timeout=10)
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
soup = BeautifulSoup(response.text, "html.parser")
|
soup = BeautifulSoup(response.text, "html.parser")
|
||||||
input_sections = soup.find_all("div", class_="input")
|
input_sections = soup.find_all("div", class_="input")
|
||||||
|
|
@ -176,12 +239,12 @@ def extract_problem_limits(soup: BeautifulSoup) -> tuple[int, float]:
|
||||||
return timeout_ms, memory_mb
|
return timeout_ms, memory_mb
|
||||||
|
|
||||||
|
|
||||||
def scrape_contest_problems(contest_id: str) -> list[ProblemSummary]:
|
def scrape_contest_problems(
|
||||||
|
contest_id: str, client: HttpClient
|
||||||
|
) -> list[ProblemSummary]:
|
||||||
try:
|
try:
|
||||||
contest_url: str = f"https://codeforces.com/contest/{contest_id}"
|
contest_url: str = f"https://codeforces.com/contest/{contest_id}"
|
||||||
scraper = cloudscraper.create_scraper()
|
response = client.get(contest_url)
|
||||||
response = scraper.get(contest_url, timeout=10)
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
soup = BeautifulSoup(response.text, "html.parser")
|
soup = BeautifulSoup(response.text, "html.parser")
|
||||||
problems: list[ProblemSummary] = []
|
problems: list[ProblemSummary] = []
|
||||||
|
|
@ -217,16 +280,13 @@ def scrape_contest_problems(contest_id: str) -> list[ProblemSummary]:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
def scrape_sample_tests(url: str) -> list[TestCase]:
|
def scrape_sample_tests(url: str, client: HttpClient) -> list[TestCase]:
|
||||||
print(f"Scraping: {url}", file=sys.stderr)
|
print(f"Scraping: {url}", file=sys.stderr)
|
||||||
return scrape(url)
|
return scrape(url, client)
|
||||||
|
|
||||||
|
|
||||||
def scrape_contests() -> list[ContestSummary]:
|
def scrape_contests(client: HttpClient) -> list[ContestSummary]:
|
||||||
try:
|
response = client.get("https://codeforces.com/api/contest.list")
|
||||||
scraper = cloudscraper.create_scraper()
|
|
||||||
response = scraper.get("https://codeforces.com/api/contest.list", timeout=10)
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
data = response.json()
|
data = response.json()
|
||||||
if data["status"] != "OK":
|
if data["status"] != "OK":
|
||||||
|
|
@ -241,10 +301,6 @@ def scrape_contests() -> list[ContestSummary]:
|
||||||
|
|
||||||
return contests
|
return contests
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Failed to fetch contests: {e}", file=sys.stderr)
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
if len(sys.argv) < 2:
|
if len(sys.argv) < 2:
|
||||||
|
|
@ -255,6 +311,7 @@ def main() -> None:
|
||||||
print(json.dumps(asdict(result)))
|
print(json.dumps(asdict(result)))
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
scraper = CodeforcesScraper()
|
||||||
mode: str = sys.argv[1]
|
mode: str = sys.argv[1]
|
||||||
|
|
||||||
if mode == "metadata":
|
if mode == "metadata":
|
||||||
|
|
@ -266,18 +323,7 @@ def main() -> None:
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
contest_id: str = sys.argv[2]
|
contest_id: str = sys.argv[2]
|
||||||
problems: list[ProblemSummary] = scrape_contest_problems(contest_id)
|
result = scraper.scrape_contest_metadata(contest_id)
|
||||||
|
|
||||||
if not problems:
|
|
||||||
result = MetadataResult(
|
|
||||||
success=False, error=f"No problems found for contest {contest_id}"
|
|
||||||
)
|
|
||||||
print(json.dumps(asdict(result)))
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
result = MetadataResult(
|
|
||||||
success=True, error="", contest_id=contest_id, problems=problems
|
|
||||||
)
|
|
||||||
print(json.dumps(asdict(result)))
|
print(json.dumps(asdict(result)))
|
||||||
|
|
||||||
elif mode == "tests":
|
elif mode == "tests":
|
||||||
|
|
@ -296,52 +342,7 @@ def main() -> None:
|
||||||
|
|
||||||
tests_contest_id: str = sys.argv[2]
|
tests_contest_id: str = sys.argv[2]
|
||||||
problem_letter: str = sys.argv[3]
|
problem_letter: str = sys.argv[3]
|
||||||
problem_id: str = tests_contest_id + problem_letter.lower()
|
tests_result = scraper.scrape_problem_tests(tests_contest_id, problem_letter)
|
||||||
|
|
||||||
url: str = parse_problem_url(tests_contest_id, problem_letter)
|
|
||||||
tests: list[TestCase] = scrape_sample_tests(url)
|
|
||||||
|
|
||||||
try:
|
|
||||||
scraper = cloudscraper.create_scraper()
|
|
||||||
response = scraper.get(url, timeout=10)
|
|
||||||
response.raise_for_status()
|
|
||||||
soup = BeautifulSoup(response.text, "html.parser")
|
|
||||||
timeout_ms, memory_mb = extract_problem_limits(soup)
|
|
||||||
except Exception as e:
|
|
||||||
tests_result = TestsResult(
|
|
||||||
success=False,
|
|
||||||
error=f"Failed to extract constraints: {e}",
|
|
||||||
problem_id=problem_id,
|
|
||||||
url=url,
|
|
||||||
tests=[],
|
|
||||||
timeout_ms=0,
|
|
||||||
memory_mb=0,
|
|
||||||
)
|
|
||||||
print(json.dumps(asdict(tests_result)))
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
if not tests:
|
|
||||||
tests_result = TestsResult(
|
|
||||||
success=False,
|
|
||||||
error=f"No tests found for {tests_contest_id} {problem_letter}",
|
|
||||||
problem_id=problem_id,
|
|
||||||
url=url,
|
|
||||||
tests=[],
|
|
||||||
timeout_ms=timeout_ms,
|
|
||||||
memory_mb=memory_mb,
|
|
||||||
)
|
|
||||||
print(json.dumps(asdict(tests_result)))
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
tests_result = TestsResult(
|
|
||||||
success=True,
|
|
||||||
error="",
|
|
||||||
problem_id=problem_id,
|
|
||||||
url=url,
|
|
||||||
tests=tests,
|
|
||||||
timeout_ms=timeout_ms,
|
|
||||||
memory_mb=memory_mb,
|
|
||||||
)
|
|
||||||
print(json.dumps(asdict(tests_result)))
|
print(json.dumps(asdict(tests_result)))
|
||||||
|
|
||||||
elif mode == "contests":
|
elif mode == "contests":
|
||||||
|
|
@ -352,13 +353,7 @@ def main() -> None:
|
||||||
print(json.dumps(asdict(contest_result)))
|
print(json.dumps(asdict(contest_result)))
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
contests = scrape_contests()
|
contest_result = scraper.scrape_contest_list()
|
||||||
if not contests:
|
|
||||||
contest_result = ContestListResult(success=False, error="No contests found")
|
|
||||||
print(json.dumps(asdict(contest_result)))
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
contest_result = ContestListResult(success=True, error="", contests=contests)
|
|
||||||
print(json.dumps(asdict(contest_result)))
|
print(json.dumps(asdict(contest_result)))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
@ -369,6 +364,8 @@ def main() -> None:
|
||||||
print(json.dumps(asdict(result)))
|
print(json.dumps(asdict(result)))
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
scraper.close()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
|
||||||
|
|
@ -158,9 +158,7 @@ describe('cp.picker', function()
|
||||||
end,
|
end,
|
||||||
}
|
}
|
||||||
|
|
||||||
package.loaded['cp.pickers.init'] = nil
|
picker = spec_helper.fresh_require('cp.pickers', { 'cp.pickers.init' })
|
||||||
package.loaded['cp.pickers'] = nil
|
|
||||||
picker = require('cp.pickers')
|
|
||||||
|
|
||||||
local problems = picker.get_problems_for_contest('test_platform', 'test_contest')
|
local problems = picker.get_problems_for_contest('test_platform', 'test_contest')
|
||||||
assert.is_table(problems)
|
assert.is_table(problems)
|
||||||
|
|
@ -183,6 +181,8 @@ describe('cp.picker', function()
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
picker = spec_helper.fresh_require('cp.pickers', { 'cp.pickers.init' })
|
||||||
|
|
||||||
local problems = picker.get_problems_for_contest('test_platform', 'test_contest')
|
local problems = picker.get_problems_for_contest('test_platform', 'test_contest')
|
||||||
assert.is_table(problems)
|
assert.is_table(problems)
|
||||||
assert.equals(0, #problems)
|
assert.equals(0, #problems)
|
||||||
|
|
|
||||||
|
|
@ -56,8 +56,7 @@ describe('cp.scrape', function()
|
||||||
|
|
||||||
package.loaded['cp.cache'] = mock_cache
|
package.loaded['cp.cache'] = mock_cache
|
||||||
package.loaded['cp.utils'] = mock_utils
|
package.loaded['cp.utils'] = mock_utils
|
||||||
package.loaded['cp.scrape'] = nil
|
scrape = spec_helper.fresh_require('cp.scrape')
|
||||||
scrape = require('cp.scrape')
|
|
||||||
|
|
||||||
local original_fn = vim.fn
|
local original_fn = vim.fn
|
||||||
vim.fn = vim.tbl_extend('force', vim.fn, {
|
vim.fn = vim.tbl_extend('force', vim.fn, {
|
||||||
|
|
@ -125,8 +124,7 @@ describe('cp.scrape', function()
|
||||||
stored_data = { platform = platform, contest_id = contest_id, problems = problems }
|
stored_data = { platform = platform, contest_id = contest_id, problems = problems }
|
||||||
end
|
end
|
||||||
|
|
||||||
package.loaded['cp.scrape'] = nil
|
scrape = spec_helper.fresh_require('cp.scrape')
|
||||||
scrape = require('cp.scrape')
|
|
||||||
|
|
||||||
local result = scrape.scrape_contest_metadata('atcoder', 'abc123')
|
local result = scrape.scrape_contest_metadata('atcoder', 'abc123')
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,8 +5,7 @@ describe('cp.snippets', function()
|
||||||
|
|
||||||
before_each(function()
|
before_each(function()
|
||||||
spec_helper.setup()
|
spec_helper.setup()
|
||||||
package.loaded['cp.snippets'] = nil
|
snippets = spec_helper.fresh_require('cp.snippets')
|
||||||
snippets = require('cp.snippets')
|
|
||||||
mock_luasnip = {
|
mock_luasnip = {
|
||||||
snippet = function(trigger, body)
|
snippet = function(trigger, body)
|
||||||
return { trigger = trigger, body = body }
|
return { trigger = trigger, body = body }
|
||||||
|
|
|
||||||
|
|
@ -121,6 +121,17 @@ function M.find_logged_message(pattern)
|
||||||
return nil
|
return nil
|
||||||
end
|
end
|
||||||
|
|
||||||
|
function M.fresh_require(module_name, additional_clears)
|
||||||
|
additional_clears = additional_clears or {}
|
||||||
|
|
||||||
|
for _, clear_module in ipairs(additional_clears) do
|
||||||
|
package.loaded[clear_module] = nil
|
||||||
|
end
|
||||||
|
package.loaded[module_name] = nil
|
||||||
|
|
||||||
|
return require(module_name)
|
||||||
|
end
|
||||||
|
|
||||||
function M.teardown()
|
function M.teardown()
|
||||||
package.loaded['cp.log'] = nil
|
package.loaded['cp.log'] = nil
|
||||||
package.loaded['cp.scrape'] = nil
|
package.loaded['cp.scrape'] = nil
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,8 @@ import pytest
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def mock_codeforces_html():
|
def mock_codeforces_html():
|
||||||
return """
|
return """
|
||||||
|
<div class="time-limit">Time limit: 1 seconds</div>
|
||||||
|
<div class="memory-limit">Memory limit: 256 megabytes</div>
|
||||||
<div class="input">
|
<div class="input">
|
||||||
<pre>
|
<pre>
|
||||||
<div class="test-example-line-1">3</div>
|
<div class="test-example-line-1">3</div>
|
||||||
|
|
|
||||||
|
|
@ -1,61 +1,61 @@
|
||||||
from unittest.mock import Mock
|
from unittest.mock import Mock
|
||||||
|
|
||||||
from scrapers.codeforces import scrape, scrape_contest_problems, scrape_contests
|
from scrapers.codeforces import CodeforcesScraper
|
||||||
from scrapers.models import ContestSummary, ProblemSummary
|
from scrapers.models import ContestSummary, ProblemSummary
|
||||||
|
|
||||||
|
|
||||||
def test_scrape_success(mocker, mock_codeforces_html):
|
def test_scrape_success(mocker, mock_codeforces_html):
|
||||||
mock_scraper = Mock()
|
mock_client = Mock()
|
||||||
mock_response = Mock()
|
mock_response = Mock()
|
||||||
mock_response.text = mock_codeforces_html
|
mock_response.text = mock_codeforces_html
|
||||||
mock_scraper.get.return_value = mock_response
|
mock_client.get.return_value = mock_response
|
||||||
|
|
||||||
mocker.patch(
|
scraper = CodeforcesScraper()
|
||||||
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper
|
mocker.patch.object(scraper, "_create_client", return_value=mock_client)
|
||||||
)
|
|
||||||
|
|
||||||
result = scrape("https://codeforces.com/contest/1900/problem/A")
|
result = scraper.scrape_problem_tests("1900", "A")
|
||||||
|
|
||||||
assert len(result) == 1
|
assert result.success == True
|
||||||
assert result[0].input == "1\n3\n1 2 3"
|
assert len(result.tests) == 1
|
||||||
assert result[0].expected == "6"
|
assert result.tests[0].input == "1\n3\n1 2 3"
|
||||||
|
assert result.tests[0].expected == "6"
|
||||||
|
|
||||||
|
|
||||||
def test_scrape_contest_problems(mocker):
|
def test_scrape_contest_problems(mocker):
|
||||||
mock_scraper = Mock()
|
mock_client = Mock()
|
||||||
mock_response = Mock()
|
mock_response = Mock()
|
||||||
mock_response.text = """
|
mock_response.text = """
|
||||||
<a href="/contest/1900/problem/A">A. Problem A</a>
|
<a href="/contest/1900/problem/A">A. Problem A</a>
|
||||||
<a href="/contest/1900/problem/B">B. Problem B</a>
|
<a href="/contest/1900/problem/B">B. Problem B</a>
|
||||||
"""
|
"""
|
||||||
mock_scraper.get.return_value = mock_response
|
mock_client.get.return_value = mock_response
|
||||||
|
|
||||||
mocker.patch(
|
scraper = CodeforcesScraper()
|
||||||
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper
|
mocker.patch.object(scraper, "_create_client", return_value=mock_client)
|
||||||
)
|
|
||||||
|
|
||||||
result = scrape_contest_problems("1900")
|
result = scraper.scrape_contest_metadata("1900")
|
||||||
|
|
||||||
assert len(result) == 2
|
assert result.success == True
|
||||||
assert result[0] == ProblemSummary(id="a", name="A. Problem A")
|
assert len(result.problems) == 2
|
||||||
assert result[1] == ProblemSummary(id="b", name="B. Problem B")
|
assert result.problems[0] == ProblemSummary(id="a", name="A. Problem A")
|
||||||
|
assert result.problems[1] == ProblemSummary(id="b", name="B. Problem B")
|
||||||
|
|
||||||
|
|
||||||
def test_scrape_network_error(mocker):
|
def test_scrape_network_error(mocker):
|
||||||
mock_scraper = Mock()
|
mock_client = Mock()
|
||||||
mock_scraper.get.side_effect = Exception("Network error")
|
mock_client.get.side_effect = Exception("Network error")
|
||||||
|
|
||||||
mocker.patch(
|
scraper = CodeforcesScraper()
|
||||||
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper
|
mocker.patch.object(scraper, "_create_client", return_value=mock_client)
|
||||||
)
|
|
||||||
|
|
||||||
result = scrape("https://codeforces.com/contest/1900/problem/A")
|
result = scraper.scrape_problem_tests("1900", "A")
|
||||||
|
|
||||||
assert result == []
|
assert result.success == False
|
||||||
|
assert "network error" in result.error.lower()
|
||||||
|
|
||||||
|
|
||||||
def test_scrape_contests_success(mocker):
|
def test_scrape_contests_success(mocker):
|
||||||
mock_scraper = Mock()
|
mock_client = Mock()
|
||||||
mock_response = Mock()
|
mock_response = Mock()
|
||||||
mock_response.json.return_value = {
|
mock_response.json.return_value = {
|
||||||
"status": "OK",
|
"status": "OK",
|
||||||
|
|
@ -65,26 +65,26 @@ def test_scrape_contests_success(mocker):
|
||||||
{"id": 1949, "name": "Codeforces Global Round 26"},
|
{"id": 1949, "name": "Codeforces Global Round 26"},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
mock_scraper.get.return_value = mock_response
|
mock_client.get.return_value = mock_response
|
||||||
|
|
||||||
mocker.patch(
|
scraper = CodeforcesScraper()
|
||||||
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper
|
mocker.patch.object(scraper, "_create_client", return_value=mock_client)
|
||||||
)
|
|
||||||
|
|
||||||
result = scrape_contests()
|
result = scraper.scrape_contest_list()
|
||||||
|
|
||||||
assert len(result) == 3
|
assert result.success == True
|
||||||
assert result[0] == ContestSummary(
|
assert len(result.contests) == 3
|
||||||
|
assert result.contests[0] == ContestSummary(
|
||||||
id="1951",
|
id="1951",
|
||||||
name="Educational Codeforces Round 168 (Rated for Div. 2)",
|
name="Educational Codeforces Round 168 (Rated for Div. 2)",
|
||||||
display_name="Educational Codeforces Round 168 (Rated for Div. 2)",
|
display_name="Educational Codeforces Round 168 (Rated for Div. 2)",
|
||||||
)
|
)
|
||||||
assert result[1] == ContestSummary(
|
assert result.contests[1] == ContestSummary(
|
||||||
id="1950",
|
id="1950",
|
||||||
name="Codeforces Round 936 (Div. 2)",
|
name="Codeforces Round 936 (Div. 2)",
|
||||||
display_name="Codeforces Round 936 (Div. 2)",
|
display_name="Codeforces Round 936 (Div. 2)",
|
||||||
)
|
)
|
||||||
assert result[2] == ContestSummary(
|
assert result.contests[2] == ContestSummary(
|
||||||
id="1949",
|
id="1949",
|
||||||
name="Codeforces Global Round 26",
|
name="Codeforces Global Round 26",
|
||||||
display_name="Codeforces Global Round 26",
|
display_name="Codeforces Global Round 26",
|
||||||
|
|
@ -92,28 +92,28 @@ def test_scrape_contests_success(mocker):
|
||||||
|
|
||||||
|
|
||||||
def test_scrape_contests_api_error(mocker):
|
def test_scrape_contests_api_error(mocker):
|
||||||
mock_scraper = Mock()
|
mock_client = Mock()
|
||||||
mock_response = Mock()
|
mock_response = Mock()
|
||||||
mock_response.json.return_value = {"status": "FAILED", "result": []}
|
mock_response.json.return_value = {"status": "FAILED", "result": []}
|
||||||
mock_scraper.get.return_value = mock_response
|
mock_client.get.return_value = mock_response
|
||||||
|
|
||||||
mocker.patch(
|
scraper = CodeforcesScraper()
|
||||||
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper
|
mocker.patch.object(scraper, "_create_client", return_value=mock_client)
|
||||||
)
|
|
||||||
|
|
||||||
result = scrape_contests()
|
result = scraper.scrape_contest_list()
|
||||||
|
|
||||||
assert result == []
|
assert result.success == False
|
||||||
|
assert "no contests found" in result.error.lower()
|
||||||
|
|
||||||
|
|
||||||
def test_scrape_contests_network_error(mocker):
|
def test_scrape_contests_network_error(mocker):
|
||||||
mock_scraper = Mock()
|
mock_client = Mock()
|
||||||
mock_scraper.get.side_effect = Exception("Network error")
|
mock_client.get.side_effect = Exception("Network error")
|
||||||
|
|
||||||
mocker.patch(
|
scraper = CodeforcesScraper()
|
||||||
"scrapers.codeforces.cloudscraper.create_scraper", return_value=mock_scraper
|
mocker.patch.object(scraper, "_create_client", return_value=mock_client)
|
||||||
)
|
|
||||||
|
|
||||||
result = scrape_contests()
|
result = scraper.scrape_contest_list()
|
||||||
|
|
||||||
assert result == []
|
assert result.success == False
|
||||||
|
assert "network error" in result.error.lower()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue