feat(codechef): implement login, submit, and full contest list
Problem: CodeChef login/submit used wrong selectors for the Drupal-based site, the contest list only showed future contests, and problem/contest URLs were not set for `:CP open`. Solution: Fix login selectors (`input[name="name"]`, `input[name="pass"]`, `input.cc-login-btn`) with fast 3s failure on bad credentials. Rewrite submit to use MUI Select (`[aria-haspopup="listbox"]`), Ace editor clipboard injection, and `#submit_btn` dispatch with a practice-contest fallback. Paginate `/api/list/contests/past` to collect all 228 past Starters, then concurrently fetch each to expand parent contests into individual division entries (e.g. `START228 (Div. 4)`). Set `url`, `contest_url`, and `standings_url` correctly in `scrape_contest_metadata`.
This commit is contained in:
parent
52fc2f76f4
commit
3b3da9ab9b
1 changed files with 175 additions and 103 deletions
|
|
@ -23,6 +23,7 @@ from .models import (
|
||||||
|
|
||||||
BASE_URL = "https://www.codechef.com"
|
BASE_URL = "https://www.codechef.com"
|
||||||
API_CONTESTS_ALL = "/api/list/contests/all"
|
API_CONTESTS_ALL = "/api/list/contests/all"
|
||||||
|
API_CONTESTS_PAST = "/api/list/contests/past"
|
||||||
API_CONTEST = "/api/contests/{contest_id}"
|
API_CONTEST = "/api/contests/{contest_id}"
|
||||||
API_PROBLEM = "/api/contests/{contest_id}/problems/{problem_id}"
|
API_PROBLEM = "/api/contests/{contest_id}/problems/{problem_id}"
|
||||||
HEADERS = {
|
HEADERS = {
|
||||||
|
|
@ -32,17 +33,19 @@ CONNECTIONS = 8
|
||||||
|
|
||||||
_COOKIE_PATH = Path.home() / ".cache" / "cp-nvim" / "codechef-cookies.json"
|
_COOKIE_PATH = Path.home() / ".cache" / "cp-nvim" / "codechef-cookies.json"
|
||||||
|
|
||||||
_CC_CHECK_LOGIN_JS = """() => {
|
_CC_CHECK_LOGIN_JS = "() => !!document.querySelector('a[href*=\"/users/\"]')"
|
||||||
const d = document.getElementById('__NEXT_DATA__');
|
|
||||||
if (d) {
|
_CC_LANG_IDS: dict[str, str] = {
|
||||||
try {
|
"C++": "42",
|
||||||
const p = JSON.parse(d.textContent);
|
"PYTH 3": "116",
|
||||||
if (p?.props?.pageProps?.currentUser?.username) return true;
|
"JAVA": "10",
|
||||||
} catch(e) {}
|
"PYPY3": "109",
|
||||||
}
|
"GO": "114",
|
||||||
return !!document.querySelector('a[href="/logout"]') ||
|
"rust": "93",
|
||||||
!!document.querySelector('[class*="user-name"]');
|
"KTLN": "47",
|
||||||
}"""
|
"NODEJS": "56",
|
||||||
|
"TS": "35",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
async def fetch_json(client: httpx.AsyncClient, path: str) -> dict[str, Any]:
|
async def fetch_json(client: httpx.AsyncClient, path: str) -> dict[str, Any]:
|
||||||
|
|
@ -71,21 +74,19 @@ def _login_headless_codechef(credentials: dict[str, str]) -> LoginResult:
|
||||||
|
|
||||||
def check_login(page):
|
def check_login(page):
|
||||||
nonlocal logged_in
|
nonlocal logged_in
|
||||||
logged_in = page.evaluate(_CC_CHECK_LOGIN_JS)
|
logged_in = "dashboard" in page.url or page.evaluate(_CC_CHECK_LOGIN_JS)
|
||||||
|
|
||||||
def login_action(page):
|
def login_action(page):
|
||||||
nonlocal login_error
|
nonlocal login_error
|
||||||
try:
|
try:
|
||||||
page.locator('input[type="email"], input[name="email"]').first.fill(
|
page.locator('input[name="name"]').fill(credentials.get("username", ""))
|
||||||
credentials.get("username", "")
|
page.locator('input[name="pass"]').fill(credentials.get("password", ""))
|
||||||
)
|
page.locator('input.cc-login-btn').click()
|
||||||
page.locator('input[type="password"], input[name="password"]').first.fill(
|
try:
|
||||||
credentials.get("password", "")
|
page.wait_for_url(lambda url: "/login" not in url, timeout=3000)
|
||||||
)
|
except Exception:
|
||||||
page.locator('button[type="submit"]').first.click()
|
login_error = "Login failed (bad credentials?)"
|
||||||
page.wait_for_url(
|
return
|
||||||
lambda url: "/login" not in url, timeout=BROWSER_NAV_TIMEOUT
|
|
||||||
)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
login_error = str(e)
|
login_error = str(e)
|
||||||
|
|
||||||
|
|
@ -155,21 +156,19 @@ def _submit_headless_codechef(
|
||||||
|
|
||||||
def check_login(page):
|
def check_login(page):
|
||||||
nonlocal logged_in
|
nonlocal logged_in
|
||||||
logged_in = page.evaluate(_CC_CHECK_LOGIN_JS)
|
logged_in = "dashboard" in page.url or page.evaluate(_CC_CHECK_LOGIN_JS)
|
||||||
|
|
||||||
def login_action(page):
|
def login_action(page):
|
||||||
nonlocal login_error
|
nonlocal login_error
|
||||||
try:
|
try:
|
||||||
page.locator('input[type="email"], input[name="email"]').first.fill(
|
page.locator('input[name="name"]').fill(credentials.get("username", ""))
|
||||||
credentials.get("username", "")
|
page.locator('input[name="pass"]').fill(credentials.get("password", ""))
|
||||||
)
|
page.locator('input.cc-login-btn').click()
|
||||||
page.locator('input[type="password"], input[name="password"]').first.fill(
|
try:
|
||||||
credentials.get("password", "")
|
page.wait_for_url(lambda url: "/login" not in url, timeout=3000)
|
||||||
)
|
except Exception:
|
||||||
page.locator('button[type="submit"]').first.click()
|
login_error = "Login failed (bad credentials?)"
|
||||||
page.wait_for_url(
|
return
|
||||||
lambda url: "/login" not in url, timeout=BROWSER_NAV_TIMEOUT
|
|
||||||
)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
login_error = str(e)
|
login_error = str(e)
|
||||||
|
|
||||||
|
|
@ -179,54 +178,44 @@ def _submit_headless_codechef(
|
||||||
needs_relogin = True
|
needs_relogin = True
|
||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
selected = False
|
page.wait_for_timeout(2000)
|
||||||
selects = page.locator("select")
|
|
||||||
for i in range(selects.count()):
|
|
||||||
try:
|
|
||||||
sel = selects.nth(i)
|
|
||||||
opts = sel.locator("option").all_inner_texts()
|
|
||||||
match = next(
|
|
||||||
(o for o in opts if language_id.lower() in o.lower()), None
|
|
||||||
)
|
|
||||||
if match:
|
|
||||||
sel.select_option(label=match)
|
|
||||||
selected = True
|
|
||||||
break
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
if not selected:
|
page.locator('[aria-haspopup="listbox"]').click()
|
||||||
lang_trigger = page.locator(
|
page.wait_for_selector('[role="option"]', timeout=5000)
|
||||||
'[class*="language"] button, [data-testid*="language"] button'
|
page.locator(f'[role="option"][data-value="{language_id}"]').click()
|
||||||
).first
|
page.wait_for_timeout(2000)
|
||||||
lang_trigger.click()
|
|
||||||
page.wait_for_timeout(500)
|
|
||||||
page.locator(
|
|
||||||
f'[role="option"]:has-text("{language_id}"), '
|
|
||||||
f'li:has-text("{language_id}")'
|
|
||||||
).first.click()
|
|
||||||
|
|
||||||
|
page.locator('.ace_editor').click()
|
||||||
|
page.keyboard.press('Control+a')
|
||||||
|
page.wait_for_timeout(200)
|
||||||
page.evaluate(
|
page.evaluate(
|
||||||
"""(code) => {
|
"""(code) => {
|
||||||
if (typeof monaco !== 'undefined') {
|
const textarea = document.querySelector('.ace_text-input');
|
||||||
const models = monaco.editor.getModels();
|
const dt = new DataTransfer();
|
||||||
if (models.length > 0) { models[0].setValue(code); return; }
|
dt.setData('text/plain', code);
|
||||||
}
|
textarea.dispatchEvent(new ClipboardEvent('paste', {
|
||||||
const cm = document.querySelector('.CodeMirror');
|
clipboardData: dt, bubbles: true, cancelable: true
|
||||||
if (cm && cm.CodeMirror) { cm.CodeMirror.setValue(code); return; }
|
}));
|
||||||
const ta = document.querySelector('textarea');
|
|
||||||
if (ta) { ta.value = code; ta.dispatchEvent(new Event('input', {bubbles: true})); }
|
|
||||||
}""",
|
}""",
|
||||||
source_code,
|
source_code,
|
||||||
)
|
)
|
||||||
|
page.wait_for_timeout(1000)
|
||||||
|
|
||||||
page.locator(
|
page.evaluate(
|
||||||
'button[type="submit"]:has-text("Submit"), button:has-text("Submit Code")'
|
"() => document.getElementById('submit_btn').scrollIntoView({block:'center'})"
|
||||||
).first.click()
|
|
||||||
page.wait_for_url(
|
|
||||||
lambda url: "/submit/" not in url or "submission" in url,
|
|
||||||
timeout=BROWSER_NAV_TIMEOUT * 2,
|
|
||||||
)
|
)
|
||||||
|
page.wait_for_timeout(200)
|
||||||
|
page.locator('#submit_btn').dispatch_event('click')
|
||||||
|
page.wait_for_timeout(3000)
|
||||||
|
|
||||||
|
dialog_text = page.evaluate("""() => {
|
||||||
|
const d = document.querySelector('[role="dialog"], .swal2-popup');
|
||||||
|
return d ? d.textContent.trim() : null;
|
||||||
|
}""")
|
||||||
|
if dialog_text and "not available for accepting solutions" in dialog_text:
|
||||||
|
submit_error = "PRACTICE_FALLBACK"
|
||||||
|
elif dialog_text:
|
||||||
|
submit_error = dialog_text
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
submit_error = str(e)
|
submit_error = str(e)
|
||||||
|
|
||||||
|
|
@ -252,10 +241,12 @@ def _submit_headless_codechef(
|
||||||
)
|
)
|
||||||
|
|
||||||
print(json.dumps({"status": "submitting"}), flush=True)
|
print(json.dumps({"status": "submitting"}), flush=True)
|
||||||
session.fetch(
|
submit_url = (
|
||||||
f"{BASE_URL}/{contest_id}/submit/{problem_id}",
|
f"{BASE_URL}/submit/{problem_id}"
|
||||||
page_action=submit_action,
|
if contest_id == "PRACTICE"
|
||||||
|
else f"{BASE_URL}/{contest_id}/submit/{problem_id}"
|
||||||
)
|
)
|
||||||
|
session.fetch(submit_url, page_action=submit_action)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
browser_cookies = session.context.cookies()
|
browser_cookies = session.context.cookies()
|
||||||
|
|
@ -275,12 +266,20 @@ def _submit_headless_codechef(
|
||||||
_retried=True,
|
_retried=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if submit_error == "PRACTICE_FALLBACK" and not _retried:
|
||||||
|
return _submit_headless_codechef(
|
||||||
|
"PRACTICE",
|
||||||
|
problem_id,
|
||||||
|
file_path,
|
||||||
|
language_id,
|
||||||
|
credentials,
|
||||||
|
_retried=True,
|
||||||
|
)
|
||||||
|
|
||||||
if submit_error:
|
if submit_error:
|
||||||
return SubmitResult(success=False, error=submit_error)
|
return SubmitResult(success=False, error=submit_error)
|
||||||
|
|
||||||
return SubmitResult(
|
return SubmitResult(success=True, error="", submission_id="")
|
||||||
success=True, error="", submission_id="", verdict="submitted"
|
|
||||||
)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return SubmitResult(success=False, error=str(e))
|
return SubmitResult(success=False, error=str(e))
|
||||||
|
|
||||||
|
|
@ -296,12 +295,19 @@ class CodeChefScraper(BaseScraper):
|
||||||
data = await fetch_json(
|
data = await fetch_json(
|
||||||
client, API_CONTEST.format(contest_id=contest_id)
|
client, API_CONTEST.format(contest_id=contest_id)
|
||||||
)
|
)
|
||||||
if not data.get("problems"):
|
problems_raw = data.get("problems")
|
||||||
|
if not problems_raw and isinstance(data.get("child_contests"), dict):
|
||||||
|
for div in ("div_4", "div_3", "div_2", "div_1"):
|
||||||
|
child = data["child_contests"].get(div, {})
|
||||||
|
child_code = child.get("contest_code")
|
||||||
|
if child_code:
|
||||||
|
return await self.scrape_contest_metadata(child_code)
|
||||||
|
if not problems_raw:
|
||||||
return self._metadata_error(
|
return self._metadata_error(
|
||||||
f"No problems found for contest {contest_id}"
|
f"No problems found for contest {contest_id}"
|
||||||
)
|
)
|
||||||
problems = []
|
problems = []
|
||||||
for problem_code, problem_data in data["problems"].items():
|
for problem_code, problem_data in problems_raw.items():
|
||||||
if problem_data.get("category_name") == "main":
|
if problem_data.get("category_name") == "main":
|
||||||
problems.append(
|
problems.append(
|
||||||
ProblemSummary(
|
ProblemSummary(
|
||||||
|
|
@ -314,42 +320,101 @@ class CodeChefScraper(BaseScraper):
|
||||||
error="",
|
error="",
|
||||||
contest_id=contest_id,
|
contest_id=contest_id,
|
||||||
problems=problems,
|
problems=problems,
|
||||||
url=f"{BASE_URL}/{contest_id}",
|
url=f"{BASE_URL}/problems/%s",
|
||||||
|
contest_url=f"{BASE_URL}/{contest_id}",
|
||||||
|
standings_url=f"{BASE_URL}/{contest_id}/rankings",
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return self._metadata_error(f"Failed to fetch contest {contest_id}: {e}")
|
return self._metadata_error(f"Failed to fetch contest {contest_id}: {e}")
|
||||||
|
|
||||||
async def scrape_contest_list(self) -> ContestListResult:
|
async def scrape_contest_list(self) -> ContestListResult:
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient(
|
||||||
|
limits=httpx.Limits(max_connections=CONNECTIONS)
|
||||||
|
) as client:
|
||||||
try:
|
try:
|
||||||
data = await fetch_json(client, API_CONTESTS_ALL)
|
data = await fetch_json(client, API_CONTESTS_ALL)
|
||||||
except httpx.HTTPStatusError as e:
|
except httpx.HTTPStatusError as e:
|
||||||
return self._contests_error(f"Failed to fetch contests: {e}")
|
return self._contests_error(f"Failed to fetch contests: {e}")
|
||||||
contests: list[ContestSummary] = []
|
|
||||||
seen: set[str] = set()
|
present = data.get("present_contests", [])
|
||||||
for c in data.get("future_contests", []) + data.get("past_contests", []):
|
future = data.get("future_contests", [])
|
||||||
code = c.get("contest_code", "")
|
|
||||||
name = c.get("contest_name", code)
|
async def fetch_past_page(offset: int) -> list[dict[str, Any]]:
|
||||||
if not re.match(r"^START\d+$", code):
|
r = await client.get(
|
||||||
continue
|
BASE_URL + API_CONTESTS_PAST,
|
||||||
if code in seen:
|
params={"sort_by": "START", "sorting_order": "desc", "offset": offset},
|
||||||
continue
|
headers=HEADERS,
|
||||||
seen.add(code)
|
timeout=HTTP_TIMEOUT,
|
||||||
start_time: int | None = None
|
)
|
||||||
iso = c.get("contest_start_date_iso")
|
r.raise_for_status()
|
||||||
if iso:
|
return r.json().get("contests", [])
|
||||||
|
|
||||||
|
past: list[dict[str, Any]] = []
|
||||||
|
offset = 0
|
||||||
|
while True:
|
||||||
|
page = await fetch_past_page(offset)
|
||||||
|
past.extend(c for c in page if re.match(r"^START\d+", c.get("contest_code", "")))
|
||||||
|
if len(page) < 20:
|
||||||
|
break
|
||||||
|
offset += 20
|
||||||
|
|
||||||
|
raw: list[dict[str, Any]] = []
|
||||||
|
seen_raw: set[str] = set()
|
||||||
|
for c in present + future + past:
|
||||||
|
code = c.get("contest_code", "")
|
||||||
|
if not code or code in seen_raw:
|
||||||
|
continue
|
||||||
|
seen_raw.add(code)
|
||||||
|
raw.append(c)
|
||||||
|
|
||||||
|
sem = asyncio.Semaphore(CONNECTIONS)
|
||||||
|
|
||||||
|
async def expand(c: dict[str, Any]) -> list[ContestSummary]:
|
||||||
|
code = c["contest_code"]
|
||||||
|
name = c.get("contest_name", code)
|
||||||
|
start_time: int | None = None
|
||||||
|
iso = c.get("contest_start_date_iso")
|
||||||
|
if iso:
|
||||||
|
try:
|
||||||
|
start_time = int(datetime.fromisoformat(iso).timestamp())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
base_name = re.sub(r"\s*\(.*?\)\s*$", "", name).strip()
|
||||||
try:
|
try:
|
||||||
dt = datetime.fromisoformat(iso)
|
async with sem:
|
||||||
start_time = int(dt.timestamp())
|
detail = await fetch_json(client, API_CONTEST.format(contest_id=code))
|
||||||
|
children = detail.get("child_contests")
|
||||||
|
if children and isinstance(children, dict):
|
||||||
|
divs: list[ContestSummary] = []
|
||||||
|
for div_key in ("div_1", "div_2", "div_3", "div_4"):
|
||||||
|
child = children.get(div_key)
|
||||||
|
if not child:
|
||||||
|
continue
|
||||||
|
child_code = child.get("contest_code")
|
||||||
|
div_num = child.get("div", {}).get("div_number", div_key[-1])
|
||||||
|
if child_code:
|
||||||
|
display = f"{base_name} (Div. {div_num})"
|
||||||
|
divs.append(ContestSummary(
|
||||||
|
id=child_code, name=display, display_name=display, start_time=start_time
|
||||||
|
))
|
||||||
|
if divs:
|
||||||
|
return divs
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
contests.append(
|
return [ContestSummary(id=code, name=name, display_name=name, start_time=start_time)]
|
||||||
ContestSummary(
|
|
||||||
id=code, name=name, display_name=name, start_time=start_time
|
results = await asyncio.gather(*[expand(c) for c in raw])
|
||||||
)
|
|
||||||
)
|
contests: list[ContestSummary] = []
|
||||||
|
seen: set[str] = set()
|
||||||
|
for group in results:
|
||||||
|
for entry in group:
|
||||||
|
if entry.id not in seen:
|
||||||
|
seen.add(entry.id)
|
||||||
|
contests.append(entry)
|
||||||
|
|
||||||
if not contests:
|
if not contests:
|
||||||
return self._contests_error("No Starters contests found")
|
return self._contests_error("No contests found")
|
||||||
return ContestListResult(success=True, error="", contests=contests)
|
return ContestListResult(success=True, error="", contests=contests)
|
||||||
|
|
||||||
async def stream_tests_for_category_async(self, category_id: str) -> None:
|
async def stream_tests_for_category_async(self, category_id: str) -> None:
|
||||||
|
|
@ -369,6 +434,13 @@ class CodeChefScraper(BaseScraper):
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
all_problems = contest_data.get("problems", {})
|
all_problems = contest_data.get("problems", {})
|
||||||
|
if not all_problems and isinstance(contest_data.get("child_contests"), dict):
|
||||||
|
for div in ("div_4", "div_3", "div_2", "div_1"):
|
||||||
|
child = contest_data["child_contests"].get(div, {})
|
||||||
|
child_code = child.get("contest_code")
|
||||||
|
if child_code:
|
||||||
|
await self.stream_tests_for_category_async(child_code)
|
||||||
|
return
|
||||||
if not all_problems:
|
if not all_problems:
|
||||||
print(
|
print(
|
||||||
json.dumps(
|
json.dumps(
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue