diff --git a/.github/workflows/update-hltb-api-url.yml b/.github/workflows/update-hltb-api-url.yml new file mode 100644 index 000000000..a31f5df33 --- /dev/null +++ b/.github/workflows/update-hltb-api-url.yml @@ -0,0 +1,36 @@ +name: Update HowLongToBeat API URL + +on: + workflow_dispatch: + +permissions: + contents: write + +jobs: + update-hltb-api-url: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v6.7.0 + + - name: Install python + run: | + uv python install 3.13 + + - name: Install dependencies + run: | + uv sync + + - name: Run HLTB API URL discovery script + run: | + uv run python backend/utils/update_hltb_api_url.py + + - name: Commit and push changes + uses: EndBug/add-and-commit@v9 + with: + add: backend/handler/metadata/fixtures/hltb_api_url + message: "Update HLTB API URL [skip CI]" + push: true diff --git a/backend/handler/socket_handler.py b/backend/handler/socket_handler.py index c31b0bae0..239ee2c99 100644 --- a/backend/handler/socket_handler.py +++ b/backend/handler/socket_handler.py @@ -1,7 +1,7 @@ import socketio # type: ignore from config import REDIS_URL -from utils import json as json_module +from utils import json_module class SocketHandler: diff --git a/backend/utils/json.py b/backend/utils/json_module.py similarity index 100% rename from backend/utils/json.py rename to backend/utils/json_module.py diff --git a/backend/utils/update_hltb_api_url.py b/backend/utils/update_hltb_api_url.py new file mode 100644 index 000000000..3a65e1c77 --- /dev/null +++ b/backend/utils/update_hltb_api_url.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 +""" +Utility script to update HowLongToBeat API URL by discovering the dynamic endpoint from the website. +This script fetches the Next.js app script and extracts the API endpoint and tokens. +""" + +import re +import sys +from pathlib import Path + +import httpx + + +def fetch_hltb_app_script(base_url: str = "https://howlongtobeat.com") -> str | None: + """Fetch the HLTB app script from the site.""" + try: + with httpx.Client() as client: + # 1) Fetch homepage HTML + homepage_url = f"{base_url}/" + resp = client.get(homepage_url, timeout=15) + resp.raise_for_status() + html = resp.text + print(f"Fetched homepage: {homepage_url}") + + # 2) Find the Next.js _app chunk (typical pattern: "/_next/static/chunks/pages/_app-.js") + app_js_match = re.search( + r'src=["\'](?P\/_next\/static\/chunks\/pages\/_app[^"\']+\.js)["\']', + html, + ) + if not app_js_match: + # Fallback: any script path containing "_app" ending with .js + app_js_match = re.search( + r'src=["\'](?P[^"\']*_app[^"\']+\.js)["\']', + html, + ) + if not app_js_match: + print("Could not locate HLTB _app JS chunk.") + return None + app_js_path = app_js_match.group("path") + print(f"Located app JS path: {app_js_path}") + + app_js_url = ( + app_js_path + if app_js_path.startswith("http") + else f"{base_url.rstrip('/')}/{app_js_path.lstrip('/')}" + ) + print(f"Constructed app JS URL: {app_js_url}") + + # 3) Download the _app JS chunk + js_resp = client.get(app_js_url, timeout=15) + js_resp.raise_for_status() + js_code = js_resp.text + print(f"Downloaded app JS chunk (size: {len(js_code)} chars)") + + return js_code + except Exception as e: + print(f"Error fetching HLTB app script: {e}") + return None + + +def discover_hltb_endpoint(base_url: str = "https://howlongtobeat.com") -> str | None: + """Discover the current HLTB API endpoint by fetching and parsing the app script.""" + try: + # 1) Fetch the app script + js_code = fetch_hltb_app_script(base_url) + + if not js_code: + print("Could not fetch HLTB app script; using default search endpoint") + return None + + # 2) Extract the endpoint and tokens from the app script + token_match = re.search( + r'/api/(?P[a-zA-Z0-9_-]+)/["\']\.concat\(["\'](?P[0-9a-zA-Z]+)["\']\)\.concat\(["\'](?P[0-9a-zA-Z]+)["\']\)', + js_code, + ) + if not token_match: + print( + "Could not extract HLTB endpoint and tokens from _app JS; using default search endpoint" + ) + return None + + endpoint = token_match.group("endpoint") + part1 = token_match.group("part1") + part2 = token_match.group("part2") + + print(f"Extracted endpoint: {endpoint}") + print(f"Extracted token part1: {part1}") + print(f"Extracted token part2: {part2}") + + # 3) Build the search URL + search_url = f"{base_url}/api/{endpoint}/{part1}{part2}" + print(f"Resolved HLTB search endpoint: {search_url}") + + return search_url + except Exception as e: + print(f"Unexpected error discovering HLTB endpoint from site: {e}") + return None + + +def main(): + """Main function to discover and update the HLTB API URL.""" + print("Starting HLTB API URL discovery...") + + search_url = discover_hltb_endpoint() + + if not search_url: + print("Failed to discover HLTB API URL") + sys.exit(1) + return + + # Write to the expected location + fixture_path = ( + Path(__file__).parent.parent + / "handler" + / "metadata" + / "fixtures" + / "hltb_api_url" + ) + + try: + with open(fixture_path, "w") as f: + f.write(search_url) + print(f"Successfully updated HLTB API URL to: {search_url}") + print(f"Written to: {fixture_path}") + except Exception as e: + print(f"Error writing to fixture file: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main()