Files
romm/backend/utils/update_hltb_api_url.py
Georges-Antoine Assi 2be1bab224 changes from bot review
2025-11-17 15:46:14 -05:00

134 lines
4.4 KiB
Python

# uv run python -m utils.update_hltb_api_url
"""
Utility script to update HowLongToBeat API URL by discovering the dynamic endpoint from the website
"""
import re
import sys
from pathlib import Path
import httpx
# Precompiled regexes for better performance
APP_JS_REGEX = re.compile(
r'src=["\'](?P<path>\/_next\/static\/chunks\/pages\/_app[^"\']+\.js)["\']'
)
APP_JS_FALLBACK_REGEX = re.compile(r'src=["\'](?P<path>[^"\']*_app[^"\']+\.js)["\']')
ENDPOINT_TOKEN_REGEX = re.compile(
r'/api/(?P<endpoint>[a-zA-Z0-9_-]+)/["\']\.concat\(["\'](?P<part1>[0-9a-zA-Z]+)["\']\)\.concat\(["\'](?P<part2>[0-9a-zA-Z]+)["\']\)'
)
def fetch_hltb_app_script(base_url: str = "https://howlongtobeat.com") -> str | None:
"""Fetch the HLTB app script from the site."""
try:
with httpx.Client() as client:
# 1) Fetch homepage HTML
homepage_url = f"{base_url}/"
resp = client.get(homepage_url, timeout=15)
resp.raise_for_status()
html = resp.text
print(f"Fetched homepage: {homepage_url}")
# 2) Find the Next.js _app chunk (typical pattern: "/_next/static/chunks/pages/_app-<hash>.js")
app_js_match = APP_JS_REGEX.search(html)
if not app_js_match:
# Fallback: any script path containing "_app" ending with .js
app_js_match = APP_JS_FALLBACK_REGEX.search(html)
if not app_js_match:
print("Could not locate HLTB _app JS chunk.")
return None
app_js_path = app_js_match.group("path")
print(f"Located app JS path: {app_js_path}")
app_js_url = (
app_js_path
if app_js_path.startswith("http")
else f"{base_url.rstrip('/')}/{app_js_path.lstrip('/')}"
)
print(f"Constructed app JS URL: {app_js_url}")
# 3) Download the _app JS chunk
js_resp = client.get(app_js_url, timeout=15)
js_resp.raise_for_status()
js_code = js_resp.text
print(f"Downloaded app JS chunk (size: {len(js_code)} chars)")
return js_code
except (httpx.RequestError, httpx.HTTPStatusError) as e:
print(f"Error fetching HLTB app script: {e}", file=sys.stderr)
return None
def discover_hltb_endpoint(base_url: str = "https://howlongtobeat.com") -> str | None:
"""Discover the current HLTB API endpoint by fetching and parsing the app script."""
try:
# 1) Fetch the app script
js_code = fetch_hltb_app_script(base_url)
if not js_code:
print("Could not fetch HLTB app script; using default search endpoint")
return None
# 2) Extract the endpoint and tokens from the app script
token_match = ENDPOINT_TOKEN_REGEX.search(js_code)
if not token_match:
print(
"Could not extract HLTB endpoint and tokens from _app JS; using default search endpoint"
)
return None
endpoint = token_match.group("endpoint")
part1 = token_match.group("part1")
part2 = token_match.group("part2")
print(f"Extracted endpoint: {endpoint}")
print(f"Extracted token part1: {part1}")
print(f"Extracted token part2: {part2}")
# 3) Build the search URL
search_url = f"{base_url}/api/{endpoint}/{part1}{part2}"
print(f"Resolved HLTB search endpoint: {search_url}")
return search_url
except (IOError, OSError) as e:
print(
f"Unexpected error discovering HLTB endpoint from site: {e}",
file=sys.stderr,
)
return None
def main():
"""Main function to discover and update the HLTB API URL."""
print("Starting HLTB API URL discovery...")
search_url = discover_hltb_endpoint()
if not search_url:
print("Failed to discover HLTB API URL")
sys.exit(1)
return
# Write to the expected location
fixture_path = (
Path(__file__).parent.parent
/ "handler"
/ "metadata"
/ "fixtures"
/ "hltb_api_url"
)
try:
with open(fixture_path, "w") as f:
f.write(search_url)
print(f"Successfully updated HLTB API URL to: {search_url}")
print(f"Written to: {fixture_path}")
except Exception as e:
print(f"Error writing to fixture file: {e}")
sys.exit(1)
if __name__ == "__main__":
main()