From c27cc7205dec3c256f8ee64665df9f9a41377937 Mon Sep 17 00:00:00 2001 From: acx10 Date: Thu, 22 Jan 2026 00:45:36 -0700 Subject: [PATCH] Fix failing book cover search due to HTTP 403 --- .../metadata/DuckDuckGoCoverService.java | 62 ++++++++++++++----- 1 file changed, 48 insertions(+), 14 deletions(-) diff --git a/booklore-api/src/main/java/com/adityachandel/booklore/service/metadata/DuckDuckGoCoverService.java b/booklore-api/src/main/java/com/adityachandel/booklore/service/metadata/DuckDuckGoCoverService.java index 0ced902d1..3ea7d277f 100644 --- a/booklore-api/src/main/java/com/adityachandel/booklore/service/metadata/DuckDuckGoCoverService.java +++ b/booklore-api/src/main/java/com/adityachandel/booklore/service/metadata/DuckDuckGoCoverService.java @@ -32,9 +32,29 @@ public class DuckDuckGoCoverService implements BookCoverProvider { private static final String USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"; private static final String REFERRER = "https://duckduckgo.com/"; - private static final Map DEFAULT_HEADERS = Map.ofEntries( - Map.entry("accept", "text/html, application/json"), - Map.entry("content-type", "application/json"), + private static final Map HTML_HEADERS = Map.ofEntries( + Map.entry("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"), + Map.entry("accept-language", "en-US,en;q=0.9"), + Map.entry("sec-ch-ua", "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\""), + Map.entry("sec-ch-ua-mobile", "?0"), + Map.entry("sec-ch-ua-platform", "\"macOS\""), + Map.entry("sec-fetch-dest", "document"), + Map.entry("sec-fetch-mode", "navigate"), + Map.entry("sec-fetch-site", "same-origin"), + Map.entry("sec-fetch-user", "?1"), + Map.entry("upgrade-insecure-requests", "1"), + Map.entry("user-agent", USER_AGENT) + ); + private static final Map JSON_HEADERS = Map.ofEntries( + Map.entry("accept", "application/json, text/javascript, */*; q=0.01"), + Map.entry("accept-language", "en-US,en;q=0.9"), + Map.entry("sec-ch-ua", "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\""), + Map.entry("sec-ch-ua-mobile", "?0"), + Map.entry("sec-ch-ua-platform", "\"macOS\""), + Map.entry("sec-fetch-dest", "empty"), + Map.entry("sec-fetch-mode", "cors"), + Map.entry("sec-fetch-site", "same-origin"), + Map.entry("x-requested-with", "XMLHttpRequest"), Map.entry("user-agent", USER_AGENT) ); @@ -49,7 +69,9 @@ public class DuckDuckGoCoverService implements BookCoverProvider { String encodedSiteQuery = URLEncoder.encode(searchTerm, StandardCharsets.UTF_8); String siteUrl = SEARCH_BASE_URL + encodedSiteQuery + SITE_FILTER + SEARCH_PARAMS; - Document siteDoc = getDocument(siteUrl); + Connection.Response siteResponse = getResponse(siteUrl); + Document siteDoc = parseResponse(siteResponse); + Map cookies = siteResponse.cookies(); Pattern tokenPattern = Pattern.compile("vqd=\"(\\d+-\\d+)\""); Matcher siteMatcher = tokenPattern.matcher(siteDoc.html()); if (!siteMatcher.find()) { @@ -57,7 +79,7 @@ public class DuckDuckGoCoverService implements BookCoverProvider { return Collections.emptyList(); } String siteSearchToken = siteMatcher.group(1); - List siteFilteredImages = fetchImagesFromApi(searchTerm + " (site:amazon.com OR site:goodreads.com)", siteSearchToken); + List siteFilteredImages = fetchImagesFromApi(searchTerm + " (site:amazon.com OR site:goodreads.com)", siteSearchToken, cookies, siteUrl); siteFilteredImages.removeIf(dto -> dto.getWidth() < 350); siteFilteredImages.removeIf(dto -> dto.getWidth() >= dto.getHeight()); if (siteFilteredImages.size() > 7) { @@ -66,12 +88,14 @@ public class DuckDuckGoCoverService implements BookCoverProvider { String encodedGeneralQuery = URLEncoder.encode(searchTerm, StandardCharsets.UTF_8); String generalUrl = SEARCH_BASE_URL + encodedGeneralQuery + SEARCH_PARAMS; - Document generalDoc = getDocument(generalUrl); + Connection.Response generalResponse = getResponse(generalUrl); + Document generalDoc = parseResponse(generalResponse); + Map generalCookies = generalResponse.cookies(); Matcher generalMatcher = tokenPattern.matcher(generalDoc.html()); List generalBookImages = new ArrayList<>(); if (generalMatcher.find()) { String generalSearchToken = generalMatcher.group(1); - generalBookImages = fetchImagesFromApi(searchTerm, generalSearchToken); + generalBookImages = fetchImagesFromApi(searchTerm, generalSearchToken, generalCookies, generalUrl); generalBookImages.removeIf(dto -> dto.getWidth() < 350); generalBookImages.removeIf(dto -> dto.getWidth() >= dto.getHeight()); Set siteUrls = siteFilteredImages.stream().map(CoverImage::getUrl).collect(Collectors.toSet()); @@ -97,7 +121,7 @@ public class DuckDuckGoCoverService implements BookCoverProvider { return allImages; } - private List fetchImagesFromApi(String query, String searchToken) { + private List fetchImagesFromApi(String query, String searchToken, Map cookies, String referrerUrl) { List priority = new ArrayList<>(); List others = new ArrayList<>(); try { @@ -108,9 +132,11 @@ public class DuckDuckGoCoverService implements BookCoverProvider { Connection.Response resp = Jsoup.connect(url) .ignoreContentType(true) - .referrer(REFERRER) + .referrer(referrerUrl) .followRedirects(true) - .headers(DEFAULT_HEADERS) + .headers(JSON_HEADERS) + .header("x-vqd-4", searchToken) + .cookies(cookies) .method(Connection.Method.GET) .execute(); @@ -137,17 +163,25 @@ public class DuckDuckGoCoverService implements BookCoverProvider { return all; } - private Document getDocument(String url) { + private Connection.Response getResponse(String url) { try { - Connection.Response response = Jsoup.connect(url) + return Jsoup.connect(url) .referrer(REFERRER) .followRedirects(true) - .headers(DEFAULT_HEADERS) + .headers(HTML_HEADERS) .method(Connection.Method.GET) .execute(); + } catch (IOException e) { + log.error("Error fetching url: {}", url, e); + throw new RuntimeException(e); + } + } + + private Document parseResponse(Connection.Response response) { + try { return response.parse(); } catch (IOException e) { - log.error("Error parsing url: {}", url, e); + log.error("Error parsing response", e); throw new RuntimeException(e); } }