Fix failing book cover search due to HTTP 403

This commit is contained in:
acx10
2026-01-22 00:45:36 -07:00
parent 61ac306735
commit c27cc7205d

View File

@@ -32,9 +32,29 @@ public class DuckDuckGoCoverService implements BookCoverProvider {
private static final String USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
private static final String REFERRER = "https://duckduckgo.com/";
private static final Map<String, String> DEFAULT_HEADERS = Map.ofEntries(
Map.entry("accept", "text/html, application/json"),
Map.entry("content-type", "application/json"),
private static final Map<String, String> HTML_HEADERS = Map.ofEntries(
Map.entry("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"),
Map.entry("accept-language", "en-US,en;q=0.9"),
Map.entry("sec-ch-ua", "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\""),
Map.entry("sec-ch-ua-mobile", "?0"),
Map.entry("sec-ch-ua-platform", "\"macOS\""),
Map.entry("sec-fetch-dest", "document"),
Map.entry("sec-fetch-mode", "navigate"),
Map.entry("sec-fetch-site", "same-origin"),
Map.entry("sec-fetch-user", "?1"),
Map.entry("upgrade-insecure-requests", "1"),
Map.entry("user-agent", USER_AGENT)
);
private static final Map<String, String> JSON_HEADERS = Map.ofEntries(
Map.entry("accept", "application/json, text/javascript, */*; q=0.01"),
Map.entry("accept-language", "en-US,en;q=0.9"),
Map.entry("sec-ch-ua", "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\""),
Map.entry("sec-ch-ua-mobile", "?0"),
Map.entry("sec-ch-ua-platform", "\"macOS\""),
Map.entry("sec-fetch-dest", "empty"),
Map.entry("sec-fetch-mode", "cors"),
Map.entry("sec-fetch-site", "same-origin"),
Map.entry("x-requested-with", "XMLHttpRequest"),
Map.entry("user-agent", USER_AGENT)
);
@@ -49,7 +69,9 @@ public class DuckDuckGoCoverService implements BookCoverProvider {
String encodedSiteQuery = URLEncoder.encode(searchTerm, StandardCharsets.UTF_8);
String siteUrl = SEARCH_BASE_URL + encodedSiteQuery + SITE_FILTER + SEARCH_PARAMS;
Document siteDoc = getDocument(siteUrl);
Connection.Response siteResponse = getResponse(siteUrl);
Document siteDoc = parseResponse(siteResponse);
Map<String, String> cookies = siteResponse.cookies();
Pattern tokenPattern = Pattern.compile("vqd=\"(\\d+-\\d+)\"");
Matcher siteMatcher = tokenPattern.matcher(siteDoc.html());
if (!siteMatcher.find()) {
@@ -57,7 +79,7 @@ public class DuckDuckGoCoverService implements BookCoverProvider {
return Collections.emptyList();
}
String siteSearchToken = siteMatcher.group(1);
List<CoverImage> siteFilteredImages = fetchImagesFromApi(searchTerm + " (site:amazon.com OR site:goodreads.com)", siteSearchToken);
List<CoverImage> siteFilteredImages = fetchImagesFromApi(searchTerm + " (site:amazon.com OR site:goodreads.com)", siteSearchToken, cookies, siteUrl);
siteFilteredImages.removeIf(dto -> dto.getWidth() < 350);
siteFilteredImages.removeIf(dto -> dto.getWidth() >= dto.getHeight());
if (siteFilteredImages.size() > 7) {
@@ -66,12 +88,14 @@ public class DuckDuckGoCoverService implements BookCoverProvider {
String encodedGeneralQuery = URLEncoder.encode(searchTerm, StandardCharsets.UTF_8);
String generalUrl = SEARCH_BASE_URL + encodedGeneralQuery + SEARCH_PARAMS;
Document generalDoc = getDocument(generalUrl);
Connection.Response generalResponse = getResponse(generalUrl);
Document generalDoc = parseResponse(generalResponse);
Map<String, String> generalCookies = generalResponse.cookies();
Matcher generalMatcher = tokenPattern.matcher(generalDoc.html());
List<CoverImage> generalBookImages = new ArrayList<>();
if (generalMatcher.find()) {
String generalSearchToken = generalMatcher.group(1);
generalBookImages = fetchImagesFromApi(searchTerm, generalSearchToken);
generalBookImages = fetchImagesFromApi(searchTerm, generalSearchToken, generalCookies, generalUrl);
generalBookImages.removeIf(dto -> dto.getWidth() < 350);
generalBookImages.removeIf(dto -> dto.getWidth() >= dto.getHeight());
Set<String> siteUrls = siteFilteredImages.stream().map(CoverImage::getUrl).collect(Collectors.toSet());
@@ -97,7 +121,7 @@ public class DuckDuckGoCoverService implements BookCoverProvider {
return allImages;
}
private List<CoverImage> fetchImagesFromApi(String query, String searchToken) {
private List<CoverImage> fetchImagesFromApi(String query, String searchToken, Map<String, String> cookies, String referrerUrl) {
List<CoverImage> priority = new ArrayList<>();
List<CoverImage> others = new ArrayList<>();
try {
@@ -108,9 +132,11 @@ public class DuckDuckGoCoverService implements BookCoverProvider {
Connection.Response resp = Jsoup.connect(url)
.ignoreContentType(true)
.referrer(REFERRER)
.referrer(referrerUrl)
.followRedirects(true)
.headers(DEFAULT_HEADERS)
.headers(JSON_HEADERS)
.header("x-vqd-4", searchToken)
.cookies(cookies)
.method(Connection.Method.GET)
.execute();
@@ -137,17 +163,25 @@ public class DuckDuckGoCoverService implements BookCoverProvider {
return all;
}
private Document getDocument(String url) {
private Connection.Response getResponse(String url) {
try {
Connection.Response response = Jsoup.connect(url)
return Jsoup.connect(url)
.referrer(REFERRER)
.followRedirects(true)
.headers(DEFAULT_HEADERS)
.headers(HTML_HEADERS)
.method(Connection.Method.GET)
.execute();
} catch (IOException e) {
log.error("Error fetching url: {}", url, e);
throw new RuntimeException(e);
}
}
private Document parseResponse(Connection.Response response) {
try {
return response.parse();
} catch (IOException e) {
log.error("Error parsing url: {}", url, e);
log.error("Error parsing response", e);
throw new RuntimeException(e);
}
}