mirror of
https://github.com/booklore-app/booklore.git
synced 2026-02-18 00:17:53 +01:00
Fix failing book cover search due to HTTP 403
This commit is contained in:
@@ -32,9 +32,29 @@ public class DuckDuckGoCoverService implements BookCoverProvider {
|
||||
|
||||
private static final String USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
|
||||
private static final String REFERRER = "https://duckduckgo.com/";
|
||||
private static final Map<String, String> DEFAULT_HEADERS = Map.ofEntries(
|
||||
Map.entry("accept", "text/html, application/json"),
|
||||
Map.entry("content-type", "application/json"),
|
||||
private static final Map<String, String> HTML_HEADERS = Map.ofEntries(
|
||||
Map.entry("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"),
|
||||
Map.entry("accept-language", "en-US,en;q=0.9"),
|
||||
Map.entry("sec-ch-ua", "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\""),
|
||||
Map.entry("sec-ch-ua-mobile", "?0"),
|
||||
Map.entry("sec-ch-ua-platform", "\"macOS\""),
|
||||
Map.entry("sec-fetch-dest", "document"),
|
||||
Map.entry("sec-fetch-mode", "navigate"),
|
||||
Map.entry("sec-fetch-site", "same-origin"),
|
||||
Map.entry("sec-fetch-user", "?1"),
|
||||
Map.entry("upgrade-insecure-requests", "1"),
|
||||
Map.entry("user-agent", USER_AGENT)
|
||||
);
|
||||
private static final Map<String, String> JSON_HEADERS = Map.ofEntries(
|
||||
Map.entry("accept", "application/json, text/javascript, */*; q=0.01"),
|
||||
Map.entry("accept-language", "en-US,en;q=0.9"),
|
||||
Map.entry("sec-ch-ua", "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\""),
|
||||
Map.entry("sec-ch-ua-mobile", "?0"),
|
||||
Map.entry("sec-ch-ua-platform", "\"macOS\""),
|
||||
Map.entry("sec-fetch-dest", "empty"),
|
||||
Map.entry("sec-fetch-mode", "cors"),
|
||||
Map.entry("sec-fetch-site", "same-origin"),
|
||||
Map.entry("x-requested-with", "XMLHttpRequest"),
|
||||
Map.entry("user-agent", USER_AGENT)
|
||||
);
|
||||
|
||||
@@ -49,7 +69,9 @@ public class DuckDuckGoCoverService implements BookCoverProvider {
|
||||
|
||||
String encodedSiteQuery = URLEncoder.encode(searchTerm, StandardCharsets.UTF_8);
|
||||
String siteUrl = SEARCH_BASE_URL + encodedSiteQuery + SITE_FILTER + SEARCH_PARAMS;
|
||||
Document siteDoc = getDocument(siteUrl);
|
||||
Connection.Response siteResponse = getResponse(siteUrl);
|
||||
Document siteDoc = parseResponse(siteResponse);
|
||||
Map<String, String> cookies = siteResponse.cookies();
|
||||
Pattern tokenPattern = Pattern.compile("vqd=\"(\\d+-\\d+)\"");
|
||||
Matcher siteMatcher = tokenPattern.matcher(siteDoc.html());
|
||||
if (!siteMatcher.find()) {
|
||||
@@ -57,7 +79,7 @@ public class DuckDuckGoCoverService implements BookCoverProvider {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
String siteSearchToken = siteMatcher.group(1);
|
||||
List<CoverImage> siteFilteredImages = fetchImagesFromApi(searchTerm + " (site:amazon.com OR site:goodreads.com)", siteSearchToken);
|
||||
List<CoverImage> siteFilteredImages = fetchImagesFromApi(searchTerm + " (site:amazon.com OR site:goodreads.com)", siteSearchToken, cookies, siteUrl);
|
||||
siteFilteredImages.removeIf(dto -> dto.getWidth() < 350);
|
||||
siteFilteredImages.removeIf(dto -> dto.getWidth() >= dto.getHeight());
|
||||
if (siteFilteredImages.size() > 7) {
|
||||
@@ -66,12 +88,14 @@ public class DuckDuckGoCoverService implements BookCoverProvider {
|
||||
|
||||
String encodedGeneralQuery = URLEncoder.encode(searchTerm, StandardCharsets.UTF_8);
|
||||
String generalUrl = SEARCH_BASE_URL + encodedGeneralQuery + SEARCH_PARAMS;
|
||||
Document generalDoc = getDocument(generalUrl);
|
||||
Connection.Response generalResponse = getResponse(generalUrl);
|
||||
Document generalDoc = parseResponse(generalResponse);
|
||||
Map<String, String> generalCookies = generalResponse.cookies();
|
||||
Matcher generalMatcher = tokenPattern.matcher(generalDoc.html());
|
||||
List<CoverImage> generalBookImages = new ArrayList<>();
|
||||
if (generalMatcher.find()) {
|
||||
String generalSearchToken = generalMatcher.group(1);
|
||||
generalBookImages = fetchImagesFromApi(searchTerm, generalSearchToken);
|
||||
generalBookImages = fetchImagesFromApi(searchTerm, generalSearchToken, generalCookies, generalUrl);
|
||||
generalBookImages.removeIf(dto -> dto.getWidth() < 350);
|
||||
generalBookImages.removeIf(dto -> dto.getWidth() >= dto.getHeight());
|
||||
Set<String> siteUrls = siteFilteredImages.stream().map(CoverImage::getUrl).collect(Collectors.toSet());
|
||||
@@ -97,7 +121,7 @@ public class DuckDuckGoCoverService implements BookCoverProvider {
|
||||
return allImages;
|
||||
}
|
||||
|
||||
private List<CoverImage> fetchImagesFromApi(String query, String searchToken) {
|
||||
private List<CoverImage> fetchImagesFromApi(String query, String searchToken, Map<String, String> cookies, String referrerUrl) {
|
||||
List<CoverImage> priority = new ArrayList<>();
|
||||
List<CoverImage> others = new ArrayList<>();
|
||||
try {
|
||||
@@ -108,9 +132,11 @@ public class DuckDuckGoCoverService implements BookCoverProvider {
|
||||
|
||||
Connection.Response resp = Jsoup.connect(url)
|
||||
.ignoreContentType(true)
|
||||
.referrer(REFERRER)
|
||||
.referrer(referrerUrl)
|
||||
.followRedirects(true)
|
||||
.headers(DEFAULT_HEADERS)
|
||||
.headers(JSON_HEADERS)
|
||||
.header("x-vqd-4", searchToken)
|
||||
.cookies(cookies)
|
||||
.method(Connection.Method.GET)
|
||||
.execute();
|
||||
|
||||
@@ -137,17 +163,25 @@ public class DuckDuckGoCoverService implements BookCoverProvider {
|
||||
return all;
|
||||
}
|
||||
|
||||
private Document getDocument(String url) {
|
||||
private Connection.Response getResponse(String url) {
|
||||
try {
|
||||
Connection.Response response = Jsoup.connect(url)
|
||||
return Jsoup.connect(url)
|
||||
.referrer(REFERRER)
|
||||
.followRedirects(true)
|
||||
.headers(DEFAULT_HEADERS)
|
||||
.headers(HTML_HEADERS)
|
||||
.method(Connection.Method.GET)
|
||||
.execute();
|
||||
} catch (IOException e) {
|
||||
log.error("Error fetching url: {}", url, e);
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private Document parseResponse(Connection.Response response) {
|
||||
try {
|
||||
return response.parse();
|
||||
} catch (IOException e) {
|
||||
log.error("Error parsing url: {}", url, e);
|
||||
log.error("Error parsing response", e);
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user