mirror of
https://github.com/booklore-app/booklore.git
synced 2026-02-18 00:17:53 +01:00
feat(metadata): return full metadata from fetchMetadata instead of previews (#2689)
Amazon, Audible, and GoodReads parsers now fetch full detailed metadata (4 results each) instead of returning lightweight previews. Adds randomized 500-1500ms delays between requests. GoodReads retries with title-only search (2 results) when title+author yields no matches. Co-authored-by: acx10 <acx10@users.noreply.github.com>
This commit is contained in:
@@ -24,6 +24,7 @@ import java.time.ZoneOffset;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.DateTimeParseException;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
@@ -44,7 +45,7 @@ public class AmazonBookParser implements BookParser, DetailedMetadataProvider {
|
||||
}
|
||||
}
|
||||
|
||||
private static final int COUNT_DETAILED_METADATA_TO_GET = 3;
|
||||
private static final int COUNT_DETAILED_METADATA_TO_GET = 4;
|
||||
private static final String BASE_BOOK_URL_SUFFIX = "/dp/";
|
||||
private static final Pattern NON_DIGIT_PATTERN = Pattern.compile("[^\\d]");
|
||||
private static final Pattern SERIES_FORMAT_PATTERN = Pattern.compile("Book (\\d+(?:\\.\\d+)?) of (\\d+)");
|
||||
@@ -104,21 +105,28 @@ public class AmazonBookParser implements BookParser, DetailedMetadataProvider {
|
||||
|
||||
@Override
|
||||
public List<BookMetadata> fetchMetadata(Book book, FetchMetadataRequest fetchMetadataRequest) {
|
||||
String queryUrl = buildQueryUrl(fetchMetadataRequest, book);
|
||||
if (queryUrl == null) {
|
||||
log.error("Query URL is null, cannot proceed.");
|
||||
LinkedList<String> amazonBookIds = getAmazonBookIds(book, fetchMetadataRequest);
|
||||
if (amazonBookIds == null || amazonBookIds.isEmpty()) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
try {
|
||||
Document doc = fetchDocument(queryUrl);
|
||||
return extractSearchPreviews(doc);
|
||||
} catch (AmazonAntiScrapingException e) {
|
||||
log.debug("Aborting Amazon search due to anti-scraping (503).");
|
||||
return Collections.emptyList();
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to fetch Amazon search results: {}", e.getMessage(), e);
|
||||
return Collections.emptyList();
|
||||
List<BookMetadata> results = new ArrayList<>();
|
||||
for (int i = 0; i < amazonBookIds.size() && results.size() < COUNT_DETAILED_METADATA_TO_GET; i++) {
|
||||
try {
|
||||
if (i > 0) {
|
||||
Thread.sleep(ThreadLocalRandom.current().nextLong(500, 1501));
|
||||
}
|
||||
BookMetadata metadata = getBookMetadata(amazonBookIds.get(i));
|
||||
if (metadata != null) {
|
||||
results.add(metadata);
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
break;
|
||||
} catch (Exception e) {
|
||||
log.error("Error fetching metadata for ASIN: {}", amazonBookIds.get(i), e);
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
private List<BookMetadata> extractSearchPreviews(Document doc) {
|
||||
|
||||
@@ -25,6 +25,7 @@ import java.time.LocalDate;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.DateTimeParseException;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
@@ -35,6 +36,7 @@ import java.util.stream.Collectors;
|
||||
@AllArgsConstructor
|
||||
public class AudibleParser implements BookParser, DetailedMetadataProvider {
|
||||
|
||||
private static final int COUNT_DETAILED_METADATA_TO_GET = 4;
|
||||
private static final long MIN_REQUEST_INTERVAL_MS = 1500;
|
||||
private static final String DEFAULT_DOMAIN = "com";
|
||||
|
||||
@@ -74,19 +76,28 @@ public class AudibleParser implements BookParser, DetailedMetadataProvider {
|
||||
|
||||
@Override
|
||||
public List<BookMetadata> fetchMetadata(Book book, FetchMetadataRequest fetchMetadataRequest) {
|
||||
String queryUrl = buildQueryUrl(fetchMetadataRequest, book);
|
||||
if (queryUrl == null) {
|
||||
log.error("Query URL is null, cannot proceed with Audible search.");
|
||||
List<String> audibleIds = getAudibleIds(book, fetchMetadataRequest);
|
||||
if (audibleIds == null || audibleIds.isEmpty()) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
try {
|
||||
enforceRateLimit();
|
||||
Document doc = fetchDocument(queryUrl);
|
||||
return extractSearchPreviews(doc);
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to fetch Audible search results: {}", e.getMessage(), e);
|
||||
return Collections.emptyList();
|
||||
List<BookMetadata> results = new ArrayList<>();
|
||||
for (int i = 0; i < audibleIds.size() && results.size() < COUNT_DETAILED_METADATA_TO_GET; i++) {
|
||||
try {
|
||||
if (i > 0) {
|
||||
Thread.sleep(ThreadLocalRandom.current().nextLong(500, 1501));
|
||||
}
|
||||
BookMetadata metadata = getBookMetadata(audibleIds.get(i));
|
||||
if (metadata != null) {
|
||||
results.add(metadata);
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
break;
|
||||
} catch (Exception e) {
|
||||
log.error("Error fetching metadata for Audible ID: {}", audibleIds.get(i), e);
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
private List<BookMetadata> extractSearchPreviews(Document doc) {
|
||||
|
||||
@@ -27,6 +27,7 @@ import java.time.Instant;
|
||||
import java.time.LocalDate;
|
||||
import java.time.ZoneId;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
import java.util.function.Function;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
@@ -40,6 +41,7 @@ public class GoodReadsParser implements BookParser, DetailedMetadataProvider {
|
||||
private static final String BASE_BOOK_URL = "https://www.goodreads.com/book/show/";
|
||||
private static final String BASE_ISBN_URL = "https://www.goodreads.com/book/isbn/";
|
||||
private static final int COUNT_DETAILED_METADATA_TO_GET = 3;
|
||||
private static final int COUNT_DETAILED_METADATA_TO_GET_RETRY = 2;
|
||||
private static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
|
||||
private static final Pattern BOOK_SHOW_ID_PATTERN = Pattern.compile("/book/show/(\\d+)");
|
||||
|
||||
@@ -111,9 +113,29 @@ public class GoodReadsParser implements BookParser, DetailedMetadataProvider {
|
||||
}
|
||||
}
|
||||
|
||||
return fetchMetadataPreviews(book, fetchMetadataRequest).stream()
|
||||
List<BookMetadata> previews = fetchMetadataPreviews(book, fetchMetadataRequest).stream()
|
||||
.limit(COUNT_DETAILED_METADATA_TO_GET)
|
||||
.toList();
|
||||
List<BookMetadata> results = fetchMetadataUsingPreviews(previews);
|
||||
|
||||
if (results.isEmpty()
|
||||
&& fetchMetadataRequest.getTitle() != null && !fetchMetadataRequest.getTitle().isBlank()
|
||||
&& fetchMetadataRequest.getAuthor() != null && !fetchMetadataRequest.getAuthor().isBlank()) {
|
||||
log.info("GoodReads: No results with title+author, retrying with title only.");
|
||||
FetchMetadataRequest titleOnlyRequest = FetchMetadataRequest.builder()
|
||||
.bookId(fetchMetadataRequest.getBookId())
|
||||
.providers(fetchMetadataRequest.getProviders())
|
||||
.isbn(fetchMetadataRequest.getIsbn())
|
||||
.title(fetchMetadataRequest.getTitle())
|
||||
.asin(fetchMetadataRequest.getAsin())
|
||||
.build();
|
||||
previews = fetchMetadataPreviews(book, titleOnlyRequest).stream()
|
||||
.limit(COUNT_DETAILED_METADATA_TO_GET_RETRY)
|
||||
.toList();
|
||||
results = fetchMetadataUsingPreviews(previews);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
private List<BookMetadata> fetchMetadataUsingPreviews(List<BookMetadata> previews) {
|
||||
@@ -126,7 +148,7 @@ public class GoodReadsParser implements BookParser, DetailedMetadataProvider {
|
||||
if (detailedMetadata != null) {
|
||||
fetchedMetadata.add(detailedMetadata);
|
||||
}
|
||||
Thread.sleep(Duration.ofSeconds(2));
|
||||
Thread.sleep(ThreadLocalRandom.current().nextLong(500, 1501));
|
||||
} catch (Exception e) {
|
||||
log.error("Error fetching metadata for book: {}", preview.getGoodreadsId(), e);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user