feat(metadata): return full metadata from fetchMetadata instead of previews (#2689)

Amazon, Audible, and GoodReads parsers now fetch full detailed metadata
(4 results each) instead of returning lightweight previews. Adds randomized
500-1500ms delays between requests. GoodReads retries with title-only search
(2 results) when title+author yields no matches.

Co-authored-by: acx10 <acx10@users.noreply.github.com>
This commit is contained in:
ACX
2026-02-10 15:49:11 -07:00
committed by GitHub
parent fe950d97b1
commit ff11ec57b8
3 changed files with 66 additions and 25 deletions

View File

@@ -24,6 +24,7 @@ import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.*;
import java.util.concurrent.ThreadLocalRandom;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
@@ -44,7 +45,7 @@ public class AmazonBookParser implements BookParser, DetailedMetadataProvider {
}
}
private static final int COUNT_DETAILED_METADATA_TO_GET = 3;
private static final int COUNT_DETAILED_METADATA_TO_GET = 4;
private static final String BASE_BOOK_URL_SUFFIX = "/dp/";
private static final Pattern NON_DIGIT_PATTERN = Pattern.compile("[^\\d]");
private static final Pattern SERIES_FORMAT_PATTERN = Pattern.compile("Book (\\d+(?:\\.\\d+)?) of (\\d+)");
@@ -104,21 +105,28 @@ public class AmazonBookParser implements BookParser, DetailedMetadataProvider {
@Override
public List<BookMetadata> fetchMetadata(Book book, FetchMetadataRequest fetchMetadataRequest) {
String queryUrl = buildQueryUrl(fetchMetadataRequest, book);
if (queryUrl == null) {
log.error("Query URL is null, cannot proceed.");
LinkedList<String> amazonBookIds = getAmazonBookIds(book, fetchMetadataRequest);
if (amazonBookIds == null || amazonBookIds.isEmpty()) {
return Collections.emptyList();
}
try {
Document doc = fetchDocument(queryUrl);
return extractSearchPreviews(doc);
} catch (AmazonAntiScrapingException e) {
log.debug("Aborting Amazon search due to anti-scraping (503).");
return Collections.emptyList();
} catch (Exception e) {
log.error("Failed to fetch Amazon search results: {}", e.getMessage(), e);
return Collections.emptyList();
List<BookMetadata> results = new ArrayList<>();
for (int i = 0; i < amazonBookIds.size() && results.size() < COUNT_DETAILED_METADATA_TO_GET; i++) {
try {
if (i > 0) {
Thread.sleep(ThreadLocalRandom.current().nextLong(500, 1501));
}
BookMetadata metadata = getBookMetadata(amazonBookIds.get(i));
if (metadata != null) {
results.add(metadata);
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
break;
} catch (Exception e) {
log.error("Error fetching metadata for ASIN: {}", amazonBookIds.get(i), e);
}
}
return results;
}
private List<BookMetadata> extractSearchPreviews(Document doc) {

View File

@@ -25,6 +25,7 @@ import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.*;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -35,6 +36,7 @@ import java.util.stream.Collectors;
@AllArgsConstructor
public class AudibleParser implements BookParser, DetailedMetadataProvider {
private static final int COUNT_DETAILED_METADATA_TO_GET = 4;
private static final long MIN_REQUEST_INTERVAL_MS = 1500;
private static final String DEFAULT_DOMAIN = "com";
@@ -74,19 +76,28 @@ public class AudibleParser implements BookParser, DetailedMetadataProvider {
@Override
public List<BookMetadata> fetchMetadata(Book book, FetchMetadataRequest fetchMetadataRequest) {
String queryUrl = buildQueryUrl(fetchMetadataRequest, book);
if (queryUrl == null) {
log.error("Query URL is null, cannot proceed with Audible search.");
List<String> audibleIds = getAudibleIds(book, fetchMetadataRequest);
if (audibleIds == null || audibleIds.isEmpty()) {
return Collections.emptyList();
}
try {
enforceRateLimit();
Document doc = fetchDocument(queryUrl);
return extractSearchPreviews(doc);
} catch (Exception e) {
log.error("Failed to fetch Audible search results: {}", e.getMessage(), e);
return Collections.emptyList();
List<BookMetadata> results = new ArrayList<>();
for (int i = 0; i < audibleIds.size() && results.size() < COUNT_DETAILED_METADATA_TO_GET; i++) {
try {
if (i > 0) {
Thread.sleep(ThreadLocalRandom.current().nextLong(500, 1501));
}
BookMetadata metadata = getBookMetadata(audibleIds.get(i));
if (metadata != null) {
results.add(metadata);
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
break;
} catch (Exception e) {
log.error("Error fetching metadata for Audible ID: {}", audibleIds.get(i), e);
}
}
return results;
}
private List<BookMetadata> extractSearchPreviews(Document doc) {

View File

@@ -27,6 +27,7 @@ import java.time.Instant;
import java.time.LocalDate;
import java.time.ZoneId;
import java.util.*;
import java.util.concurrent.ThreadLocalRandom;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -40,6 +41,7 @@ public class GoodReadsParser implements BookParser, DetailedMetadataProvider {
private static final String BASE_BOOK_URL = "https://www.goodreads.com/book/show/";
private static final String BASE_ISBN_URL = "https://www.goodreads.com/book/isbn/";
private static final int COUNT_DETAILED_METADATA_TO_GET = 3;
private static final int COUNT_DETAILED_METADATA_TO_GET_RETRY = 2;
private static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
private static final Pattern BOOK_SHOW_ID_PATTERN = Pattern.compile("/book/show/(\\d+)");
@@ -111,9 +113,29 @@ public class GoodReadsParser implements BookParser, DetailedMetadataProvider {
}
}
return fetchMetadataPreviews(book, fetchMetadataRequest).stream()
List<BookMetadata> previews = fetchMetadataPreviews(book, fetchMetadataRequest).stream()
.limit(COUNT_DETAILED_METADATA_TO_GET)
.toList();
List<BookMetadata> results = fetchMetadataUsingPreviews(previews);
if (results.isEmpty()
&& fetchMetadataRequest.getTitle() != null && !fetchMetadataRequest.getTitle().isBlank()
&& fetchMetadataRequest.getAuthor() != null && !fetchMetadataRequest.getAuthor().isBlank()) {
log.info("GoodReads: No results with title+author, retrying with title only.");
FetchMetadataRequest titleOnlyRequest = FetchMetadataRequest.builder()
.bookId(fetchMetadataRequest.getBookId())
.providers(fetchMetadataRequest.getProviders())
.isbn(fetchMetadataRequest.getIsbn())
.title(fetchMetadataRequest.getTitle())
.asin(fetchMetadataRequest.getAsin())
.build();
previews = fetchMetadataPreviews(book, titleOnlyRequest).stream()
.limit(COUNT_DETAILED_METADATA_TO_GET_RETRY)
.toList();
results = fetchMetadataUsingPreviews(previews);
}
return results;
}
private List<BookMetadata> fetchMetadataUsingPreviews(List<BookMetadata> previews) {
@@ -126,7 +148,7 @@ public class GoodReadsParser implements BookParser, DetailedMetadataProvider {
if (detailedMetadata != null) {
fetchedMetadata.add(detailedMetadata);
}
Thread.sleep(Duration.ofSeconds(2));
Thread.sleep(ThreadLocalRandom.current().nextLong(500, 1501));
} catch (Exception e) {
log.error("Error fetching metadata for book: {}", preview.getGoodreadsId(), e);
}