WIP: Book parsing

This commit is contained in:
aditya.chandel
2025-01-01 15:28:55 -07:00
parent ea2ac58f03
commit df001a0020
20 changed files with 364 additions and 352 deletions

View File

@@ -3,15 +3,14 @@ package com.adityachandel.booklore.controller;
import com.adityachandel.booklore.model.dto.BookDTO;
import com.adityachandel.booklore.model.dto.BookMetadataDTO;
import com.adityachandel.booklore.model.dto.BookViewerSettingDTO;
import com.adityachandel.booklore.model.dto.request.SetMetadataRequest;
import com.adityachandel.booklore.model.dto.request.ShelvesAssignmentRequest;
import com.adityachandel.booklore.model.dto.response.GoogleBooksMetadata;
import com.adityachandel.booklore.service.BooksService;
import com.adityachandel.booklore.service.metadata.parser.AmazonParser;
import com.adityachandel.booklore.service.metadata.parser.model.QueryData;
import com.adityachandel.booklore.service.metadata.BookMetadataService;
import com.adityachandel.booklore.service.metadata.model.BookFetchQuery;
import com.adityachandel.booklore.service.metadata.model.BookMetadataSource;
import com.adityachandel.booklore.service.metadata.model.FetchedBookMetadata;
import jakarta.validation.Valid;
import lombok.AllArgsConstructor;
import lombok.extern.java.Log;
import org.springframework.core.io.Resource;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
@@ -25,16 +24,16 @@ import java.util.List;
public class BookController {
private BooksService booksService;
private AmazonParser amazonParser;
private BookMetadataService bookMetadataService;
@GetMapping("/{bookId}")
public ResponseEntity<BookDTO> getBook(@PathVariable long bookId) {
return ResponseEntity.ok(booksService.getBook(bookId));
public ResponseEntity<BookDTO> getBook(@PathVariable long bookId, @RequestParam(required = false, defaultValue = "false") boolean withDescription) {
return ResponseEntity.ok(booksService.getBook(bookId, withDescription));
}
@GetMapping
public ResponseEntity<List<BookDTO>> getBooks() {
return ResponseEntity.ok(booksService.getBooks());
public ResponseEntity<List<BookDTO>> getBooks(@RequestParam(required = false, defaultValue = "false") boolean withDescription) {
return ResponseEntity.ok(booksService.getBooks(withDescription));
}
@GetMapping("/search")
@@ -69,34 +68,18 @@ public class BookController {
return ResponseEntity.ok(booksService.updateLastReadTime(bookId));
}
@GetMapping("/{bookId}/fetch-metadata")
public ResponseEntity<List<GoogleBooksMetadata>> getBookFetchMetadata(@PathVariable long bookId) {
return ResponseEntity.ok(booksService.fetchProspectiveMetadataListByBookId(bookId));
@PostMapping("/{bookId}/source/{source}/metadata")
public ResponseEntity<FetchedBookMetadata> getBookMetadata(@RequestBody(required = false) BookFetchQuery bookFetchQuery, @PathVariable Long bookId, @PathVariable BookMetadataSource source) {
return ResponseEntity.ok(bookMetadataService.fetchBookMetadata(bookId, source, bookFetchQuery));
}
@PostMapping("/{bookId}/query-for-books")
public ResponseEntity<BookMetadataDTO> getBookMetadata(@RequestBody(required = false) QueryData queryData, @PathVariable Long bookId) {
return ResponseEntity.ok(amazonParser.queryForBookMetadata(bookId, queryData));
}
@GetMapping("/fetch-metadata")
public ResponseEntity<List<GoogleBooksMetadata>> fetchMedataByTerm(@RequestParam String term) {
return ResponseEntity.ok(booksService.fetchProspectiveMetadataListBySearchTerm(term));
}
@PutMapping("/{bookId}/set-metadata")
public ResponseEntity<BookDTO> setBookMetadata(@RequestBody SetMetadataRequest setMetadataRequest, @PathVariable long bookId) {
return ResponseEntity.ok(booksService.setMetadata(setMetadataRequest, bookId));
@PutMapping("/{bookId}/source/{source}/metadata")
public ResponseEntity<BookMetadataDTO> setBookMetadata(@RequestBody FetchedBookMetadata setMetadataRequest, @PathVariable long bookId, @PathVariable BookMetadataSource source) {
return ResponseEntity.ok(booksService.setBookMetadata(bookId, source, setMetadataRequest));
}
@PostMapping("/assign-shelves")
public ResponseEntity<List<BookDTO>> addBookToShelf(@RequestBody @Valid ShelvesAssignmentRequest request) {
return ResponseEntity.ok(booksService.assignShelvesToBooks(request.getBookIds(), request.getShelvesToAssign(), request.getShelvesToUnassign()));
}
@PutMapping("/{bookId}/metadata")
public ResponseEntity<BookMetadataDTO> updateBookMetadata(@RequestBody BookMetadataDTO metadataDTO, @PathVariable long bookId) throws IOException {
return ResponseEntity.ok(booksService.setMetadata(bookId, metadataDTO));
}
}

View File

@@ -1,20 +1,17 @@
package com.adityachandel.booklore.controller;
import com.adityachandel.booklore.model.dto.BookDTO;
import com.adityachandel.booklore.model.dto.BookMetadataDTO;
import com.adityachandel.booklore.model.dto.BookWithNeighborsDTO;
import com.adityachandel.booklore.model.dto.LibraryDTO;
import com.adityachandel.booklore.model.dto.request.CreateLibraryRequest;
import com.adityachandel.booklore.model.entity.Sort;
import com.adityachandel.booklore.service.BooksService;
import com.adityachandel.booklore.service.LibraryService;
import com.adityachandel.booklore.service.metadata.parser.AmazonParser;
import com.adityachandel.booklore.service.metadata.parser.model.QueryData;
import com.adityachandel.booklore.service.metadata.parser.AmazonBookParser;
import lombok.AllArgsConstructor;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import java.io.IOException;
import java.util.List;
@@ -25,7 +22,7 @@ public class LibraryController {
private LibraryService libraryService;
private BooksService booksService;
private AmazonParser amazonParser;
private AmazonBookParser amazonBookParser;
@GetMapping("/{libraryId}")
public ResponseEntity<LibraryDTO> getLibrary(@PathVariable long libraryId) {

View File

@@ -19,7 +19,8 @@ public enum ApiError {
FILE_ALREADY_EXISTS(HttpStatus.CONFLICT, "File already exists"),
INVALID_QUERY_PARAMETERS(HttpStatus.BAD_REQUEST, "Query parameters are required for the search."),
SHELF_ALREADY_EXISTS(HttpStatus.CONFLICT, "Shelf already exists: %s"),
SHELF_NOT_FOUND(HttpStatus.NOT_FOUND, "Shelf not found with ID: %d");
SHELF_NOT_FOUND(HttpStatus.NOT_FOUND, "Shelf not found with ID: %d"),
METADATA_SOURCE_NOT_IMPLEMENT_OR_DOES_NOT_EXIST(HttpStatus.BAD_REQUEST, "Metadata source not implement or does not exist" ),;
private final HttpStatus status;
private final String message;

View File

@@ -3,6 +3,7 @@ package com.adityachandel.booklore.model.dto;
import lombok.Builder;
import lombok.Data;
import java.time.LocalDate;
import java.util.List;
@Data
@@ -14,16 +15,15 @@ public class BookMetadataDTO {
private String title;
private String subtitle;
private String publisher;
private String publishedDate;
private LocalDate publishedDate;
private String description;
private String isbn13;
private String isbn10;
private Integer pageCount;
private String thumbnail;
private String language;
private Float rating;
private Integer reviewCount;
private List<AuthorDTO> authors;
private List<CategoryDTO> categories;
private String rating;
private String reviewCount;
private String printLength;
}

View File

@@ -4,6 +4,7 @@ import com.fasterxml.jackson.annotation.JsonIgnore;
import jakarta.persistence.*;
import lombok.*;
import java.time.LocalDate;
import java.util.List;
@Entity
@@ -29,7 +30,7 @@ public class BookMetadata {
private String publisher;
@Column(name = "published_date")
private String publishedDate;
private LocalDate publishedDate;
@Column(name = "description", columnDefinition = "TEXT")
private String description;
@@ -49,6 +50,12 @@ public class BookMetadata {
@Column(name = "language", length = 10)
private String language;
@Column(name = "rating")
private Float rating;
@Column(name = "review_count")
private Integer reviewCount;
@OneToOne(fetch = FetchType.LAZY)
@MapsId
@JoinColumn(name = "book_id")

View File

@@ -1,16 +1,22 @@
package com.adityachandel.booklore.service;
import com.adityachandel.booklore.exception.ApiError;
import com.adityachandel.booklore.model.dto.*;
import com.adityachandel.booklore.model.dto.request.SetMetadataRequest;
import com.adityachandel.booklore.model.dto.BookDTO;
import com.adityachandel.booklore.model.dto.BookMetadataDTO;
import com.adityachandel.booklore.model.dto.BookViewerSettingDTO;
import com.adityachandel.booklore.model.dto.BookWithNeighborsDTO;
import com.adityachandel.booklore.model.dto.response.GoogleBooksMetadata;
import com.adityachandel.booklore.model.entity.*;
import com.adityachandel.booklore.model.entity.Author;
import com.adityachandel.booklore.model.entity.Book;
import com.adityachandel.booklore.model.entity.BookViewerSetting;
import com.adityachandel.booklore.model.entity.Shelf;
import com.adityachandel.booklore.repository.*;
import com.adityachandel.booklore.transformer.BookMetadataTransformer;
import com.adityachandel.booklore.service.metadata.BookMetadataService;
import com.adityachandel.booklore.service.metadata.model.BookMetadataSource;
import com.adityachandel.booklore.service.metadata.model.FetchedBookMetadata;
import com.adityachandel.booklore.transformer.BookSettingTransformer;
import com.adityachandel.booklore.transformer.BookTransformer;
import com.adityachandel.booklore.util.BookUtils;
import com.adityachandel.booklore.util.DateUtils;
import com.adityachandel.booklore.util.FileService;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@@ -24,10 +30,6 @@ import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.time.Instant;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
@@ -40,23 +42,29 @@ public class BooksService {
private final BookRepository bookRepository;
private final BookViewerSettingRepository bookViewerSettingRepository;
private final GoogleBookMetadataService googleBookMetadataService;
private final BookMetadataRepository metadataRepository;
private final AuthorRepository authorRepository;
private final CategoryRepository categoryRepository;
private final LibraryRepository libraryRepository;
private final NotificationService notificationService;
private final ShelfRepository shelfRepository;
private final FileService fileService;
private final BookMetadataService bookMetadataService;
public BookDTO getBook(long bookId) {
public BookDTO getBook(long bookId, boolean withDescription) {
Book book = bookRepository.findById(bookId).orElseThrow(() -> ApiError.BOOK_NOT_FOUND.createException(bookId));
return BookTransformer.convertToBookDTO(book);
BookDTO bookDTO = BookTransformer.convertToBookDTO(book);
if (!withDescription) {
bookDTO.getMetadata().setDescription(null);
}
return bookDTO;
}
public List<BookDTO> getBooks() {
public List<BookDTO> getBooks(boolean withDescription) {
return bookRepository.findAll().stream()
.map(BookTransformer::convertToBookDTO)
.peek(bookDTO -> {
if (!withDescription) {
bookDTO.getMetadata().setDescription(null);
}
})
.collect(Collectors.toList());
}
@@ -117,118 +125,6 @@ public class BooksService {
return googleBookMetadataService.queryByTerm(searchTerm);
}
public BookMetadataDTO setMetadata(long bookId, BookMetadataDTO newMetadata) throws IOException {
Book book = bookRepository.findById(bookId).orElseThrow(() -> ApiError.BOOK_NOT_FOUND.createException(bookId));
BookMetadata metadata = book.getMetadata();
metadata.setTitle(newMetadata.getTitle());
metadata.setSubtitle(newMetadata.getSubtitle());
metadata.setPublisher(newMetadata.getPublisher());
metadata.setPublishedDate(DateUtils.parseDateToInstant(newMetadata.getPublishedDate()));
metadata.setLanguage(newMetadata.getLanguage());
metadata.setIsbn10(newMetadata.getIsbn10());
metadata.setIsbn13(newMetadata.getIsbn13());
metadata.setDescription(newMetadata.getDescription());
metadata.setPageCount(newMetadata.getPageCount());
if (newMetadata.getAuthors() != null && !newMetadata.getAuthors().isEmpty()) {
List<Author> authors = newMetadata.getAuthors().stream()
.map(authorDTO -> authorRepository.findByName(authorDTO.getName())
.orElseGet(() -> authorRepository.save(Author.builder().name(authorDTO.getName()).build())))
.collect(Collectors.toList());
metadata.setAuthors(authors);
}
if (newMetadata.getCategories() != null && !newMetadata.getCategories().isEmpty()) {
List<Category> categories = newMetadata
.getCategories()
.stream()
.map(CategoryDTO::getName)
.collect(Collectors.toSet())
.stream()
.map(categoryName -> categoryRepository.findByName(categoryName)
.orElseGet(() -> categoryRepository.save(Category.builder().name(categoryName).build())))
.collect(Collectors.toList());
metadata.setCategories(categories);
}
if(newMetadata.getThumbnail() != null && !newMetadata.getThumbnail().isEmpty()) {
String thumbnailPath = fileService.createThumbnail(bookId, newMetadata.getThumbnail(), "amz");
metadata.setThumbnail(thumbnailPath);
}
authorRepository.saveAll(metadata.getAuthors());
categoryRepository.saveAll(metadata.getCategories());
metadataRepository.save(metadata);
return BookMetadataTransformer.convertToBookDTO(metadata);
}
public BookDTO setMetadata(SetMetadataRequest setMetadataRequest, long bookId) {
Book book = bookRepository.findById(bookId).orElseThrow(() -> ApiError.BOOK_NOT_FOUND.createException(bookId));
GoogleBooksMetadata gMetadata = googleBookMetadataService.getByGoogleBookId(setMetadataRequest.getGoogleBookId());
BookMetadata metadata = book.getMetadata();
metadata.setDescription(gMetadata.getDescription());
metadata.setTitle(gMetadata.getTitle());
metadata.setLanguage(gMetadata.getLanguage());
metadata.setPublisher(gMetadata.getPublisher());
String publishedDate = gMetadata.getPublishedDate();
if (publishedDate != null && !publishedDate.isEmpty()) {
String normalizeDate = normalizeDate(publishedDate);
metadata.setPublishedDate(normalizeDate);
}
metadata.setSubtitle(gMetadata.getSubtitle());
metadata.setPageCount(gMetadata.getPageCount());
metadata.setThumbnail(gMetadata.getThumbnail());
if (gMetadata.getAuthors() != null && !gMetadata.getAuthors().isEmpty()) {
List<Author> authors = gMetadata.getAuthors().stream()
.map(authorName -> authorRepository.findByName(authorName)
.orElseGet(() -> authorRepository.save(Author.builder().name(authorName).build())))
.collect(Collectors.toList());
metadata.setAuthors(authors);
}
if (gMetadata.getCategories() != null && !gMetadata.getCategories().isEmpty()) {
List<Category> categories = gMetadata
.getCategories()
.stream()
.map(c -> Arrays.stream(c.split("/"))
.map(String::trim)
.filter(s -> !s.isEmpty() && !s.equalsIgnoreCase("General"))
.toList())
.flatMap(List::stream)
.collect(Collectors.toSet())
.stream()
.map(categoryName -> categoryRepository.findByName(categoryName)
.orElseGet(() -> categoryRepository.save(Category.builder().name(categoryName).build())))
.collect(Collectors.toList());
metadata.setCategories(categories);
}
metadata.setIsbn10(gMetadata.getIsbn10());
metadata.setIsbn13(gMetadata.getIsbn13());
authorRepository.saveAll(metadata.getAuthors());
categoryRepository.saveAll(metadata.getCategories());
metadataRepository.save(metadata);
return BookTransformer.convertToBookDTO(book);
}
public String normalizeDate(String input) {
DateTimeFormatter fullDateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");
if (input.matches("\\d{4}")) {
return input + "-01-01";
}
try {
LocalDate.parse(input, fullDateFormatter);
return input;
} catch (DateTimeParseException e) {
throw new IllegalArgumentException("Invalid date format: " + input);
}
}
public String getFileNameWithoutExtension(String fileName) {
int dotIndex = fileName.lastIndexOf('.');
if (dotIndex == -1) {
return fileName;
} else {
return fileName.substring(0, dotIndex);
}
}
public BookWithNeighborsDTO getBookWithNeighbours(long libraryId, long bookId) {
libraryRepository.findById(libraryId).orElseThrow(() -> ApiError.LIBRARY_NOT_FOUND.createException(libraryId));
@@ -268,4 +164,8 @@ public class BooksService {
Book book = bookRepository.findById(bookId).orElseThrow(() -> ApiError.BOOK_NOT_FOUND.createException(bookId));
return fileService.getBookCover(book.getMetadata().getThumbnail());
}
public BookMetadataDTO setBookMetadata(long bookId, BookMetadataSource source, FetchedBookMetadata setMetadataRequest) {
return bookMetadataService.setBookMetadata(bookId, setMetadataRequest, source);
}
}

View File

@@ -0,0 +1,92 @@
package com.adityachandel.booklore.service.metadata;
import com.adityachandel.booklore.exception.ApiError;
import com.adityachandel.booklore.model.dto.BookMetadataDTO;
import com.adityachandel.booklore.model.dto.CategoryDTO;
import com.adityachandel.booklore.model.entity.Author;
import com.adityachandel.booklore.model.entity.Book;
import com.adityachandel.booklore.model.entity.BookMetadata;
import com.adityachandel.booklore.model.entity.Category;
import com.adityachandel.booklore.repository.AuthorRepository;
import com.adityachandel.booklore.repository.BookMetadataRepository;
import com.adityachandel.booklore.repository.BookRepository;
import com.adityachandel.booklore.repository.CategoryRepository;
import com.adityachandel.booklore.service.metadata.model.BookFetchQuery;
import com.adityachandel.booklore.service.metadata.model.BookMetadataSource;
import com.adityachandel.booklore.service.metadata.model.FetchedBookMetadata;
import com.adityachandel.booklore.service.metadata.parser.AmazonBookParser;
import com.adityachandel.booklore.transformer.BookMetadataTransformer;
import com.adityachandel.booklore.util.FileService;
import lombok.AllArgsConstructor;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.stream.Collectors;
@Service
@AllArgsConstructor
public class BookMetadataService {
private AmazonBookParser amazonBookParser;
private BookRepository bookRepository;
private AuthorRepository authorRepository;
private BookMetadataRepository bookMetadataRepository;
private CategoryRepository categoryRepository;
private FileService fileService;
public FetchedBookMetadata fetchBookMetadata(long bookId, BookMetadataSource source, BookFetchQuery bookFetchQuery) {
if (source == BookMetadataSource.AMAZON) {
return amazonBookParser.fetchMetadata(bookId, bookFetchQuery);
} else {
throw ApiError.METADATA_SOURCE_NOT_IMPLEMENT_OR_DOES_NOT_EXIST.createException();
}
}
public BookMetadataDTO setBookMetadata(long bookId, FetchedBookMetadata newMetadata, BookMetadataSource source) {
Book book = bookRepository.findById(bookId).orElseThrow(() -> ApiError.BOOK_NOT_FOUND.createException(bookId));
BookMetadata metadata = book.getMetadata();
metadata.setTitle(newMetadata.getTitle());
metadata.setSubtitle(newMetadata.getSubtitle());
metadata.setPublisher(newMetadata.getPublisher());
metadata.setPublishedDate(newMetadata.getPublishedDate());
metadata.setLanguage(newMetadata.getLanguage());
metadata.setIsbn10(newMetadata.getIsbn10());
metadata.setIsbn13(newMetadata.getIsbn13());
metadata.setDescription(newMetadata.getDescription());
metadata.setPageCount(newMetadata.getPageCount());
if (newMetadata.getAuthors() != null && !newMetadata.getAuthors().isEmpty()) {
List<Author> authors = newMetadata.getAuthors().stream()
.map(authorName -> authorRepository.findByName(authorName)
.orElseGet(() -> authorRepository.save(Author.builder().name(authorName).build())))
.collect(Collectors.toList());
metadata.setAuthors(authors);
}
if (newMetadata.getCategories() != null && !newMetadata.getCategories().isEmpty()) {
List<Category> categories = new HashSet<>(newMetadata
.getCategories())
.stream()
.map(categoryName -> categoryRepository.findByName(categoryName)
.orElseGet(() -> categoryRepository.save(Category.builder().name(categoryName).build())))
.collect(Collectors.toList());
metadata.setCategories(categories);
}
if (newMetadata.getThumbnailUrl() != null && !newMetadata.getThumbnailUrl().isEmpty()) {
String thumbnailPath = null;
try {
thumbnailPath = fileService.createThumbnail(bookId, newMetadata.getThumbnailUrl(), source.name());
} catch (IOException e) {
throw new RuntimeException(e);
}
metadata.setThumbnail(thumbnailPath);
}
authorRepository.saveAll(metadata.getAuthors());
categoryRepository.saveAll(metadata.getCategories());
bookMetadataRepository.save(metadata);
return BookMetadataTransformer.convertToBookDTO(metadata);
}
}

View File

@@ -1,11 +1,11 @@
package com.adityachandel.booklore.service.metadata.parser.model;
package com.adityachandel.booklore.service.metadata.model;
import lombok.Builder;
import lombok.Data;
@Builder
@Data
public class QueryData {
public class BookFetchQuery {
private String isbn;
private String bookTitle;
private String author;

View File

@@ -0,0 +1,5 @@
package com.adityachandel.booklore.service.metadata.model;
public enum BookMetadataSource {
AMAZON, GOOGLE, GOOD_READS
}

View File

@@ -0,0 +1,29 @@
package com.adityachandel.booklore.service.metadata.model;
import lombok.Builder;
import lombok.Data;
import java.time.LocalDate;
import java.util.List;
@Data
@Builder
public class FetchedBookMetadata {
private Long bookId;
private String googleBookId;
private String amazonBookId;
private String title;
private String subtitle;
private String publisher;
private LocalDate publishedDate;
private String description;
private String isbn13;
private String isbn10;
private Integer pageCount;
private String thumbnailUrl;
private String language;
private Float rating;
private Integer reviewCount;
private List<String> authors;
private List<String> categories;
}

View File

@@ -1,14 +1,10 @@
package com.adityachandel.booklore.service.metadata.parser;
import com.adityachandel.booklore.exception.ApiError;
import com.adityachandel.booklore.model.dto.AuthorDTO;
import com.adityachandel.booklore.model.dto.BookMetadataDTO;
import com.adityachandel.booklore.model.dto.CategoryDTO;
import com.adityachandel.booklore.model.entity.Book;
import com.adityachandel.booklore.model.entity.Library;
import com.adityachandel.booklore.repository.BookRepository;
import com.adityachandel.booklore.repository.LibraryRepository;
import com.adityachandel.booklore.service.metadata.parser.model.QueryData;
import com.adityachandel.booklore.service.metadata.model.FetchedBookMetadata;
import com.adityachandel.booklore.service.metadata.model.BookFetchQuery;
import com.adityachandel.booklore.util.BookUtils;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@@ -20,6 +16,10 @@ import org.jsoup.select.Elements;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.time.Instant;
import java.time.LocalDate;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
@@ -27,59 +27,99 @@ import java.util.stream.Collectors;
@Slf4j
@Service
@AllArgsConstructor
public class AmazonParser {
public class AmazonBookParser implements BookParser {
private BookRepository bookRepository;
public BookMetadataDTO queryForBookMetadata(Long bookId, QueryData queryData) {
public FetchedBookMetadata fetchMetadata(Long bookId, BookFetchQuery bookFetchQuery) {
Book book = bookRepository.findById(bookId).orElseThrow(() -> ApiError.BOOK_NOT_FOUND.createException(bookId));
if (queryData == null || (queryData.getBookTitle() == null && queryData.getAuthor() == null && queryData.getIsbn() == null)) {
if (bookFetchQuery == null || (bookFetchQuery.getBookTitle() == null && bookFetchQuery.getAuthor() == null && bookFetchQuery.getIsbn() == null)) {
String title = book.getMetadata().getTitle();
if (title == null || title.isEmpty()) {
String cleanFileName = BookUtils.cleanFileName(book.getFileName());
queryData = QueryData.builder().bookTitle(cleanFileName).build();
bookFetchQuery = BookFetchQuery.builder().bookTitle(cleanFileName).build();
} else {
queryData = QueryData.builder().bookTitle(title).build();
bookFetchQuery = BookFetchQuery.builder().bookTitle(title).build();
}
}
String amazonBookId = getAmazonBookId(queryData);
String amazonBookId = getAmazonBookId(bookFetchQuery);
if (amazonBookId == null) {
return null;
}
return getBookMetadata(amazonBookId);
}
public String getAmazonBookId(QueryData queryData) {
String queryUrl = buildQueryUrl(queryData);
private String getAmazonBookId(BookFetchQuery bookFetchQuery) {
String queryUrl = buildQueryUrl(bookFetchQuery);
if (queryUrl == null) {
log.error("Query URL is null, cannot proceed.");
return null;
}
try {
Document doc = fetchDoc(queryUrl);
Element searchResults = doc.select("span[data-component-type=s-search-results]").first();
if (searchResults == null) {
log.error("No search results found for query: {}", queryUrl);
return null;
}
Element item = searchResults.select("div[role=listitem][data-index=2]").first();
return item.attr("data-asin");
if (item == null) {
log.error("No item found in the search results.");
return null;
}
String bookLink = null;
// Try to get 'Paperback' and 'Hardcover' links
for (String type : new String[]{"Paperback", "Hardcover"}) {
Element link = item.select("a:containsOwn(" + type + ")").first();
if (link != null) {
bookLink = link.attr("href");
log.info("{} link found: {}", type, bookLink);
break; // Take the first found link, whether Paperback or Hardcover
} else {
log.info("No link containing '{}' found.", type);
}
}
if (bookLink != null) {
String asin = extractAsinFromUrl(bookLink);
log.info("Book ASIN extracted: {}", asin);
return asin;
} else {
String asin = item.attr("data-asin");
log.info("No book link found, returning ASIN: {}", asin);
return asin;
}
} catch (Exception e) {
log.error("Failed to get asin: {}", e.getMessage());
log.error("Failed to get asin: {}", e.getMessage(), e);
return null;
}
}
public BookMetadataDTO getBookMetadata(String amazonBookId) {
private String extractAsinFromUrl(String url) {
// Extract the ASIN (book ID) from the URL, which will be the part after "/dp/"
String[] parts = url.split("/dp/");
if (parts.length > 1) {
String[] asinParts = parts[1].split("/");
return asinParts[0];
}
return null;
}
private FetchedBookMetadata getBookMetadata(String amazonBookId) {
log.info("Fetching book metadata for amazon book {}", amazonBookId);
Document doc = fetchDoc("https://www.amazon.com/dp/" + amazonBookId);
return BookMetadataDTO.builder()
return FetchedBookMetadata.builder()
.amazonBookId(amazonBookId)
.title(getTitle(doc))
.subtitle(getSubtitle(doc))
.authors(getAuthors(doc).stream()
.map(name -> AuthorDTO.builder().name(name).build())
.collect(Collectors.toList()))
.categories(getBestSellerCategories(doc).stream()
.map(category -> CategoryDTO.builder().name(category).build())
.collect(Collectors.toList()))
.authors(getAuthors(doc).stream().toList())
.categories(getBestSellerCategories(doc).stream().toList())
.description(getDescription(doc))
.isbn13(getIsbn13(doc))
.isbn10(getIsbn10(doc))
@@ -87,32 +127,32 @@ public class AmazonParser {
.publishedDate(getPublicationDate(doc))
.language(getLanguage(doc))
.pageCount(getPageCount(doc))
.thumbnail(getThumbnail(doc))
.thumbnailUrl(getThumbnail(doc))
.rating(getRating(doc))
.reviewCount(getReviewCount(doc))
.printLength(getPrintLength(doc))
.build();
}
private String buildQueryUrl(QueryData queryData) {
private String buildQueryUrl(BookFetchQuery bookFetchQuery) {
StringBuilder queryBuilder = new StringBuilder("https://www.amazon.com/s/?search-alias=stripbooks&unfiltered=1&sort=relevanceexprank");
if (queryData.getIsbn() != null && !queryData.getIsbn().isEmpty()) {
queryBuilder.append("&field-isbn=").append(queryData.getIsbn());
if (bookFetchQuery.getIsbn() != null && !bookFetchQuery.getIsbn().isEmpty()) {
queryBuilder.append("&field-isbn=").append(bookFetchQuery.getIsbn());
}
if (queryData.getBookTitle() != null && !queryData.getBookTitle().isEmpty()) {
queryBuilder.append("&field-title=").append(queryData.getBookTitle().replace(" ", "%20"));
if (bookFetchQuery.getBookTitle() != null && !bookFetchQuery.getBookTitle().isEmpty()) {
queryBuilder.append("&field-title=").append(bookFetchQuery.getBookTitle().replace(" ", "%20"));
}
if (queryData.getAuthor() != null && !queryData.getAuthor().isEmpty()) {
queryBuilder.append("&field-author=").append(queryData.getAuthor().replace(" ", "%20"));
if (bookFetchQuery.getAuthor() != null && !bookFetchQuery.getAuthor().isEmpty()) {
queryBuilder.append("&field-author=").append(bookFetchQuery.getAuthor().replace(" ", "%20"));
}
if (queryData.getIsbn() == null && queryData.getBookTitle() == null && queryData.getAuthor() != null) {
if (bookFetchQuery.getIsbn() == null && bookFetchQuery.getBookTitle() == null && bookFetchQuery.getAuthor() != null) {
return null;
}
log.info("Query URL: {}", queryBuilder.toString());
return queryBuilder.toString();
}
@@ -205,11 +245,11 @@ public class AmazonParser {
return null;
}
private String getPublicationDate(Document doc) {
private LocalDate getPublicationDate(Document doc) {
try {
Element publicationDateElement = doc.select("#rpi-attribute-book_details-publication_date .rpi-attribute-value span").first();
if (publicationDateElement != null) {
return publicationDateElement.text();
return parseAmazonDate(publicationDateElement.text());
}
log.error("Error fetching publication date: Element not found.");
} catch (Exception e) {
@@ -231,19 +271,6 @@ public class AmazonParser {
return null;
}
private String getPrintLength(Document doc) {
try {
Element printLengthElement = doc.select("#rpi-attribute-book_details-fiona_pages .rpi-attribute-value span").first();
if (printLengthElement != null) {
return printLengthElement.text();
}
log.error("Error fetching print length: Element not found.");
} catch (Exception e) {
log.error("Error fetching print length: {}", e.getMessage());
}
return null;
}
private Set<String> getBestSellerCategories(Document doc) {
try {
Element bestSellerCategoriesElement = doc.select("#detailBullets_feature_div").first();
@@ -262,13 +289,16 @@ public class AmazonParser {
return Set.of();
}
private String getRating(Document doc) {
private Float getRating(Document doc) {
try {
Element reviewDiv = doc.select("div#averageCustomerReviews_feature_div").first();
if (reviewDiv != null) {
Elements ratingElements = reviewDiv.select("span#acrPopover span.a-size-base.a-color-base");
if (!ratingElements.isEmpty()) {
return Objects.requireNonNull(ratingElements.first()).text();
String text = Objects.requireNonNull(ratingElements.first()).text();
if (!text.isEmpty()) {
return Float.parseFloat(text);
}
}
}
} catch (Exception e) {
@@ -277,13 +307,16 @@ public class AmazonParser {
return null;
}
private String getReviewCount(Document doc) {
private Integer getReviewCount(Document doc) {
try {
Element reviewDiv = doc.select("div#averageCustomerReviews_feature_div").first();
if (reviewDiv != null) {
Element reviewCountElement = reviewDiv.getElementById("acrCustomerReviewText");
if (reviewCountElement != null) {
return Objects.requireNonNull(reviewCountElement).text().split(" ")[0];
String reviewCount = Objects.requireNonNull(reviewCountElement).text().split(" ")[0];
if (!reviewCount.isEmpty()) {
return Integer.parseInt(reviewCount);
}
}
}
} catch (Exception e) {
@@ -355,4 +388,9 @@ public class AmazonParser {
throw new RuntimeException(e);
}
}
private LocalDate parseAmazonDate(String dateString) {
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("MMMM d, yyyy");
return LocalDate.parse(dateString, formatter);
}
}

View File

@@ -0,0 +1,8 @@
package com.adityachandel.booklore.service.metadata.parser;
import com.adityachandel.booklore.service.metadata.model.FetchedBookMetadata;
import com.adityachandel.booklore.service.metadata.model.BookFetchQuery;
public interface BookParser {
FetchedBookMetadata fetchMetadata(Long bookId, BookFetchQuery bookFetchQuery);
}

View File

@@ -19,10 +19,10 @@ public class BookMetadataTransformer {
return BookMetadataDTO.builder()
.bookId(bookMetadata.getBookId())
.title(bookMetadata.getTitle())
.description(bookMetadata.getDescription())
.isbn10(bookMetadata.getIsbn10())
.isbn13(bookMetadata.getIsbn13())
.publisher(bookMetadata.getPublisher())
.description(bookMetadata.getDescription())
.subtitle(bookMetadata.getSubtitle())
.language(bookMetadata.getLanguage())
.pageCount(bookMetadata.getPageCount())

View File

@@ -34,6 +34,8 @@ CREATE TABLE IF NOT EXISTS book_metadata
page_count INT,
thumbnail VARCHAR(1000),
language VARCHAR(10),
rating FLOAT,
review_count INT,
CONSTRAINT fk_book_metadata FOREIGN KEY (book_id) REFERENCES book (id) ON DELETE CASCADE
);