diff --git a/booklore-api/build.gradle b/booklore-api/build.gradle index d45283a8f..a54edd655 100644 --- a/booklore-api/build.gradle +++ b/booklore-api/build.gradle @@ -52,6 +52,7 @@ dependencies { // --- Book & Image Processing --- implementation 'org.apache.pdfbox:pdfbox:3.0.6' + implementation 'org.apache.pdfbox:pdfbox-io:3.0.6' implementation 'org.apache.pdfbox:xmpbox:3.0.6' implementation 'org.apache.pdfbox:jbig2-imageio:3.0.4' implementation 'com.github.jai-imageio:jai-imageio-core:1.4.0' diff --git a/booklore-api/src/main/java/com/adityachandel/booklore/service/fileprocessor/PdfProcessor.java b/booklore-api/src/main/java/com/adityachandel/booklore/service/fileprocessor/PdfProcessor.java index 6a6eed9d0..dfd6f09fb 100644 --- a/booklore-api/src/main/java/com/adityachandel/booklore/service/fileprocessor/PdfProcessor.java +++ b/booklore-api/src/main/java/com/adityachandel/booklore/service/fileprocessor/PdfProcessor.java @@ -16,6 +16,7 @@ import com.adityachandel.booklore.util.FileUtils; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.StringUtils; import org.apache.pdfbox.Loader; +import org.apache.pdfbox.io.RandomAccessReadBufferedFile; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.rendering.ImageType; import org.apache.pdfbox.rendering.PDFRenderer; @@ -58,7 +59,9 @@ public class PdfProcessor extends AbstractFileProcessor implements BookFileProce @Override public boolean generateCover(BookEntity bookEntity) { - try (PDDocument pdf = Loader.loadPDF(new File(FileUtils.getBookFullPath(bookEntity)))) { + File pdfFile = new File(FileUtils.getBookFullPath(bookEntity)); + try (RandomAccessReadBufferedFile randomAccessRead = new RandomAccessReadBufferedFile(pdfFile); + PDDocument pdf = Loader.loadPDF(randomAccessRead)) { return generateCoverImageAndSave(bookEntity.getId(), pdf); } catch (OutOfMemoryError e) { // Note: Catching OOM is generally discouraged, but for batch processing diff --git a/booklore-api/src/main/java/com/adityachandel/booklore/service/metadata/extractor/PdfMetadataExtractor.java b/booklore-api/src/main/java/com/adityachandel/booklore/service/metadata/extractor/PdfMetadataExtractor.java index 8ff56d9db..c88929689 100644 --- a/booklore-api/src/main/java/com/adityachandel/booklore/service/metadata/extractor/PdfMetadataExtractor.java +++ b/booklore-api/src/main/java/com/adityachandel/booklore/service/metadata/extractor/PdfMetadataExtractor.java @@ -8,6 +8,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.pdfbox.Loader; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.io.RandomAccessReadBufferedFile; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocumentInformation; import org.apache.pdfbox.pdmodel.common.PDMetadata; @@ -45,7 +46,8 @@ public class PdfMetadataExtractor implements FileMetadataExtractor { @Override public byte[] extractCover(File file) { BufferedImage coverImage = null; - try (PDDocument pdf = Loader.loadPDF(file)) { + try (RandomAccessReadBufferedFile randomAccessRead = new RandomAccessReadBufferedFile(file); + PDDocument pdf = Loader.loadPDF(randomAccessRead)) { coverImage = new PDFRenderer(pdf).renderImageWithDPI(0, 300, ImageType.RGB); try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { ImageIO.write(coverImage, "jpg", baos); @@ -70,7 +72,8 @@ public class PdfMetadataExtractor implements FileMetadataExtractor { BookMetadata.BookMetadataBuilder metadataBuilder = BookMetadata.builder(); - try (PDDocument pdf = Loader.loadPDF(file)) { + try (RandomAccessReadBufferedFile randomAccessRead = new RandomAccessReadBufferedFile(file); + PDDocument pdf = Loader.loadPDF(randomAccessRead)) { PDDocumentInformation info = pdf.getDocumentInformation(); if (info != null) { diff --git a/booklore-api/src/main/java/com/adityachandel/booklore/service/metadata/writer/PdfMetadataWriter.java b/booklore-api/src/main/java/com/adityachandel/booklore/service/metadata/writer/PdfMetadataWriter.java index fc01e2218..0e9a3be69 100644 --- a/booklore-api/src/main/java/com/adityachandel/booklore/service/metadata/writer/PdfMetadataWriter.java +++ b/booklore-api/src/main/java/com/adityachandel/booklore/service/metadata/writer/PdfMetadataWriter.java @@ -6,6 +6,8 @@ import com.adityachandel.booklore.model.enums.BookFileType; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.StringUtils; import org.apache.pdfbox.Loader; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.io.RandomAccessReadBufferedFile; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocumentInformation; import org.apache.pdfbox.pdmodel.common.PDMetadata; @@ -63,10 +65,12 @@ public class PdfMetadataWriter implements MetadataWriter { log.warn("Could not create PDF temp backup for {}: {}", file.getName(), e.getMessage()); } - try (PDDocument pdf = Loader.loadPDF(file)) { + try (RandomAccessReadBufferedFile randomAccessRead = new RandomAccessReadBufferedFile(file); + PDDocument pdf = Loader.loadPDF(randomAccessRead, IOUtils.createMemoryOnlyStreamCache())) { pdf.setAllSecurityToBeRemoved(true); applyMetadataToDocument(pdf, metadataEntity, clear); tempFile = File.createTempFile("pdfmeta-", ".pdf"); + // PDFBox 3.x saves in compressed mode by default pdf.save(tempFile); Files.move(tempFile.toPath(), filePath, StandardCopyOption.REPLACE_EXISTING); log.info("Successfully embedded metadata into PDF: {}", file.getName()); diff --git a/booklore-api/src/main/java/com/adityachandel/booklore/service/reader/PdfReaderService.java b/booklore-api/src/main/java/com/adityachandel/booklore/service/reader/PdfReaderService.java index 245cd2aea..ccf2a9702 100644 --- a/booklore-api/src/main/java/com/adityachandel/booklore/service/reader/PdfReaderService.java +++ b/booklore-api/src/main/java/com/adityachandel/booklore/service/reader/PdfReaderService.java @@ -9,6 +9,7 @@ import com.adityachandel.booklore.util.FileUtils; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.apache.pdfbox.Loader; +import org.apache.pdfbox.io.RandomAccessReadBufferedFile; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.rendering.ImageType; import org.apache.pdfbox.rendering.PDFRenderer; @@ -91,7 +92,8 @@ public class PdfReaderService { if (!Files.isReadable(pdfPath)) { throw new FileNotFoundException("PDF file is not readable: " + pdfPath); } - try (PDDocument document = Loader.loadPDF(new File(pdfPath.toFile().toURI()))) { + try (RandomAccessReadBufferedFile randomAccessRead = new RandomAccessReadBufferedFile(pdfPath.toFile()); + PDDocument document = Loader.loadPDF(randomAccessRead)) { PDFRenderer renderer = new PDFRenderer(document); for (int i = 0; i < document.getNumberOfPages(); i++) { BufferedImage image = null;