diff --git a/booklore-api/src/main/java/com/adityachandel/booklore/service/fileprocessor/CbxProcessor.java b/booklore-api/src/main/java/com/adityachandel/booklore/service/fileprocessor/CbxProcessor.java index 90e857875..18ed419a0 100644 --- a/booklore-api/src/main/java/com/adityachandel/booklore/service/fileprocessor/CbxProcessor.java +++ b/booklore-api/src/main/java/com/adityachandel/booklore/service/fileprocessor/CbxProcessor.java @@ -114,24 +114,45 @@ public class CbxProcessor extends AbstractFileProcessor implements BookFileProce } private Optional extractFirstImageFromZip(File file) { - try (ZipFile zipFile = ZipFile.builder().setFile(file).get()) { - return Collections.list(zipFile.getEntries()).stream() - .filter(e -> !e.isDirectory() && IMAGE_EXTENSION_CASE_INSENSITIVE_PATTERN.matcher(e.getName()).matches()) - .min(Comparator.comparing(ZipArchiveEntry::getName)) - .map(entry -> { - try (InputStream is = zipFile.getInputStream(entry)) { - return ImageIO.read(is); - } catch (Exception e) { - log.warn("Failed to read image from ZIP entry {}: {}", entry.getName(), e.getMessage()); - return null; - } - }); + // Fast path: Try reading from Central Directory + try (ZipFile zipFile = ZipFile.builder() + .setFile(file) + .setUseUnicodeExtraFields(true) + .setIgnoreLocalFileHeader(true) + .get()) { + Optional image = findAndReadFirstImage(zipFile); + if (image.isPresent()) return image; + } catch (Exception e) { + log.debug("Fast path failed for ZIP extraction: {}", e.getMessage()); + } + + // Slow path: Fallback to scanning local file headers + try (ZipFile zipFile = ZipFile.builder() + .setFile(file) + .setUseUnicodeExtraFields(true) + .setIgnoreLocalFileHeader(false) + .get()) { + return findAndReadFirstImage(zipFile); } catch (Exception e) { log.error("Error extracting ZIP: {}", e.getMessage()); return Optional.empty(); } } + private Optional findAndReadFirstImage(ZipFile zipFile) { + return Collections.list(zipFile.getEntries()).stream() + .filter(e -> !e.isDirectory() && IMAGE_EXTENSION_CASE_INSENSITIVE_PATTERN.matcher(e.getName()).matches()) + .min(Comparator.comparing(ZipArchiveEntry::getName)) + .map(entry -> { + try (InputStream is = zipFile.getInputStream(entry)) { + return ImageIO.read(is); + } catch (Exception e) { + log.warn("Failed to read image from ZIP entry {}: {}", entry.getName(), e.getMessage()); + return null; + } + }); + } + private Optional extractFirstImageFrom7z(File file) { try (SevenZFile sevenZFile = SevenZFile.builder().setFile(file).get()) { List imageEntries = new ArrayList<>(); diff --git a/booklore-api/src/main/java/com/adityachandel/booklore/service/kobo/CbxConversionService.java b/booklore-api/src/main/java/com/adityachandel/booklore/service/kobo/CbxConversionService.java index 31a3ad517..150d39922 100644 --- a/booklore-api/src/main/java/com/adityachandel/booklore/service/kobo/CbxConversionService.java +++ b/booklore-api/src/main/java/com/adityachandel/booklore/service/kobo/CbxConversionService.java @@ -196,23 +196,44 @@ public class CbxConversionService { } private List extractImagesFromZip(File cbzFile, Path extractedImagesDir) throws IOException { + // Fast path: Try reading from Central Directory + try (ZipFile zipFile = ZipFile.builder() + .setFile(cbzFile) + .setUseUnicodeExtraFields(true) + .setIgnoreLocalFileHeader(true) + .get()) { + List paths = extractImagesFromZipFile(zipFile, extractedImagesDir); + if (!paths.isEmpty()) return paths; + } catch (Exception e) { + log.debug("Fast path extraction failed for {}: {}", cbzFile.getName(), e.getMessage()); + } + + // Slow path: Fallback to scanning local file headers + try (ZipFile zipFile = ZipFile.builder() + .setFile(cbzFile) + .setUseUnicodeExtraFields(true) + .setIgnoreLocalFileHeader(false) + .get()) { + return extractImagesFromZipFile(zipFile, extractedImagesDir); + } + } + + private List extractImagesFromZipFile(ZipFile zipFile, Path extractedImagesDir) { List imagePaths = new ArrayList<>(); - - try (ZipFile zipFile = ZipFile.builder().setFile(cbzFile).get()) { - for (ZipArchiveEntry entry : Collections.list(zipFile.getEntries())) { - if (entry.isDirectory() || !isImageFile(entry.getName())) { - continue; - } - + for (ZipArchiveEntry entry : Collections.list(zipFile.getEntries())) { + if (entry.isDirectory() || !isImageFile(entry.getName())) { + continue; + } + + try { validateImageSize(entry.getName(), entry.getSize()); - + Path outputPath = extractedImagesDir.resolve(extractFileName(entry.getName())); try (InputStream inputStream = zipFile.getInputStream(entry)) { - Path outputPath = extractedImagesDir.resolve(extractFileName(entry.getName())); Files.copy(inputStream, outputPath); imagePaths.add(outputPath); - } catch (Exception e) { - log.warn("Error extracting image {}: {}", entry.getName(), e.getMessage()); } + } catch (Exception e) { + log.warn("Error extracting image {}: {}", entry.getName(), e.getMessage()); } } diff --git a/booklore-api/src/main/java/com/adityachandel/booklore/service/reader/CbxReaderService.java b/booklore-api/src/main/java/com/adityachandel/booklore/service/reader/CbxReaderService.java index ba5224fc2..c40ef6492 100644 --- a/booklore-api/src/main/java/com/adityachandel/booklore/service/reader/CbxReaderService.java +++ b/booklore-api/src/main/java/com/adityachandel/booklore/service/reader/CbxReaderService.java @@ -137,25 +137,36 @@ public class CbxReaderService { String[] encodingsToTry = {"UTF-8", "Shift_JIS", "ISO-8859-1", "CP437", "MS932"}; for (String encoding : encodingsToTry) { + Charset charset = Charset.forName(encoding); try { - extractZipWithEncoding(cbzPath, targetDir, Charset.forName(encoding)); - return; - } catch (IllegalArgumentException | java.util.zip.ZipException e) { - log.debug("Failed to extract with encoding {}: {}", encoding, e.getMessage()); + // Fast path: Try reading from Central Directory only + if (extractZipWithEncoding(cbzPath, targetDir, charset, true)) return; + } catch (Exception e) { + log.debug("Fast path failed for encoding {}: {}", encoding, e.getMessage()); + } + + try { + // Slow path: Fallback to scanning local file headers + if (extractZipWithEncoding(cbzPath, targetDir, charset, false)) return; + } catch (Exception e) { + log.debug("Slow path failed for encoding {}: {}", encoding, e.getMessage()); } } throw new IOException("Unable to extract ZIP archive with any supported encoding"); } - private void extractZipWithEncoding(Path cbzPath, Path targetDir, Charset charset) throws IOException { + private boolean extractZipWithEncoding(Path cbzPath, Path targetDir, Charset charset, boolean useFastPath) throws IOException { try (org.apache.commons.compress.archivers.zip.ZipFile zipFile = org.apache.commons.compress.archivers.zip.ZipFile.builder() .setPath(cbzPath) .setCharset(charset) + .setUseUnicodeExtraFields(true) + .setIgnoreLocalFileHeader(useFastPath) .get()) { var entries = zipFile.getEntries(); + boolean foundImages = false; while (entries.hasMoreElements()) { ZipArchiveEntry entry = entries.nextElement(); if (!entry.isDirectory() && isImageFile(entry.getName())) { @@ -163,9 +174,11 @@ public class CbxReaderService { Path target = targetDir.resolve(fileName); try (InputStream in = zipFile.getInputStream(entry)) { Files.copy(in, target, StandardCopyOption.REPLACE_EXISTING); + foundImages = true; } } } + return foundImages; } } @@ -333,10 +346,19 @@ public class CbxReaderService { String[] encodingsToTry = {"UTF-8", "Shift_JIS", "ISO-8859-1", "CP437", "MS932"}; for (String encoding : encodingsToTry) { + Charset charset = Charset.forName(encoding); try { - return estimateCbzWithEncoding(cbxPath, Charset.forName(encoding)); - } catch (IllegalArgumentException | java.util.zip.ZipException e) { - log.debug("Failed to estimate with encoding {}: {}", encoding, e.getMessage()); + long size = estimateCbzWithEncoding(cbxPath, charset, true); + if (size > 0) return size; + } catch (Exception e) { + log.debug("Fast path estimation failed for encoding {}: {}", encoding, e.getMessage()); + } + + try { + long size = estimateCbzWithEncoding(cbxPath, charset, false); + if (size > 0) return size; + } catch (Exception e) { + log.debug("Slow path estimation failed for encoding {}: {}", encoding, e.getMessage()); } } @@ -344,11 +366,13 @@ public class CbxReaderService { return Long.MAX_VALUE; } - private long estimateCbzWithEncoding(Path cbxPath, Charset charset) throws IOException { + private long estimateCbzWithEncoding(Path cbxPath, Charset charset, boolean useFastPath) throws IOException { try (org.apache.commons.compress.archivers.zip.ZipFile zipFile = org.apache.commons.compress.archivers.zip.ZipFile.builder() .setPath(cbxPath) .setCharset(charset) + .setUseUnicodeExtraFields(true) + .setIgnoreLocalFileHeader(useFastPath) .get()) { long total = 0;