mirror of
https://github.com/adityachandelgit/BookLore.git
synced 2026-02-18 03:07:40 +01:00
perf(cbz): optimize CBZ reading (#1980)
* perf(cbz): improve zip file compatibility by enabling Unicode extra fields and ignoring local file headers Signed-off-by: Balázs Szücs <bszucs1209@gmail.com> * feat(perf): Implement robust ZipFile optimization with fallback Updated ZipFile reading logic in CbxReaderService, CbxProcessor, and CbxConversionService. Implemented a 'Fast Path' (Central Directory only) for speed (20x faster on large archives). Implemented a 'Slow Path' (Local Header scanning) fallback if the fast path fails or finds no images. Ensures compatibility with older or malformed archives where Unicode names are only present in local headers. Refactored extraction logic to helper methods where appropriate to support retry mechanism. --------- Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
@@ -114,24 +114,45 @@ public class CbxProcessor extends AbstractFileProcessor implements BookFileProce
|
||||
}
|
||||
|
||||
private Optional<BufferedImage> extractFirstImageFromZip(File file) {
|
||||
try (ZipFile zipFile = ZipFile.builder().setFile(file).get()) {
|
||||
return Collections.list(zipFile.getEntries()).stream()
|
||||
.filter(e -> !e.isDirectory() && IMAGE_EXTENSION_CASE_INSENSITIVE_PATTERN.matcher(e.getName()).matches())
|
||||
.min(Comparator.comparing(ZipArchiveEntry::getName))
|
||||
.map(entry -> {
|
||||
try (InputStream is = zipFile.getInputStream(entry)) {
|
||||
return ImageIO.read(is);
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to read image from ZIP entry {}: {}", entry.getName(), e.getMessage());
|
||||
return null;
|
||||
}
|
||||
});
|
||||
// Fast path: Try reading from Central Directory
|
||||
try (ZipFile zipFile = ZipFile.builder()
|
||||
.setFile(file)
|
||||
.setUseUnicodeExtraFields(true)
|
||||
.setIgnoreLocalFileHeader(true)
|
||||
.get()) {
|
||||
Optional<BufferedImage> image = findAndReadFirstImage(zipFile);
|
||||
if (image.isPresent()) return image;
|
||||
} catch (Exception e) {
|
||||
log.debug("Fast path failed for ZIP extraction: {}", e.getMessage());
|
||||
}
|
||||
|
||||
// Slow path: Fallback to scanning local file headers
|
||||
try (ZipFile zipFile = ZipFile.builder()
|
||||
.setFile(file)
|
||||
.setUseUnicodeExtraFields(true)
|
||||
.setIgnoreLocalFileHeader(false)
|
||||
.get()) {
|
||||
return findAndReadFirstImage(zipFile);
|
||||
} catch (Exception e) {
|
||||
log.error("Error extracting ZIP: {}", e.getMessage());
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
|
||||
private Optional<BufferedImage> findAndReadFirstImage(ZipFile zipFile) {
|
||||
return Collections.list(zipFile.getEntries()).stream()
|
||||
.filter(e -> !e.isDirectory() && IMAGE_EXTENSION_CASE_INSENSITIVE_PATTERN.matcher(e.getName()).matches())
|
||||
.min(Comparator.comparing(ZipArchiveEntry::getName))
|
||||
.map(entry -> {
|
||||
try (InputStream is = zipFile.getInputStream(entry)) {
|
||||
return ImageIO.read(is);
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to read image from ZIP entry {}: {}", entry.getName(), e.getMessage());
|
||||
return null;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private Optional<BufferedImage> extractFirstImageFrom7z(File file) {
|
||||
try (SevenZFile sevenZFile = SevenZFile.builder().setFile(file).get()) {
|
||||
List<SevenZArchiveEntry> imageEntries = new ArrayList<>();
|
||||
|
||||
@@ -196,23 +196,44 @@ public class CbxConversionService {
|
||||
}
|
||||
|
||||
private List<Path> extractImagesFromZip(File cbzFile, Path extractedImagesDir) throws IOException {
|
||||
// Fast path: Try reading from Central Directory
|
||||
try (ZipFile zipFile = ZipFile.builder()
|
||||
.setFile(cbzFile)
|
||||
.setUseUnicodeExtraFields(true)
|
||||
.setIgnoreLocalFileHeader(true)
|
||||
.get()) {
|
||||
List<Path> paths = extractImagesFromZipFile(zipFile, extractedImagesDir);
|
||||
if (!paths.isEmpty()) return paths;
|
||||
} catch (Exception e) {
|
||||
log.debug("Fast path extraction failed for {}: {}", cbzFile.getName(), e.getMessage());
|
||||
}
|
||||
|
||||
// Slow path: Fallback to scanning local file headers
|
||||
try (ZipFile zipFile = ZipFile.builder()
|
||||
.setFile(cbzFile)
|
||||
.setUseUnicodeExtraFields(true)
|
||||
.setIgnoreLocalFileHeader(false)
|
||||
.get()) {
|
||||
return extractImagesFromZipFile(zipFile, extractedImagesDir);
|
||||
}
|
||||
}
|
||||
|
||||
private List<Path> extractImagesFromZipFile(ZipFile zipFile, Path extractedImagesDir) {
|
||||
List<Path> imagePaths = new ArrayList<>();
|
||||
|
||||
try (ZipFile zipFile = ZipFile.builder().setFile(cbzFile).get()) {
|
||||
for (ZipArchiveEntry entry : Collections.list(zipFile.getEntries())) {
|
||||
if (entry.isDirectory() || !isImageFile(entry.getName())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (ZipArchiveEntry entry : Collections.list(zipFile.getEntries())) {
|
||||
if (entry.isDirectory() || !isImageFile(entry.getName())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
validateImageSize(entry.getName(), entry.getSize());
|
||||
|
||||
Path outputPath = extractedImagesDir.resolve(extractFileName(entry.getName()));
|
||||
try (InputStream inputStream = zipFile.getInputStream(entry)) {
|
||||
Path outputPath = extractedImagesDir.resolve(extractFileName(entry.getName()));
|
||||
Files.copy(inputStream, outputPath);
|
||||
imagePaths.add(outputPath);
|
||||
} catch (Exception e) {
|
||||
log.warn("Error extracting image {}: {}", entry.getName(), e.getMessage());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("Error extracting image {}: {}", entry.getName(), e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -137,25 +137,36 @@ public class CbxReaderService {
|
||||
String[] encodingsToTry = {"UTF-8", "Shift_JIS", "ISO-8859-1", "CP437", "MS932"};
|
||||
|
||||
for (String encoding : encodingsToTry) {
|
||||
Charset charset = Charset.forName(encoding);
|
||||
try {
|
||||
extractZipWithEncoding(cbzPath, targetDir, Charset.forName(encoding));
|
||||
return;
|
||||
} catch (IllegalArgumentException | java.util.zip.ZipException e) {
|
||||
log.debug("Failed to extract with encoding {}: {}", encoding, e.getMessage());
|
||||
// Fast path: Try reading from Central Directory only
|
||||
if (extractZipWithEncoding(cbzPath, targetDir, charset, true)) return;
|
||||
} catch (Exception e) {
|
||||
log.debug("Fast path failed for encoding {}: {}", encoding, e.getMessage());
|
||||
}
|
||||
|
||||
try {
|
||||
// Slow path: Fallback to scanning local file headers
|
||||
if (extractZipWithEncoding(cbzPath, targetDir, charset, false)) return;
|
||||
} catch (Exception e) {
|
||||
log.debug("Slow path failed for encoding {}: {}", encoding, e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
throw new IOException("Unable to extract ZIP archive with any supported encoding");
|
||||
}
|
||||
|
||||
private void extractZipWithEncoding(Path cbzPath, Path targetDir, Charset charset) throws IOException {
|
||||
private boolean extractZipWithEncoding(Path cbzPath, Path targetDir, Charset charset, boolean useFastPath) throws IOException {
|
||||
try (org.apache.commons.compress.archivers.zip.ZipFile zipFile =
|
||||
org.apache.commons.compress.archivers.zip.ZipFile.builder()
|
||||
.setPath(cbzPath)
|
||||
.setCharset(charset)
|
||||
.setUseUnicodeExtraFields(true)
|
||||
.setIgnoreLocalFileHeader(useFastPath)
|
||||
.get()) {
|
||||
|
||||
var entries = zipFile.getEntries();
|
||||
boolean foundImages = false;
|
||||
while (entries.hasMoreElements()) {
|
||||
ZipArchiveEntry entry = entries.nextElement();
|
||||
if (!entry.isDirectory() && isImageFile(entry.getName())) {
|
||||
@@ -163,9 +174,11 @@ public class CbxReaderService {
|
||||
Path target = targetDir.resolve(fileName);
|
||||
try (InputStream in = zipFile.getInputStream(entry)) {
|
||||
Files.copy(in, target, StandardCopyOption.REPLACE_EXISTING);
|
||||
foundImages = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return foundImages;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -333,10 +346,19 @@ public class CbxReaderService {
|
||||
String[] encodingsToTry = {"UTF-8", "Shift_JIS", "ISO-8859-1", "CP437", "MS932"};
|
||||
|
||||
for (String encoding : encodingsToTry) {
|
||||
Charset charset = Charset.forName(encoding);
|
||||
try {
|
||||
return estimateCbzWithEncoding(cbxPath, Charset.forName(encoding));
|
||||
} catch (IllegalArgumentException | java.util.zip.ZipException e) {
|
||||
log.debug("Failed to estimate with encoding {}: {}", encoding, e.getMessage());
|
||||
long size = estimateCbzWithEncoding(cbxPath, charset, true);
|
||||
if (size > 0) return size;
|
||||
} catch (Exception e) {
|
||||
log.debug("Fast path estimation failed for encoding {}: {}", encoding, e.getMessage());
|
||||
}
|
||||
|
||||
try {
|
||||
long size = estimateCbzWithEncoding(cbxPath, charset, false);
|
||||
if (size > 0) return size;
|
||||
} catch (Exception e) {
|
||||
log.debug("Slow path estimation failed for encoding {}: {}", encoding, e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -344,11 +366,13 @@ public class CbxReaderService {
|
||||
return Long.MAX_VALUE;
|
||||
}
|
||||
|
||||
private long estimateCbzWithEncoding(Path cbxPath, Charset charset) throws IOException {
|
||||
private long estimateCbzWithEncoding(Path cbxPath, Charset charset, boolean useFastPath) throws IOException {
|
||||
try (org.apache.commons.compress.archivers.zip.ZipFile zipFile =
|
||||
org.apache.commons.compress.archivers.zip.ZipFile.builder()
|
||||
.setPath(cbxPath)
|
||||
.setCharset(charset)
|
||||
.setUseUnicodeExtraFields(true)
|
||||
.setIgnoreLocalFileHeader(useFastPath)
|
||||
.get()) {
|
||||
|
||||
long total = 0;
|
||||
|
||||
Reference in New Issue
Block a user