feat(cbx-reader): enhance ZIP archive handling with Unicode support and fallback mechanisms (#2593)

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
Balázs Szücs
2026-02-10 17:30:31 +01:00
committed by GitHub
parent 697de9052f
commit 9444a544f9
2 changed files with 142 additions and 66 deletions

View File

@@ -1,12 +1,5 @@
package org.booklore.service.reader;
import org.booklore.exception.ApiError;
import org.booklore.model.dto.response.CbxPageInfo;
import org.booklore.model.entity.BookEntity;
import org.booklore.model.entity.BookFileEntity;
import org.booklore.model.enums.BookFileType;
import org.booklore.repository.BookRepository;
import org.booklore.util.FileUtils;
import com.github.junrar.Archive;
import com.github.junrar.rarfile.FileHeader;
import lombok.RequiredArgsConstructor;
@@ -15,6 +8,13 @@ import org.apache.commons.compress.archivers.sevenz.SevenZArchiveEntry;
import org.apache.commons.compress.archivers.sevenz.SevenZFile;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.pdfbox.io.IOUtils;
import org.booklore.exception.ApiError;
import org.booklore.model.dto.response.CbxPageInfo;
import org.booklore.model.entity.BookEntity;
import org.booklore.model.entity.BookFileEntity;
import org.booklore.model.enums.BookFileType;
import org.booklore.repository.BookRepository;
import org.booklore.util.FileUtils;
import org.springframework.stereotype.Service;
import java.io.FileNotFoundException;
@@ -61,12 +61,14 @@ public class CbxReaderService {
final List<String> imageEntries;
final long lastModified;
final Charset successfulEncoding;
final boolean useUnicodeExtraFields;
volatile long lastAccessed;
CachedArchiveMetadata(List<String> imageEntries, long lastModified, Charset successfulEncoding) {
CachedArchiveMetadata(List<String> imageEntries, long lastModified, Charset successfulEncoding, boolean useUnicodeExtraFields) {
this.imageEntries = List.copyOf(imageEntries);
this.lastModified = lastModified;
this.successfulEncoding = successfulEncoding;
this.useUnicodeExtraFields = useUnicodeExtraFields;
this.lastAccessed = System.currentTimeMillis();
}
}
@@ -130,7 +132,7 @@ public class CbxReaderService {
CachedArchiveMetadata metadata = getCachedMetadata(cbxPath);
validatePageRequest(bookId, page, metadata.imageEntries);
String entryName = metadata.imageEntries.get(page - 1);
streamEntryFromArchive(cbxPath, entryName, outputStream, metadata.successfulEncoding);
streamEntryFromArchive(cbxPath, entryName, outputStream, metadata);
}
private Path getBookPath(Long bookId, String bookType) {
@@ -198,23 +200,10 @@ public class CbxReaderService {
return scanZipMetadata(cbxPath, lastModified);
} else if (filename.endsWith(CB7_EXTENSION)) {
List<String> entries = getImageEntriesFrom7z(cbxPath);
return new CachedArchiveMetadata(entries, lastModified, null);
return new CachedArchiveMetadata(entries, lastModified, null, false);
} else if (filename.endsWith(CBR_EXTENSION)) {
List<String> entries = getImageEntriesFromRar(cbxPath);
return new CachedArchiveMetadata(entries, lastModified, null);
} else {
throw new IOException("Unsupported archive format: " + cbxPath.getFileName());
}
}
private void streamEntryFromArchive(Path cbxPath, String entryName, OutputStream outputStream, Charset cachedEncoding) throws IOException {
String filename = cbxPath.getFileName().toString().toLowerCase();
if (filename.endsWith(CBZ_EXTENSION)) {
streamEntryFromZip(cbxPath, entryName, outputStream, cachedEncoding);
} else if (filename.endsWith(CB7_EXTENSION)) {
streamEntryFrom7z(cbxPath, entryName, outputStream);
} else if (filename.endsWith(CBR_EXTENSION)) {
streamEntryFromRar(cbxPath, entryName, outputStream);
return new CachedArchiveMetadata(entries, lastModified, null, false);
} else {
throw new IOException("Unsupported archive format: " + cbxPath.getFileName());
}
@@ -225,35 +214,57 @@ public class CbxReaderService {
CachedArchiveMetadata oldCache = archiveCache.get(cacheKey);
if (oldCache != null && oldCache.successfulEncoding != null) {
try {
List<String> entries = getImageEntriesFromZipWithEncoding(cbxPath, oldCache.successfulEncoding, true);
return new CachedArchiveMetadata(entries, lastModified, oldCache.successfulEncoding);
List<String> entries = getImageEntriesFromZipWithEncoding(cbxPath, oldCache.successfulEncoding, true, oldCache.useUnicodeExtraFields);
return new CachedArchiveMetadata(entries, lastModified, oldCache.successfulEncoding, oldCache.useUnicodeExtraFields);
} catch (Exception e) {
log.debug("Cached encoding {} failed, trying others", oldCache.successfulEncoding);
log.debug("Cached encoding {} with useUnicode={} failed, trying others", oldCache.successfulEncoding, oldCache.useUnicodeExtraFields);
}
}
// Try combinations per encoding
for (Charset encoding : ENCODINGS_TO_TRY) {
// Priority 1: Fast path, Unicode Enabled
try {
List<String> entries = getImageEntriesFromZipWithEncoding(cbxPath, encoding, true);
return new CachedArchiveMetadata(entries, lastModified, encoding);
List<String> entries = getImageEntriesFromZipWithEncoding(cbxPath, encoding, true, true);
return new CachedArchiveMetadata(entries, lastModified, encoding, true);
} catch (Exception e) {
log.debug("ZIP fast path failed for encoding {}: {}", encoding, e.getMessage());
log.trace("ZIP strategy failed (Fast, Unicode, {}): {}", encoding, e.getMessage());
}
// Priority 2: Slow path, Unicode Enabled
try {
List<String> entries = getImageEntriesFromZipWithEncoding(cbxPath, encoding, false);
return new CachedArchiveMetadata(entries, lastModified, encoding);
List<String> entries = getImageEntriesFromZipWithEncoding(cbxPath, encoding, false, true);
return new CachedArchiveMetadata(entries, lastModified, encoding, true);
} catch (Exception e) {
log.debug("ZIP slow path failed for encoding {}: {}", encoding, e.getMessage());
log.trace("ZIP strategy failed (Slow, Unicode, {}): {}", encoding, e.getMessage());
}
// Priority 3: Fast path, Unicode Disabled (Fallback)
try {
List<String> entries = getImageEntriesFromZipWithEncoding(cbxPath, encoding, true, false);
return new CachedArchiveMetadata(entries, lastModified, encoding, false);
} catch (Exception e) {
log.trace("ZIP strategy failed (Fast, No-Unicode, {}): {}", encoding, e.getMessage());
}
// Priority 4: Slow path, Unicode Disabled (Fallback)
try {
List<String> entries = getImageEntriesFromZipWithEncoding(cbxPath, encoding, false, false);
return new CachedArchiveMetadata(entries, lastModified, encoding, false);
} catch (Exception e) {
log.trace("ZIP strategy failed (Slow, No-Unicode, {}): {}", encoding, e.getMessage());
}
}
throw new IOException("Unable to read ZIP archive with any supported encoding");
}
private List<String> getImageEntriesFromZipWithEncoding(Path cbxPath, Charset charset, boolean useFastPath) throws IOException {
private List<String> getImageEntriesFromZipWithEncoding(Path cbxPath, Charset charset, boolean useFastPath, boolean useUnicodeExtraFields) throws IOException {
try (org.apache.commons.compress.archivers.zip.ZipFile zipFile =
org.apache.commons.compress.archivers.zip.ZipFile.builder()
.setPath(cbxPath)
.setCharset(charset)
.setUseUnicodeExtraFields(true)
.setUseUnicodeExtraFields(useUnicodeExtraFields)
.setIgnoreLocalFileHeader(useFastPath)
.get()) {
List<String> entries = new ArrayList<>();
@@ -269,41 +280,59 @@ public class CbxReaderService {
}
}
private void streamEntryFromZip(Path cbxPath, String entryName, OutputStream outputStream, Charset cachedEncoding) throws IOException {
if (cachedEncoding != null) {
if (streamEntryFromZipWithEncoding(cbxPath, entryName, outputStream, cachedEncoding, true)) {
return;
}
if (streamEntryFromZipWithEncoding(cbxPath, entryName, outputStream, cachedEncoding, false)) {
return;
}
}
for (Charset encoding : ENCODINGS_TO_TRY) {
if (encoding.equals(cachedEncoding)) continue;
try {
if (streamEntryFromZipWithEncoding(cbxPath, entryName, outputStream, encoding, true)) {
return;
}
} catch (Exception e) {
log.debug("ZIP stream fast path failed for encoding {}: {}", encoding, e.getMessage());
}
try {
if (streamEntryFromZipWithEncoding(cbxPath, entryName, outputStream, encoding, false)) {
return;
}
} catch (Exception e) {
log.debug("ZIP stream slow path failed for encoding {}: {}", encoding, e.getMessage());
}
private void streamEntryFromZip(Path cbxPath, String entryName, OutputStream outputStream, CachedArchiveMetadata metadata) throws IOException {
Charset encoding = metadata != null ? metadata.successfulEncoding : null;
boolean useUnicode = metadata != null && metadata.useUnicodeExtraFields;
if (encoding != null) {
if (tryStreamEntry(cbxPath, entryName, outputStream, encoding, true, useUnicode)) return;
if (tryStreamEntry(cbxPath, entryName, outputStream, encoding, false, useUnicode)) return;
}
for (Charset charset : ENCODINGS_TO_TRY) {
if (charset.equals(encoding)) continue; // Skip failed cached encoding if we want, or retry it with different flags?
if (tryStreamEntry(cbxPath, entryName, outputStream, charset, true, true)) return;
if (tryStreamEntry(cbxPath, entryName, outputStream, charset, false, true)) return;
if (tryStreamEntry(cbxPath, entryName, outputStream, charset, true, false)) return;
if (tryStreamEntry(cbxPath, entryName, outputStream, charset, false, false)) return;
}
throw new IOException("Unable to find entry in ZIP archive: " + entryName);
}
private boolean tryStreamEntry(Path cbxPath, String entryName, OutputStream outputStream, Charset charset, boolean useFastPath, boolean useUnicode) {
try {
if (streamEntryFromZipWithEncoding(cbxPath, entryName, outputStream, charset, useFastPath, useUnicode)) {
return true;
}
} catch (Exception e) {
log.trace("Stream strategy failed ({}, Fast={}, Unicode={}): {}", charset, useFastPath, useUnicode, e.getMessage());
}
return false;
}
private boolean streamEntryFromZipWithEncoding(Path cbxPath, String entryName, OutputStream outputStream, Charset charset, boolean useFastPath) throws IOException {
private void streamEntryFromArchive(Path cbxPath, String entryName, OutputStream outputStream, CachedArchiveMetadata metadata) throws IOException {
String filename = cbxPath.getFileName().toString().toLowerCase();
if (filename.endsWith(CBZ_EXTENSION)) {
streamEntryFromZip(cbxPath, entryName, outputStream, metadata);
} else if (filename.endsWith(CB7_EXTENSION)) {
streamEntryFrom7z(cbxPath, entryName, outputStream);
} else if (filename.endsWith(CBR_EXTENSION)) {
streamEntryFromRar(cbxPath, entryName, outputStream);
} else {
throw new IOException("Unsupported archive format: " + cbxPath.getFileName());
}
}
private boolean streamEntryFromZipWithEncoding(Path cbxPath, String entryName, OutputStream outputStream, Charset charset, boolean useFastPath, boolean useUnicodeExtraFields) throws IOException {
try (org.apache.commons.compress.archivers.zip.ZipFile zipFile =
org.apache.commons.compress.archivers.zip.ZipFile.builder()
.setPath(cbxPath)
.setCharset(charset)
.setUseUnicodeExtraFields(true)
.setUseUnicodeExtraFields(useUnicodeExtraFields)
.setIgnoreLocalFileHeader(useFastPath)
.get()) {
ZipArchiveEntry entry = zipFile.getEntry(entryName);

View File

@@ -1,15 +1,15 @@
package org.booklore.service.reader;
import org.booklore.exception.ApiError;
import org.booklore.model.entity.BookEntity;
import org.booklore.repository.BookRepository;
import org.booklore.util.FileUtils;
import com.github.junrar.Archive;
import com.github.junrar.rarfile.FileHeader;
import org.apache.commons.compress.archivers.sevenz.SevenZArchiveEntry;
import org.apache.commons.compress.archivers.sevenz.SevenZFile;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipFile;
import org.booklore.exception.ApiError;
import org.booklore.model.entity.BookEntity;
import org.booklore.repository.BookRepository;
import org.booklore.util.FileUtils;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
@@ -89,6 +89,53 @@ class CbxReaderServiceTest {
}
}
@Test
void testGetAvailablePages_CBZ_Fallback_Success() throws Exception {
when(bookRepository.findById(1L)).thenReturn(Optional.of(bookEntity));
try (MockedStatic<FileUtils> fileUtilsStatic = mockStatic(FileUtils.class)) {
fileUtilsStatic.when(() -> FileUtils.getBookFullPath(bookEntity)).thenReturn(cbzPath.toString());
// Unicode enabled -> throws Exception
ZipFile zipFileFail = mock(ZipFile.class);
when(zipFileFail.getEntries()).thenThrow(new IllegalArgumentException("Corrupt extra fields"));
// Unicode disabled -> returns valid entries
ZipArchiveEntry entry1 = new ZipArchiveEntry("1.jpg");
Enumeration<ZipArchiveEntry> entries = Collections.enumeration(List.of(entry1));
ZipFile zipFileSuccess = mock(ZipFile.class);
when(zipFileSuccess.getEntries()).thenReturn(entries);
ZipFile.Builder builder = mock(ZipFile.Builder.class, RETURNS_DEEP_STUBS);
when(builder.setPath(any(Path.class))).thenReturn(builder);
when(builder.setCharset(any(Charset.class))).thenReturn(builder);
when(builder.setIgnoreLocalFileHeader(anyBoolean())).thenReturn(builder);
// Mock builder behavior based on UseUnicodeExtraFields call
when(builder.setUseUnicodeExtraFields(anyBoolean())).thenAnswer(invocation -> {
return builder;
});
when(builder.get())
.thenReturn(zipFileFail) // 1. Fast, Unicode=True
.thenReturn(zipFileFail) // 2. Slow, Unicode=True
.thenReturn(zipFileSuccess); // 3. Fast, Unicode=False
try (MockedStatic<ZipFile> zipFileStatic = mockStatic(ZipFile.class)) {
zipFileStatic.when(ZipFile::builder).thenReturn(builder);
Files.createFile(cbzPath);
Files.setLastModifiedTime(cbzPath, FileTime.fromMillis(System.currentTimeMillis()));
List<Integer> pages = cbxReaderService.getAvailablePages(1L);
assertEquals(List.of(1), pages);
// Verify that we eventually called with useUnicode=false
verify(builder).setUseUnicodeExtraFields(eq(false));
}
}
}
@Test
void testStreamPageImage_CBZ_Success() throws Exception {
when(bookRepository.findById(1L)).thenReturn(Optional.of(bookEntity));