diff --git a/booklore-api/src/main/java/com/adityachandel/booklore/util/PathPatternResolver.java b/booklore-api/src/main/java/com/adityachandel/booklore/util/PathPatternResolver.java index 006cc1982..ccbab2416 100644 --- a/booklore-api/src/main/java/com/adityachandel/booklore/util/PathPatternResolver.java +++ b/booklore-api/src/main/java/com/adityachandel/booklore/util/PathPatternResolver.java @@ -5,20 +5,35 @@ import com.adityachandel.booklore.model.entity.AuthorEntity; import com.adityachandel.booklore.model.entity.BookEntity; import com.adityachandel.booklore.model.entity.BookMetadataEntity; import lombok.experimental.UtilityClass; +import lombok.extern.slf4j.Slf4j; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.StandardCharsets; import java.time.LocalDate; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; @UtilityClass +@Slf4j public class PathPatternResolver { + private final int MAX_COMPONENT_BYTES = 200; + private final int MAX_FILESYSTEM_COMPONENT_BYTES = 245; // Left 10 bytes buffer + private final int MAX_AUTHOR_BYTES = 180; + + private final String TRUNCATION_SUFFIX = " et al."; + private final int SUFFIX_BYTES = TRUNCATION_SUFFIX.getBytes(StandardCharsets.UTF_8).length; + private final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+"); private final Pattern FILE_EXTENSION_PATTERN = Pattern.compile(".*\\.[a-zA-Z0-9]+$"); - private final Pattern CONTROL_CHARACTER_PATTERN = Pattern.compile("[\\p{Cntrl}]"); + private final Pattern CONTROL_CHARACTER_PATTERN = Pattern.compile("\\p{Cntrl}"); private final Pattern INVALID_CHARS_PATTERN = Pattern.compile("[\\\\/:*?\"<>|]"); private final Pattern PLACEHOLDER_PATTERN = Pattern.compile("\\{(.*?)}"); + private final Pattern COMMA_SPACE_PATTERN = Pattern.compile(", "); + private final Pattern SLASH_PATTERN = Pattern.compile("/"); public String resolvePattern(BookEntity book, String pattern) { String currentFilename = book.getFileName() != null ? book.getFileName().trim() : ""; @@ -48,7 +63,7 @@ public class PathPatternResolver { String authors = sanitize( metadata != null - ? String.join(", ", metadata.getAuthors()) + ? truncateAuthorsForFilesystem(String.join(", ", metadata.getAuthors())) : "" ); String year = sanitize( @@ -79,13 +94,13 @@ public class PathPatternResolver { Map values = new LinkedHashMap<>(); values.put("authors", authors); - values.put("title", title); - values.put("subtitle", subtitle); + values.put("title", truncatePathComponent(title, MAX_COMPONENT_BYTES)); + values.put("subtitle", truncatePathComponent(subtitle, MAX_COMPONENT_BYTES)); values.put("year", year); - values.put("series", series); + values.put("series", truncatePathComponent(series, MAX_COMPONENT_BYTES)); values.put("seriesIndex", seriesIndex); values.put("language", language); - values.put("publisher", publisher); + values.put("publisher", truncatePathComponent(publisher, MAX_COMPONENT_BYTES)); values.put("isbn", isbn); values.put("currentFilename", filename); @@ -164,7 +179,7 @@ public class PathPatternResolver { result += "." + extension; } - return result; + return validateFinalPath(result); } private String sanitize(String input) { @@ -173,6 +188,125 @@ public class PathPatternResolver { .trim(); } + private String truncateAuthorsForFilesystem(String authors) { + if (authors == null || authors.isEmpty()) { + return authors; + } + + byte[] originalBytes = authors.getBytes(StandardCharsets.UTF_8); + if (originalBytes.length <= MAX_AUTHOR_BYTES) { + return authors; + } + + String[] authorArray = COMMA_SPACE_PATTERN.split(authors); + StringBuilder result = new StringBuilder(); + int currentBytes = 0; + int truncationLimit = MAX_AUTHOR_BYTES - SUFFIX_BYTES; + + for (int i = 0; i < authorArray.length; i++) { + String author = authorArray[i]; + + int separatorBytes = (i > 0) ? 2 : 0; + int authorBytes = author.getBytes(StandardCharsets.UTF_8).length; + + if (currentBytes + separatorBytes + authorBytes > MAX_AUTHOR_BYTES) { + if (result.isEmpty()) { + return truncatePathComponent(author, truncationLimit) + TRUNCATION_SUFFIX; + } + return result + TRUNCATION_SUFFIX; + } + + if (i > 0) { + result.append(", "); + currentBytes += 2; + } + result.append(author); + currentBytes += authorBytes; + } + + return result.toString(); + } + + private String truncatePathComponent(String component, int maxBytes) { + if (component == null || component.isEmpty()) { + return component; + } + + byte[] bytes = component.getBytes(StandardCharsets.UTF_8); + if (bytes.length <= maxBytes) { + return component; + } + + CharsetEncoder encoder = StandardCharsets.UTF_8.newEncoder(); + ByteBuffer buffer = ByteBuffer.allocate(maxBytes); + CharBuffer charBuffer = CharBuffer.wrap(component); + + encoder.encode(charBuffer, buffer, true); + + String truncated = component.substring(0, charBuffer.position()); + if (!truncated.equals(component)) { + log.debug("Truncated path component from {} to {} bytes for filesystem safety", + bytes.length, truncated.getBytes(StandardCharsets.UTF_8).length); + } + return truncated; + } + + + private String validateFinalPath(String path) { + String[] components = SLASH_PATTERN.split(path); + StringBuilder result = new StringBuilder(); + + for (int i = 0; i < components.length; i++) { + String component = components[i]; + boolean isLastComponent = (i == components.length - 1); + + if (isLastComponent && component.contains(".")) { + component = truncateFilenameWithExtension(component); + } else { + if (component.getBytes(StandardCharsets.UTF_8).length > MAX_FILESYSTEM_COMPONENT_BYTES) { + component = truncatePathComponent(component, MAX_FILESYSTEM_COMPONENT_BYTES); + } + } + + if (i > 0) result.append("/"); + result.append(component); + } + return result.toString(); + } + + private String truncateFilenameWithExtension(String filename) { + int lastDotIndex = filename.lastIndexOf('.'); + if (lastDotIndex == -1 || lastDotIndex == 0) { + // No extension or dot is at start (hidden file), treat as normal component + if (filename.getBytes(StandardCharsets.UTF_8).length > MAX_FILESYSTEM_COMPONENT_BYTES) { + return truncatePathComponent(filename, MAX_FILESYSTEM_COMPONENT_BYTES); + } + return filename; + } + + String extension = filename.substring(lastDotIndex); // includes dot + String name = filename.substring(0, lastDotIndex); + + int extBytes = extension.getBytes(StandardCharsets.UTF_8).length; + + if (extBytes > 50) { + log.warn("Unusually long extension detected: {}", extension); + if (filename.getBytes(StandardCharsets.UTF_8).length > MAX_FILESYSTEM_COMPONENT_BYTES) { + return truncatePathComponent(filename, MAX_FILESYSTEM_COMPONENT_BYTES); + } + return filename; + } + + int maxNameBytes = MAX_FILESYSTEM_COMPONENT_BYTES - extBytes; + + if (name.getBytes(StandardCharsets.UTF_8).length > maxNameBytes) { + String truncatedName = truncatePathComponent(name, maxNameBytes); + return truncatedName + extension; + } + + return filename; + } + private interface MetadataProvider { String getTitle(); diff --git a/booklore-api/src/test/java/com/adityachandel/booklore/service/upload/FileUploadServiceTest.java b/booklore-api/src/test/java/com/adityachandel/booklore/service/upload/FileUploadServiceTest.java index 8855e28c4..d992b0a86 100644 --- a/booklore-api/src/test/java/com/adityachandel/booklore/service/upload/FileUploadServiceTest.java +++ b/booklore-api/src/test/java/com/adityachandel/booklore/service/upload/FileUploadServiceTest.java @@ -17,21 +17,29 @@ import com.adityachandel.booklore.repository.LibraryRepository; import com.adityachandel.booklore.service.file.FileFingerprint; import com.adityachandel.booklore.service.appsettings.AppSettingService; import com.adityachandel.booklore.service.file.FileMovingHelper; +import com.adityachandel.booklore.model.dto.BookMetadata; +import com.adityachandel.booklore.model.enums.BookFileExtension; import com.adityachandel.booklore.service.metadata.extractor.MetadataExtractorFactory; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import org.mockito.Mock; import org.mockito.MockedStatic; import org.mockito.MockitoAnnotations; import org.springframework.mock.web.MockMultipartFile; import org.springframework.web.multipart.MultipartFile; +import java.io.File; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.LinkedHashSet; import java.util.List; import java.util.Optional; +import java.util.Set; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatExceptionOfType; @@ -40,6 +48,13 @@ import static org.mockito.Mockito.*; class FileUploadServiceTest { + public static final Set LONG_AUTHOR_LIST = new LinkedHashSet<>(List.of( + "梁思成", "叶嘉莹", "厉以宁", "萧乾", "冯友兰", "费孝通", "李济", "侯仁之", "汤一介", "温源宁", + "胡适", "吴青", "李照国", "蒋梦麟", "汪荣祖", "邢玉瑞", "《中华思想文化术语》编委会", + "北京大学政策法规研究室", "(美)艾恺(Guy S. Alitto)", "顾毓琇", "陈从周", + "(加拿大)伊莎白(Isabel Crook)(美)柯临清(Christina Gilmartin)", "傅莹" + )); + @TempDir Path tempDir; @@ -263,4 +278,30 @@ class FileUploadServiceTest { .isThrownBy(() -> service.uploadAdditionalFile(bookId, file, AdditionalFileType.ALTERNATIVE_FORMAT, null)); } } + + @Test + @DisplayName("Should upload files with long authors without filesystem errors") + void uploadFile_withLongAuthors_doesNotThrowFilesystemError() { + byte[] data = "content".getBytes(); + MockMultipartFile file = new MockMultipartFile("file", "long-authors.epub", "application/epub+zip", data); + + LibraryEntity lib = new LibraryEntity(); + lib.setId(10L); + String defaultPattern = "{authors}/<{series}/><{seriesIndex}. >{title}< - {authors}>< ({year})>"; + lib.setFileNamingPattern(defaultPattern); + LibraryPathEntity path = new LibraryPathEntity(); + path.setId(3L); + path.setPath(tempDir.toString()); + lib.setLibraryPaths(List.of(path)); + when(libraryRepository.findById(10L)).thenReturn(Optional.of(lib)); + + BookMetadata metadata = BookMetadata.builder() + .title("中国文化合集") + .authors(LONG_AUTHOR_LIST) + .build(); + + when(metadataExtractorFactory.extractMetadata(any(BookFileExtension.class), any(File.class))).thenReturn(metadata); + + assertDoesNotThrow(() -> service.uploadFile(file, 10L, 3L)); + } } diff --git a/booklore-api/src/test/java/com/adityachandel/booklore/util/PathPatternResolverTest.java b/booklore-api/src/test/java/com/adityachandel/booklore/util/PathPatternResolverTest.java index fd5fd46e0..83ae27833 100644 --- a/booklore-api/src/test/java/com/adityachandel/booklore/util/PathPatternResolverTest.java +++ b/booklore-api/src/test/java/com/adityachandel/booklore/util/PathPatternResolverTest.java @@ -3,15 +3,26 @@ package com.adityachandel.booklore.util; import com.adityachandel.booklore.model.dto.BookMetadata; import com.adityachandel.booklore.model.entity.BookEntity; import com.adityachandel.booklore.model.entity.BookMetadataEntity; +import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; +import java.nio.charset.StandardCharsets; import java.time.LocalDate; +import java.util.LinkedHashSet; +import java.util.List; import java.util.Set; import static org.junit.jupiter.api.Assertions.*; class PathPatternResolverTest { + public static final Set LONG_AUTHOR_LIST = new LinkedHashSet<>(List.of( + "梁思成", "叶嘉莹", "厉以宁", "萧乾", "冯友兰", "费孝通", "李济", "侯仁之", "汤一介", "温源宁", + "胡适", "吴青", "李照国", "蒋梦麟", "汪荣祖", "邢玉瑞", "《中华思想文化术语》编委会", + "北京大学政策法规研究室", "(美)艾恺(Guy S. Alitto)", "顾毓琇", "陈从周", + "(加拿大)伊莎白(Isabel Crook)(美)柯临清(Christina Gilmartin)", "傅莹" + )); + @Test void testResolvePattern_nullPattern() { BookMetadata metadata = BookMetadata.builder() @@ -311,4 +322,149 @@ class PathPatternResolverTest { assertTrue(result.equals("John Doe, Jane Smith - The Great Book [Awesome Series #3] (2023).pdf") || result.equals("Jane Smith, John Doe - The Great Book [Awesome Series #3] (2023).pdf")); } + + @Test + @DisplayName("Should truncate long author lists to prevent filesystem errors") + void testResolvePattern_truncatesLongAuthorList() { + BookMetadata metadata = BookMetadata.builder() + .title("中国文化合集") + .authors(LONG_AUTHOR_LIST) + .build(); + + String result = PathPatternResolver.resolvePattern(metadata, "{authors}/{title}", "original.epub"); + + assertTrue(result.contains("中国文化合集"), "Should contain the title"); + assertTrue(result.endsWith(".epub"), "Should end with file extension"); + + String[] pathComponents = result.split("/"); + for (String component : pathComponents) { + int byteLength = component.getBytes(StandardCharsets.UTF_8).length; + assertTrue(byteLength <= 245, + "Component '" + component + "' byte length should not exceed filesystem limits: " + byteLength); + } + + // Verify the authors part is properly truncated by bytes + String authorsPart = pathComponents[0]; + int authorsBytes = authorsPart.getBytes(StandardCharsets.UTF_8).length; + assertTrue(authorsBytes <= 180, "Authors part should be truncated to <= 180 bytes: " + authorsBytes); + } + + @Test + void testResolvePattern_authorsWithinLimit() { + Set authors = Set.of("John Doe", "Jane Smith", "Bob Wilson"); + + BookMetadata metadata = BookMetadata.builder() + .title("Test Book") + .authors(authors) + .build(); + + String result = PathPatternResolver.resolvePattern(metadata, "{authors}", "original.pdf"); + + assertTrue(result.contains("John Doe") && result.contains("Jane Smith") && result.contains("Bob Wilson")); + assertTrue(result.endsWith(".pdf")); + } + + @Test + @DisplayName("Should apply author truncation in various pattern contexts") + void testResolvePattern_appliesAuthorTruncation() { + Set shortAuthorList = new LinkedHashSet<>(List.of("John Doe", "Jane Smith")); + + BookMetadata metadata = BookMetadata.builder() + .title("Test") + .authors(shortAuthorList) + .build(); + + String result = PathPatternResolver.resolvePattern(metadata, "{authors}", "test.epub"); + + assertTrue(result.endsWith(".epub")); + String authorsPart = result.replace(".epub", ""); + int authorsBytes = authorsPart.getBytes(StandardCharsets.UTF_8).length; + assertTrue(authorsBytes <= 180, "Authors should be <= 180 bytes: " + authorsBytes); + + BookMetadata longMetadata = BookMetadata.builder() + .title("Test") + .authors(LONG_AUTHOR_LIST) + .build(); + + String longResult = PathPatternResolver.resolvePattern(longMetadata, "{authors}", "test.epub"); + + String longAuthorsPart = longResult.replace(".epub", ""); + int longAuthorsBytes = longAuthorsPart.getBytes(StandardCharsets.UTF_8).length; + assertTrue(longAuthorsBytes <= 180, "Long authors should be truncated to <= 180 bytes, got: " + longAuthorsBytes); + + assertTrue(longAuthorsBytes < LONG_AUTHOR_LIST.toString().getBytes(StandardCharsets.UTF_8).length, + "Truncated result should be shorter than original long author list"); + } + + @Test + @DisplayName("Should handle single author that exceeds byte limits") + void testResolvePattern_truncatesSingleVeryLongAuthor() { + String veryLongAuthor = "某某某某某某某某某某".repeat(10); // ~300 bytes in UTF-8 + + BookMetadata metadata = BookMetadata.builder() + .title("Test") + .authors(Set.of(veryLongAuthor)) + .build(); + + String result = PathPatternResolver.resolvePattern(metadata, "{authors}", "test.epub"); + + String authorsPart = result.replace(".epub", ""); + int authorsBytes = authorsPart.getBytes(StandardCharsets.UTF_8).length; + + assertTrue(authorsBytes <= 180, + "Single long author should be truncated to <= 180 bytes: " + authorsBytes); + assertFalse(authorsPart.isEmpty(), "Should not be empty after truncation"); + assertTrue(authorsBytes < veryLongAuthor.getBytes(StandardCharsets.UTF_8).length, + "Truncated result should be shorter than original single long author"); + } + + @Test + @DisplayName("Should add 'et al.' when authors are truncated") + void testResolvePattern_addsEtAlWhenTruncated() { + BookMetadata metadata = BookMetadata.builder() + .title("Test") + .authors(LONG_AUTHOR_LIST) + .build(); + + String result = PathPatternResolver.resolvePattern(metadata, "{authors}", "test.epub"); + + assertTrue(result.contains("et al."), "Should contain truncation indicator when authors are truncated"); + } + + @Test + @DisplayName("Should truncate combined long components in final validation") + void testResolvePattern_validatesFinalPathWithCombinedLongComponents() { + String longTitle = "某".repeat(70); // ~210 bytes + + BookMetadata metadata = BookMetadata.builder() + .title(longTitle) + .authors(LONG_AUTHOR_LIST) + .build(); + + String result = PathPatternResolver.resolvePattern(metadata, "{title} - {authors}", "test.epub"); + + String[] components = result.split("/"); + for (String component : components) { + if (!component.contains(".")) { // Skip filename with extension + int byteLength = component.getBytes(StandardCharsets.UTF_8).length; + assertTrue(byteLength <= 245, "Path component should be <= 245 bytes: " + byteLength + " for component: " + component); + } + } + } + + @Test + @DisplayName("Should preserve file extension when truncating very long filenames") + void testResolvePattern_preservesExtensionOnTruncation() { + String longTitle = "A".repeat(300); // 300 bytes + + BookMetadata metadata = BookMetadata.builder().title(longTitle).build(); + + String result = PathPatternResolver.resolvePattern(metadata, "{title}", "original.pdf"); + + assertTrue(result.endsWith(".pdf"), "Extension must be preserved"); + assertTrue(result.length() < 300, "Filename must be truncated"); + + int byteLen = result.getBytes(StandardCharsets.UTF_8).length; + assertTrue(byteLen <= 245, "Total filename bytes " + byteLen + " should be <= 245"); + } }