mirror of
https://github.com/booklore-app/booklore.git
synced 2026-02-18 00:17:53 +01:00
fix(file): truncate long path components and author lists to prevent filesystem errors (#1655)
This commit is contained in:
@@ -5,20 +5,35 @@ import com.adityachandel.booklore.model.entity.AuthorEntity;
|
||||
import com.adityachandel.booklore.model.entity.BookEntity;
|
||||
import com.adityachandel.booklore.model.entity.BookMetadataEntity;
|
||||
import lombok.experimental.UtilityClass;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.LocalDate;
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@UtilityClass
|
||||
@Slf4j
|
||||
public class PathPatternResolver {
|
||||
|
||||
private final int MAX_COMPONENT_BYTES = 200;
|
||||
private final int MAX_FILESYSTEM_COMPONENT_BYTES = 245; // Left 10 bytes buffer
|
||||
private final int MAX_AUTHOR_BYTES = 180;
|
||||
|
||||
private final String TRUNCATION_SUFFIX = " et al.";
|
||||
private final int SUFFIX_BYTES = TRUNCATION_SUFFIX.getBytes(StandardCharsets.UTF_8).length;
|
||||
|
||||
private final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
|
||||
private final Pattern FILE_EXTENSION_PATTERN = Pattern.compile(".*\\.[a-zA-Z0-9]+$");
|
||||
private final Pattern CONTROL_CHARACTER_PATTERN = Pattern.compile("[\\p{Cntrl}]");
|
||||
private final Pattern CONTROL_CHARACTER_PATTERN = Pattern.compile("\\p{Cntrl}");
|
||||
private final Pattern INVALID_CHARS_PATTERN = Pattern.compile("[\\\\/:*?\"<>|]");
|
||||
private final Pattern PLACEHOLDER_PATTERN = Pattern.compile("\\{(.*?)}");
|
||||
private final Pattern COMMA_SPACE_PATTERN = Pattern.compile(", ");
|
||||
private final Pattern SLASH_PATTERN = Pattern.compile("/");
|
||||
|
||||
public String resolvePattern(BookEntity book, String pattern) {
|
||||
String currentFilename = book.getFileName() != null ? book.getFileName().trim() : "";
|
||||
@@ -48,7 +63,7 @@ public class PathPatternResolver {
|
||||
|
||||
String authors = sanitize(
|
||||
metadata != null
|
||||
? String.join(", ", metadata.getAuthors())
|
||||
? truncateAuthorsForFilesystem(String.join(", ", metadata.getAuthors()))
|
||||
: ""
|
||||
);
|
||||
String year = sanitize(
|
||||
@@ -79,13 +94,13 @@ public class PathPatternResolver {
|
||||
|
||||
Map<String, String> values = new LinkedHashMap<>();
|
||||
values.put("authors", authors);
|
||||
values.put("title", title);
|
||||
values.put("subtitle", subtitle);
|
||||
values.put("title", truncatePathComponent(title, MAX_COMPONENT_BYTES));
|
||||
values.put("subtitle", truncatePathComponent(subtitle, MAX_COMPONENT_BYTES));
|
||||
values.put("year", year);
|
||||
values.put("series", series);
|
||||
values.put("series", truncatePathComponent(series, MAX_COMPONENT_BYTES));
|
||||
values.put("seriesIndex", seriesIndex);
|
||||
values.put("language", language);
|
||||
values.put("publisher", publisher);
|
||||
values.put("publisher", truncatePathComponent(publisher, MAX_COMPONENT_BYTES));
|
||||
values.put("isbn", isbn);
|
||||
values.put("currentFilename", filename);
|
||||
|
||||
@@ -164,7 +179,7 @@ public class PathPatternResolver {
|
||||
result += "." + extension;
|
||||
}
|
||||
|
||||
return result;
|
||||
return validateFinalPath(result);
|
||||
}
|
||||
|
||||
private String sanitize(String input) {
|
||||
@@ -173,6 +188,125 @@ public class PathPatternResolver {
|
||||
.trim();
|
||||
}
|
||||
|
||||
private String truncateAuthorsForFilesystem(String authors) {
|
||||
if (authors == null || authors.isEmpty()) {
|
||||
return authors;
|
||||
}
|
||||
|
||||
byte[] originalBytes = authors.getBytes(StandardCharsets.UTF_8);
|
||||
if (originalBytes.length <= MAX_AUTHOR_BYTES) {
|
||||
return authors;
|
||||
}
|
||||
|
||||
String[] authorArray = COMMA_SPACE_PATTERN.split(authors);
|
||||
StringBuilder result = new StringBuilder();
|
||||
int currentBytes = 0;
|
||||
int truncationLimit = MAX_AUTHOR_BYTES - SUFFIX_BYTES;
|
||||
|
||||
for (int i = 0; i < authorArray.length; i++) {
|
||||
String author = authorArray[i];
|
||||
|
||||
int separatorBytes = (i > 0) ? 2 : 0;
|
||||
int authorBytes = author.getBytes(StandardCharsets.UTF_8).length;
|
||||
|
||||
if (currentBytes + separatorBytes + authorBytes > MAX_AUTHOR_BYTES) {
|
||||
if (result.isEmpty()) {
|
||||
return truncatePathComponent(author, truncationLimit) + TRUNCATION_SUFFIX;
|
||||
}
|
||||
return result + TRUNCATION_SUFFIX;
|
||||
}
|
||||
|
||||
if (i > 0) {
|
||||
result.append(", ");
|
||||
currentBytes += 2;
|
||||
}
|
||||
result.append(author);
|
||||
currentBytes += authorBytes;
|
||||
}
|
||||
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
private String truncatePathComponent(String component, int maxBytes) {
|
||||
if (component == null || component.isEmpty()) {
|
||||
return component;
|
||||
}
|
||||
|
||||
byte[] bytes = component.getBytes(StandardCharsets.UTF_8);
|
||||
if (bytes.length <= maxBytes) {
|
||||
return component;
|
||||
}
|
||||
|
||||
CharsetEncoder encoder = StandardCharsets.UTF_8.newEncoder();
|
||||
ByteBuffer buffer = ByteBuffer.allocate(maxBytes);
|
||||
CharBuffer charBuffer = CharBuffer.wrap(component);
|
||||
|
||||
encoder.encode(charBuffer, buffer, true);
|
||||
|
||||
String truncated = component.substring(0, charBuffer.position());
|
||||
if (!truncated.equals(component)) {
|
||||
log.debug("Truncated path component from {} to {} bytes for filesystem safety",
|
||||
bytes.length, truncated.getBytes(StandardCharsets.UTF_8).length);
|
||||
}
|
||||
return truncated;
|
||||
}
|
||||
|
||||
|
||||
private String validateFinalPath(String path) {
|
||||
String[] components = SLASH_PATTERN.split(path);
|
||||
StringBuilder result = new StringBuilder();
|
||||
|
||||
for (int i = 0; i < components.length; i++) {
|
||||
String component = components[i];
|
||||
boolean isLastComponent = (i == components.length - 1);
|
||||
|
||||
if (isLastComponent && component.contains(".")) {
|
||||
component = truncateFilenameWithExtension(component);
|
||||
} else {
|
||||
if (component.getBytes(StandardCharsets.UTF_8).length > MAX_FILESYSTEM_COMPONENT_BYTES) {
|
||||
component = truncatePathComponent(component, MAX_FILESYSTEM_COMPONENT_BYTES);
|
||||
}
|
||||
}
|
||||
|
||||
if (i > 0) result.append("/");
|
||||
result.append(component);
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
private String truncateFilenameWithExtension(String filename) {
|
||||
int lastDotIndex = filename.lastIndexOf('.');
|
||||
if (lastDotIndex == -1 || lastDotIndex == 0) {
|
||||
// No extension or dot is at start (hidden file), treat as normal component
|
||||
if (filename.getBytes(StandardCharsets.UTF_8).length > MAX_FILESYSTEM_COMPONENT_BYTES) {
|
||||
return truncatePathComponent(filename, MAX_FILESYSTEM_COMPONENT_BYTES);
|
||||
}
|
||||
return filename;
|
||||
}
|
||||
|
||||
String extension = filename.substring(lastDotIndex); // includes dot
|
||||
String name = filename.substring(0, lastDotIndex);
|
||||
|
||||
int extBytes = extension.getBytes(StandardCharsets.UTF_8).length;
|
||||
|
||||
if (extBytes > 50) {
|
||||
log.warn("Unusually long extension detected: {}", extension);
|
||||
if (filename.getBytes(StandardCharsets.UTF_8).length > MAX_FILESYSTEM_COMPONENT_BYTES) {
|
||||
return truncatePathComponent(filename, MAX_FILESYSTEM_COMPONENT_BYTES);
|
||||
}
|
||||
return filename;
|
||||
}
|
||||
|
||||
int maxNameBytes = MAX_FILESYSTEM_COMPONENT_BYTES - extBytes;
|
||||
|
||||
if (name.getBytes(StandardCharsets.UTF_8).length > maxNameBytes) {
|
||||
String truncatedName = truncatePathComponent(name, maxNameBytes);
|
||||
return truncatedName + extension;
|
||||
}
|
||||
|
||||
return filename;
|
||||
}
|
||||
|
||||
private interface MetadataProvider {
|
||||
String getTitle();
|
||||
|
||||
|
||||
@@ -17,21 +17,29 @@ import com.adityachandel.booklore.repository.LibraryRepository;
|
||||
import com.adityachandel.booklore.service.file.FileFingerprint;
|
||||
import com.adityachandel.booklore.service.appsettings.AppSettingService;
|
||||
import com.adityachandel.booklore.service.file.FileMovingHelper;
|
||||
import com.adityachandel.booklore.model.dto.BookMetadata;
|
||||
import com.adityachandel.booklore.model.enums.BookFileExtension;
|
||||
import com.adityachandel.booklore.service.metadata.extractor.MetadataExtractorFactory;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.DisplayName;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.MockedStatic;
|
||||
import org.mockito.MockitoAnnotations;
|
||||
import org.springframework.mock.web.MockMultipartFile;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatExceptionOfType;
|
||||
@@ -40,6 +48,13 @@ import static org.mockito.Mockito.*;
|
||||
|
||||
class FileUploadServiceTest {
|
||||
|
||||
public static final Set<String> LONG_AUTHOR_LIST = new LinkedHashSet<>(List.of(
|
||||
"梁思成", "叶嘉莹", "厉以宁", "萧乾", "冯友兰", "费孝通", "李济", "侯仁之", "汤一介", "温源宁",
|
||||
"胡适", "吴青", "李照国", "蒋梦麟", "汪荣祖", "邢玉瑞", "《中华思想文化术语》编委会",
|
||||
"北京大学政策法规研究室", "(美)艾恺(Guy S. Alitto)", "顾毓琇", "陈从周",
|
||||
"(加拿大)伊莎白(Isabel Crook)(美)柯临清(Christina Gilmartin)", "傅莹"
|
||||
));
|
||||
|
||||
@TempDir
|
||||
Path tempDir;
|
||||
|
||||
@@ -263,4 +278,30 @@ class FileUploadServiceTest {
|
||||
.isThrownBy(() -> service.uploadAdditionalFile(bookId, file, AdditionalFileType.ALTERNATIVE_FORMAT, null));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should upload files with long authors without filesystem errors")
|
||||
void uploadFile_withLongAuthors_doesNotThrowFilesystemError() {
|
||||
byte[] data = "content".getBytes();
|
||||
MockMultipartFile file = new MockMultipartFile("file", "long-authors.epub", "application/epub+zip", data);
|
||||
|
||||
LibraryEntity lib = new LibraryEntity();
|
||||
lib.setId(10L);
|
||||
String defaultPattern = "{authors}/<{series}/><{seriesIndex}. >{title}< - {authors}>< ({year})>";
|
||||
lib.setFileNamingPattern(defaultPattern);
|
||||
LibraryPathEntity path = new LibraryPathEntity();
|
||||
path.setId(3L);
|
||||
path.setPath(tempDir.toString());
|
||||
lib.setLibraryPaths(List.of(path));
|
||||
when(libraryRepository.findById(10L)).thenReturn(Optional.of(lib));
|
||||
|
||||
BookMetadata metadata = BookMetadata.builder()
|
||||
.title("中国文化合集")
|
||||
.authors(LONG_AUTHOR_LIST)
|
||||
.build();
|
||||
|
||||
when(metadataExtractorFactory.extractMetadata(any(BookFileExtension.class), any(File.class))).thenReturn(metadata);
|
||||
|
||||
assertDoesNotThrow(() -> service.uploadFile(file, 10L, 3L));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,15 +3,26 @@ package com.adityachandel.booklore.util;
|
||||
import com.adityachandel.booklore.model.dto.BookMetadata;
|
||||
import com.adityachandel.booklore.model.entity.BookEntity;
|
||||
import com.adityachandel.booklore.model.entity.BookMetadataEntity;
|
||||
import org.junit.jupiter.api.DisplayName;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.LocalDate;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class PathPatternResolverTest {
|
||||
|
||||
public static final Set<String> LONG_AUTHOR_LIST = new LinkedHashSet<>(List.of(
|
||||
"梁思成", "叶嘉莹", "厉以宁", "萧乾", "冯友兰", "费孝通", "李济", "侯仁之", "汤一介", "温源宁",
|
||||
"胡适", "吴青", "李照国", "蒋梦麟", "汪荣祖", "邢玉瑞", "《中华思想文化术语》编委会",
|
||||
"北京大学政策法规研究室", "(美)艾恺(Guy S. Alitto)", "顾毓琇", "陈从周",
|
||||
"(加拿大)伊莎白(Isabel Crook)(美)柯临清(Christina Gilmartin)", "傅莹"
|
||||
));
|
||||
|
||||
@Test
|
||||
void testResolvePattern_nullPattern() {
|
||||
BookMetadata metadata = BookMetadata.builder()
|
||||
@@ -311,4 +322,149 @@ class PathPatternResolverTest {
|
||||
assertTrue(result.equals("John Doe, Jane Smith - The Great Book [Awesome Series #3] (2023).pdf") ||
|
||||
result.equals("Jane Smith, John Doe - The Great Book [Awesome Series #3] (2023).pdf"));
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should truncate long author lists to prevent filesystem errors")
|
||||
void testResolvePattern_truncatesLongAuthorList() {
|
||||
BookMetadata metadata = BookMetadata.builder()
|
||||
.title("中国文化合集")
|
||||
.authors(LONG_AUTHOR_LIST)
|
||||
.build();
|
||||
|
||||
String result = PathPatternResolver.resolvePattern(metadata, "{authors}/{title}", "original.epub");
|
||||
|
||||
assertTrue(result.contains("中国文化合集"), "Should contain the title");
|
||||
assertTrue(result.endsWith(".epub"), "Should end with file extension");
|
||||
|
||||
String[] pathComponents = result.split("/");
|
||||
for (String component : pathComponents) {
|
||||
int byteLength = component.getBytes(StandardCharsets.UTF_8).length;
|
||||
assertTrue(byteLength <= 245,
|
||||
"Component '" + component + "' byte length should not exceed filesystem limits: " + byteLength);
|
||||
}
|
||||
|
||||
// Verify the authors part is properly truncated by bytes
|
||||
String authorsPart = pathComponents[0];
|
||||
int authorsBytes = authorsPart.getBytes(StandardCharsets.UTF_8).length;
|
||||
assertTrue(authorsBytes <= 180, "Authors part should be truncated to <= 180 bytes: " + authorsBytes);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testResolvePattern_authorsWithinLimit() {
|
||||
Set<String> authors = Set.of("John Doe", "Jane Smith", "Bob Wilson");
|
||||
|
||||
BookMetadata metadata = BookMetadata.builder()
|
||||
.title("Test Book")
|
||||
.authors(authors)
|
||||
.build();
|
||||
|
||||
String result = PathPatternResolver.resolvePattern(metadata, "{authors}", "original.pdf");
|
||||
|
||||
assertTrue(result.contains("John Doe") && result.contains("Jane Smith") && result.contains("Bob Wilson"));
|
||||
assertTrue(result.endsWith(".pdf"));
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should apply author truncation in various pattern contexts")
|
||||
void testResolvePattern_appliesAuthorTruncation() {
|
||||
Set<String> shortAuthorList = new LinkedHashSet<>(List.of("John Doe", "Jane Smith"));
|
||||
|
||||
BookMetadata metadata = BookMetadata.builder()
|
||||
.title("Test")
|
||||
.authors(shortAuthorList)
|
||||
.build();
|
||||
|
||||
String result = PathPatternResolver.resolvePattern(metadata, "{authors}", "test.epub");
|
||||
|
||||
assertTrue(result.endsWith(".epub"));
|
||||
String authorsPart = result.replace(".epub", "");
|
||||
int authorsBytes = authorsPart.getBytes(StandardCharsets.UTF_8).length;
|
||||
assertTrue(authorsBytes <= 180, "Authors should be <= 180 bytes: " + authorsBytes);
|
||||
|
||||
BookMetadata longMetadata = BookMetadata.builder()
|
||||
.title("Test")
|
||||
.authors(LONG_AUTHOR_LIST)
|
||||
.build();
|
||||
|
||||
String longResult = PathPatternResolver.resolvePattern(longMetadata, "{authors}", "test.epub");
|
||||
|
||||
String longAuthorsPart = longResult.replace(".epub", "");
|
||||
int longAuthorsBytes = longAuthorsPart.getBytes(StandardCharsets.UTF_8).length;
|
||||
assertTrue(longAuthorsBytes <= 180, "Long authors should be truncated to <= 180 bytes, got: " + longAuthorsBytes);
|
||||
|
||||
assertTrue(longAuthorsBytes < LONG_AUTHOR_LIST.toString().getBytes(StandardCharsets.UTF_8).length,
|
||||
"Truncated result should be shorter than original long author list");
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should handle single author that exceeds byte limits")
|
||||
void testResolvePattern_truncatesSingleVeryLongAuthor() {
|
||||
String veryLongAuthor = "某某某某某某某某某某".repeat(10); // ~300 bytes in UTF-8
|
||||
|
||||
BookMetadata metadata = BookMetadata.builder()
|
||||
.title("Test")
|
||||
.authors(Set.of(veryLongAuthor))
|
||||
.build();
|
||||
|
||||
String result = PathPatternResolver.resolvePattern(metadata, "{authors}", "test.epub");
|
||||
|
||||
String authorsPart = result.replace(".epub", "");
|
||||
int authorsBytes = authorsPart.getBytes(StandardCharsets.UTF_8).length;
|
||||
|
||||
assertTrue(authorsBytes <= 180,
|
||||
"Single long author should be truncated to <= 180 bytes: " + authorsBytes);
|
||||
assertFalse(authorsPart.isEmpty(), "Should not be empty after truncation");
|
||||
assertTrue(authorsBytes < veryLongAuthor.getBytes(StandardCharsets.UTF_8).length,
|
||||
"Truncated result should be shorter than original single long author");
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should add 'et al.' when authors are truncated")
|
||||
void testResolvePattern_addsEtAlWhenTruncated() {
|
||||
BookMetadata metadata = BookMetadata.builder()
|
||||
.title("Test")
|
||||
.authors(LONG_AUTHOR_LIST)
|
||||
.build();
|
||||
|
||||
String result = PathPatternResolver.resolvePattern(metadata, "{authors}", "test.epub");
|
||||
|
||||
assertTrue(result.contains("et al."), "Should contain truncation indicator when authors are truncated");
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should truncate combined long components in final validation")
|
||||
void testResolvePattern_validatesFinalPathWithCombinedLongComponents() {
|
||||
String longTitle = "某".repeat(70); // ~210 bytes
|
||||
|
||||
BookMetadata metadata = BookMetadata.builder()
|
||||
.title(longTitle)
|
||||
.authors(LONG_AUTHOR_LIST)
|
||||
.build();
|
||||
|
||||
String result = PathPatternResolver.resolvePattern(metadata, "{title} - {authors}", "test.epub");
|
||||
|
||||
String[] components = result.split("/");
|
||||
for (String component : components) {
|
||||
if (!component.contains(".")) { // Skip filename with extension
|
||||
int byteLength = component.getBytes(StandardCharsets.UTF_8).length;
|
||||
assertTrue(byteLength <= 245, "Path component should be <= 245 bytes: " + byteLength + " for component: " + component);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should preserve file extension when truncating very long filenames")
|
||||
void testResolvePattern_preservesExtensionOnTruncation() {
|
||||
String longTitle = "A".repeat(300); // 300 bytes
|
||||
|
||||
BookMetadata metadata = BookMetadata.builder().title(longTitle).build();
|
||||
|
||||
String result = PathPatternResolver.resolvePattern(metadata, "{title}", "original.pdf");
|
||||
|
||||
assertTrue(result.endsWith(".pdf"), "Extension must be preserved");
|
||||
assertTrue(result.length() < 300, "Filename must be truncated");
|
||||
|
||||
int byteLen = result.getBytes(StandardCharsets.UTF_8).length;
|
||||
assertTrue(byteLen <= 245, "Total filename bytes " + byteLen + " should be <= 245");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user