fix(file): truncate long path components and author lists to prevent filesystem errors (#1655)

This commit is contained in:
Balázs Szücs
2025-11-28 17:34:02 +01:00
committed by GitHub
parent 721952a679
commit 900cd0a884
3 changed files with 338 additions and 7 deletions

View File

@@ -5,20 +5,35 @@ import com.adityachandel.booklore.model.entity.AuthorEntity;
import com.adityachandel.booklore.model.entity.BookEntity;
import com.adityachandel.booklore.model.entity.BookMetadataEntity;
import lombok.experimental.UtilityClass;
import lombok.extern.slf4j.Slf4j;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.StandardCharsets;
import java.time.LocalDate;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@UtilityClass
@Slf4j
public class PathPatternResolver {
private final int MAX_COMPONENT_BYTES = 200;
private final int MAX_FILESYSTEM_COMPONENT_BYTES = 245; // Left 10 bytes buffer
private final int MAX_AUTHOR_BYTES = 180;
private final String TRUNCATION_SUFFIX = " et al.";
private final int SUFFIX_BYTES = TRUNCATION_SUFFIX.getBytes(StandardCharsets.UTF_8).length;
private final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
private final Pattern FILE_EXTENSION_PATTERN = Pattern.compile(".*\\.[a-zA-Z0-9]+$");
private final Pattern CONTROL_CHARACTER_PATTERN = Pattern.compile("[\\p{Cntrl}]");
private final Pattern CONTROL_CHARACTER_PATTERN = Pattern.compile("\\p{Cntrl}");
private final Pattern INVALID_CHARS_PATTERN = Pattern.compile("[\\\\/:*?\"<>|]");
private final Pattern PLACEHOLDER_PATTERN = Pattern.compile("\\{(.*?)}");
private final Pattern COMMA_SPACE_PATTERN = Pattern.compile(", ");
private final Pattern SLASH_PATTERN = Pattern.compile("/");
public String resolvePattern(BookEntity book, String pattern) {
String currentFilename = book.getFileName() != null ? book.getFileName().trim() : "";
@@ -48,7 +63,7 @@ public class PathPatternResolver {
String authors = sanitize(
metadata != null
? String.join(", ", metadata.getAuthors())
? truncateAuthorsForFilesystem(String.join(", ", metadata.getAuthors()))
: ""
);
String year = sanitize(
@@ -79,13 +94,13 @@ public class PathPatternResolver {
Map<String, String> values = new LinkedHashMap<>();
values.put("authors", authors);
values.put("title", title);
values.put("subtitle", subtitle);
values.put("title", truncatePathComponent(title, MAX_COMPONENT_BYTES));
values.put("subtitle", truncatePathComponent(subtitle, MAX_COMPONENT_BYTES));
values.put("year", year);
values.put("series", series);
values.put("series", truncatePathComponent(series, MAX_COMPONENT_BYTES));
values.put("seriesIndex", seriesIndex);
values.put("language", language);
values.put("publisher", publisher);
values.put("publisher", truncatePathComponent(publisher, MAX_COMPONENT_BYTES));
values.put("isbn", isbn);
values.put("currentFilename", filename);
@@ -164,7 +179,7 @@ public class PathPatternResolver {
result += "." + extension;
}
return result;
return validateFinalPath(result);
}
private String sanitize(String input) {
@@ -173,6 +188,125 @@ public class PathPatternResolver {
.trim();
}
private String truncateAuthorsForFilesystem(String authors) {
if (authors == null || authors.isEmpty()) {
return authors;
}
byte[] originalBytes = authors.getBytes(StandardCharsets.UTF_8);
if (originalBytes.length <= MAX_AUTHOR_BYTES) {
return authors;
}
String[] authorArray = COMMA_SPACE_PATTERN.split(authors);
StringBuilder result = new StringBuilder();
int currentBytes = 0;
int truncationLimit = MAX_AUTHOR_BYTES - SUFFIX_BYTES;
for (int i = 0; i < authorArray.length; i++) {
String author = authorArray[i];
int separatorBytes = (i > 0) ? 2 : 0;
int authorBytes = author.getBytes(StandardCharsets.UTF_8).length;
if (currentBytes + separatorBytes + authorBytes > MAX_AUTHOR_BYTES) {
if (result.isEmpty()) {
return truncatePathComponent(author, truncationLimit) + TRUNCATION_SUFFIX;
}
return result + TRUNCATION_SUFFIX;
}
if (i > 0) {
result.append(", ");
currentBytes += 2;
}
result.append(author);
currentBytes += authorBytes;
}
return result.toString();
}
private String truncatePathComponent(String component, int maxBytes) {
if (component == null || component.isEmpty()) {
return component;
}
byte[] bytes = component.getBytes(StandardCharsets.UTF_8);
if (bytes.length <= maxBytes) {
return component;
}
CharsetEncoder encoder = StandardCharsets.UTF_8.newEncoder();
ByteBuffer buffer = ByteBuffer.allocate(maxBytes);
CharBuffer charBuffer = CharBuffer.wrap(component);
encoder.encode(charBuffer, buffer, true);
String truncated = component.substring(0, charBuffer.position());
if (!truncated.equals(component)) {
log.debug("Truncated path component from {} to {} bytes for filesystem safety",
bytes.length, truncated.getBytes(StandardCharsets.UTF_8).length);
}
return truncated;
}
private String validateFinalPath(String path) {
String[] components = SLASH_PATTERN.split(path);
StringBuilder result = new StringBuilder();
for (int i = 0; i < components.length; i++) {
String component = components[i];
boolean isLastComponent = (i == components.length - 1);
if (isLastComponent && component.contains(".")) {
component = truncateFilenameWithExtension(component);
} else {
if (component.getBytes(StandardCharsets.UTF_8).length > MAX_FILESYSTEM_COMPONENT_BYTES) {
component = truncatePathComponent(component, MAX_FILESYSTEM_COMPONENT_BYTES);
}
}
if (i > 0) result.append("/");
result.append(component);
}
return result.toString();
}
private String truncateFilenameWithExtension(String filename) {
int lastDotIndex = filename.lastIndexOf('.');
if (lastDotIndex == -1 || lastDotIndex == 0) {
// No extension or dot is at start (hidden file), treat as normal component
if (filename.getBytes(StandardCharsets.UTF_8).length > MAX_FILESYSTEM_COMPONENT_BYTES) {
return truncatePathComponent(filename, MAX_FILESYSTEM_COMPONENT_BYTES);
}
return filename;
}
String extension = filename.substring(lastDotIndex); // includes dot
String name = filename.substring(0, lastDotIndex);
int extBytes = extension.getBytes(StandardCharsets.UTF_8).length;
if (extBytes > 50) {
log.warn("Unusually long extension detected: {}", extension);
if (filename.getBytes(StandardCharsets.UTF_8).length > MAX_FILESYSTEM_COMPONENT_BYTES) {
return truncatePathComponent(filename, MAX_FILESYSTEM_COMPONENT_BYTES);
}
return filename;
}
int maxNameBytes = MAX_FILESYSTEM_COMPONENT_BYTES - extBytes;
if (name.getBytes(StandardCharsets.UTF_8).length > maxNameBytes) {
String truncatedName = truncatePathComponent(name, maxNameBytes);
return truncatedName + extension;
}
return filename;
}
private interface MetadataProvider {
String getTitle();

View File

@@ -17,21 +17,29 @@ import com.adityachandel.booklore.repository.LibraryRepository;
import com.adityachandel.booklore.service.file.FileFingerprint;
import com.adityachandel.booklore.service.appsettings.AppSettingService;
import com.adityachandel.booklore.service.file.FileMovingHelper;
import com.adityachandel.booklore.model.dto.BookMetadata;
import com.adityachandel.booklore.model.enums.BookFileExtension;
import com.adityachandel.booklore.service.metadata.extractor.MetadataExtractorFactory;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import org.mockito.Mock;
import org.mockito.MockedStatic;
import org.mockito.MockitoAnnotations;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.web.multipart.MultipartFile;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatExceptionOfType;
@@ -40,6 +48,13 @@ import static org.mockito.Mockito.*;
class FileUploadServiceTest {
public static final Set<String> LONG_AUTHOR_LIST = new LinkedHashSet<>(List.of(
"梁思成", "叶嘉莹", "厉以宁", "萧乾", "冯友兰", "费孝通", "李济", "侯仁之", "汤一介", "温源宁",
"胡适", "吴青", "李照国", "蒋梦麟", "汪荣祖", "邢玉瑞", "《中华思想文化术语》编委会",
"北京大学政策法规研究室", "艾恺Guy S. Alitto", "顾毓琇", "陈从周",
"加拿大伊莎白Isabel Crook柯临清Christina Gilmartin", "傅莹"
));
@TempDir
Path tempDir;
@@ -263,4 +278,30 @@ class FileUploadServiceTest {
.isThrownBy(() -> service.uploadAdditionalFile(bookId, file, AdditionalFileType.ALTERNATIVE_FORMAT, null));
}
}
@Test
@DisplayName("Should upload files with long authors without filesystem errors")
void uploadFile_withLongAuthors_doesNotThrowFilesystemError() {
byte[] data = "content".getBytes();
MockMultipartFile file = new MockMultipartFile("file", "long-authors.epub", "application/epub+zip", data);
LibraryEntity lib = new LibraryEntity();
lib.setId(10L);
String defaultPattern = "{authors}/<{series}/><{seriesIndex}. >{title}< - {authors}>< ({year})>";
lib.setFileNamingPattern(defaultPattern);
LibraryPathEntity path = new LibraryPathEntity();
path.setId(3L);
path.setPath(tempDir.toString());
lib.setLibraryPaths(List.of(path));
when(libraryRepository.findById(10L)).thenReturn(Optional.of(lib));
BookMetadata metadata = BookMetadata.builder()
.title("中国文化合集")
.authors(LONG_AUTHOR_LIST)
.build();
when(metadataExtractorFactory.extractMetadata(any(BookFileExtension.class), any(File.class))).thenReturn(metadata);
assertDoesNotThrow(() -> service.uploadFile(file, 10L, 3L));
}
}

View File

@@ -3,15 +3,26 @@ package com.adityachandel.booklore.util;
import com.adityachandel.booklore.model.dto.BookMetadata;
import com.adityachandel.booklore.model.entity.BookEntity;
import com.adityachandel.booklore.model.entity.BookMetadataEntity;
import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.Test;
import java.nio.charset.StandardCharsets;
import java.time.LocalDate;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import static org.junit.jupiter.api.Assertions.*;
class PathPatternResolverTest {
public static final Set<String> LONG_AUTHOR_LIST = new LinkedHashSet<>(List.of(
"梁思成", "叶嘉莹", "厉以宁", "萧乾", "冯友兰", "费孝通", "李济", "侯仁之", "汤一介", "温源宁",
"胡适", "吴青", "李照国", "蒋梦麟", "汪荣祖", "邢玉瑞", "《中华思想文化术语》编委会",
"北京大学政策法规研究室", "艾恺Guy S. Alitto", "顾毓琇", "陈从周",
"加拿大伊莎白Isabel Crook柯临清Christina Gilmartin", "傅莹"
));
@Test
void testResolvePattern_nullPattern() {
BookMetadata metadata = BookMetadata.builder()
@@ -311,4 +322,149 @@ class PathPatternResolverTest {
assertTrue(result.equals("John Doe, Jane Smith - The Great Book [Awesome Series #3] (2023).pdf") ||
result.equals("Jane Smith, John Doe - The Great Book [Awesome Series #3] (2023).pdf"));
}
@Test
@DisplayName("Should truncate long author lists to prevent filesystem errors")
void testResolvePattern_truncatesLongAuthorList() {
BookMetadata metadata = BookMetadata.builder()
.title("中国文化合集")
.authors(LONG_AUTHOR_LIST)
.build();
String result = PathPatternResolver.resolvePattern(metadata, "{authors}/{title}", "original.epub");
assertTrue(result.contains("中国文化合集"), "Should contain the title");
assertTrue(result.endsWith(".epub"), "Should end with file extension");
String[] pathComponents = result.split("/");
for (String component : pathComponents) {
int byteLength = component.getBytes(StandardCharsets.UTF_8).length;
assertTrue(byteLength <= 245,
"Component '" + component + "' byte length should not exceed filesystem limits: " + byteLength);
}
// Verify the authors part is properly truncated by bytes
String authorsPart = pathComponents[0];
int authorsBytes = authorsPart.getBytes(StandardCharsets.UTF_8).length;
assertTrue(authorsBytes <= 180, "Authors part should be truncated to <= 180 bytes: " + authorsBytes);
}
@Test
void testResolvePattern_authorsWithinLimit() {
Set<String> authors = Set.of("John Doe", "Jane Smith", "Bob Wilson");
BookMetadata metadata = BookMetadata.builder()
.title("Test Book")
.authors(authors)
.build();
String result = PathPatternResolver.resolvePattern(metadata, "{authors}", "original.pdf");
assertTrue(result.contains("John Doe") && result.contains("Jane Smith") && result.contains("Bob Wilson"));
assertTrue(result.endsWith(".pdf"));
}
@Test
@DisplayName("Should apply author truncation in various pattern contexts")
void testResolvePattern_appliesAuthorTruncation() {
Set<String> shortAuthorList = new LinkedHashSet<>(List.of("John Doe", "Jane Smith"));
BookMetadata metadata = BookMetadata.builder()
.title("Test")
.authors(shortAuthorList)
.build();
String result = PathPatternResolver.resolvePattern(metadata, "{authors}", "test.epub");
assertTrue(result.endsWith(".epub"));
String authorsPart = result.replace(".epub", "");
int authorsBytes = authorsPart.getBytes(StandardCharsets.UTF_8).length;
assertTrue(authorsBytes <= 180, "Authors should be <= 180 bytes: " + authorsBytes);
BookMetadata longMetadata = BookMetadata.builder()
.title("Test")
.authors(LONG_AUTHOR_LIST)
.build();
String longResult = PathPatternResolver.resolvePattern(longMetadata, "{authors}", "test.epub");
String longAuthorsPart = longResult.replace(".epub", "");
int longAuthorsBytes = longAuthorsPart.getBytes(StandardCharsets.UTF_8).length;
assertTrue(longAuthorsBytes <= 180, "Long authors should be truncated to <= 180 bytes, got: " + longAuthorsBytes);
assertTrue(longAuthorsBytes < LONG_AUTHOR_LIST.toString().getBytes(StandardCharsets.UTF_8).length,
"Truncated result should be shorter than original long author list");
}
@Test
@DisplayName("Should handle single author that exceeds byte limits")
void testResolvePattern_truncatesSingleVeryLongAuthor() {
String veryLongAuthor = "某某某某某某某某某某".repeat(10); // ~300 bytes in UTF-8
BookMetadata metadata = BookMetadata.builder()
.title("Test")
.authors(Set.of(veryLongAuthor))
.build();
String result = PathPatternResolver.resolvePattern(metadata, "{authors}", "test.epub");
String authorsPart = result.replace(".epub", "");
int authorsBytes = authorsPart.getBytes(StandardCharsets.UTF_8).length;
assertTrue(authorsBytes <= 180,
"Single long author should be truncated to <= 180 bytes: " + authorsBytes);
assertFalse(authorsPart.isEmpty(), "Should not be empty after truncation");
assertTrue(authorsBytes < veryLongAuthor.getBytes(StandardCharsets.UTF_8).length,
"Truncated result should be shorter than original single long author");
}
@Test
@DisplayName("Should add 'et al.' when authors are truncated")
void testResolvePattern_addsEtAlWhenTruncated() {
BookMetadata metadata = BookMetadata.builder()
.title("Test")
.authors(LONG_AUTHOR_LIST)
.build();
String result = PathPatternResolver.resolvePattern(metadata, "{authors}", "test.epub");
assertTrue(result.contains("et al."), "Should contain truncation indicator when authors are truncated");
}
@Test
@DisplayName("Should truncate combined long components in final validation")
void testResolvePattern_validatesFinalPathWithCombinedLongComponents() {
String longTitle = "".repeat(70); // ~210 bytes
BookMetadata metadata = BookMetadata.builder()
.title(longTitle)
.authors(LONG_AUTHOR_LIST)
.build();
String result = PathPatternResolver.resolvePattern(metadata, "{title} - {authors}", "test.epub");
String[] components = result.split("/");
for (String component : components) {
if (!component.contains(".")) { // Skip filename with extension
int byteLength = component.getBytes(StandardCharsets.UTF_8).length;
assertTrue(byteLength <= 245, "Path component should be <= 245 bytes: " + byteLength + " for component: " + component);
}
}
}
@Test
@DisplayName("Should preserve file extension when truncating very long filenames")
void testResolvePattern_preservesExtensionOnTruncation() {
String longTitle = "A".repeat(300); // 300 bytes
BookMetadata metadata = BookMetadata.builder().title(longTitle).build();
String result = PathPatternResolver.resolvePattern(metadata, "{title}", "original.pdf");
assertTrue(result.endsWith(".pdf"), "Extension must be preserved");
assertTrue(result.length() < 300, "Filename must be truncated");
int byteLen = result.getBytes(StandardCharsets.UTF_8).length;
assertTrue(byteLen <= 245, "Total filename bytes " + byteLen + " should be <= 245");
}
}