Aggressive removal of file extensions and tags

This commit is contained in:
Georges-Antoine Assi
2023-09-21 09:30:02 -04:00
parent 8a756ac984
commit a9c9d658bb

View File

@@ -95,9 +95,20 @@ def parse_tags(file_name: str) -> tuple:
def get_file_name_with_no_tags(file_name: str) -> str:
# Use .rsplit to remove only the file extension
return re.sub(r"[\(\[].*?[\)\]]", "", file_name.rsplit(".", 1)[0]).strip()
# \[[^\]]+\]: Matches tags enclosed in square brackets, e.g., [rel-1]
# \([^)]+\): Matches tags enclosed in parentheses, e.g., (USA)
# (\.\w+)+$: Matches one or more file extensions, e.g., .zip or .nkit.iso
tags_extension_regex = r"(\s*\[[^\]]+\]\s*|\s*\([^)]+\)\s*)*(\.\w+)+$"
# The regex is aggressive and may remove some of the title,
# but that's prefered over leaving tags/extensions in the title
return re.sub(tags_extension_regex, "", file_name).strip()
def get_file_extension(rom: dict) -> str:
return rom["file_name"].split(".")[-1] if not rom["multi"] else ""
extension_regex = r"(\.\w+)+$"
return (
re.search(extension_regex, rom["file_name"]).group(0)
if not rom["multi"]
else ""
)