diff --git a/backend/alembic/versions/0025_file_hashes_scan.py b/backend/alembic/versions/0025_roms_hashes.py similarity index 50% rename from backend/alembic/versions/0025_file_hashes_scan.py rename to backend/alembic/versions/0025_roms_hashes.py index 2cd47a738..7e193dc2b 100644 --- a/backend/alembic/versions/0025_file_hashes_scan.py +++ b/backend/alembic/versions/0025_roms_hashes.py @@ -1,8 +1,8 @@ """empty message -Revision ID: 0025_file_hashes_scan +Revision ID: 0025_roms_hashes Revises: 0024_sibling_roms_db_view -Create Date: 2024-08-11 12:00:00.000000 +Create Date: 2024-08-11 21:50:53.301352 """ @@ -14,30 +14,28 @@ from handler.redis_handler import high_prio_queue from handler.scan_handler import ScanType # revision identifiers, used by Alembic. -revision = "0025_file_hashes_scan" +revision = "0025_roms_hashes" down_revision = "0024_sibling_roms_db_view" branch_labels = None depends_on = None def upgrade() -> None: - # Run a hash scan in the background + with op.batch_alter_table("roms", schema=None) as batch_op: + batch_op.add_column(sa.Column("crc_hash", sa.String(length=100), nullable=True)) + batch_op.add_column(sa.Column("md5_hash", sa.String(length=100), nullable=True)) + batch_op.add_column( + sa.Column("sha1_hash", sa.String(length=100), nullable=True) + ) + + # Run a no-scan in the background on startup high_prio_queue.enqueue( scan_platforms, [], ScanType.HASH_SCAN, [], [], job_timeout=SCAN_TIMEOUT ) def downgrade() -> None: - connection = op.get_bind() - - connection.execute( - sa.text( - """ - DROP VIEW sibling_roms; - """ - ), - ) - with op.batch_alter_table("roms", schema=None) as batch_op: - batch_op.drop_index("idx_roms_igdb_id") - batch_op.drop_index("idx_roms_moby_id") + batch_op.drop_column("sha1_hash") + batch_op.drop_column("md5_hash") + batch_op.drop_column("crc_hash") diff --git a/backend/endpoints/responses/rom.py b/backend/endpoints/responses/rom.py index f4d0edf3b..2369d06a6 100644 --- a/backend/endpoints/responses/rom.py +++ b/backend/endpoints/responses/rom.py @@ -109,6 +109,9 @@ class RomSchema(BaseModel): multi: bool files: list[RomFile] + crc_hash: str | None + md5_hash: str | None + sha1_hash: str | None full_path: str created_at: datetime updated_at: datetime diff --git a/backend/handler/filesystem/roms_handler.py b/backend/handler/filesystem/roms_handler.py index 988369dfc..9cd2b7022 100644 --- a/backend/handler/filesystem/roms_handler.py +++ b/backend/handler/filesystem/roms_handler.py @@ -7,7 +7,7 @@ import shutil import tarfile import zipfile from pathlib import Path -from typing import Final, Iterator +from typing import Any, Final, Iterator, Tuple import magic import py7zr @@ -165,15 +165,35 @@ class FSRomsHandler(FSHandler): return [f for f in roms if f not in filtered_files] - def _calculate_rom_hashes(self, file_path: Path) -> dict[str, str]: + def _build_rom_file(self, path: Path) -> RomFile: + return RomFile( + filename=path.name, + size=os.stat(path).st_size, + last_modified=os.path.getmtime(path), + ) + + def get_rom_files(self, rom: str, roms_path: str) -> list[RomFile]: + rom_files: list[RomFile] = [] + + # Check if rom is a multi-part rom + if os.path.isdir(f"{roms_path}/{rom}"): + multi_files = os.listdir(f"{roms_path}/{rom}") + for file in self._exclude_files(multi_files, "multi_parts"): + path = Path(roms_path, rom, file) + rom_files.append(self._build_rom_file(path)) + else: + path = Path(roms_path, rom) + rom_files.append(self._build_rom_file(path)) + + return rom_files + + def _calculate_rom_hashes( + self, file_path: Path, crc_c: int, md5_h: Any, sha1_h: Any + ) -> Tuple[int, Any, Any]: mime = magic.Magic(mime=True) file_type = mime.from_file(file_path) extension = Path(file_path).suffix.lower() - crc_c = 0 - md5_h = hashlib.md5(usedforsecurity=False) - sha1_h = hashlib.sha1(usedforsecurity=False) - def update_hashes(chunk: bytes): md5_h.update(chunk) sha1_h.update(chunk) @@ -206,50 +226,35 @@ class FSRomsHandler(FSHandler): while chunk := f.read(FILE_READ_CHUNK_SIZE): update_hashes(chunk) + return crc_c, md5_h, sha1_h + + def get_rom_hashes(self, rom: str, roms_path: str) -> dict[str, str]: + roms_file_path = f"{LIBRARY_BASE_PATH}/{roms_path}" + + crc_c = 0 + md5_h = hashlib.md5(usedforsecurity=False) + sha1_h = hashlib.sha1(usedforsecurity=False) + + # Check if rom is a multi-part rom + if os.path.isdir(f"{roms_file_path}/{rom}"): + multi_files = os.listdir(f"{roms_file_path}/{rom}") + for file in self._exclude_files(multi_files, "multi_parts"): + path = Path(roms_file_path, rom, file) + crc_c, md5_h, sha1_h = self._calculate_rom_hashes( + path, crc_c, md5_h, sha1_h + ) + else: + path = Path(roms_file_path, rom) + crc_c, md5_h, sha1_h = self._calculate_rom_hashes( + path, crc_c, md5_h, sha1_h + ) + return { "crc_hash": (crc_c & 0xFFFFFFFF).to_bytes(4, byteorder="big").hex(), "md5_hash": md5_h.hexdigest(), "sha1_hash": sha1_h.hexdigest(), } - def _build_rom_file(self, path: Path, with_hashes: bool = False) -> RomFile: - if not with_hashes: - return RomFile( - filename=path.name, - size=os.stat(path).st_size, - last_modified=os.path.getmtime(path), - crc_hash=None, - md5_hash=None, - sha1_hash=None, - ) - - rom_hashes = self._calculate_rom_hashes(path) - return RomFile( - filename=path.name, - size=os.stat(path).st_size, - last_modified=os.path.getmtime(path), - crc_hash=rom_hashes["crc_hash"], - md5_hash=rom_hashes["md5_hash"], - sha1_hash=rom_hashes["sha1_hash"], - ) - - def get_rom_files( - self, rom: str, roms_path: str, with_hashes: bool = False - ) -> list[RomFile]: - rom_files: list[RomFile] = [] - - # Check if rom is a multi-part rom - if os.path.isdir(f"{roms_path}/{rom}"): - multi_files = os.listdir(f"{roms_path}/{rom}") - for file in self._exclude_files(multi_files, "multi_parts"): - path = Path(roms_path, rom, file) - rom_files.append(self._build_rom_file(path, with_hashes)) - else: - path = Path(roms_path, rom) - rom_files.append(self._build_rom_file(path, with_hashes)) - - return rom_files - def get_roms(self, platform_fs_slug: str) -> list[FSRom]: """Gets all filesystem roms for a platform diff --git a/backend/handler/scan_handler.py b/backend/handler/scan_handler.py index ba955ad53..750ec4256 100644 --- a/backend/handler/scan_handler.py +++ b/backend/handler/scan_handler.py @@ -3,7 +3,6 @@ from enum import Enum from typing import Any import emoji -from config import LIBRARY_BASE_PATH from config.config_manager import config_manager as cm from handler.database import db_platform_handler from handler.filesystem import fs_asset_handler, fs_firmware_handler, fs_rom_handler @@ -207,6 +206,7 @@ async def scan_rom( ) # Update properties that don't require metadata + file_size = sum([file["size"] for file in rom_attrs["files"]]) regs, rev, langs, other_tags = fs_rom_handler.parse_tags(rom_attrs["file_name"]) rom_attrs.update( { @@ -221,6 +221,7 @@ async def scan_rom( "file_extension": fs_rom_handler.parse_file_extension( rom_attrs["file_name"] ), + "file_size_bytes": file_size, "multi": rom_attrs["multi"], "regions": regs, "revision": rev, @@ -229,18 +230,8 @@ async def scan_rom( } ) - # Calculate file hashes (expensive) - roms_file_path = f"{LIBRARY_BASE_PATH}/{roms_path}" - rom_files = fs_rom_handler.get_rom_files( - rom_attrs["file_name"], roms_file_path, True - ) - file_size = sum([file["size"] for file in rom_files]) - rom_attrs.update( - { - "files": rom_files, - "file_size_bytes": file_size, - } - ) + rom_hashes = fs_rom_handler.get_rom_hashes(rom_attrs["file_name"], roms_path) + rom_attrs.update(**rom_hashes) # If no metadata scan is required if scan_type == ScanType.HASH_SCAN: diff --git a/backend/handler/tests/test_fastapi.py b/backend/handler/tests/test_fastapi.py index 9a5069ab2..6b17b5d6c 100644 --- a/backend/handler/tests/test_fastapi.py +++ b/backend/handler/tests/test_fastapi.py @@ -35,9 +35,6 @@ async def test_scan_rom(): filename="Paper Mario (USA).z64", size=1024, last_modified=1620000000, - crc_hash="9d0d1c6e", - md5_hash="f1b7f9e4f4d0e0b7b9faa1b1f2f8e4e9", - sha1_hash="c3c7f9f3d1d0e0b7b9faa1b1f2f8e4e9", ) ] diff --git a/backend/models/rom.py b/backend/models/rom.py index fe22a5f55..908700fbf 100644 --- a/backend/models/rom.py +++ b/backend/models/rom.py @@ -29,9 +29,6 @@ class RomFile(TypedDict): filename: str size: int last_modified: float | None - crc_hash: str | None - md5_hash: str | None - sha1_hash: str | None class Rom(BaseModel): @@ -78,6 +75,9 @@ class Rom(BaseModel): multi: Mapped[bool] = mapped_column(default=False) files: Mapped[list[RomFile] | None] = mapped_column(JSON, default=[]) + crc_hash: Mapped[str | None] = mapped_column(String(100)) + md5_hash: Mapped[str | None] = mapped_column(String(100)) + sha1_hash: Mapped[str | None] = mapped_column(String(100)) platform_id: Mapped[int] = mapped_column( ForeignKey("platforms.id", ondelete="CASCADE") diff --git a/frontend/src/__generated__/models/DetailedRomSchema.ts b/frontend/src/__generated__/models/DetailedRomSchema.ts index 3896f7f87..1888f2db9 100644 --- a/frontend/src/__generated__/models/DetailedRomSchema.ts +++ b/frontend/src/__generated__/models/DetailedRomSchema.ts @@ -50,6 +50,9 @@ export type DetailedRomSchema = { tags: Array; multi: boolean; files: Array; + crc_hash: (string | null); + md5_hash: (string | null); + sha1_hash: (string | null); full_path: string; created_at: string; updated_at: string; diff --git a/frontend/src/__generated__/models/RomFile.ts b/frontend/src/__generated__/models/RomFile.ts index 4d60c99fc..3880f3ef8 100644 --- a/frontend/src/__generated__/models/RomFile.ts +++ b/frontend/src/__generated__/models/RomFile.ts @@ -6,8 +6,6 @@ export type RomFile = { filename: string; size: number; - crc_hash: number; - md5_hash: string; - sha1_hash: string; + last_modified: (number | null); }; diff --git a/frontend/src/__generated__/models/RomSchema.ts b/frontend/src/__generated__/models/RomSchema.ts index 9eb5af8fd..4d2f71149 100644 --- a/frontend/src/__generated__/models/RomSchema.ts +++ b/frontend/src/__generated__/models/RomSchema.ts @@ -43,6 +43,9 @@ export type RomSchema = { tags: Array; multi: boolean; files: Array; + crc_hash: (string | null); + md5_hash: (string | null); + sha1_hash: (string | null); full_path: string; created_at: string; updated_at: string; diff --git a/frontend/src/__generated__/models/SimpleRomSchema.ts b/frontend/src/__generated__/models/SimpleRomSchema.ts index 9a4ab4088..a06ea7335 100644 --- a/frontend/src/__generated__/models/SimpleRomSchema.ts +++ b/frontend/src/__generated__/models/SimpleRomSchema.ts @@ -45,6 +45,9 @@ export type SimpleRomSchema = { tags: Array; multi: boolean; files: Array; + crc_hash: (string | null); + md5_hash: (string | null); + sha1_hash: (string | null); full_path: string; created_at: string; updated_at: string; diff --git a/frontend/src/components/Details/Info/FileInfo.vue b/frontend/src/components/Details/Info/FileInfo.vue index 76a0cb768..cd21f1291 100644 --- a/frontend/src/components/Details/Info/FileInfo.vue +++ b/frontend/src/components/Details/Info/FileInfo.vue @@ -136,14 +136,14 @@ watch( Size: {{ formatBytes(rom.file_size_bytes) }} - - SHA-1 Hash: {{ rom.files[0].sha1_hash }} + + SHA-1 Hash: {{ rom.sha1_hash }} - - MD5 Hash: {{ rom.files[0].md5_hash }} + + MD5 Hash: {{ rom.md5_hash }} - - CRC Hash: {{ rom.files[0].crc_hash }} + + CRC Hash: {{ rom.crc_hash }}