move hashes to rom model

This commit is contained in:
Georges-Antoine Assi
2024-08-11 22:38:22 -04:00
parent a617b79f55
commit 1ea1b326d3
11 changed files with 89 additions and 88 deletions

View File

@@ -1,8 +1,8 @@
"""empty message
Revision ID: 0025_file_hashes_scan
Revision ID: 0025_roms_hashes
Revises: 0024_sibling_roms_db_view
Create Date: 2024-08-11 12:00:00.000000
Create Date: 2024-08-11 21:50:53.301352
"""
@@ -14,30 +14,28 @@ from handler.redis_handler import high_prio_queue
from handler.scan_handler import ScanType
# revision identifiers, used by Alembic.
revision = "0025_file_hashes_scan"
revision = "0025_roms_hashes"
down_revision = "0024_sibling_roms_db_view"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Run a hash scan in the background
with op.batch_alter_table("roms", schema=None) as batch_op:
batch_op.add_column(sa.Column("crc_hash", sa.String(length=100), nullable=True))
batch_op.add_column(sa.Column("md5_hash", sa.String(length=100), nullable=True))
batch_op.add_column(
sa.Column("sha1_hash", sa.String(length=100), nullable=True)
)
# Run a no-scan in the background on startup
high_prio_queue.enqueue(
scan_platforms, [], ScanType.HASH_SCAN, [], [], job_timeout=SCAN_TIMEOUT
)
def downgrade() -> None:
connection = op.get_bind()
connection.execute(
sa.text(
"""
DROP VIEW sibling_roms;
"""
),
)
with op.batch_alter_table("roms", schema=None) as batch_op:
batch_op.drop_index("idx_roms_igdb_id")
batch_op.drop_index("idx_roms_moby_id")
batch_op.drop_column("sha1_hash")
batch_op.drop_column("md5_hash")
batch_op.drop_column("crc_hash")

View File

@@ -109,6 +109,9 @@ class RomSchema(BaseModel):
multi: bool
files: list[RomFile]
crc_hash: str | None
md5_hash: str | None
sha1_hash: str | None
full_path: str
created_at: datetime
updated_at: datetime

View File

@@ -7,7 +7,7 @@ import shutil
import tarfile
import zipfile
from pathlib import Path
from typing import Final, Iterator
from typing import Any, Final, Iterator, Tuple
import magic
import py7zr
@@ -165,15 +165,35 @@ class FSRomsHandler(FSHandler):
return [f for f in roms if f not in filtered_files]
def _calculate_rom_hashes(self, file_path: Path) -> dict[str, str]:
def _build_rom_file(self, path: Path) -> RomFile:
return RomFile(
filename=path.name,
size=os.stat(path).st_size,
last_modified=os.path.getmtime(path),
)
def get_rom_files(self, rom: str, roms_path: str) -> list[RomFile]:
rom_files: list[RomFile] = []
# Check if rom is a multi-part rom
if os.path.isdir(f"{roms_path}/{rom}"):
multi_files = os.listdir(f"{roms_path}/{rom}")
for file in self._exclude_files(multi_files, "multi_parts"):
path = Path(roms_path, rom, file)
rom_files.append(self._build_rom_file(path))
else:
path = Path(roms_path, rom)
rom_files.append(self._build_rom_file(path))
return rom_files
def _calculate_rom_hashes(
self, file_path: Path, crc_c: int, md5_h: Any, sha1_h: Any
) -> Tuple[int, Any, Any]:
mime = magic.Magic(mime=True)
file_type = mime.from_file(file_path)
extension = Path(file_path).suffix.lower()
crc_c = 0
md5_h = hashlib.md5(usedforsecurity=False)
sha1_h = hashlib.sha1(usedforsecurity=False)
def update_hashes(chunk: bytes):
md5_h.update(chunk)
sha1_h.update(chunk)
@@ -206,50 +226,35 @@ class FSRomsHandler(FSHandler):
while chunk := f.read(FILE_READ_CHUNK_SIZE):
update_hashes(chunk)
return crc_c, md5_h, sha1_h
def get_rom_hashes(self, rom: str, roms_path: str) -> dict[str, str]:
roms_file_path = f"{LIBRARY_BASE_PATH}/{roms_path}"
crc_c = 0
md5_h = hashlib.md5(usedforsecurity=False)
sha1_h = hashlib.sha1(usedforsecurity=False)
# Check if rom is a multi-part rom
if os.path.isdir(f"{roms_file_path}/{rom}"):
multi_files = os.listdir(f"{roms_file_path}/{rom}")
for file in self._exclude_files(multi_files, "multi_parts"):
path = Path(roms_file_path, rom, file)
crc_c, md5_h, sha1_h = self._calculate_rom_hashes(
path, crc_c, md5_h, sha1_h
)
else:
path = Path(roms_file_path, rom)
crc_c, md5_h, sha1_h = self._calculate_rom_hashes(
path, crc_c, md5_h, sha1_h
)
return {
"crc_hash": (crc_c & 0xFFFFFFFF).to_bytes(4, byteorder="big").hex(),
"md5_hash": md5_h.hexdigest(),
"sha1_hash": sha1_h.hexdigest(),
}
def _build_rom_file(self, path: Path, with_hashes: bool = False) -> RomFile:
if not with_hashes:
return RomFile(
filename=path.name,
size=os.stat(path).st_size,
last_modified=os.path.getmtime(path),
crc_hash=None,
md5_hash=None,
sha1_hash=None,
)
rom_hashes = self._calculate_rom_hashes(path)
return RomFile(
filename=path.name,
size=os.stat(path).st_size,
last_modified=os.path.getmtime(path),
crc_hash=rom_hashes["crc_hash"],
md5_hash=rom_hashes["md5_hash"],
sha1_hash=rom_hashes["sha1_hash"],
)
def get_rom_files(
self, rom: str, roms_path: str, with_hashes: bool = False
) -> list[RomFile]:
rom_files: list[RomFile] = []
# Check if rom is a multi-part rom
if os.path.isdir(f"{roms_path}/{rom}"):
multi_files = os.listdir(f"{roms_path}/{rom}")
for file in self._exclude_files(multi_files, "multi_parts"):
path = Path(roms_path, rom, file)
rom_files.append(self._build_rom_file(path, with_hashes))
else:
path = Path(roms_path, rom)
rom_files.append(self._build_rom_file(path, with_hashes))
return rom_files
def get_roms(self, platform_fs_slug: str) -> list[FSRom]:
"""Gets all filesystem roms for a platform

View File

@@ -3,7 +3,6 @@ from enum import Enum
from typing import Any
import emoji
from config import LIBRARY_BASE_PATH
from config.config_manager import config_manager as cm
from handler.database import db_platform_handler
from handler.filesystem import fs_asset_handler, fs_firmware_handler, fs_rom_handler
@@ -207,6 +206,7 @@ async def scan_rom(
)
# Update properties that don't require metadata
file_size = sum([file["size"] for file in rom_attrs["files"]])
regs, rev, langs, other_tags = fs_rom_handler.parse_tags(rom_attrs["file_name"])
rom_attrs.update(
{
@@ -221,6 +221,7 @@ async def scan_rom(
"file_extension": fs_rom_handler.parse_file_extension(
rom_attrs["file_name"]
),
"file_size_bytes": file_size,
"multi": rom_attrs["multi"],
"regions": regs,
"revision": rev,
@@ -229,18 +230,8 @@ async def scan_rom(
}
)
# Calculate file hashes (expensive)
roms_file_path = f"{LIBRARY_BASE_PATH}/{roms_path}"
rom_files = fs_rom_handler.get_rom_files(
rom_attrs["file_name"], roms_file_path, True
)
file_size = sum([file["size"] for file in rom_files])
rom_attrs.update(
{
"files": rom_files,
"file_size_bytes": file_size,
}
)
rom_hashes = fs_rom_handler.get_rom_hashes(rom_attrs["file_name"], roms_path)
rom_attrs.update(**rom_hashes)
# If no metadata scan is required
if scan_type == ScanType.HASH_SCAN:

View File

@@ -35,9 +35,6 @@ async def test_scan_rom():
filename="Paper Mario (USA).z64",
size=1024,
last_modified=1620000000,
crc_hash="9d0d1c6e",
md5_hash="f1b7f9e4f4d0e0b7b9faa1b1f2f8e4e9",
sha1_hash="c3c7f9f3d1d0e0b7b9faa1b1f2f8e4e9",
)
]

View File

@@ -29,9 +29,6 @@ class RomFile(TypedDict):
filename: str
size: int
last_modified: float | None
crc_hash: str | None
md5_hash: str | None
sha1_hash: str | None
class Rom(BaseModel):
@@ -78,6 +75,9 @@ class Rom(BaseModel):
multi: Mapped[bool] = mapped_column(default=False)
files: Mapped[list[RomFile] | None] = mapped_column(JSON, default=[])
crc_hash: Mapped[str | None] = mapped_column(String(100))
md5_hash: Mapped[str | None] = mapped_column(String(100))
sha1_hash: Mapped[str | None] = mapped_column(String(100))
platform_id: Mapped[int] = mapped_column(
ForeignKey("platforms.id", ondelete="CASCADE")

View File

@@ -50,6 +50,9 @@ export type DetailedRomSchema = {
tags: Array<string>;
multi: boolean;
files: Array<RomFile>;
crc_hash: (string | null);
md5_hash: (string | null);
sha1_hash: (string | null);
full_path: string;
created_at: string;
updated_at: string;

View File

@@ -6,8 +6,6 @@
export type RomFile = {
filename: string;
size: number;
crc_hash: number;
md5_hash: string;
sha1_hash: string;
last_modified: (number | null);
};

View File

@@ -43,6 +43,9 @@ export type RomSchema = {
tags: Array<string>;
multi: boolean;
files: Array<RomFile>;
crc_hash: (string | null);
md5_hash: (string | null);
sha1_hash: (string | null);
full_path: string;
created_at: string;
updated_at: string;

View File

@@ -45,6 +45,9 @@ export type SimpleRomSchema = {
tags: Array<string>;
multi: boolean;
files: Array<RomFile>;
crc_hash: (string | null);
md5_hash: (string | null);
sha1_hash: (string | null);
full_path: string;
created_at: string;
updated_at: string;

View File

@@ -136,14 +136,14 @@ watch(
<v-chip size="small" label>
Size: {{ formatBytes(rom.file_size_bytes) }}
</v-chip>
<v-chip v-if="!rom.multi && rom.files[0].sha1_hash" size="small" label class="mx-2">
SHA-1 Hash: {{ rom.files[0].sha1_hash }}
<v-chip v-if="!rom.multi && rom.sha1_hash" size="small" label class="mx-2">
SHA-1 Hash: {{ rom.sha1_hash }}
</v-chip>
<v-chip v-if="!rom.multi && rom.files[0].md5_hash" size="small" label class="mx-2">
MD5 Hash: {{ rom.files[0].md5_hash }}
<v-chip v-if="!rom.multi && rom.md5_hash" size="small" label class="mx-2">
MD5 Hash: {{ rom.md5_hash }}
</v-chip>
<v-chip v-if="!rom.multi && rom.files[0].crc_hash" size="small" label class="mx-2">
CRC Hash: {{ rom.files[0].crc_hash }}
<v-chip v-if="!rom.multi && rom.crc_hash" size="small" label class="mx-2">
CRC Hash: {{ rom.crc_hash }}
</v-chip>
</v-col>
</v-row>