[AIKIDO-13126604] Stream file when building file hash

This commit is contained in:
Georges-Antoine Assi
2026-02-16 13:51:20 -05:00
parent c696ffdd37
commit 77823c168d
2 changed files with 30 additions and 33 deletions

View File

@@ -9,36 +9,6 @@ from models.user import User
from .base_handler import FSHandler
def compute_file_hash(file_path: str) -> str:
hash_obj = hashlib.md5(usedforsecurity=False)
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(8192), b""):
hash_obj.update(chunk)
return hash_obj.hexdigest()
def compute_zip_hash(zip_path: str) -> str:
with zipfile.ZipFile(zip_path, "r") as zf:
file_hashes = []
for name in sorted(zf.namelist()):
if not name.endswith("/"):
content = zf.read(name)
file_hash = hashlib.md5(content, usedforsecurity=False).hexdigest()
file_hashes.append(f"{name}:{file_hash}")
combined = "\n".join(file_hashes)
return hashlib.md5(combined.encode(), usedforsecurity=False).hexdigest()
def compute_content_hash(file_path: str) -> str | None:
try:
if zipfile.is_zipfile(file_path):
return compute_zip_hash(file_path)
return compute_file_hash(file_path)
except Exception as e:
log.debug(f"Failed to compute content hash for {file_path}: {e}")
return None
class FSAssetsHandler(FSHandler):
def __init__(self) -> None:
super().__init__(base_path=ASSETS_BASE_PATH)
@@ -97,3 +67,30 @@ class FSAssetsHandler(FSHandler):
return self._build_asset_file_path(
user, "screenshots", platform_fs_slug, rom_id
)
async def _compute_file_hash(self, file_path: str) -> str:
hash_obj = hashlib.md5(usedforsecurity=False)
async with await self.stream_file(file_path=file_path) as f:
while chunk := await f.read(8192):
hash_obj.update(chunk)
return hash_obj.hexdigest()
async def _compute_zip_hash(self, zip_path: str) -> str:
with zipfile.ZipFile(zip_path, "r") as zf:
file_hashes = []
for name in sorted(zf.namelist()):
if not name.endswith("/"):
content = zf.read(name)
file_hash = hashlib.md5(content, usedforsecurity=False).hexdigest()
file_hashes.append(f"{name}:{file_hash}")
combined = "\n".join(file_hashes)
return hashlib.md5(combined.encode(), usedforsecurity=False).hexdigest()
async def compute_content_hash(self, file_path: str) -> str | None:
try:
if zipfile.is_zipfile(file_path):
return await self._compute_zip_hash(file_path)
return await self._compute_file_hash(file_path)
except Exception as e:
log.debug(f"Failed to compute content hash for {file_path}: {e}")
return None

View File

@@ -9,7 +9,6 @@ from config.config_manager import config_manager as cm
from endpoints.responses.rom import SimpleRomSchema
from handler.database import db_platform_handler, db_rom_handler
from handler.filesystem import fs_asset_handler, fs_firmware_handler
from handler.filesystem.assets_handler import compute_content_hash
from handler.filesystem.roms_handler import FSRom
from handler.metadata import (
meta_flashpoint_handler,
@@ -833,8 +832,9 @@ async def _scan_asset(file_name: str, asset_path: str, should_hash: bool = False
}
if should_hash:
absolute_path = f"{ASSETS_BASE_PATH}/{file_path}"
result["content_hash"] = compute_content_hash(absolute_path)
result["content_hash"] = await fs_asset_handler.compute_content_hash(
f"{ASSETS_BASE_PATH}/{file_path}"
)
return result