diff --git a/backend/handler/metadata/base_hander.py b/backend/handler/metadata/base_hander.py
index 3618ab6ee..d805643e6 100644
--- a/backend/handler/metadata/base_hander.py
+++ b/backend/handler/metadata/base_hander.py
@@ -3,19 +3,21 @@ import json
import os
import re
import unicodedata
-from difflib import SequenceMatcher
from functools import lru_cache
from itertools import batched
from typing import Final, NotRequired, TypedDict
from handler.redis_handler import async_cache, sync_cache
from logger.logger import log
+from strsimpy.jaro_winkler import JaroWinkler
from tasks.scheduled.update_switch_titledb import (
SWITCH_PRODUCT_ID_KEY,
SWITCH_TITLEDB_INDEX_KEY,
update_switch_titledb_task,
)
+jarowinkler = JaroWinkler()
+
def conditionally_set_cache(
index_key: str, filename: str, parent_dir: str = os.path.dirname(__file__)
@@ -133,70 +135,6 @@ class MetadataHandler:
) -> str:
return _normalize_search_term(name, remove_articles, remove_punctuation)
- def calculate_text_similarity(
- self,
- normalized_search_term: str,
- game_name: str,
- remove_articles: bool = True,
- remove_punctuation: bool = True,
- ) -> float:
- """
- Calculate similarity between search term and game name using multiple metrics.
- Returns a score between 0 and 1, where 1 is a perfect match.
-
- Args:
- search_term: The search term to compare
- game_name: The game name to compare against
-
- Returns:
- Similarity score between 0 and 1
- """
- game_normalized = self.normalize_search_term(
- game_name,
- remove_articles=remove_articles,
- remove_punctuation=remove_punctuation,
- )
-
- # Exact match gets the highest score
- if normalized_search_term == game_normalized:
- return 1.0
-
- # Split into tokens for word-based matching
- search_tokens = set(WORD_TOKEN_PATTERN.findall(normalized_search_term.lower()))
- game_tokens = set(WORD_TOKEN_PATTERN.findall(game_normalized.lower()))
-
- # Calculate token overlap ratio
- if search_tokens and game_tokens:
- intersection = search_tokens & game_tokens
- union = search_tokens | game_tokens
- token_overlap_ratio = len(intersection) / len(union)
- else:
- token_overlap_ratio = 0.0
-
- # Calculate sequence similarity (better for longer strings)
- sequence_ratio = SequenceMatcher(
- None, normalized_search_term, game_normalized
- ).ratio()
-
- # Calculate Wagner-Fischer distance (normalized by max length)
- max_len = max(len(normalized_search_term), len(game_normalized))
- if max_len > 0:
- wagner_fischer_ratio = 1 - (
- wagner_fischer_distance(normalized_search_term, game_normalized)
- / max_len
- )
- else:
- wagner_fischer_ratio = 1.0
-
- # Token overlap is most important for game titles
- final_score = (
- token_overlap_ratio * 0.5
- + sequence_ratio * 0.3
- + wagner_fischer_ratio * 0.2
- )
-
- return final_score
-
def find_best_match(
self,
normalized_search_term: str,
@@ -223,12 +161,18 @@ class MetadataHandler:
best_score = 0.0
for game_name in game_names:
- score = self.calculate_text_similarity(
- normalized_search_term,
+ # score = self.calculate_text_similarity(
+ # normalized_search_term,
+ # game_name,
+ # remove_articles=remove_articles,
+ # remove_punctuation=remove_punctuation,
+ # )
+ game_normalized = self.normalize_search_term(
game_name,
remove_articles=remove_articles,
remove_punctuation=remove_punctuation,
)
+ score = JaroWinkler().similarity(normalized_search_term, game_normalized)
if score > best_score:
best_score = score
best_match = game_name
diff --git a/backend/handler/metadata/igdb_handler.py b/backend/handler/metadata/igdb_handler.py
index 929c4173b..e267754ed 100644
--- a/backend/handler/metadata/igdb_handler.py
+++ b/backend/handler/metadata/igdb_handler.py
@@ -223,7 +223,6 @@ class IGDBHandler(MetadataHandler):
"Client-ID": IGDB_CLIENT_ID,
"Accept": "application/json",
}
- self.min_similarity_score: Final = 0.75
@staticmethod
def check_twitch_token(func):
@@ -343,7 +342,6 @@ class IGDBHandler(MetadataHandler):
best_match, best_score = self.find_best_match(
search_term,
list(games_by_name.keys()),
- min_similarity_score=self.min_similarity_score,
remove_punctuation=False,
)
if best_match:
@@ -372,7 +370,6 @@ class IGDBHandler(MetadataHandler):
best_match, best_score = self.find_best_match(
search_term,
list(games_by_name.keys()),
- min_similarity_score=self.min_similarity_score,
remove_punctuation=False,
)
if best_match:
diff --git a/backend/handler/metadata/moby_handler.py b/backend/handler/metadata/moby_handler.py
index 799acce3f..1b02f162c 100644
--- a/backend/handler/metadata/moby_handler.py
+++ b/backend/handler/metadata/moby_handler.py
@@ -77,7 +77,6 @@ def extract_metadata_from_moby_rom(rom: MobyGame) -> MobyMetadata:
class MobyGamesHandler(MetadataHandler):
def __init__(self) -> None:
self.moby_service = MobyGamesService()
- self.min_similarity_score: Final = 0.75
async def _search_rom(
self, search_term: str, platform_moby_id: int
@@ -96,7 +95,6 @@ class MobyGamesHandler(MetadataHandler):
best_match, best_score = self.find_best_match(
search_term,
list(games_by_name.keys()),
- min_similarity_score=self.min_similarity_score,
remove_punctuation=False,
)
if best_match:
diff --git a/backend/handler/metadata/sgdb_handler.py b/backend/handler/metadata/sgdb_handler.py
index b7b71f21d..67c772a6d 100644
--- a/backend/handler/metadata/sgdb_handler.py
+++ b/backend/handler/metadata/sgdb_handler.py
@@ -31,7 +31,7 @@ class SGDBRom(TypedDict):
class SGDBBaseHandler(MetadataHandler):
def __init__(self) -> None:
self.sgdb_service = SteamGridDBService()
- self.min_similarity_score: Final = 0.75
+ self.min_similarity_score: Final = 0.95
async def get_details(self, search_term: str) -> list[SGDBResult]:
if not STEAMGRIDDB_API_ENABLED:
diff --git a/backend/handler/metadata/ss_handler.py b/backend/handler/metadata/ss_handler.py
index 256be0a40..4bddd70b3 100644
--- a/backend/handler/metadata/ss_handler.py
+++ b/backend/handler/metadata/ss_handler.py
@@ -278,7 +278,6 @@ def extract_metadata_from_ss_rom(rom: SSGame) -> SSMetadata:
class SSHandler(MetadataHandler):
def __init__(self) -> None:
self.ss_service = ScreenScraperService()
- self.min_similarity_score: Final = 0.75
async def _search_rom(self, search_term: str, platform_ss_id: int) -> SSGame | None:
if not platform_ss_id:
@@ -296,7 +295,6 @@ class SSHandler(MetadataHandler):
best_match, best_score = self.find_best_match(
search_term,
list(games_by_name.keys()),
- min_similarity_score=self.min_similarity_score,
remove_punctuation=False,
)
if best_match:
diff --git a/frontend/src/components/Details/Title.vue b/frontend/src/components/Details/Title.vue
index 26203d1d6..1069e547f 100644
--- a/frontend/src/components/Details/Title.vue
+++ b/frontend/src/components/Details/Title.vue
@@ -159,7 +159,12 @@ const hashMatches = computed(() => {
{{ rom.moby_id }}
-
+
{{
(parseFloat(rom.moby_metadata.moby_score) * 10).toFixed(2)
diff --git a/pyproject.toml b/pyproject.toml
index 8b51262cb..da17b9e21 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -41,6 +41,7 @@ dependencies = [
"sentry-sdk ~= 2.32",
"starlette-csrf ~= 3.0",
"streaming-form-data ~= 1.19",
+ "strsimpy>=0.2.1",
"types-colorama ~= 0.4",
"types-passlib ~= 1.7",
"types-pyyaml ~= 6.0",
diff --git a/uv.lock b/uv.lock
index c2615c7db..2d767308d 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
version = 1
-revision = 3
+revision = 2
requires-python = ">=3.13"
resolution-markers = [
"platform_python_implementation != 'PyPy'",
@@ -1715,6 +1715,7 @@ dependencies = [
{ name = "sqlalchemy", extra = ["mariadb-connector", "mysql-connector", "postgresql-psycopg"] },
{ name = "starlette-csrf" },
{ name = "streaming-form-data" },
+ { name = "strsimpy" },
{ name = "types-colorama" },
{ name = "types-passlib" },
{ name = "types-pyyaml" },
@@ -1787,6 +1788,7 @@ requires-dist = [
{ name = "sqlalchemy", extras = ["mariadb-connector", "mysql-connector", "postgresql-psycopg"], specifier = "~=2.0" },
{ name = "starlette-csrf", specifier = "~=3.0" },
{ name = "streaming-form-data", specifier = "~=1.19" },
+ { name = "strsimpy", specifier = ">=0.2.1" },
{ name = "types-colorama", specifier = "~=0.4" },
{ name = "types-passlib", specifier = "~=1.7" },
{ name = "types-pyyaml", specifier = "~=6.0" },
@@ -2001,6 +2003,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/5d/53/a709d8925a0e48bc4904f12e1f619b0295042c06d66aacaa213f7a18a927/streaming_form_data-1.19.1-cp313-cp313-win_amd64.whl", hash = "sha256:e2dee016f1db735cd91e97421340cd3799f9fd46b1e39e4a11d6215c7cbe1edc", size = 201927, upload-time = "2025-01-10T18:33:07.6Z" },
]
+[[package]]
+name = "strsimpy"
+version = "0.2.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0d/7e/5ccf2edfa1e97154dbf3119fd240b1f5fbe32ad1edd1db5f7a94d3f7a037/strsimpy-0.2.1.tar.gz", hash = "sha256:0842eb57f7af86c882a59a1bc8721ec2580a267e563fd0503ced2972040372c9", size = 24403, upload-time = "2021-09-10T09:14:20.405Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/fc/90/bd55a4b18f4b75a76e32f444975d2c869d692eb23897d116d47122f88d1a/strsimpy-0.2.1-py3-none-any.whl", hash = "sha256:d676a440d5d3dbcf5ba92d01814a03a218776ce07bd7a8185da7019e04cf9ba7", size = 45870, upload-time = "2021-09-10T09:14:18.944Z" },
+]
+
[[package]]
name = "texttable"
version = "1.7.0"