feat(db): fuzzy search song_id

This commit is contained in:
2023-06-06 15:00:59 +08:00
parent ac04a43c49
commit eee196d0ba
2 changed files with 42 additions and 2 deletions

View File

@ -9,7 +9,9 @@ authors = [{ name = "283375", email = "log_283375@163.com" }]
description = "Calculate your B30 & R10 locally." description = "Calculate your B30 & R10 locally."
readme = "README.md" readme = "README.md"
requires-python = ">=3.8" requires-python = ">=3.8"
dependencies = [] dependencies = [
"thefuzz[speedup] == 0.19.0"
]
classifiers = [ classifiers = [
"Development Status :: 3 - Alpha", "Development Status :: 3 - Alpha",
"Programming Language :: Python :: 3", "Programming Language :: Python :: 3",

View File

@ -1,7 +1,10 @@
import os import os
import sqlite3 import sqlite3
from dataclasses import fields, is_dataclass from dataclasses import fields, is_dataclass
from typing import List, Optional, Union from typing import List, NamedTuple, Optional, Union
from thefuzz import fuzz
from thefuzz import process as fuzz_process
from .models import DbAliasRow, DbCalculatedRow, DbChartRow, DbPackageRow, DbScoreRow from .models import DbAliasRow, DbCalculatedRow, DbChartRow, DbPackageRow, DbScoreRow
from .utils.singleton import Singleton from .utils.singleton import Singleton
@ -205,6 +208,21 @@ class Database(metaclass=Singleton):
(SELECT SUM(potential) AS r10_sum, AVG(potential) AS r10_avg, COUNT(*) AS r10_count FROM recent_10) r10, (SELECT SUM(potential) AS r10_sum, AVG(potential) AS r10_avg, COUNT(*) AS r10_count FROM recent_10) r10,
(SELECT SUM(potential) AS b30_sum, AVG(potential) AS b30_avg, COUNT(*) AS b30_count FROM best_30) b30 (SELECT SUM(potential) AS b30_sum, AVG(potential) AS b30_avg, COUNT(*) AS b30_count FROM best_30) b30
""", """,
"""
CREATE VIEW IF NOT EXISTS song_id_names AS
SELECT song_id, name
FROM (
SELECT song_id, alias AS name FROM aliases
UNION ALL
SELECT song_id, song_id AS name FROM charts
UNION ALL
SELECT song_id, name_en AS name FROM charts
UNION ALL
SELECT song_id, name_jp AS name FROM charts
) AS subquery
WHERE name IS NOT NULL AND name <> ''
GROUP BY song_id, name
"""
] ]
for sql in create_sqls: for sql in create_sqls:
@ -299,6 +317,26 @@ class Database(metaclass=Singleton):
).fetchall() ).fetchall()
] ]
class FuzzySearchSongIdResult(NamedTuple):
song_id: str
confidence: int
def fuzzy_search_song_id(self, input_str: str, limit: int= 5) -> List[FuzzySearchSongIdResult]:
with self.conn as conn:
db_results = conn.execute("SELECT song_id, name FROM song_id_names").fetchall()
name_song_id_map = {r[1]: r[0] for r in db_results}
names = name_song_id_map.keys()
fuzzy_results = fuzz_process.extractBests(input_str, names, scorer=fuzz.partial_ratio, limit=limit) # type: ignore
results = {}
for fuzzy_result in fuzzy_results:
name = fuzzy_result[0]
confidence = fuzzy_result[1]
song_id = name_song_id_map[name]
results[song_id] = max(confidence, results.get(song_id, 0))
return [self.FuzzySearchSongIdResult(si, confi) for si, confi in results.items()]
def get_scores( def get_scores(
self, self,
*, *,