refactor!: remove searcher

This commit is contained in:
283375 2024-04-02 21:54:07 +08:00
parent 64285c350c
commit 55e76ef650
Signed by: 283375
SSH Key Fingerprint: SHA256:UcX0qg6ZOSDOeieKPGokA5h7soykG61nz2uxuQgVLSk
3 changed files with 0 additions and 115 deletions

View File

@ -10,10 +10,8 @@ description = "Manage your local Arcaea score database."
readme = "README.md" readme = "README.md"
requires-python = ">=3.8" requires-python = ">=3.8"
dependencies = [ dependencies = [
"beautifulsoup4==4.12.2",
"SQLAlchemy==2.0.20", "SQLAlchemy==2.0.20",
"SQLAlchemy-Utils==0.41.1", "SQLAlchemy-Utils==0.41.1",
"Whoosh==2.7.4",
] ]
classifiers = [ classifiers = [
"Development Status :: 3 - Alpha", "Development Status :: 3 - Alpha",

View File

@ -1,4 +1,2 @@
beautifulsoup4==4.12.2
SQLAlchemy==2.0.20 SQLAlchemy==2.0.20
SQLAlchemy-Utils==0.41.1 SQLAlchemy-Utils==0.41.1
Whoosh==2.7.4

View File

@ -1,111 +0,0 @@
from typing import List, Union
from sqlalchemy import select
from sqlalchemy.orm import Session
from whoosh.analysis import NgramFilter, StandardAnalyzer
from whoosh.fields import ID, KEYWORD, TEXT, Schema
from whoosh.filedb.filestore import RamStorage
from whoosh.qparser import FuzzyTermPlugin, MultifieldParser, OrGroup
from .models.songs import Song, SongLocalized
from .utils.search_title import recover_search_title
class Searcher:
def __init__(self):
self.text_analyzer = StandardAnalyzer() | NgramFilter(minsize=2, maxsize=5)
self.song_schema = Schema(
song_id=ID(stored=True, unique=True),
title=TEXT(analyzer=self.text_analyzer, spelling=True),
artist=TEXT(analyzer=self.text_analyzer, spelling=True),
source=TEXT(analyzer=self.text_analyzer, spelling=True),
keywords=KEYWORD(lowercase=True, stored=True, scorable=True),
)
self.storage = RamStorage()
self.index = self.storage.create_index(self.song_schema)
self.default_query_parser = MultifieldParser(
["song_id", "title", "artist", "source", "keywords"],
self.song_schema,
group=OrGroup,
)
self.default_query_parser.add_plugin(FuzzyTermPlugin())
def import_songs(self, session: Session):
writer = self.index.writer()
songs = list(session.scalars(select(Song)))
song_localize_stmt = select(SongLocalized)
for song in songs:
stmt = song_localize_stmt.where(SongLocalized.id == song.id)
sl = session.scalar(stmt)
song_id = song.id
possible_titles: List[Union[str, None]] = [song.title]
possible_artists: List[Union[str, None]] = [song.artist]
possible_sources: List[Union[str, None]] = [song.source]
if sl:
possible_titles.extend(
[sl.title_ja, sl.title_ko, sl.title_zh_hans, sl.title_zh_hant]
)
possible_titles.extend(
recover_search_title(sl.search_title_ja)
+ recover_search_title(sl.search_title_ko)
+ recover_search_title(sl.search_title_zh_hans)
+ recover_search_title(sl.search_title_zh_hant)
)
possible_artists.extend(
recover_search_title(sl.search_artist_ja)
+ recover_search_title(sl.search_artist_ko)
+ recover_search_title(sl.search_artist_zh_hans)
+ recover_search_title(sl.search_artist_zh_hant)
)
possible_sources.extend(
[
sl.source_ja,
sl.source_ko,
sl.source_zh_hans,
sl.source_zh_hant,
]
)
# remove empty items in list
titles = [t for t in possible_titles if t != "" and t is not None]
artists = [t for t in possible_artists if t != "" and t is not None]
sources = [t for t in possible_sources if t != "" and t is not None]
writer.update_document(
song_id=song_id,
title=" ".join(titles),
artist=" ".join(artists),
source=" ".join(sources),
keywords=" ".join([song_id] + titles + artists + sources),
)
writer.commit()
def did_you_mean(self, string: str):
results = set()
with self.index.searcher() as searcher:
corrector_keywords = searcher.corrector("keywords") # type: ignore
corrector_song_id = searcher.corrector("song_id") # type: ignore
corrector_title = searcher.corrector("title") # type: ignore
corrector_artist = searcher.corrector("artist") # type: ignore
corrector_source = searcher.corrector("source") # type: ignore
results.update(corrector_keywords.suggest(string))
results.update(corrector_song_id.suggest(string))
results.update(corrector_title.suggest(string))
results.update(corrector_artist.suggest(string))
results.update(corrector_source.suggest(string))
if string in results:
results.remove(string)
return list(results)
def search(self, string: str, *, limit: int = 10):
query_string = f"{string}"
query = self.default_query_parser.parse(query_string)
with self.index.searcher() as searcher:
results = searcher.search(query, limit=limit)
return [result.get("song_id") for result in results]