refactor!: remove searcher

2025-09-13 06:53:09 +00:00 · 2024-04-02 21:54:07 +08:00
parent 64285c350c
commit 55e76ef650
3 changed files with 0 additions and 115 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -10,10 +10,8 @@ description = "Manage your local Arcaea score database."
 readme = "README.md"
 requires-python = ">=3.8"
 dependencies = [
  "beautifulsoup4==4.12.2",
  "SQLAlchemy==2.0.20",
  "SQLAlchemy-Utils==0.41.1",
  "Whoosh==2.7.4",
 ]
 classifiers = [
  "Development Status :: 3 - Alpha",
--- a/requirements.txt
+++ b/requirements.txt
@ -1,4 +1,2 @@
 beautifulsoup4==4.12.2
 SQLAlchemy==2.0.20
 SQLAlchemy-Utils==0.41.1
 Whoosh==2.7.4
--- a/src/arcaea_offline/searcher.py
+++ b/src/arcaea_offline/searcher.py
@ -1,111 +0,0 @@
 from typing import List, Union
 from sqlalchemy import select
 from sqlalchemy.orm import Session
 from whoosh.analysis import NgramFilter, StandardAnalyzer
 from whoosh.fields import ID, KEYWORD, TEXT, Schema
 from whoosh.filedb.filestore import RamStorage
 from whoosh.qparser import FuzzyTermPlugin, MultifieldParser, OrGroup
 from .models.songs import Song, SongLocalized
 from .utils.search_title import recover_search_title
 class Searcher:
    def __init__(self):
        self.text_analyzer = StandardAnalyzer() | NgramFilter(minsize=2, maxsize=5)
        self.song_schema = Schema(
            song_id=ID(stored=True, unique=True),
            title=TEXT(analyzer=self.text_analyzer, spelling=True),
            artist=TEXT(analyzer=self.text_analyzer, spelling=True),
            source=TEXT(analyzer=self.text_analyzer, spelling=True),
            keywords=KEYWORD(lowercase=True, stored=True, scorable=True),
        )
        self.storage = RamStorage()
        self.index = self.storage.create_index(self.song_schema)
        self.default_query_parser = MultifieldParser(
            ["song_id", "title", "artist", "source", "keywords"],
            self.song_schema,
            group=OrGroup,
        )
        self.default_query_parser.add_plugin(FuzzyTermPlugin())
    def import_songs(self, session: Session):
        writer = self.index.writer()
        songs = list(session.scalars(select(Song)))
        song_localize_stmt = select(SongLocalized)
        for song in songs:
            stmt = song_localize_stmt.where(SongLocalized.id == song.id)
            sl = session.scalar(stmt)
            song_id = song.id
            possible_titles: List[Union[str, None]] = [song.title]
            possible_artists: List[Union[str, None]] = [song.artist]
            possible_sources: List[Union[str, None]] = [song.source]
            if sl:
                possible_titles.extend(
                    [sl.title_ja, sl.title_ko, sl.title_zh_hans, sl.title_zh_hant]
                )
                possible_titles.extend(
                    recover_search_title(sl.search_title_ja)
                    + recover_search_title(sl.search_title_ko)
                    + recover_search_title(sl.search_title_zh_hans)
                    + recover_search_title(sl.search_title_zh_hant)
                )
                possible_artists.extend(
                    recover_search_title(sl.search_artist_ja)
                    + recover_search_title(sl.search_artist_ko)
                    + recover_search_title(sl.search_artist_zh_hans)
                    + recover_search_title(sl.search_artist_zh_hant)
                )
                possible_sources.extend(
                    [
                        sl.source_ja,
                        sl.source_ko,
                        sl.source_zh_hans,
                        sl.source_zh_hant,
                    ]
                )
            # remove empty items in list
            titles = [t for t in possible_titles if t != "" and t is not None]
            artists = [t for t in possible_artists if t != "" and t is not None]
            sources = [t for t in possible_sources if t != "" and t is not None]
            writer.update_document(
                song_id=song_id,
                title=" ".join(titles),
                artist=" ".join(artists),
                source=" ".join(sources),
                keywords=" ".join([song_id] + titles + artists + sources),
            )
        writer.commit()
    def did_you_mean(self, string: str):
        results = set()
        with self.index.searcher() as searcher:
            corrector_keywords = searcher.corrector("keywords")  # type: ignore
            corrector_song_id = searcher.corrector("song_id")  # type: ignore
            corrector_title = searcher.corrector("title")  # type: ignore
            corrector_artist = searcher.corrector("artist")  # type: ignore
            corrector_source = searcher.corrector("source")  # type: ignore
            results.update(corrector_keywords.suggest(string))
            results.update(corrector_song_id.suggest(string))
            results.update(corrector_title.suggest(string))
            results.update(corrector_artist.suggest(string))
            results.update(corrector_source.suggest(string))
        if string in results:
            results.remove(string)
        return list(results)
    def search(self, string: str, *, limit: int = 10):
        query_string = f"{string}"
        query = self.default_query_parser.parse(query_string)
        with self.index.searcher() as searcher:
            results = searcher.search(query, limit=limit)
            return [result.get("song_id") for result in results]