diff --git a/pyproject.toml b/pyproject.toml index 755d9a1..2432639 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,10 +10,8 @@ description = "Manage your local Arcaea score database." readme = "README.md" requires-python = ">=3.8" dependencies = [ - "beautifulsoup4==4.12.2", "SQLAlchemy==2.0.20", "SQLAlchemy-Utils==0.41.1", - "Whoosh==2.7.4", ] classifiers = [ "Development Status :: 3 - Alpha", diff --git a/requirements.txt b/requirements.txt index 58463df..eaef1dc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,2 @@ -beautifulsoup4==4.12.2 SQLAlchemy==2.0.20 SQLAlchemy-Utils==0.41.1 -Whoosh==2.7.4 diff --git a/src/arcaea_offline/searcher.py b/src/arcaea_offline/searcher.py deleted file mode 100644 index 55c5c3e..0000000 --- a/src/arcaea_offline/searcher.py +++ /dev/null @@ -1,111 +0,0 @@ -from typing import List, Union - -from sqlalchemy import select -from sqlalchemy.orm import Session -from whoosh.analysis import NgramFilter, StandardAnalyzer -from whoosh.fields import ID, KEYWORD, TEXT, Schema -from whoosh.filedb.filestore import RamStorage -from whoosh.qparser import FuzzyTermPlugin, MultifieldParser, OrGroup - -from .models.songs import Song, SongLocalized -from .utils.search_title import recover_search_title - - -class Searcher: - def __init__(self): - self.text_analyzer = StandardAnalyzer() | NgramFilter(minsize=2, maxsize=5) - self.song_schema = Schema( - song_id=ID(stored=True, unique=True), - title=TEXT(analyzer=self.text_analyzer, spelling=True), - artist=TEXT(analyzer=self.text_analyzer, spelling=True), - source=TEXT(analyzer=self.text_analyzer, spelling=True), - keywords=KEYWORD(lowercase=True, stored=True, scorable=True), - ) - self.storage = RamStorage() - self.index = self.storage.create_index(self.song_schema) - - self.default_query_parser = MultifieldParser( - ["song_id", "title", "artist", "source", "keywords"], - self.song_schema, - group=OrGroup, - ) - self.default_query_parser.add_plugin(FuzzyTermPlugin()) - - def import_songs(self, session: Session): - writer = self.index.writer() - songs = list(session.scalars(select(Song))) - song_localize_stmt = select(SongLocalized) - for song in songs: - stmt = song_localize_stmt.where(SongLocalized.id == song.id) - sl = session.scalar(stmt) - song_id = song.id - possible_titles: List[Union[str, None]] = [song.title] - possible_artists: List[Union[str, None]] = [song.artist] - possible_sources: List[Union[str, None]] = [song.source] - if sl: - possible_titles.extend( - [sl.title_ja, sl.title_ko, sl.title_zh_hans, sl.title_zh_hant] - ) - possible_titles.extend( - recover_search_title(sl.search_title_ja) - + recover_search_title(sl.search_title_ko) - + recover_search_title(sl.search_title_zh_hans) - + recover_search_title(sl.search_title_zh_hant) - ) - possible_artists.extend( - recover_search_title(sl.search_artist_ja) - + recover_search_title(sl.search_artist_ko) - + recover_search_title(sl.search_artist_zh_hans) - + recover_search_title(sl.search_artist_zh_hant) - ) - possible_sources.extend( - [ - sl.source_ja, - sl.source_ko, - sl.source_zh_hans, - sl.source_zh_hant, - ] - ) - - # remove empty items in list - titles = [t for t in possible_titles if t != "" and t is not None] - artists = [t for t in possible_artists if t != "" and t is not None] - sources = [t for t in possible_sources if t != "" and t is not None] - - writer.update_document( - song_id=song_id, - title=" ".join(titles), - artist=" ".join(artists), - source=" ".join(sources), - keywords=" ".join([song_id] + titles + artists + sources), - ) - - writer.commit() - - def did_you_mean(self, string: str): - results = set() - - with self.index.searcher() as searcher: - corrector_keywords = searcher.corrector("keywords") # type: ignore - corrector_song_id = searcher.corrector("song_id") # type: ignore - corrector_title = searcher.corrector("title") # type: ignore - corrector_artist = searcher.corrector("artist") # type: ignore - corrector_source = searcher.corrector("source") # type: ignore - - results.update(corrector_keywords.suggest(string)) - results.update(corrector_song_id.suggest(string)) - results.update(corrector_title.suggest(string)) - results.update(corrector_artist.suggest(string)) - results.update(corrector_source.suggest(string)) - - if string in results: - results.remove(string) - - return list(results) - - def search(self, string: str, *, limit: int = 10): - query_string = f"{string}" - query = self.default_query_parser.parse(query_string) - with self.index.searcher() as searcher: - results = searcher.search(query, limit=limit) - return [result.get("song_id") for result in results]