diff --git a/pyproject.toml b/pyproject.toml index be62d59..ea5c2c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,11 @@ authors = [{ name = "283375", email = "log_283375@163.com" }] description = "Manage your local Arcaea score database." readme = "README.md" requires-python = ">=3.8" -dependencies = ["SQLAlchemy==2.0.20", "SQLAlchemy-Utils==0.41.1"] +dependencies = [ + "beautifulsoup4==4.12.2", + "SQLAlchemy==2.0.20", + "SQLAlchemy-Utils==0.41.1", +] classifiers = [ "Development Status :: 3 - Alpha", "Programming Language :: Python :: 3", diff --git a/requirements.txt b/requirements.txt index eaef1dc..53d0b8c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ +beautifulsoup4==4.12.2 SQLAlchemy==2.0.20 SQLAlchemy-Utils==0.41.1 diff --git a/src/arcaea_offline/external/wiki_arcaea_cn/__init__.py b/src/arcaea_offline/external/wiki_arcaea_cn/__init__.py new file mode 100644 index 0000000..d59b800 --- /dev/null +++ b/src/arcaea_offline/external/wiki_arcaea_cn/__init__.py @@ -0,0 +1 @@ +from .constants import WikiArcaeaCnConstantParser diff --git a/src/arcaea_offline/external/wiki_arcaea_cn/constants.py b/src/arcaea_offline/external/wiki_arcaea_cn/constants.py new file mode 100644 index 0000000..b5a302e --- /dev/null +++ b/src/arcaea_offline/external/wiki_arcaea_cn/constants.py @@ -0,0 +1,52 @@ +from decimal import Decimal +from typing import Dict, List + +from bs4 import BeautifulSoup +from bs4.element import Tag + + +class WikiArcaeaCnConstantParser: + HEADERS = ["曲目", "PST", "PRS", "FTR", "BYD"] + + def __init__(self, filepath): + self.filepath = filepath + + def parse(self) -> Dict[str, List[int]]: + with open(self.filepath, "r", encoding="utf-8") as html_f: + html = BeautifulSoup(html_f.read(), "html.parser") + + result = {} + + for table in html.find_all("table"): + # check if the table is constant table + if not isinstance(table, Tag): + continue + + tbody = table.find("tbody") + if not isinstance(tbody, Tag): + continue + + first_tr = tbody.find("tr") + if not isinstance(first_tr, Tag): + continue + + header_match = all( + expected_header in th.string + for expected_header, th in zip(self.HEADERS, first_tr.find_all("th")) + ) + if not header_match: + continue + + rows = list(tbody.find_all("tr"))[1:] + for row in rows: + title = row.td.a.string + constants = [] + for td in row.find_all("td")[1:5]: + constant_string = td.string.replace("\n", "") + if "-" in constant_string or not constant_string: + constants.append(None) + else: + constants.append(int(Decimal(constant_string) * 10)) + result[title] = constants + + return result