This commit is contained in:
283375 2023-08-26 18:32:13 +08:00
parent 003e1a7289
commit a2193a7d1b
Signed by: 283375
SSH Key Fingerprint: SHA256:UcX0qg6ZOSDOeieKPGokA5h7soykG61nz2uxuQgVLSk
4 changed files with 59 additions and 1 deletions

View File

@ -9,7 +9,11 @@ authors = [{ name = "283375", email = "log_283375@163.com" }]
description = "Manage your local Arcaea score database."
readme = "README.md"
requires-python = ">=3.8"
dependencies = ["SQLAlchemy==2.0.20", "SQLAlchemy-Utils==0.41.1"]
dependencies = [
"beautifulsoup4==4.12.2",
"SQLAlchemy==2.0.20",
"SQLAlchemy-Utils==0.41.1",
]
classifiers = [
"Development Status :: 3 - Alpha",
"Programming Language :: Python :: 3",

View File

@ -1,2 +1,3 @@
beautifulsoup4==4.12.2
SQLAlchemy==2.0.20
SQLAlchemy-Utils==0.41.1

View File

@ -0,0 +1 @@
from .constants import WikiArcaeaCnConstantParser

View File

@ -0,0 +1,52 @@
from decimal import Decimal
from typing import Dict, List
from bs4 import BeautifulSoup
from bs4.element import Tag
class WikiArcaeaCnConstantParser:
HEADERS = ["曲目", "PST", "PRS", "FTR", "BYD"]
def __init__(self, filepath):
self.filepath = filepath
def parse(self) -> Dict[str, List[int]]:
with open(self.filepath, "r", encoding="utf-8") as html_f:
html = BeautifulSoup(html_f.read(), "html.parser")
result = {}
for table in html.find_all("table"):
# check if the table is constant table
if not isinstance(table, Tag):
continue
tbody = table.find("tbody")
if not isinstance(tbody, Tag):
continue
first_tr = tbody.find("tr")
if not isinstance(first_tr, Tag):
continue
header_match = all(
expected_header in th.string
for expected_header, th in zip(self.HEADERS, first_tr.find_all("th"))
)
if not header_match:
continue
rows = list(tbody.find_all("tr"))[1:]
for row in rows:
title = row.td.a.string
constants = []
for td in row.find_all("td")[1:5]:
constant_string = td.string.replace("\n", "")
if "-" in constant_string or not constant_string:
constants.append(None)
else:
constants.append(int(Decimal(constant_string) * 10))
result[title] = constants
return result