mirror of
https://github.com/283375/arcaea-offline-pyside-ui.git
synced 2025-04-21 18:20:18 +00:00
62 lines
1.7 KiB
Python
62 lines
1.7 KiB
Python
import sqlite3
|
|
import time
|
|
from pathlib import Path
|
|
from typing import Any, Callable, Optional
|
|
|
|
import cv2
|
|
from arcaea_offline_ocr.phash_db import phash_opencv
|
|
|
|
|
|
def build_image_phash_database(
|
|
images: list[Path],
|
|
labels: list[str],
|
|
*,
|
|
hash_size: int = 16,
|
|
highfreq_factor: int = 4,
|
|
progress_func: Optional[Callable[[int, int], Any]] = None,
|
|
):
|
|
assert len(images) == len(labels)
|
|
|
|
conn = sqlite3.connect(":memory:", check_same_thread=False)
|
|
|
|
with conn:
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute("CREATE TABLE properties (key TEXT, value TEXT)")
|
|
cursor.executemany(
|
|
"INSERT INTO properties VALUES (?, ?)",
|
|
[
|
|
("hash_size", hash_size),
|
|
("highfreq_factor", highfreq_factor),
|
|
],
|
|
)
|
|
|
|
image_num = len(images)
|
|
id_hashes = []
|
|
for i, label, image_path in zip(range(image_num), labels, images):
|
|
image_hash = phash_opencv(
|
|
cv2.imread(str(image_path.resolve()), cv2.IMREAD_GRAYSCALE),
|
|
hash_size=hash_size,
|
|
highfreq_factor=highfreq_factor,
|
|
)
|
|
image_hash_bytes = image_hash.flatten().tobytes()
|
|
|
|
id_hashes.append([label, image_hash_bytes])
|
|
if progress_func:
|
|
progress_func(i + 1, image_num)
|
|
|
|
hash_length = len(id_hashes[0][1])
|
|
cursor.execute(f"CREATE TABLE hashes (id TEXT, hash BLOB({hash_length}))")
|
|
|
|
cursor.executemany(
|
|
"INSERT INTO hashes VALUES (?, ?)",
|
|
id_hashes,
|
|
)
|
|
cursor.executemany(
|
|
"INSERT INTO properties VALUES (?, ?)",
|
|
[("built_timestamp", int(time.time()))],
|
|
)
|
|
conn.commit()
|
|
|
|
return conn
|