From 413188d86a7c4f26f8136ee7618f213f427883e1 Mon Sep 17 00:00:00 2001 From: 283375 Date: Fri, 10 Jan 2025 23:54:37 +0800 Subject: [PATCH] feat: core hashers --- src/arcaea_offline_ocr/core/__init__.py | 0 .../core/hashers/__init__.py | 3 ++ .../core/hashers/_common.py | 7 ++++ src/arcaea_offline_ocr/core/hashers/index.py | 35 +++++++++++++++++++ 4 files changed, 45 insertions(+) create mode 100644 src/arcaea_offline_ocr/core/__init__.py create mode 100644 src/arcaea_offline_ocr/core/hashers/__init__.py create mode 100644 src/arcaea_offline_ocr/core/hashers/_common.py create mode 100644 src/arcaea_offline_ocr/core/hashers/index.py diff --git a/src/arcaea_offline_ocr/core/__init__.py b/src/arcaea_offline_ocr/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/arcaea_offline_ocr/core/hashers/__init__.py b/src/arcaea_offline_ocr/core/hashers/__init__.py new file mode 100644 index 0000000..e265b2b --- /dev/null +++ b/src/arcaea_offline_ocr/core/hashers/__init__.py @@ -0,0 +1,3 @@ +from .index import average, dct, difference + +__all__ = ["average", "dct", "difference"] diff --git a/src/arcaea_offline_ocr/core/hashers/_common.py b/src/arcaea_offline_ocr/core/hashers/_common.py new file mode 100644 index 0000000..6a10207 --- /dev/null +++ b/src/arcaea_offline_ocr/core/hashers/_common.py @@ -0,0 +1,7 @@ +import cv2 + +from arcaea_offline_ocr.types import Mat + + +def _resize_image(src: Mat, dsize: ...) -> Mat: + return cv2.resize(src, dsize, fx=0, fy=0, interpolation=cv2.INTER_AREA) diff --git a/src/arcaea_offline_ocr/core/hashers/index.py b/src/arcaea_offline_ocr/core/hashers/index.py new file mode 100644 index 0000000..1d8c3fd --- /dev/null +++ b/src/arcaea_offline_ocr/core/hashers/index.py @@ -0,0 +1,35 @@ +import cv2 +import numpy as np + +from arcaea_offline_ocr.types import Mat + +from ._common import _resize_image + + +def average(img_gray: Mat, hash_size: int) -> Mat: + img_resized = _resize_image(img_gray, (hash_size, hash_size)) + diff = img_resized > img_resized.mean() + return diff.flatten() + + +def difference(img_gray: Mat, hash_size: int) -> Mat: + img_size = (hash_size + 1, hash_size) + img_resized = _resize_image(img_gray, img_size) + + previous = img_resized[:, :-1] + current = img_resized[:, 1:] + diff = previous > current + return diff.flatten() + + +def dct(img_gray: Mat, hash_size: int = 16, high_freq_factor: int = 4) -> Mat: + # TODO: consistency? + img_size_base = hash_size * high_freq_factor + img_size = (img_size_base, img_size_base) + + img_resized = _resize_image(img_gray, img_size) + img_resized = img_resized.astype(np.float32) + dct_mat = cv2.dct(img_resized) + + hash_mat = dct_mat[:hash_size, :hash_size] + return hash_mat > hash_mat.mean()