arcaea-offline-ocr/src/arcaea_offline_ocr/template.py

from base64 import b64decode
from time import sleep
from typing import Dict, List, Literal, Tuple, TypedDict

from cv2 import (
    CHAIN_APPROX_SIMPLE,
    COLOR_BGR2GRAY,
    COLOR_GRAY2BGR,
    FONT_HERSHEY_SIMPLEX,
    IMREAD_GRAYSCALE,
    RETR_EXTERNAL,
    THRESH_BINARY_INV,
    TM_CCOEFF_NORMED,
    Mat,
    boundingRect,
    cvtColor,
    destroyAllWindows,
    findContours,
    imdecode,
    imread,
    imshow,
    matchTemplate,
    minMaxLoc,
    putText,
    rectangle,
    threshold,
    waitKey,
)
from imutils import contours, grab_contours
from numpy import frombuffer as np_frombuffer
from numpy import uint8

from ._builtin_templates import GeoSansLight_Italic, GeoSansLight_Regular

__all__ = [
    "load_digit_template",
    "load_builtin_digit_template",
    "MatchTemplateMultipleResult",
    "matchTemplateMultiple",
]


def load_digit_template(filename: str) -> Dict[int, Mat]:
    """
    Arguments:
        filename -- An image with white background and black "0 1 2 3 4 5 6 7 8 9" text.

    Returns:
        dict[int, cv2.Mat]
    """
    # https://pyimagesearch.com/2017/07/17/credit-card-ocr-with-opencv-and-python/
    ref = imread(filename)
    ref = cvtColor(ref, COLOR_BGR2GRAY)
    ref = threshold(ref, 10, 255, THRESH_BINARY_INV)[1]
    refCnts = findContours(ref.copy(), RETR_EXTERNAL, CHAIN_APPROX_SIMPLE)
    refCnts = grab_contours(refCnts)
    refCnts = contours.sort_contours(refCnts, method="left-to-right")[0]
    digits = {}
    for i, cnt in enumerate(refCnts):
        (x, y, w, h) = boundingRect(cnt)
        roi = ref[y : y + h, x : x + w]
        digits[i] = roi
    return digits


def load_builtin_digit_template(
    name: Literal["GeoSansLight-Regular", "GeoSansLight-Italic"]
):
    name_builtin_template_b64_map = {
        "GeoSansLight-Regular": GeoSansLight_Regular,
        "GeoSansLight-Italic": GeoSansLight_Italic,
    }
    template_b64 = name_builtin_template_b64_map[name]
    return {
        int(key): imdecode(np_frombuffer(b64decode(b64str), uint8), IMREAD_GRAYSCALE)
        for key, b64str in template_b64.items()
    }


class MatchTemplateMultipleResult(TypedDict):
    max_val: float
    xywh: Tuple[int, int, int, int]


def matchTemplateMultiple(
    src: Mat, template: Mat, threshold: float = 0.1
) -> List[MatchTemplateMultipleResult]:
    """
    Returns:
        A list of tuple[x, y, w, h] representing the matched rectangle
    """
    template_result = matchTemplate(src, template, TM_CCOEFF_NORMED)
    min_val, max_val, min_loc, max_loc = minMaxLoc(template_result)
    template_h, template_w = template.shape[:2]
    results = []

    # debug
    # imshow("templ", template)
    # waitKey(750)
    # destroyAllWindows()

    # https://stackoverflow.com/a/66848923/16484891
    # CC BY-SA 4.0
    prev_min_val, prev_max_val, prev_min_loc, prev_max_loc = None, None, None, None
    while max_val > threshold:
        min_val, max_val, min_loc, max_loc = minMaxLoc(template_result)

        # Prevent infinite loop. If those 4 values are the same as previous ones, break the loop.
        if (
            prev_min_val == min_val
            and prev_max_val == max_val
            and prev_min_loc == min_loc
            and prev_max_loc == max_loc
        ):
            break
        else:
            prev_min_val, prev_max_val, prev_min_loc, prev_max_loc = (
                min_val,
                max_val,
                min_loc,
                max_loc,
            )

        if max_val > threshold:
            # Prevent start_row, end_row, start_col, end_col be out of range of image
            start_row = max(0, max_loc[1] - template_h // 2)
            start_col = max(0, max_loc[0] - template_w // 2)
            end_row = min(template_result.shape[0], max_loc[1] + template_h // 2 + 1)
            end_col = min(template_result.shape[1], max_loc[0] + template_w // 2 + 1)

            template_result[start_row:end_row, start_col:end_col] = 0
            results.append(
                {
                    "max_val": max_val,
                    "xywh": (
                        max_loc[0],
                        max_loc[1],
                        max_loc[0] + template_w + 1,
                        max_loc[1] + template_h + 1,
                    ),
                }
            )

            # debug
            # src_dbg = cvtColor(src, COLOR_GRAY2BGR)
            # src_dbg = rectangle(
            #     src_dbg,
            #     (max_loc[0], max_loc[1]),
            #     (
            #         max_loc[0] + template_w + 1,
            #         max_loc[1] + template_h + 1,
            #     ),
            #     (0, 255, 0),
            #     thickness=3,
            # )
            # src_dbg = putText(
            #     src_dbg,
            #     f"{max_val:.5f}",
            #     (5, src_dbg.shape[0] - 5),
            #     FONT_HERSHEY_SIMPLEX,
            #     1,
            #     (0, 255, 0),
            #     thickness=2,
            # )
            # imshow("src_rect", src_dbg)
            # imshow("templ", template)
            # waitKey(750)
            # destroyAllWindows()

    return results