From c1140803b1a2cd4331020322861afc712ada87d6 Mon Sep 17 00:00:00 2001 From: 283375 Date: Wed, 30 Aug 2023 20:35:35 +0800 Subject: [PATCH] feat: `FixRects.split_connected` --- assets/fix_rects/connected_masked.jpg | Bin 0 -> 1814 bytes src/arcaea_offline_ocr/ocr.py | 51 ++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 assets/fix_rects/connected_masked.jpg diff --git a/assets/fix_rects/connected_masked.jpg b/assets/fix_rects/connected_masked.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8e08242d3009d0da8e29a7e02791e14a1728db17 GIT binary patch literal 1814 zcmX9;4LqCI8b9%o2r^$S%5GC16{cvZW>b@uqP5$(SbdhXlnjeZs}(DGrzpy0s8D{F zektun)K_X%X$Y;cWv$jnY!Z5Jt%ycxyqY(8_u75V?>y(6-+7+@|2gM*&OxV8888kC zI2r(OH~?^11LzF!16sH(+0t5DqNBZ~b#-*Kb?~}){NJ5G)WZ{q1Uz2PK#!=uCD_c+ zK;Lj{fj$BTdVm64aX1rziU1h{*Zmt4WB9+|w6t|}@dV7*05j+|#yGXLF(zG%2D8q= z_JNLxuBpwwLwK{0Qv}=eZSJ`h*Y)gt?+LeuK2_Lz#GkoH)Hk=V++nrbf#T@&(SFYZ z)Q>;$@;mGw5P0NhP}rBp!y_W2q7zOhCNaKBPC1)#?tCUY>)X8if=h)(#g{9qu2get zYOmJaxOuDTc5};}yB(cf-97yKy*~{MiiUZaCVVd#cIK6tGv(!VMff+mtKSf(+?=7&v4KYnht8S?YQjk#-OQ76J7#JFBV98`Uv5z`gH0!v89 zhGci+01EQ?LND@+i#JXdBiEmaXn&Z0!l91UF!rGJ#n}7&%E&_`$sp;$;YoK>p6*aD z3QWF7!Z5FL&2|QIFieI5#A5-mL%|eX(c<%zCV@Hxo&BRqqn<42lc?StGKYu1tp$QJ@=yf{O;(coh}d^N*eed-!-l>TUL!0mqXuC0VRW z>-r*>LDlXOKfbJeCdDW<4=bk7DY-&6G|3Um&rD-j|#wy3$@$524Pp+M-1c&JN! zYUVcM)wO+;AK=TeJ`19a4K6i(vyo9ow5KtPMdlrQE5iSH!-~^3Jip=Bj)gGBnQYtv zv4UaGB_&7Q=0lr(y~gGZH+JQ`8@~3_vG01id&-=$V_7Vw;mJzdOS6`zLYLpV4_gIR zj2`41fk&0^)IV}|afWBjFfb1MV4g40n6Rxyrl}lrMU8TMJ`^vgcoX|d75bU$tbnrhv<_EP%ZDaP z&Sk|{NRrq5<7?cD-A-juKkPwU+4+$OLmBSy2!r$ zq<^QX?39UctTN}|95t`&AwgyPE7U1zeUK6wt&M`%_^$BCesNOBVdJANPpYIS__};E zjHUbn1)bkL7p(J!k8f(zJEe*M2==O)V<2XH~o=iF_ozT8xLK6BqO!_1J_m_X%KS4v$l<#!*a zY=XKFVxR!FZQ4Lg`5Iy&R)(eO+WjanW-p0MVzXmbQ6O?F=St2leKV?0@|XJ;U=f^N z(~KlCl%YK0y|tIsP27Nn;4vfk8@f8_rV@KeqB0b$SgC31`;e@X7LS6wbOFO{DpO;< zp@e=z!JIXm%bbVc=$*7xv65PFj>n%_;!7FhtnA+YNfg+452(WA#xdTicrlk|Nn^5L zyGi-9N+)~II10WBE^ELk)Z#%GNMw>6_W@inNBc1O+n(xqx({)M>AF1haPWj@_fU6V z)sTJr<15bh?e<1*UKHGs!X7M*c_ZRQ3q?Ue0Sbr&Wgy~8VrY!%@K07M&vT8E>4;sz&aG&~Kuki!WbNQL;b7PLxbz@LB)Ei_Z2`Q_;ph$7lX{M0(LQ?qry-ZzS=xMjvy; zN?h3LV+BVUA=?Z@kRpU$y3-ckk-VxMtjIX rect_wh_ratio: + # consider this is a connected contour + connected_rects.append(rect) + + # find the thinnest part + border_ignore = round(rw * width_range_ratio) + img_cropped = crop_xywh( + img_masked, + (border_ignore, ry, rw - border_ignore, rh), + ) + white_pixels = {} # dict[x, white_pixel_number] + for i in range(img_cropped.shape[1]): + col = img_cropped[:, i] + white_pixels[rx + border_ignore + i] = np.count_nonzero(col > 200) + least_white_pixels = min(list(white_pixels.values())) + x_values = [ + x + for x, pixel in white_pixels.items() + if pixel == least_white_pixels + ] + # select only middle values + x_mean = np.mean(x_values) + x_std = np.std(x_values) + x_values = [ + x + for x in x_values + if x_mean - x_std * 1.5 <= x <= x_mean + x_std * 1.5 + ] + x_mid = round(np.median(x_values)) + + # split the rect + new_rects.extend( + [(rx, ry, x_mid, rh), (rx + x_mid, ry, rw - x_mid, rh)] + ) + + return_rects = deepcopy(rects) + return_rects = [r for r in rects if r not in connected_rects] + return_rects.extend(new_rects) + return return_rects + def resize_fill_square(img: Mat, target: int = 20): h, w = img.shape[:2]