mirror of
https://github.com/283375/arcaea-offline-ocr-model.git
synced 2025-04-21 13:30:17 +00:00
feat: redact sources
This commit is contained in:
parent
e84f2bec81
commit
b9d69fe577
35
project.py
35
project.py
@ -9,7 +9,6 @@ from functools import cached_property
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from hashlib import md5
|
from hashlib import md5
|
||||||
from io import BytesIO
|
|
||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
|
|
||||||
@ -111,15 +110,18 @@ class Project:
|
|||||||
samples = self.samples
|
samples = self.samples
|
||||||
return [p for p in samples if p.stem.startswith(f"{tag}^")]
|
return [p for p in samples if p.stem.startswith(f"{tag}^")]
|
||||||
|
|
||||||
def extractYield(self):
|
def getModule(self, moduleName: str):
|
||||||
cwdPath = Path(os.getcwd())
|
cwdPath = Path(os.getcwd())
|
||||||
importParts = [
|
importParts = [
|
||||||
*self.path.resolve().relative_to(cwdPath.resolve()).parts,
|
*self.path.resolve().relative_to(cwdPath.resolve()).parts,
|
||||||
"extract",
|
moduleName,
|
||||||
]
|
]
|
||||||
importName = ".".join(importParts)
|
importName = ".".join(importParts)
|
||||||
projectExtractModule = importlib.import_module(importName)
|
return importlib.import_module(importName)
|
||||||
getSamples = projectExtractModule.getSamples
|
|
||||||
|
def extractYield(self):
|
||||||
|
extractModule = self.getModule("extract")
|
||||||
|
getSamples = extractModule.getSamples
|
||||||
assert callable(getSamples)
|
assert callable(getSamples)
|
||||||
|
|
||||||
extractLogger = logging.getLogger(
|
extractLogger = logging.getLogger(
|
||||||
@ -167,6 +169,29 @@ class Project:
|
|||||||
def extract(self):
|
def extract(self):
|
||||||
list(self.extractYield())
|
list(self.extractYield())
|
||||||
|
|
||||||
|
def redactYield(self):
|
||||||
|
redactModule = self.getModule("redact")
|
||||||
|
redactSource = redactModule.redactSource
|
||||||
|
assert callable(redactSource)
|
||||||
|
|
||||||
|
redactLogger = logging.getLogger(
|
||||||
|
f"redact-{self.name}-{int(time.time() * 1000)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
sources = self.sources
|
||||||
|
sourcesNum = len(sources)
|
||||||
|
for i, source in enumerate(sources):
|
||||||
|
try:
|
||||||
|
redactLogger.info(f"Redacting {source.resolve()}")
|
||||||
|
redactSource(source)
|
||||||
|
except Exception:
|
||||||
|
redactLogger.exception(f"Error redacting {source.resolve()}")
|
||||||
|
finally:
|
||||||
|
yield (source, i, sourcesNum)
|
||||||
|
|
||||||
|
def redact(self):
|
||||||
|
list(self.redactYield())
|
||||||
|
|
||||||
def getSampleOriginalFileName(self, sample: Path):
|
def getSampleOriginalFileName(self, sample: Path):
|
||||||
return self.tagsReExp.sub("", sample.name)
|
return self.tagsReExp.sub("", sample.name)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user