Skip to content

Commit

Permalink
调整tbpu模块,移除imgInfo参数
Browse files Browse the repository at this point in the history
  • Loading branch information
hiroi-sora committed Jan 24, 2024
1 parent b647453 commit 62a3532
Show file tree
Hide file tree
Showing 9 changed files with 15 additions and 10 deletions.
2 changes: 1 addition & 1 deletion UmiOCR-data/py_src/ocr/output/output_pdf_layered.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,6 @@ def print(self, res): # 输出图片结果
def onEnd(self): # 结束时保存。
print("保存PDF:", self.outputPath)
if self.pdf:
self.pdf.subset_fonts() # 构建字体子集,减小文件大小。需要fontTools库
self.pdf.subset_fonts() # 构建字体子集,减小文件大小。需要 fontTools 库
# ez_save默认启用压缩和垃圾回收 deflate=True, garbage=3
self.pdf.ez_save(self.outputPath)
2 changes: 2 additions & 0 deletions UmiOCR-data/py_src/ocr/tbpu/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# tbpu : text block processing unit 文本块后处理

from .merge_line import MergeLine
from .merge_para import MergePara
from .merge_para_code import MergeParaCode
Expand Down
2 changes: 1 addition & 1 deletion UmiOCR-data/py_src/ocr/tbpu/ignore_area.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def __init__(self, areaList):
self.tbpuName = "忽略区域"
self.areaList = areaList

def run(self, textBlocks, imgInfo):
def run(self, textBlocks):
# 返回是否矩形框 a 包含 b
def isInBox(a, b):
return (
Expand Down
2 changes: 1 addition & 1 deletion UmiOCR-data/py_src/ocr/tbpu/merge_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def sortKey(A, B):

resList.sort(key=cmp_to_key(sortKey))

def run(self, textBlocks, imgInfo):
def run(self, textBlocks):
# 单行合并
resList = self.mergeLine(textBlocks)
# 结果排序
Expand Down
2 changes: 1 addition & 1 deletion UmiOCR-data/py_src/ocr/tbpu/merge_line_v_lr.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def __init__(self):
self.tbpuName = "单行-竖排-从左到右"
self.rl = False # T为从右到左,F为从左到右

def run(self, textBlocks, imgInfo):
def run(self, textBlocks):
"""传入 文块组、图片信息。返回文块组、debug信息字符串。"""
# 所有文块,按左上角点的y坐标排序
textBlocks.sort(key=lambda tb: tb["box"][0][1])
Expand Down
2 changes: 1 addition & 1 deletion UmiOCR-data/py_src/ocr/tbpu/merge_para.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def mergePara(self, textBlocks):
resList.append(tb1) # 装填入结果
return resList

def run(self, textBlocks, imgInfo):
def run(self, textBlocks):
# 段落合并
resList = self.mergePara(textBlocks)
# 返回新文块列表
Expand Down
2 changes: 1 addition & 1 deletion UmiOCR-data/py_src/ocr/tbpu/merge_para_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def mergePara(self, textBlocks): # 所有行合并
res = [{"text": text, "box": box, "score": score}]
return res

def run(self, textBlocks, imgInfo):
def run(self, textBlocks):
# 段落合并
resList = self.mergePara(textBlocks)
# 返回新文块列表
Expand Down
8 changes: 5 additions & 3 deletions UmiOCR-data/py_src/ocr/tbpu/tbpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ class Tbpu:
def __init__(self):
self.tbpuName = "文块处理单元-未知"

def run(self, textBlocks, img):
"""输入:textBlocks文块 , img图片信息\n
输出:textBlocks文块"""
def run(self, textBlocks):
"""输入:textBlocks文块列表\n
输出:排序后的textBlocks文块列表,每个块增加键:
'end' 结尾间隔符
"""
return textBlocks
3 changes: 2 additions & 1 deletion UmiOCR-data/qt_res/qml/TabPages/BatchDOC/BatchDOC.qml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ TabPage {
onTriggered: {
addDocs(
[
"D:/Pictures/Screenshots/test",
// "D:/Pictures/Screenshots/test",
"../../PDF测试",
]
)
console.log("自动添加!!!!!!!!!!!!!")
Expand Down

0 comments on commit 62a3532

Please sign in to comment.