From f1d0c8a2ff1a5790f78749d7cd489d8d8337b463 Mon Sep 17 00:00:00 2001 From: Leif <4603009@qq.com> Date: Thu, 5 May 2022 22:37:11 +0800 Subject: [PATCH] Auto stash before merge of "table" and "origin/new" --- PPOCRLabel/PPOCRLabel.py | 19 +++++++++++++---- PPOCRLabel/libs/utils.py | 44 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 4 deletions(-) diff --git a/PPOCRLabel/PPOCRLabel.py b/PPOCRLabel/PPOCRLabel.py index 534e0cd882..f80cf8fd0e 100644 --- a/PPOCRLabel/PPOCRLabel.py +++ b/PPOCRLabel/PPOCRLabel.py @@ -21,6 +21,7 @@ import platform import subprocess import sys +import xlrd from functools import partial from PyQt5.QtCore import QSize, Qt, QPoint, QByteArray, QTimer, QFileInfo, QPointF, QProcess @@ -2349,9 +2350,19 @@ def exportJSON(self): return # read xlsx file, convert to HTML - xd = pd.ExcelFile(csv_path) - df = xd.parse() - structure = df.to_html(index = False) + # xd = pd.ExcelFile(csv_path) + # df = xd.parse() + # structure = df.to_html(index = False) + excel = xlrd.open_workbook(csv_path) + sheet0 = excel.sheet_by_index(0) # only sheet 0 + merged_cells = sheet0.merged_cells # (0,1,1,3) start row, end row, start col, end col + + html_list = [['td'] * sheet0.ncols for i in range(sheet0.nrows)] + + for merged in merged_cells: + html_list = expand_list(merged, html_list) + + token_list = convert_token(html_list) # load box annotations cells = [] @@ -2370,7 +2381,7 @@ def exportJSON(self): split = 'test' # save dict - html = {'structure': {'tokens': structure}, 'cell': cells} + html = {'structure': {'tokens': token_list}, 'cell': cells} json_results.append({'filename': filename, 'split': split, 'imgid': imgid, 'html': html}) imgid += 1 diff --git a/PPOCRLabel/libs/utils.py b/PPOCRLabel/libs/utils.py index c49b506882..86a0336d45 100644 --- a/PPOCRLabel/libs/utils.py +++ b/PPOCRLabel/libs/utils.py @@ -188,6 +188,50 @@ def OBB2HBB(obb) -> np.array: return hbb +def expand_list(merged, html_list): + ''' + Fill blanks according to merged cells + ''' + sr, er, sc, ec = merged + for i in range(sr, er): + for j in range(sc, ec): + html_list[i][j] = None + html_list[sr][sc] = '' + if ec - sc > 1: + html_list[sr][sc] += " colspan={}".format(ec - sc) + if er - sr > 1: + html_list[sr][sc] += " rowspan={}".format(er - sr) + return html_list + + +def convert_token(html_list): + ''' + Convert raw html to label format + ''' + token_list = [""] + # final html list: + for row in html_list: + token_list.append("") + for col in row: + if col == None: + continue + elif col == 'td': + token_list.extend(["", ""]) + else: + token_list.append("") + token_list.append("") + token_list.append("") + + return token_list + + def stepsInfo(lang='en'): if lang == 'ch': msg = "1. 安装与运行:使用上述命令安装与运行程序。\n" \