Auto stash before merge of "table" and "origin/new"

Evezerest · May 5, 2022 · f1d0c8a · f1d0c8a
1 parent 1e6af3b
commit f1d0c8a
Show file tree

Hide file tree

Showing 2 changed files with 59 additions and 4 deletions.
diff --git a/PPOCRLabel/PPOCRLabel.py b/PPOCRLabel/PPOCRLabel.py
@@ -21,6 +21,7 @@
 import platform
 import subprocess
 import sys
+import xlrd
 from functools import partial
 
 from PyQt5.QtCore import QSize, Qt, QPoint, QByteArray, QTimer, QFileInfo, QPointF, QProcess
@@ -2349,9 +2350,19 @@ def exportJSON(self):
  return
 
  # read xlsx file, convert to HTML
- xd = pd.ExcelFile(csv_path)
- df = xd.parse()
- structure = df.to_html(index = False)
+ # xd = pd.ExcelFile(csv_path)
+ # df = xd.parse()
+ # structure = df.to_html(index = False)
+ excel = xlrd.open_workbook(csv_path)
+ sheet0 = excel.sheet_by_index(0) # only sheet 0
+ merged_cells = sheet0.merged_cells # (0,1,1,3) start row, end row, start col, end col
+
+ html_list = [['td'] * sheet0.ncols for i in range(sheet0.nrows)]
+
+ for merged in merged_cells:
+ html_list = expand_list(merged, html_list)
+
+ token_list = convert_token(html_list)
 
  # load box annotations
  cells = []
@@ -2370,7 +2381,7 @@ def exportJSON(self):
  split = 'test'
 
  # save dict
- html = {'structure': {'tokens': structure}, 'cell': cells}
+ html = {'structure': {'tokens': token_list}, 'cell': cells}
  json_results.append({'filename': filename, 'split': split, 'imgid': imgid, 'html': html})
  imgid += 1
 

diff --git a/PPOCRLabel/libs/utils.py b/PPOCRLabel/libs/utils.py
@@ -188,6 +188,50 @@ def OBB2HBB(obb) -> np.array:
  return hbb
 
 
+def expand_list(merged, html_list):
+ '''
+ Fill blanks according to merged cells
+ '''
+ sr, er, sc, ec = merged
+ for i in range(sr, er):
+ for j in range(sc, ec):
+ html_list[i][j] = None
+ html_list[sr][sc] = ''
+ if ec - sc > 1:
+ html_list[sr][sc] += " colspan={}".format(ec - sc)
+ if er - sr > 1:
+ html_list[sr][sc] += " rowspan={}".format(er - sr)
+ return html_list
+
+
+def convert_token(html_list):
+ '''
+ Convert raw html to label format
+ '''
+ token_list = ["<tbody>"]
+ # final html list:
+ for row in html_list:
+ token_list.append("<tr>")
+ for col in row:
+ if col == None:
+ continue
+ elif col == 'td':
+ token_list.extend(["<td>", "</td>"])
+ else:
+ token_list.append("<td")
+ if 'colspan' in col:
+ _, n = col.split('colspan=')
+ token_list.append(" colspan=\"{}\"".format(n))
+ if 'rowspan' in col:
+ _, n = col.split('rowspan=')
+ token_list.append(" rowspan=\"{}\"".format(n))
+ token_list.append(">")
+ token_list.append("</tr>")
+ token_list.append("</tbody>")
+
+ return token_list
+
+
 def stepsInfo(lang='en'):
  if lang == 'ch':
  msg = "1. 安装与运行：使用上述命令安装与运行程序。\n" \