Skip to content

Commit

Permalink
Auto stash before merge of "table" and "origin/new"
Browse files Browse the repository at this point in the history
  • Loading branch information
Evezerest committed May 5, 2022
1 parent 1e6af3b commit f1d0c8a
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 4 deletions.
19 changes: 15 additions & 4 deletions PPOCRLabel/PPOCRLabel.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import platform
import subprocess
import sys
import xlrd
from functools import partial

from PyQt5.QtCore import QSize, Qt, QPoint, QByteArray, QTimer, QFileInfo, QPointF, QProcess
Expand Down Expand Up @@ -2349,9 +2350,19 @@ def exportJSON(self):
return

# read xlsx file, convert to HTML
xd = pd.ExcelFile(csv_path)
df = xd.parse()
structure = df.to_html(index = False)
# xd = pd.ExcelFile(csv_path)
# df = xd.parse()
# structure = df.to_html(index = False)
excel = xlrd.open_workbook(csv_path)
sheet0 = excel.sheet_by_index(0) # only sheet 0
merged_cells = sheet0.merged_cells # (0,1,1,3) start row, end row, start col, end col

html_list = [['td'] * sheet0.ncols for i in range(sheet0.nrows)]

for merged in merged_cells:
html_list = expand_list(merged, html_list)

token_list = convert_token(html_list)

# load box annotations
cells = []
Expand All @@ -2370,7 +2381,7 @@ def exportJSON(self):
split = 'test'

# save dict
html = {'structure': {'tokens': structure}, 'cell': cells}
html = {'structure': {'tokens': token_list}, 'cell': cells}
json_results.append({'filename': filename, 'split': split, 'imgid': imgid, 'html': html})
imgid += 1

Expand Down
44 changes: 44 additions & 0 deletions PPOCRLabel/libs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,50 @@ def OBB2HBB(obb) -> np.array:
return hbb


def expand_list(merged, html_list):
'''
Fill blanks according to merged cells
'''
sr, er, sc, ec = merged
for i in range(sr, er):
for j in range(sc, ec):
html_list[i][j] = None
html_list[sr][sc] = ''
if ec - sc > 1:
html_list[sr][sc] += " colspan={}".format(ec - sc)
if er - sr > 1:
html_list[sr][sc] += " rowspan={}".format(er - sr)
return html_list


def convert_token(html_list):
'''
Convert raw html to label format
'''
token_list = ["<tbody>"]
# final html list:
for row in html_list:
token_list.append("<tr>")
for col in row:
if col == None:
continue
elif col == 'td':
token_list.extend(["<td>", "</td>"])
else:
token_list.append("<td")
if 'colspan' in col:
_, n = col.split('colspan=')
token_list.append(" colspan=\"{}\"".format(n))
if 'rowspan' in col:
_, n = col.split('rowspan=')
token_list.append(" rowspan=\"{}\"".format(n))
token_list.append(">")
token_list.append("</tr>")
token_list.append("</tbody>")

return token_list


def stepsInfo(lang='en'):
if lang == 'ch':
msg = "1. 安装与运行:使用上述命令安装与运行程序。\n" \
Expand Down

0 comments on commit f1d0c8a

Please sign in to comment.