fix conflict

PaddlePaddle · Dec 8, 2022 · 88ca1f1 · 88ca1f1
2 parents c478c59 + 83beede
commit 88ca1f1
Show file tree

Hide file tree

Showing 33 changed files with 414 additions and 89 deletions.
diff --git a/LICENSE b/LICENSE
@@ -1,3 +1,5 @@
+Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+
  Apache License
  Version 2.0, January 2004
  http:https://www.apache.org/licenses/
@@ -186,7 +188,7 @@
  same "printed page" as the copyright notice for easier
  identification within third-party archives.
 
- Copyright [yyyy] [name of copyright owner]
+ Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.

diff --git a/PPOCRLabel/PPOCRLabel.py b/PPOCRLabel/PPOCRLabel.py
@@ -1617,8 +1617,9 @@ def showBoundingBoxFromPPlabel(self, filePath):
  key_cls = 'None' if not self.kie_mode else box.get('key_cls', 'None')
  shapes.append((box['transcription'], box['points'], None, key_cls, box.get('difficult', False)))
 
- self.loadLabels(shapes)
- self.canvas.verified = False
+ if shapes != []:
+ self.loadLabels(shapes)
+ self.canvas.verified = False
 
  def validFilestate(self, filePath):
  if filePath not in self.fileStatedict.keys():
@@ -2203,7 +2204,7 @@ def reRecognition(self):
  msg = 'Can not recognise the detection box in ' + self.filePath + '. Please change manually'
  QMessageBox.information(self, "Information", msg)
  return
- result = self.ocr.ocr(img_crop, cls=True, det=False)
+ result = self.ocr.ocr(img_crop, cls=True, det=False)[0]
  if result[0][0] != '':
  if shape.line_color == DEFAULT_LOCK_COLOR:
  shape.label = result[0][0]
@@ -2264,7 +2265,7 @@ def singleRerecognition(self):
  msg = 'Can not recognise the detection box in ' + self.filePath + '. Please change manually'
  QMessageBox.information(self, "Information", msg)
  return
- result = self.ocr.ocr(img_crop, cls=True, det=False)
+ result = self.ocr.ocr(img_crop, cls=True, det=False)[0]
  if result[0][0] != '':
  result.insert(0, box)
  print('result in reRec is ', result)
@@ -2415,12 +2416,12 @@ def cellreRecognition(self):
  # merge the text result in the cell
  texts = ''
  probs = 0. # the probability of the cell is avgerage prob of every text box in the cell
- bboxes = self.ocr.ocr(img_crop, det=True, rec=False, cls=False)
+ bboxes = self.ocr.ocr(img_crop, det=True, rec=False, cls=False)[0]
  if len(bboxes) > 0:
  bboxes.reverse() # top row text at first
  for _bbox in bboxes:
  patch = get_rotate_crop_image(img_crop, np.array(_bbox, np.float32))
- rec_res = self.ocr.ocr(patch, det=False, rec=True, cls=False)
+ rec_res = self.ocr.ocr(patch, det=False, rec=True, cls=False)[0]
  text = rec_res[0][0]
  if text != '':
  texts += text + ('' if text[0].isalpha() else ' ') # add space between english word

diff --git a/PPOCRLabel/README.md b/PPOCRLabel/README.md
@@ -103,11 +103,11 @@ python PPOCRLabel.py --kie True # [KIE mode] for [detection + recognition + keyw
 ```
 
 #### 1.2.3 Build and Install the Whl Package Locally
-Compile and install a new whl package, where 1.0.2 is the version number, you can specify the new version in 'setup.py'.
+Compile and install a new whl package, where 0.0.0 is the version number, you can specify the new version in 'setup.py'.
 ```bash
 cd ./PPOCRLabel
 python3 setup.py bdist_wheel
-pip3 install dist/PPOCRLabel-2.1.2-py2.py3-none-any.whl
+pip3 install dist/PPOCRLabel-0.0.0-py2.py3-none-any.whl
 ```
 
 

diff --git a/PPOCRLabel/README_ch.md b/PPOCRLabel/README_ch.md
@@ -101,12 +101,12 @@ python PPOCRLabel.py --lang ch
 
 #### 1.2.3 本地构建whl包并安装
 
-编译与安装新的whl包，其中1.0.2为版本号，可在 `setup.py` 中指定新版本。
+编译与安装新的whl包，其中0.0.0为版本号，可在 `setup.py` 中指定新版本。
 
 ```bash
 cd ./PPOCRLabel
 python3 setup.py bdist_wheel 
-pip3 install dist/PPOCRLabel-2.1.2-py2.py3-none-any.whl -i https://mirror.baidu.com/pypi/simple
+pip3 install dist/PPOCRLabel-0.0.0-py2.py3-none-any.whl -i https://mirror.baidu.com/pypi/simple
 ```
 
 

diff --git a/PPOCRLabel/libs/autoDialog.py b/PPOCRLabel/libs/autoDialog.py
@@ -40,7 +40,7 @@ def run(self):
  if self.model == 'paddle':
  h, w, _ = cv2.imdecode(np.fromfile(Imgpath, dtype=np.uint8), 1).shape
  if h > 32 and w > 32:
- self.result_dic = self.ocr.ocr(Imgpath, cls=True, det=True)
+ self.result_dic = self.ocr.ocr(Imgpath, cls=True, det=True)[0]
  else:
  print('The size of', Imgpath, 'is too small to be recognised')
  self.result_dic = None

diff --git a/PPOCRLabel/requirements.txt b/PPOCRLabel/requirements.txt
@@ -1,3 +1,3 @@
 pyqt5
-paddleocr==2.6.0.0
+paddleocr
 xlrd==1.2.0
diff --git a/PPOCRLabel/setup.py b/PPOCRLabel/setup.py
@@ -33,10 +33,10 @@ def readme():
  package_dir={'PPOCRLabel': ''},
  include_package_data=True,
  entry_points={"console_scripts": ["PPOCRLabel= PPOCRLabel.PPOCRLabel:main"]},
- version='2.1.2',
+ version='2.1.3',
  install_requires=requirements,
  license='Apache License 2.0',
- description='PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, with built-in PPOCR model to automatically detect and re-recognize data. It is written in python3 and pyqt5, supporting rectangular box annotation and four-point annotation modes. Annotations can be directly used for the training of PPOCR detection and recognition models',
+ description='PPOCRLabelv2 is a semi-automatic graphic annotation tool suitable for OCR field, with built-in PP-OCR model to automatically detect and re-recognize data. It is written in Python3 and PyQT5, supporting rectangular box, table, irregular text and key information annotation modes. Annotations can be directly used for the training of PP-OCR detection and recognition models.',
  long_description=readme(),
  long_description_content_type='text/markdown',
  url='https://github.com/PaddlePaddle/PaddleOCR',

diff --git a/README.md b/README.md
@@ -26,12 +26,10 @@ PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools
 </div>
 
 ## 📣 Recent updates
-- 💥 **Live Preview: Oct 24 - Oct 26, China Standard Time, 20:30**, Engineers@PaddleOCR will show PP-StructureV2 optimization strategy for 3 days.
- - Scan the QR code below using WeChat, follow the PaddlePaddle official account and fill out the questionnaire to join the WeChat group, get the live link and 20G OCR learning materials (including PDF2Word application, 10 models in vertical scenarios, etc.)
+- 🔨**2022.11 Add implementation of [4 cutting-edge algorithms](doc/doc_ch/algorithm_overview.md)**：Text Detection [DRRG](doc/doc_en/algorithm_det_drrg_en.md), Text Recognition [RFL](./doc/doc_en/algorithm_rec_rfl_en.md), Image Super-Resolution [Text Telescope](doc/doc_en/algorithm_sr_telescope_en.md)，Handwrittem Mathematical Expression Recognition [CAN](doc/doc_en/algorithm_rec_can_en.md)
+- **2022.10 release [optimized JS version PP-OCRv3 model](./deploy/paddlejs/README.md)** with 4.3M model size, 8x faster inference time, and a ready-to-use web demo
+- 💥 **Live Playback: Introduction to PP-StructureV2 optimization strategy**. Scan [the QR code below](#Community) using WeChat, follow the PaddlePaddle official account and fill out the questionnaire to join the WeChat group, get the live link and 20G OCR learning materials (including PDF2Word application, 10 models in vertical scenarios, etc.)
 
-<div align="center">
-<img src="https://user-images.githubusercontent.com/50011306/196944258-0eb82df1-d730-4b96-a350-c1d370fdc2b1.jpg" width = "150" height = "150" />
-</div>
 
 - **🔥2022.8.24 Release PaddleOCR [release/2.6](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.6)**
  - Release [PP-StructureV2](./ppstructure/)，with functions and performance fully upgraded, adapted to Chinese scenes, and new support for [Layout Recovery](./ppstructure/recovery) and **one line command to convert PDF to Word**;
@@ -74,6 +72,7 @@ PaddleOCR support a variety of cutting-edge algorithms related to OCR, and devel
 - [Dive Into OCR ](./doc/doc_en/ocr_book_en.md)
 
 <a name="Community"></a>
+
 ## 👫 Community
 
 - For international developers, we regard [PaddleOCR Discussions](https://github.com/PaddlePaddle/PaddleOCR/discussions) as our international community platform. All ideas and questions can be discussed here in English.

diff --git a/README_ch.md b/README_ch.md
@@ -27,29 +27,19 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力
 
 ## 📣 近期更新
 
-- **💥 直播预告：10.24-10.26日每晚8点半**，PaddleOCR研发团队详解PP-StructureV2优化策略。微信扫描下方二维码，关注公众号并填写问卷后进入官方交流群，获取直播链接与20G重磅OCR学习大礼包（内含PDF转Word应用程序、10种垂类模型、《动手学OCR》电子书等）
-
-<div align="center">
-<img src="https://user-images.githubusercontent.com/50011306/196944258-0eb82df1-d730-4b96-a350-c1d370fdc2b1.jpg" width = "150" height = "150" />
-</div>
-
+- 🔨**2022.11 新增实现[4种前沿算法](doc/doc_ch/algorithm_overview.md)**：文本检测 [DRRG](doc/doc_ch/algorithm_det_drrg.md), 文本识别 [RFL](doc/doc_ch/algorithm_rec_rfl.md), 文本超分[Text Telescope](doc/doc_ch/algorithm_sr_telescope.md)，公式识别[CAN](doc/doc_ch/algorithm_rec_can.md.md)
+- **2022.10 优化[JS版PP-OCRv3模型](./deploy/paddlejs/README_ch.md)**：模型大小仅4.3M，预测速度提升8倍，配套web demo开箱即用
+- **💥 直播回放：PaddleOCR研发团队详解PP-StructureV2优化策略**。微信扫描[下方二维码](#开源社区)，关注公众号并填写问卷后进入官方交流群，获取直播回放链接与20G重磅OCR学习大礼包（内含PDF转Word应用程序、10种垂类模型、《动手学OCR》电子书等）
 - **🔥2022.8.24 发布 PaddleOCR [release/2.6](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.6)**
  - 发布[PP-StructureV2](./ppstructure/README_ch.md)，系统功能性能全面升级，适配中文场景，新增支持[版面复原](./ppstructure/recovery/README_ch.md)，支持**一行命令完成PDF转Word**；
  - [版面分析](./ppstructure/layout/README_ch.md)模型优化：模型存储减少95%，速度提升11倍，平均CPU耗时仅需41ms；
  - [表格识别](./ppstructure/table/README_ch.md)模型优化：设计3大优化策略，预测耗时不变情况下，模型精度提升6%；
  - [关键信息抽取](./ppstructure/kie/README_ch.md)模型优化：设计视觉无关模型结构，语义实体识别精度提升2.8%，关系抽取精度提升9.1%。
-
-- **🔥2022.8 发布 [OCR场景应用集合](./applications)**
-
- - 包含数码管、液晶屏、车牌、高精度SVTR模型、手写体识别等**9个垂类模型**，覆盖通用，制造、金融、交通行业的主要OCR垂类应用。
-
-
-- **2022.8 新增实现[8种前沿算法](doc/doc_ch/algorithm_overview.md)**
- - 文本检测：[FCENet](doc/doc_ch/algorithm_det_fcenet.md), [DB++](doc/doc_ch/algorithm_det_db.md)
- - 文本识别：[ViTSTR](doc/doc_ch/algorithm_rec_vitstr.md), [ABINet](doc/doc_ch/algorithm_rec_abinet.md), [VisionLAN](doc/doc_ch/algorithm_rec_visionlan.md), [SPIN](doc/doc_ch/algorithm_rec_spin.md), [RobustScanner](doc/doc_ch/algorithm_rec_robustscanner.md)
- - 表格识别：[TableMaster](doc/doc_ch/algorithm_table_master.md)
-
-
+- 🔥**2022.8 发布 [OCR场景应用集合](./applications)**：包含数码管、液晶屏、车牌、高精度SVTR模型、手写体识别等**9个垂类模型**，覆盖通用，制造、金融、交通行业的主要OCR垂类应用。
+- **2022.8 新增实现[8种前沿算法](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_overview.md)**
+ - 文本检测：[FCENet](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_det_fcenet.md), [DB++](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_det_db.md)
+ - 文本识别：[ViTSTR](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_rec_vitstr.md), [ABINet](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_rec_abinet.md), [VisionLAN](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_rec_visionlan.md), [SPIN](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_rec_spin.md), [RobustScanner](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_rec_robustscanner.md)
+ - 表格识别：[TableMaster](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_table_master.md)
 - **2022.5.9 发布 PaddleOCR [release/2.5](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.5)**
  - 发布[PP-OCRv3](./doc/doc_ch/ppocr_introduction.md#pp-ocrv3)，速度可比情况下，中文场景效果相比于PP-OCRv2再提升5%，英文场景提升11%，80语种多语言模型平均识别准确率提升5%以上；
  - 发布半自动标注工具[PPOCRLabelv2](./PPOCRLabel)：新增表格文字图像、图像关键信息抽取任务和不规则文字图像的标注功能；
@@ -79,23 +69,23 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力
 ## 📚《动手学OCR》电子书
 - [《动手学OCR》电子书](./doc/doc_ch/ocr_book.md)
 
-
 <a name="开源社区"></a>
+
 ## 👫 开源社区
 - **📑项目合作：** 如果您是企业开发者且有明确的OCR垂类应用需求，填写[问卷](https://paddle.wjx.cn/vj/QwF7GKw.aspx)后可免费与官方团队展开不同层次的合作。
-- **👫加入社区：** 微信扫描二维码并填写问卷之后，加入交流群领取20G重磅OCR学习大礼包
- - **包括《动手学OCR》电子书** ，配套讲解视频和notebook项目；PaddleOCR历次发版直播课视频；
+- **👫加入社区：** **微信扫描二维码并填写问卷之后，加入交流群领取20G重磅OCR学习大礼包**
+ - **包括《动手学OCR》电子书** ，配套讲解视频和notebook项目；**PaddleOCR历次发版直播课回放链接**；
  - **OCR场景应用模型集合：** 包含数码管、液晶屏、车牌、高精度SVTR模型、手写体识别等垂类模型，覆盖通用，制造、金融、交通行业的主要OCR垂类应用。
  - PDF2Word应用程序；OCR社区优秀开发者项目分享视频。
 - **🏅️社区项目**：[社区项目](./doc/doc_ch/thirdparty.md)文档中包含了社区用户**使用PaddleOCR开发的各种工具、应用**以及**为PaddleOCR贡献的功能、优化的文档与代码**等，是官方为社区开发者打造的荣誉墙，也是帮助优质项目宣传的广播站。 
 - **🎁社区常规赛**：社区常规赛是面向OCR开发者的积分赛事，覆盖文档、代码、模型和应用四大类型，以季度为单位评选并发放奖励，赛题详情与报名方法可参考[链接](https://github.com/PaddlePaddle/PaddleOCR/issues/4982)。
 
 <div align="center">
-<img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/dygraph/doc/joinus.PNG" width = "150" height = "150" />
+<img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/dygraph/doc/joinus.PNG" width = "150" height = "150",caption='' />
+<p>PaddleOCR官方交流群二维码</p>
 </div>
 
 
-
 <a name="模型下载"></a>
 ## 🛠️ PP-OCR系列模型列表（更新中）
 

diff --git a/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml b/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml
@@ -19,6 +19,7 @@ Global:
  use_space_char: true
  distributed: true
  save_res_path: ./output/rec/predicts_pp-OCRv2_distillation.txt
+ amp_custom_black_list: ['matmul','matmul_v2','elementwise_add']
 
 
 Optimizer:

diff --git a/deploy/avh/requirements.txt b/deploy/avh/requirements.txt
@@ -1,3 +1,4 @@
 paddlepaddle
 numpy
-opencv-python
+opencv-python
+typing-extensions
diff --git a/doc/doc_ch/algorithm_overview.md b/doc/doc_ch/algorithm_overview.md
@@ -3,6 +3,8 @@
 - [1. 两阶段OCR算法](#1)
  - [1.1 文本检测算法](#11)
  - [1.2 文本识别算法](#12)
+ - [1.3 文本超分辨率算法](#13)
+ - [1.4 公式识别算法](#14)
 - [2. 端到端OCR算法](#2)
 - [3. 表格识别算法](#3)
 - [4. 关键信息抽取算法](#4)
@@ -107,6 +109,34 @@ PaddleOCR将**持续新增**支持OCR领域前沿算法与模型，**欢迎广
 |RobustScanner|ResNet31| 87.77% | rec_r31_robustscanner | [训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_r31_robustscanner.tar)|
 |RFL|ResNetRFL| 88.63% | rec_resnet_rfl_att | [训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_resnet_rfl_att_train.tar) |
 
+
+<a name="13"></a>
+
+### 1.3 文本超分辨率算法
+已支持的文本超分辨率算法列表（戳链接获取使用教程）：
+- [x] [Text Gestalt](./algorithm_sr_gestalt.md)
+- [x] [Text Telescope](./algorithm_sr_telescope.md)
+
+在TextZoom公开数据集上，算法效果如下：
+
+|模型|骨干网络|PSNR_Avg|SSIM_Avg|配置文件|下载链接|
+|---|---|---|---|---|---|
+|Text Gestalt|tsrn|19.28|0.6560| [configs/sr/sr_tsrn_transformer_strock.yml](../../configs/sr/sr_tsrn_transformer_strock.yml)|[训练模型](https://paddleocr.bj.bcebos.com/sr_tsrn_transformer_strock_train.tar)|
+|Text Telescope|tbsrn|21.56|0.7411| [configs/sr/sr_telescope.yml](../../configs/sr/sr_telescope.yml)|[训练模型](https://paddleocr.bj.bcebos.com/contribution/sr_telescope_train.tar)|
+
+<a name="14"></a>
+
+### 1.4 公式识别算法
+
+已支持的公式识别算法列表（戳链接获取使用教程）：
+- [x] [CAN](./algorithm_rec_can.md.md)
+
+在CROHME手写公式数据集上，算法效果如下：
+
+|模型 |骨干网络|配置文件|ExpRate|下载链接|
+| ----- | ----- | ----- | ----- | ----- |
+|CAN|DenseNet|[rec_d28_can.yml](../../configs/rec/rec_d28_can.yml)|51.72%|[训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_d28_can_train.tar)|
+
 <a name="2"></a>
 
 ## 2. 端到端算法