Merge branch 'dygraph' into add_multi_doc

tink2123 · Apr 9, 2021 · 31fa491 · 31fa491
2 parents aeffcd8 + f2fc1a3
commit 31fa491
Show file tree

Hide file tree

Showing 39 changed files with 4,534 additions and 97 deletions.
diff --git a/configs/det/det_r50_vd_sast_icdar15.yml b/configs/det/det_r50_vd_sast_icdar15.yml
@@ -14,12 +14,13 @@ Global:
  load_static_weights: True
  cal_metric_during_train: False
  pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/
- checkpoints: 
+ checkpoints:
  save_inference_dir:
  use_visualdl: False
- infer_img: 
+ infer_img:
  save_res_path: ./output/sast_r50_vd_ic15/predicts_sast.txt
 
+
 Architecture:
  model_type: det
  algorithm: SAST

diff --git a/configs/e2e/e2e_r50_vd_pg.yml b/configs/e2e/e2e_r50_vd_pg.yml
@@ -0,0 +1,114 @@
+Global:
+ use_gpu: True
+ epoch_num: 600
+ log_smooth_window: 20
+ print_batch_step: 10
+ save_model_dir: ./output/pgnet_r50_vd_totaltext/
+ save_epoch_step: 10
+ # evaluation is run every 0 iterationss after the 1000th iteration
+ eval_batch_step: [ 0, 1000 ]
+ # 1. If pretrained_model is saved in static mode, such as classification pretrained model
+ # from static branch, load_static_weights must be set as True.
+ # 2. If you want to finetune the pretrained models we provide in the docs,
+ # you should set load_static_weights as False.
+ load_static_weights: False
+ cal_metric_during_train: False
+ pretrained_model:
+ checkpoints:
+ save_inference_dir:
+ use_visualdl: False
+ infer_img:
+ valid_set: totaltext # two mode: totaltext valid curved words, partvgg valid non-curved words
+ save_res_path: ./output/pgnet_r50_vd_totaltext/predicts_pgnet.txt
+ character_dict_path: ppocr/utils/ic15_dict.txt
+ character_type: EN
+ max_text_length: 50 # the max length in seq
+ max_text_nums: 30 # the max seq nums in a pic
+ tcl_len: 64
+
+Architecture:
+ model_type: e2e
+ algorithm: PGNet
+ Transform:
+ Backbone:
+ name: ResNet
+ layers: 50
+ Neck:
+ name: PGFPN
+ Head:
+ name: PGHead
+
+Loss:
+ name: PGLoss
+ tcl_bs: 64
+ max_text_length: 50 # the same as Global: max_text_length
+ max_text_nums: 30 # the same as Global：max_text_nums
+ pad_num: 36 # the length of dict for pad
+
+Optimizer:
+ name: Adam
+ beta1: 0.9
+ beta2: 0.999
+ lr:
+ learning_rate: 0.001
+ regularizer:
+ name: 'L2'
+ factor: 0
+
+
+PostProcess:
+ name: PGPostProcess
+ score_thresh: 0.5
+Metric:
+ name: E2EMetric
+ character_dict_path: ppocr/utils/ic15_dict.txt
+ main_indicator: f_score_e2e
+
+Train:
+ dataset:
+ name: PGDataSet
+ label_file_list: [.././train_data/total_text/train/]
+ ratio_list: [1.0]
+ data_format: icdar #two data format: icdar/textnet
+ transforms:
+ - DecodeImage: # load image
+ img_mode: BGR
+ channel_first: False
+ - PGProcessTrain:
+ batch_size: 14 # same as loader: batch_size_per_card
+ min_crop_size: 24
+ min_text_size: 4
+ max_text_size: 512
+ - KeepKeys:
+ keep_keys: [ 'images', 'tcl_maps', 'tcl_label_maps', 'border_maps','direction_maps', 'training_masks', 'label_list', 'pos_list', 'pos_mask' ] # dataloader will return list in this order
+ loader:
+ shuffle: True
+ drop_last: True
+ batch_size_per_card: 14
+ num_workers: 16
+
+Eval:
+ dataset:
+ name: PGDataSet
+ data_dir: ./train_data/
+ label_file_list: [./train_data/total_text/test/]
+ transforms:
+ - DecodeImage: # load image
+ img_mode: RGB
+ channel_first: False
+ - E2ELabelEncode:
+ - E2EResizeForTest:
+ max_side_len: 768
+ - NormalizeImage:
+ scale: 1./255.
+ mean: [ 0.485, 0.456, 0.406 ]
+ std: [ 0.229, 0.224, 0.225 ]
+ order: 'hwc'
+ - ToCHWImage:
+ - KeepKeys:
+ keep_keys: [ 'image', 'shape', 'polys', 'strs', 'tags' ]
+ loader:
+ shuffle: False
+ drop_last: False
+ batch_size_per_card: 1 # must be 1
+ num_workers: 2
diff --git a/doc/doc_ch/inference.md b/doc/doc_ch/inference.md
@@ -12,7 +12,8 @@ inference 模型（`paddle.jit.save`保存的模型）
 - [一、训练模型转inference模型](#训练模型转inference模型)
  - [检测模型转inference模型](#检测模型转inference模型)
  - [识别模型转inference模型](#识别模型转inference模型) 
- - [方向分类模型转inference模型](#方向分类模型转inference模型) 
+ - [方向分类模型转inference模型](#方向分类模型转inference模型)
+ - [端到端模型转inference模型](#端到端模型转inference模型)
 
 - [二、文本检测模型推理](#文本检测模型推理)
  - [1. 超轻量中文检测模型推理](#超轻量中文检测模型推理)
@@ -27,10 +28,13 @@ inference 模型（`paddle.jit.save`保存的模型）
  - [4. 自定义文本识别字典的推理](#自定义文本识别字典的推理)
  - [5. 多语言模型的推理](#多语言模型的推理)
 
-- [四、方向分类模型推理](#方向识别模型推理)
+- [四、端到端模型推理](#端到端模型推理)
+ - [1. PGNet端到端模型推理](#PGNet端到端模型推理)
+
+- [五、方向分类模型推理](#方向识别模型推理)
  - [1. 方向分类模型推理](#方向分类模型推理)
 
-- [五、文本检测、方向分类和文字识别串联推理](#文本检测、方向分类和文字识别串联推理)
+- [六、文本检测、方向分类和文字识别串联推理](#文本检测、方向分类和文字识别串联推理)
  - [1. 超轻量中文OCR模型推理](#超轻量中文OCR模型推理)
  - [2. 其他模型推理](#其他模型推理)
 
@@ -118,6 +122,32 @@ python3 tools/export_model.py -c configs/cls/cls_mv3.yml -o Global.pretrained_mo
  ├── inference.pdiparams.info # 分类inference模型的参数信息，可忽略
  └── inference.pdmodel # 分类inference模型的program文件
 ```
+<a name="端到端模型转inference模型"></a>
+### 端到端模型转inference模型
+
+下载端到端模型：
+```
+wget -P ./ch_lite/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_cls_train.tar -C ./ch_lite/
+```
+
+端到端模型转inference模型与检测的方式相同，如下：
+```
+# -c 后面设置训练算法的yml配置文件
+# -o 配置可选参数
+# Global.pretrained_model 参数设置待转换的训练模型地址，不用添加文件后缀 .pdmodel，.pdopt或.pdparams。
+# Global.load_static_weights 参数需要设置为 False。
+# Global.save_inference_dir参数设置转换的模型将保存的地址。
+
+python3 tools/export_model.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.pretrained_model=./ch_lite/ch_ppocr_mobile_v2.0_cls_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/e2e/
+```
+
+转换成功后，在目录下有三个文件：
+```
+/inference/e2e/
+ ├── inference.pdiparams # 分类inference模型的参数文件
+ ├── inference.pdiparams.info # 分类inference模型的参数信息，可忽略
+ └── inference.pdmodel # 分类inference模型的program文件
+```
 
 <a name="文本检测模型推理"></a>
 ## 二、文本检测模型推理
@@ -332,8 +362,38 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" -
 Predicts of ./doc/imgs_words/korean/1.jpg:('바탕으로', 0.9948904)
 ```
 
+<a name="端到端模型推理"></a>
+## 四、端到端模型推理
+
+端到端模型推理，默认使用PGNet模型的配置参数。当不使用PGNet模型时，在推理时，需要通过传入相应的参数进行算法适配，细节参考下文。
+<a name="PGNet端到端模型推理"></a>
+### 1. PGNet端到端模型推理
+#### (1). 四边形文本检测模型（ICDAR2015） 
+首先将PGNet端到端训练过程中保存的模型，转换成inference model。以基于Resnet50_vd骨干网络，在ICDAR2015英文数据集训练的模型为例([模型下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/en_server_pgnetA.tar))，可以使用如下命令进行转换：
+```
+python3 tools/export_model.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.pretrained_model=./en_server_pgnetA/iter_epoch_450 Global.load_static_weights=False Global.save_inference_dir=./inference/e2e
+```
+**PGNet端到端模型推理，需要设置参数`--e2e_algorithm="PGNet"`**，可以执行如下命令：
+```
+python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img_10.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_polygon=False
+```
+可视化文本检测结果默认保存到`./inference_results`文件夹里面，结果文件的名称前缀为'e2e_res'。结果示例如下：
+
+![](../imgs_results/e2e_res_img_10_pgnet.jpg)
+
+#### (2). 弯曲文本检测模型（Total-Text） 
+和四边形文本检测模型共用一个推理模型
+**PGNet端到端模型推理，需要设置参数`--e2e_algorithm="PGNet"`，同时，还需要增加参数`--e2e_pgnet_polygon=True`，**可以执行如下命令：
+```
+python3.7 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_polygon=True
+```
+可视化文本端到端结果默认保存到`./inference_results`文件夹里面，结果文件的名称前缀为'e2e_res'。结果示例如下：
+
+![](../imgs_results/e2e_res_img623_pgnet.jpg)
+
+
 <a name="方向分类模型推理"></a>
-## 四、方向分类模型推理
+## 五、方向分类模型推理
 
 下面将介绍方向分类模型推理。
 
@@ -358,7 +418,7 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:['0', 0.9999982]
 ```
 
 <a name="文本检测、方向分类和文字识别串联推理"></a>
-## 五、文本检测、方向分类和文字识别串联推理
+## 六、文本检测、方向分类和文字识别串联推理
 <a name="超轻量中文OCR模型推理"></a>
 ### 1. 超轻量中文OCR模型推理