Merge remote-tracking branch 'origin/dygraph' into dygraph

PaddlePaddle · Sep 23, 2021 · e40fd43 · e40fd43
2 parents 6e0cbbe + 0da240d
commit e40fd43
Show file tree

Hide file tree

Showing 71 changed files with 2,151 additions and 480 deletions.
diff --git a/README_ch.md b/README_ch.md
@@ -81,7 +81,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力
 
 | 模型简介 | 模型名称 |推荐场景 | 检测模型 | 方向分类器 | 识别模型 |
 | ------------ | --------------- | ----------------|---- | ---------- | -------- |
-| 中英文超轻量PP-OCRv2模型（13.0M） | ch_PP-OCRv2_xx |移动端&服务器端|[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/chinese/ch_PP-OCRv2_det_distill_train.tar)| [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar)|
+| 中英文超轻量PP-OCRv2模型（13.0M） | ch_PP-OCRv2_xx |移动端&服务器端|[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar)| [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar)|
 | 中英文超轻量PP-OCR mobile模型（9.4M） | ch_ppocr_mobile_v2.0_xx |移动端&服务器端|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_pre.tar) |
 | 中英文通用PP-OCR server模型（143.4M） |ch_ppocr_server_v2.0_xx|服务器端 |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_pre.tar) | 
 

diff --git a/configs/det/det_mv3_pse.yml b/configs/det/det_mv3_pse.yml
@@ -0,0 +1,135 @@
+Global:
+ use_gpu: true
+ epoch_num: 600
+ log_smooth_window: 20
+ print_batch_step: 10
+ save_model_dir: ./output/det_mv3_pse/
+ save_epoch_step: 600
+ # evaluation is run every 63 iterations
+ eval_batch_step: [ 0,63 ]
+ cal_metric_during_train: False
+ pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
+ checkpoints: #./output/det_r50_vd_pse_batch8_ColorJitter/best_accuracy
+ save_inference_dir:
+ use_visualdl: False
+ infer_img: doc/imgs_en/img_10.jpg
+ save_res_path: ./output/det_pse/predicts_pse.txt
+
+Architecture:
+ model_type: det
+ algorithm: PSE
+ Transform: null
+ Backbone:
+ name: MobileNetV3
+ scale: 0.5
+ model_name: large
+ Neck:
+ name: FPN
+ out_channels: 96
+ Head:
+ name: PSEHead
+ hidden_dim: 96
+ out_channels: 7
+
+Loss:
+ name: PSELoss
+ alpha: 0.7
+ ohem_ratio: 3
+ kernel_sample_mask: pred
+ reduction: none
+
+Optimizer:
+ name: Adam
+ beta1: 0.9
+ beta2: 0.999
+ lr:
+ name: Step
+ learning_rate: 0.001
+ step_size: 200
+ gamma: 0.1
+ regularizer:
+ name: 'L2'
+ factor: 0.0005
+
+PostProcess:
+ name: PSEPostProcess
+ thresh: 0
+ box_thresh: 0.85
+ min_area: 16
+ box_type: box # 'box' or 'poly'
+ scale: 1
+
+Metric:
+ name: DetMetric
+ main_indicator: hmean
+
+Train:
+ dataset:
+ name: SimpleDataSet
+ data_dir: ./train_data/icdar2015/text_localization/
+ label_file_list:
+ - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
+ ratio_list: [ 1.0 ]
+ transforms:
+ - DecodeImage: # load image
+ img_mode: BGR
+ channel_first: False
+ - DetLabelEncode: # Class handling label
+ - ColorJitter:
+ brightness: 0.12549019607843137
+ saturation: 0.5
+ - IaaAugment:
+ augmenter_args:
+ - { 'type': Resize, 'args': { 'size': [ 0.5, 3 ] } }
+ - { 'type': Fliplr, 'args': { 'p': 0.5 } }
+ - { 'type': Affine, 'args': { 'rotate': [ -10, 10 ] } }
+ - MakePseGt:
+ kernel_num: 7
+ min_shrink_ratio: 0.4
+ size: 640
+ - RandomCropImgMask:
+ size: [ 640,640 ]
+ main_key: gt_text
+ crop_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ]
+ - NormalizeImage:
+ scale: 1./255.
+ mean: [ 0.485, 0.456, 0.406 ]
+ std: [ 0.229, 0.224, 0.225 ]
+ order: 'hwc'
+ - ToCHWImage:
+ - KeepKeys:
+ keep_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ] # the order of the dataloader list
+ loader:
+ shuffle: True
+ drop_last: False
+ batch_size_per_card: 16
+ num_workers: 8
+
+Eval:
+ dataset:
+ name: SimpleDataSet
+ data_dir: ./train_data/icdar2015/text_localization/
+ label_file_list:
+ - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+ ratio_list: [ 1.0 ]
+ transforms:
+ - DecodeImage: # load image
+ img_mode: BGR
+ channel_first: False
+ - DetLabelEncode: # Class handling label
+ - DetResizeForTest:
+ limit_side_len: 736
+ limit_type: min
+ - NormalizeImage:
+ scale: 1./255.
+ mean: [ 0.485, 0.456, 0.406 ]
+ std: [ 0.229, 0.224, 0.225 ]
+ order: 'hwc'
+ - ToCHWImage:
+ - KeepKeys:
+ keep_keys: [ 'image', 'shape', 'polys', 'ignore_tags' ]
+ loader:
+ shuffle: False
+ drop_last: False
+ batch_size_per_card: 1 # must be 1
+ num_workers: 8
diff --git a/configs/det/det_r50_vd_pse.yml b/configs/det/det_r50_vd_pse.yml
@@ -0,0 +1,134 @@
+Global:
+ use_gpu: true
+ epoch_num: 600
+ log_smooth_window: 20
+ print_batch_step: 10
+ save_model_dir: ./output/det_r50_vd_pse/
+ save_epoch_step: 600
+ # evaluation is run every 125 iterations
+ eval_batch_step: [ 0,125 ]
+ cal_metric_during_train: False
+ pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained
+ checkpoints: #./output/det_r50_vd_pse_batch8_ColorJitter/best_accuracy
+ save_inference_dir:
+ use_visualdl: False
+ infer_img: doc/imgs_en/img_10.jpg
+ save_res_path: ./output/det_pse/predicts_pse.txt
+
+Architecture:
+ model_type: det
+ algorithm: PSE
+ Transform:
+ Backbone:
+ name: ResNet
+ layers: 50
+ Neck:
+ name: FPN
+ out_channels: 256
+ Head:
+ name: PSEHead
+ hidden_dim: 256
+ out_channels: 7
+
+Loss:
+ name: PSELoss
+ alpha: 0.7
+ ohem_ratio: 3
+ kernel_sample_mask: pred
+ reduction: none
+
+Optimizer:
+ name: Adam
+ beta1: 0.9
+ beta2: 0.999
+ lr:
+ name: Step
+ learning_rate: 0.0001
+ step_size: 200
+ gamma: 0.1
+ regularizer:
+ name: 'L2'
+ factor: 0.0005
+
+PostProcess:
+ name: PSEPostProcess
+ thresh: 0
+ box_thresh: 0.85
+ min_area: 16
+ box_type: box # 'box' or 'poly'
+ scale: 1
+
+Metric:
+ name: DetMetric
+ main_indicator: hmean
+
+Train:
+ dataset:
+ name: SimpleDataSet
+ data_dir: ./train_data/icdar2015/text_localization/
+ label_file_list:
+ - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
+ ratio_list: [ 1.0 ]
+ transforms:
+ - DecodeImage: # load image
+ img_mode: BGR
+ channel_first: False
+ - DetLabelEncode: # Class handling label
+ - ColorJitter:
+ brightness: 0.12549019607843137
+ saturation: 0.5
+ - IaaAugment:
+ augmenter_args:
+ - { 'type': Resize, 'args': { 'size': [ 0.5, 3 ] } }
+ - { 'type': Fliplr, 'args': { 'p': 0.5 } }
+ - { 'type': Affine, 'args': { 'rotate': [ -10, 10 ] } }
+ - MakePseGt:
+ kernel_num: 7
+ min_shrink_ratio: 0.4
+ size: 640
+ - RandomCropImgMask:
+ size: [ 640,640 ]
+ main_key: gt_text
+ crop_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ]
+ - NormalizeImage:
+ scale: 1./255.
+ mean: [ 0.485, 0.456, 0.406 ]
+ std: [ 0.229, 0.224, 0.225 ]
+ order: 'hwc'
+ - ToCHWImage:
+ - KeepKeys:
+ keep_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ] # the order of the dataloader list
+ loader:
+ shuffle: True
+ drop_last: False
+ batch_size_per_card: 8
+ num_workers: 8
+
+Eval:
+ dataset:
+ name: SimpleDataSet
+ data_dir: ./train_data/icdar2015/text_localization/
+ label_file_list:
+ - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+ ratio_list: [ 1.0 ]
+ transforms:
+ - DecodeImage: # load image
+ img_mode: BGR
+ channel_first: False
+ - DetLabelEncode: # Class handling label
+ - DetResizeForTest:
+ limit_side_len: 736
+ limit_type: min
+ - NormalizeImage:
+ scale: 1./255.
+ mean: [ 0.485, 0.456, 0.406 ]
+ std: [ 0.229, 0.224, 0.225 ]
+ order: 'hwc'
+ - ToCHWImage:
+ - KeepKeys:
+ keep_keys: [ 'image', 'shape', 'polys', 'ignore_tags' ]
+ loader:
+ shuffle: False
+ drop_last: False
+ batch_size_per_card: 1 # must be 1
+ num_workers: 8