From b5f9a7ec5b09b598d7b35c6aafdc2a4b0cd5e857 Mon Sep 17 00:00:00 2001 From: WenmuZhou Date: Fri, 11 Dec 2020 18:48:23 +0800 Subject: [PATCH] update link in doc --- configs/det/bak/det_r50_vd_db.yml | 130 ------------------ .../bak/rec_mv3_none_bilstm_ctc_simple.yml | 106 -------------- .../rec/bak/rec_r34_vd_none_bilstm_ctc.yml | 104 -------------- configs/rec/bak/rec_r34_vd_none_none_ctc.yml | 103 -------------- ...yaml => rec_chinese_common_train_v2.0.yml} | 0 ...0.yaml => rec_chinese_lite_train_v2.0.yml} | 0 doc/doc_ch/inference.md | 43 +++--- doc/doc_ch/quickstart.md | 43 +++--- doc/doc_en/inference_en.md | 45 +++--- doc/doc_en/quickstart_en.md | 49 +++---- tools/export_model.py | 3 +- tools/program.py | 1 - 12 files changed, 91 insertions(+), 536 deletions(-) delete mode 100644 configs/det/bak/det_r50_vd_db.yml delete mode 100644 configs/rec/bak/rec_mv3_none_bilstm_ctc_simple.yml delete mode 100644 configs/rec/bak/rec_r34_vd_none_bilstm_ctc.yml delete mode 100644 configs/rec/bak/rec_r34_vd_none_none_ctc.yml rename configs/rec/ch_ppocr_v2.0/{rec_chinese_common_train_v2.0.yaml => rec_chinese_common_train_v2.0.yml} (100%) rename configs/rec/ch_ppocr_v2.0/{rec_chinese_lite_train_v2.0.yaml => rec_chinese_lite_train_v2.0.yml} (100%) diff --git a/configs/det/bak/det_r50_vd_db.yml b/configs/det/bak/det_r50_vd_db.yml deleted file mode 100644 index a07273b4ae..0000000000 --- a/configs/det/bak/det_r50_vd_db.yml +++ /dev/null @@ -1,130 +0,0 @@ -Global: - use_gpu: true - epoch_num: 1200 - log_smooth_window: 20 - print_batch_step: 2 - save_model_dir: ./output/det_r50_vd/ - save_epoch_step: 1200 - # evaluation is run every 5000 iterations after the 4000th iteration - eval_batch_step: 8 - # if pretrained_model is saved in static mode, load_static_weights must set to True - load_static_weights: True - cal_metric_during_train: False - pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/ - checkpoints: - save_inference_dir: - use_visualdl: True - infer_img: doc/imgs_en/img_10.jpg - save_res_path: ./output/det_db/predicts_db.txt - -Optimizer: - name: Adam - beta1: 0.9 - beta2: 0.999 - learning_rate: - lr: 0.001 - regularizer: - name: 'L2' - factor: 0 - -Architecture: - type: det - algorithm: DB - Transform: - Backbone: - name: ResNet - layers: 50 - Neck: - name: FPN - out_channels: 256 - Head: - name: DBHead - k: 50 - -Loss: - name: DBLoss - balance_loss: true - main_loss_type: DiceLoss - alpha: 5 - beta: 10 - ohem_ratio: 3 - -PostProcess: - name: DBPostProcess - thresh: 0.3 - box_thresh: 0.6 - max_candidates: 1000 - unclip_ratio: 1.5 - -Metric: - name: DetMetric - main_indicator: hmean - -TRAIN: - dataset: - name: SimpleDataSet - data_dir: ./detection/ - file_list: - - ./detection/train_icdar2015_label.txt # dataset1 - ratio_list: [1.0] - transforms: - - DecodeImage: # load image - img_mode: BGR - channel_first: False - - DetLabelEncode: # Class handling label - - IaaAugment: - augmenter_args: - - { 'type': Fliplr, 'args': { 'p': 0.5 } } - - { 'type': Affine, 'args': { 'rotate': [ -10,10 ] } } - - { 'type': Resize,'args': { 'size': [ 0.5,3 ] } } - - EastRandomCropData: - size: [ 640,640 ] - max_tries: 50 - keep_ratio: true - - MakeBorderMap: - shrink_ratio: 0.4 - thresh_min: 0.3 - thresh_max: 0.7 - - MakeShrinkMap: - shrink_ratio: 0.4 - min_text_size: 8 - - NormalizeImage: - scale: 1./255. - mean: [ 0.485, 0.456, 0.406 ] - std: [ 0.229, 0.224, 0.225 ] - order: 'hwc' - - ToCHWImage: - - keepKeys: - keep_keys: ['image','threshold_map','threshold_mask','shrink_map','shrink_mask'] # dataloader will return list in this order - loader: - shuffle: True - drop_last: False - batch_size: 16 - num_workers: 8 - -EVAL: - dataset: - name: SimpleDataSet - data_dir: ./detection/ - file_list: - - ./detection/test_icdar2015_label.txt - transforms: - - DecodeImage: # load image - img_mode: BGR - channel_first: False - - DetLabelEncode: # Class handling label - - DetResizeForTest: - image_shape: [736,1280] - - NormalizeImage: - scale: 1./255. - mean: [ 0.485, 0.456, 0.406 ] - std: [ 0.229, 0.224, 0.225 ] - order: 'hwc' - - ToCHWImage: - - keepKeys: - keep_keys: ['image','shape','polys','ignore_tags'] - loader: - shuffle: False - drop_last: False - batch_size: 1 # must be 1 - num_workers: 8 \ No newline at end of file diff --git a/configs/rec/bak/rec_mv3_none_bilstm_ctc_simple.yml b/configs/rec/bak/rec_mv3_none_bilstm_ctc_simple.yml deleted file mode 100644 index 1be7512c9d..0000000000 --- a/configs/rec/bak/rec_mv3_none_bilstm_ctc_simple.yml +++ /dev/null @@ -1,106 +0,0 @@ -Global: - use_gpu: false - epoch_num: 500 - log_smooth_window: 20 - print_batch_step: 10 - save_model_dir: ./output/rec/mv3_none_bilstm_ctc/ - save_epoch_step: 500 - # evaluation is run every 5000 iterations after the 4000th iteration - eval_batch_step: 127 - # if pretrained_model is saved in static mode, load_static_weights must set to True - load_static_weights: True - cal_metric_during_train: True - pretrained_model: - checkpoints: - save_inference_dir: - use_visualdl: False - infer_img: doc/imgs_words/ch/word_1.jpg - # for data or label process - max_text_length: 80 - character_dict_path: ppocr/utils/ppocr_keys_v1.txt - character_type: 'ch' - use_space_char: False - infer_mode: False - use_tps: False - - -Optimizer: - name: Adam - beta1: 0.9 - beta2: 0.999 - learning_rate: - lr: 0.001 - regularizer: - name: 'L2' - factor: 0.00001 - -Architecture: - type: rec - algorithm: CRNN - Transform: - Backbone: - name: MobileNetV3 - scale: 0.5 - model_name: small - small_stride: [ 1, 2, 2, 2 ] - Neck: - name: SequenceEncoder - encoder_type: fc - hidden_size: 96 - Head: - name: CTC - fc_decay: 0.00001 - -Loss: - name: CTCLoss - -PostProcess: - name: CTCLabelDecode - -Metric: - name: RecMetric - main_indicator: acc - -TRAIN: - dataset: - name: SimpleDataSet - data_dir: ./rec - file_list: - - ./rec/train.txt # dataset1 - ratio_list: [ 0.4,0.6 ] - transforms: - - DecodeImage: # load image - img_mode: BGR - channel_first: False - - CTCLabelEncode: # Class handling label - - RecAug: - - RecResizeImg: - image_shape: [ 3,32,320 ] - - keepKeys: - keep_keys: [ 'image','label','length' ] # dataloader will return list in this order - loader: - batch_size: 256 - shuffle: True - drop_last: True - num_workers: 8 - -EVAL: - dataset: - name: SimpleDataSet - data_dir: ./rec - file_list: - - ./rec/val.txt - transforms: - - DecodeImage: # load image - img_mode: BGR - channel_first: False - - CTCLabelEncode: # Class handling label - - RecResizeImg: - image_shape: [ 3,32,320 ] - - keepKeys: - keep_keys: [ 'image','label','length' ] # dataloader will return list in this order - loader: - shuffle: False - drop_last: False - batch_size: 256 - num_workers: 8 diff --git a/configs/rec/bak/rec_r34_vd_none_bilstm_ctc.yml b/configs/rec/bak/rec_r34_vd_none_bilstm_ctc.yml deleted file mode 100644 index 36e3d1c81c..0000000000 --- a/configs/rec/bak/rec_r34_vd_none_bilstm_ctc.yml +++ /dev/null @@ -1,104 +0,0 @@ -Global: - use_gpu: false - epoch_num: 500 - log_smooth_window: 20 - print_batch_step: 10 - save_model_dir: ./output/rec/res34_none_bilstm_ctc/ - save_epoch_step: 500 - # evaluation is run every 5000 iterations after the 4000th iteration - eval_batch_step: 127 - # if pretrained_model is saved in static mode, load_static_weights must set to True - load_static_weights: True - cal_metric_during_train: True - pretrained_model: - checkpoints: - save_inference_dir: - use_visualdl: False - infer_img: doc/imgs_words/ch/word_1.jpg - # for data or label process - max_text_length: 80 - character_dict_path: ppocr/utils/ppocr_keys_v1.txt - character_type: 'ch' - use_space_char: False - infer_mode: False - use_tps: False - - -Optimizer: - name: Adam - beta1: 0.9 - beta2: 0.999 - learning_rate: - lr: 0.001 - regularizer: - name: 'L2' - factor: 0.00001 - -Architecture: - type: rec - algorithm: CRNN - Transform: - Backbone: - name: ResNet - layers: 34 - Neck: - name: SequenceEncoder - encoder_type: fc - hidden_size: 96 - Head: - name: CTC - fc_decay: 0.00001 - -Loss: - name: CTCLoss - -PostProcess: - name: CTCLabelDecode - -Metric: - name: RecMetric - main_indicator: acc - -TRAIN: - dataset: - name: SimpleDataSet - data_dir: ./rec - file_list: - - ./rec/train.txt # dataset1 - ratio_list: [ 0.4,0.6 ] - transforms: - - DecodeImage: # load image - img_mode: BGR - channel_first: False - - CTCLabelEncode: # Class handling label - - RecAug: - - RecResizeImg: - image_shape: [ 3,32,320 ] - - keepKeys: - keep_keys: [ 'image','label','length' ] # dataloader will return list in this order - loader: - batch_size: 256 - shuffle: True - drop_last: True - num_workers: 8 - -EVAL: - dataset: - name: SimpleDataSet - data_dir: ./rec - file_list: - - ./rec/val.txt - transforms: - - DecodeImage: # load image - img_mode: BGR - channel_first: False - - CTCLabelEncode: # Class handling label - - RecResizeImg: - image_shape: [ 3,32,320 ] - - keepKeys: - keep_keys: [ 'image','label','length' ] # dataloader will return list in this order - loader: - shuffle: False - drop_last: False - batch_size: 256 - num_workers: 8 diff --git a/configs/rec/bak/rec_r34_vd_none_none_ctc.yml b/configs/rec/bak/rec_r34_vd_none_none_ctc.yml deleted file mode 100644 index 641e855b43..0000000000 --- a/configs/rec/bak/rec_r34_vd_none_none_ctc.yml +++ /dev/null @@ -1,103 +0,0 @@ -Global: - use_gpu: false - epoch_num: 500 - log_smooth_window: 20 - print_batch_step: 10 - save_model_dir: ./output/rec/res34_none_none_ctc/ - save_epoch_step: 500 - # evaluation is run every 5000 iterations after the 4000th iteration - eval_batch_step: 127 - # if pretrained_model is saved in static mode, load_static_weights must set to True - load_static_weights: True - cal_metric_during_train: True - pretrained_model: - checkpoints: - save_inference_dir: - use_visualdl: False - infer_img: doc/imgs_words/ch/word_1.jpg - # for data or label process - max_text_length: 80 - character_dict_path: ppocr/utils/ppocr_keys_v1.txt - character_type: 'ch' - use_space_char: False - infer_mode: False - use_tps: False - - -Optimizer: - name: Adam - beta1: 0.9 - beta2: 0.999 - learning_rate: - lr: 0.001 - regularizer: - name: 'L2' - factor: 0.00001 - -Architecture: - type: rec - algorithm: CRNN - Transform: - Backbone: - name: ResNet - layers: 34 - Neck: - name: SequenceEncoder - encoder_type: reshape - Head: - name: CTC - fc_decay: 0.00001 - -Loss: - name: CTCLoss - -PostProcess: - name: CTCLabelDecode - -Metric: - name: RecMetric - main_indicator: acc - -TRAIN: - dataset: - name: SimpleDataSet - data_dir: ./rec - file_list: - - ./rec/train.txt # dataset1 - ratio_list: [ 0.4,0.6 ] - transforms: - - DecodeImage: # load image - img_mode: BGR - channel_first: False - - CTCLabelEncode: # Class handling label - - RecAug: - - RecResizeImg: - image_shape: [ 3,32,320 ] - - keepKeys: - keep_keys: [ 'image','label','length' ] # dataloader will return list in this order - loader: - batch_size: 256 - shuffle: True - drop_last: True - num_workers: 8 - -EVAL: - dataset: - name: SimpleDataSet - data_dir: ./rec - file_list: - - ./rec/val.txt - transforms: - - DecodeImage: # load image - img_mode: BGR - channel_first: False - - CTCLabelEncode: # Class handling label - - RecResizeImg: - image_shape: [ 3,32,320 ] - - keepKeys: - keep_keys: [ 'image','label','length' ] # dataloader will return list in this order - loader: - shuffle: False - drop_last: False - batch_size: 256 - num_workers: 8 diff --git a/configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yaml b/configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml similarity index 100% rename from configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yaml rename to configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml diff --git a/configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yaml b/configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml similarity index 100% rename from configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yaml rename to configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml diff --git a/doc/doc_ch/inference.md b/doc/doc_ch/inference.md index dfd84cccba..8f4bea07fc 100644 --- a/doc/doc_ch/inference.md +++ b/doc/doc_ch/inference.md @@ -41,7 +41,7 @@ inference 模型(`paddle.jit.save`保存的模型) 下载超轻量级中文检测模型: ``` -wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_det_train.tar -C ./ch_lite/ +wget -P ./ch_lite/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_det_train.tar -C ./ch_lite/ ``` 上述模型是以MobileNetV3为backbone训练的DB算法,将训练好的模型转换成inference模型只需要运行如下命令: ``` @@ -51,9 +51,9 @@ wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_det_train.tar # Global.load_static_weights 参数需要设置为 False。 # Global.save_inference_dir参数设置转换的模型将保存的地址。 -python3 tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.checkpoints=./ch_lite/ch_ppocr_mobile_v2.0_det_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_db/ +python3 tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./ch_lite/ch_ppocr_mobile_v2.0_det_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_db/ ``` -转inference模型时,使用的配置文件和训练时使用的配置文件相同。另外,还需要设置配置文件中的`Global.checkpoints`参数,其指向训练中保存的模型参数文件。 +转inference模型时,使用的配置文件和训练时使用的配置文件相同。另外,还需要设置配置文件中的`Global.pretrained_model`参数,其指向训练中保存的模型参数文件。 转换成功后,在模型保存目录下有三个文件: ``` inference/det_db/ @@ -67,7 +67,7 @@ inference/det_db/ 下载超轻量中文识别模型: ``` -wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_rec_train.tar -C ./ch_lite/ +wget -P ./ch_lite/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_rec_train.tar -C ./ch_lite/ ``` 识别模型转inference模型与检测的方式相同,如下: @@ -78,7 +78,7 @@ wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_rec_train.tar # Global.load_static_weights 参数需要设置为 False。 # Global.save_inference_dir参数设置转换的模型将保存的地址。 -python3 tools/export_model.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints=./ch_lite/ch_ppocr_mobile_v2.0_rec_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn/ +python3 tools/export_model.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model=./ch_lite/ch_ppocr_mobile_v2.0_rec_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn/ ``` **注意:**如果您是在自己的数据集上训练的模型,并且调整了中文字符的字典文件,请注意修改配置文件中的`character_dict_path`是否是所需要的字典文件。 @@ -96,7 +96,7 @@ python3 tools/export_model.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_trai 下载方向分类模型: ``` -wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_cls_train.tar -C ./ch_lite/ +wget -P ./ch_lite/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_cls_train.tar -C ./ch_lite/ ``` 方向分类模型转inference模型与检测的方式相同,如下: @@ -107,7 +107,7 @@ wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_cls_train.tar # Global.load_static_weights 参数需要设置为 False。 # Global.save_inference_dir参数设置转换的模型将保存的地址。 -python3 tools/export_model.py -c configs/cls/cls_mv3.yml -o Global.checkpoints=./ch_lite/ch_ppocr_mobile_v2.0_cls_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/cls/ +python3 tools/export_model.py -c configs/cls/cls_mv3.yml -o Global.pretrained_model=./ch_lite/ch_ppocr_mobile_v2.0_cls_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/cls/ ``` 转换成功后,在目录下有三个文件: @@ -152,10 +152,10 @@ python3 tools/infer/predict_det.py --image_dir="./doc/imgs/2.jpg" --det_model_di ### 2. DB文本检测模型推理 -首先将DB文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在ICDAR2015英文数据集训练的模型为例([模型下载地址](link)),可以使用如下命令进行转换: +首先将DB文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在ICDAR2015英文数据集训练的模型为例( [模型下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar) ),可以使用如下命令进行转换: ``` -python3 tools/export_model.py -c configs/det/det_r50_vd_db.yml -o Global.checkpoints=./det_r50_vd_db_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_db +python3 tools/export_model.py -c configs/det/det_r50_vd_db.yml -o Global.pretrained_model=./det_r50_vd_db_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_db ``` DB文本检测模型推理,可以执行如下命令: @@ -173,10 +173,10 @@ python3 tools/infer/predict_det.py --image_dir="./doc/imgs_en/img_10.jpg" --det_ ### 3. EAST文本检测模型推理 -首先将EAST文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在ICDAR2015英文数据集训练的模型为例([模型下载地址](link)),可以使用如下命令进行转换: +首先将EAST文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在ICDAR2015英文数据集训练的模型为例( [模型下载地址 (coming soon)](link) ),可以使用如下命令进行转换: ``` -python3 tools/export_model.py -c configs/det/det_r50_vd_east.yml -o Global.checkpoints=./det_r50_vd_east_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_east +python3 tools/export_model.py -c configs/det/det_r50_vd_east.yml -o Global.pretrained_model=./det_r50_vd_east_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_east ``` **EAST文本检测模型推理,需要设置参数`--det_algorithm="EAST"`**,可以执行如下命令: @@ -194,9 +194,9 @@ python3 tools/infer/predict_det.py --det_algorithm="EAST" --image_dir="./doc/img ### 4. SAST文本检测模型推理 #### (1). 四边形文本检测模型(ICDAR2015) -首先将SAST文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在ICDAR2015英文数据集训练的模型为例([模型下载地址](link)),可以使用如下命令进行转换: +首先将SAST文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在ICDAR2015英文数据集训练的模型为例([模型下载地址(coming soon)](link)),可以使用如下命令进行转换: ``` -python3 tools/export_model.py -c configs/det/det_r50_vd_sast_icdar15.yml -o Global.checkpoints=./det_r50_vd_sast_icdar15_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_sast_ic15 +python3 tools/export_model.py -c configs/det/det_r50_vd_sast_icdar15.yml -o Global.pretrained_model=./det_r50_vd_sast_icdar15_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_sast_ic15 ``` **SAST文本检测模型推理,需要设置参数`--det_algorithm="SAST"`**,可以执行如下命令: @@ -208,10 +208,10 @@ python3 tools/infer/predict_det.py --det_algorithm="SAST" --image_dir="./doc/img ![](../imgs_results/det_res_img_10_sast.jpg) #### (2). 弯曲文本检测模型(Total-Text) -首先将SAST文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在Total-Text英文数据集训练的模型为例([模型下载地址](link)),可以使用如下命令进行转换: +首先将SAST文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在Total-Text英文数据集训练的模型为例([模型下载地址(coming soon)](link)),可以使用如下命令进行转换: ``` -python3 tools/export_model.py -c configs/det/det_r50_vd_sast_totaltext.yml -o Global.checkpoints=./det_r50_vd_sast_totaltext_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_sast_tt +python3 tools/export_model.py -c configs/det/det_r50_vd_sast_totaltext.yml -o Global.pretrained_model=./det_r50_vd_sast_totaltext_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_sast_tt ``` @@ -254,10 +254,10 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:['实力活力', 0.89552695] 我们以 CRNN 为例,介绍基于CTC损失的识别模型推理。 Rosetta 使用方式类似,不用设置识别算法参数rec_algorithm。 首先将 Rosetta 文本识别训练过程中保存的模型,转换成inference model。以基于Resnet34_vd骨干网络,使用MJSynth和SynthText两个英文文本识别合成数据集训练 -的模型为例([模型下载地址](link)),可以使用如下命令进行转换: +的模型为例( [模型下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar) ),可以使用如下命令进行转换: ``` -python3 tools/export_model.py -c configs/det/rec_r34_vd_none_bilstm_ctc.yml -o Global.checkpoints=./rec_r34_vd_none_bilstm_ctc_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn +python3 tools/export_model.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o Global.pretrained_model=./rec_r34_vd_none_bilstm_ctc_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn ``` @@ -313,9 +313,9 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" - 执行命令后,上图的预测结果为: ``` text -2020-09-19 16:15:05,076-INFO: index: [205 206 38 39] -2020-09-19 16:15:05,077-INFO: word : 바탕으로 -2020-09-19 16:15:05,077-INFO: score: 0.9171358942985535 +2020-09-19 16:15:05,076-INFO: index: [205 206 38 39] +2020-09-19 16:15:05,077-INFO: word : 바탕으로 +2020-09-19 16:15:05,077-INFO: score: 0.9171358942985535 ``` @@ -337,8 +337,7 @@ python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words/ch/word_4.jpg" 执行命令后,上面图像的预测结果(分类的方向和得分)会打印到屏幕上,示例如下: ``` -infer_img: doc/imgs_words/ch/word_1.jpg - result: ('0', 0.9998784) +Predicts of ./doc/imgs_words/ch/word_4.jpg:['0', 0.9999982] ``` diff --git a/doc/doc_ch/quickstart.md b/doc/doc_ch/quickstart.md index b10258857f..a2ab23461f 100644 --- a/doc/doc_ch/quickstart.md +++ b/doc/doc_ch/quickstart.md @@ -9,12 +9,12 @@ ## 2.inference模型下载 -* 移动端和服务器端的检测与识别模型如下,更多模型下载(包括多语言),可以参考[PP-OCR v1.1 系列模型下载](../doc_ch/models_list.md) +* 移动端和服务器端的检测与识别模型如下,更多模型下载(包括多语言),可以参考[PP-OCR v2.0 系列模型下载](../doc_ch/models_list.md) | 模型简介 | 模型名称 |推荐场景 | 检测模型 | 方向分类器 | 识别模型 | | ------------ | --------------- | ----------------|---- | ---------- | -------- | -| 中英文超轻量OCR模型(xM) | |移动端&服务器端|[推理模型](link) / [预训练模型](link)|[推理模型]({}) / [预训练模型]({}) |[推理模型]({}) / [预训练模型]({}) | -| 中英文通用OCR模型(xM) | |服务器端 |[推理模型]({}) / [预训练模型]({}) |[推理模型]({}) / [预训练模型]({}) |[推理模型]({}) / [预训练模型]({}}) | +| 中英文超轻量OCR模型(8.6M) | ch_ppocr_mobile_v2.0_xx |移动端&服务器端|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) | +| 中英文通用OCR模型(146.4M) | ch_ppocr_server_v2.0_xx |服务器端 |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) | * windows 环境下如果没有安装wget,下载模型时可将链接复制到浏览器中下载,并解压放置在相应目录下 @@ -37,28 +37,29 @@ cd .. ``` mkdir inference && cd inference # 下载超轻量级中文OCR模型的检测模型并解压 -wget {} && tar xf ch_ppocr_mobile_v1.1_det_infer.tar +wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_ppocr_mobile_v2.0_det_infer.tar # 下载超轻量级中文OCR模型的识别模型并解压 -wget {} && tar xf ch_ppocr_mobile_v1.1_rec_infer.tar +wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar # 下载超轻量级中文OCR模型的文本方向分类器模型并解压 -wget {} && tar xf ch_ppocr_mobile_v1.1_cls_infer.tar +wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar && tar xf ch_ppocr_mobile_v2.0_cls_infer.tar cd .. ``` 解压完毕后应有如下文件结构: ``` -|-inference - |-ch_ppocr_mobile_v1.1_det_infer - |- model - |- params - |-ch_ppocr_mobile_v1.1_rec_infer - |- model - |- params - |-ch_ppocr_mobile-v1.1_cls_infer - |- model - |- params - ... +├── ch_ppocr_mobile_v2.0_cls_infer +│ ├── inference.pdiparams +│ ├── inference.pdiparams.info +│ └── inference.pdmodel +├── ch_ppocr_mobile_v2.0_det_infer +│ ├── inference.pdiparams +│ ├── inference.pdiparams.info +│ └── inference.pdmodel +├── ch_ppocr_mobile_v2.0_rec_infer + ├── inference.pdiparams + ├── inference.pdiparams.info + └── inference.pdmodel ``` ## 3.单张图像或者图像集合预测 @@ -68,13 +69,13 @@ cd .. ```bash # 预测image_dir指定的单张图像 -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_mobile_v1.1_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v1.1_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v1.1_cls_infer/" --use_angle_cls=True --use_space_char=True +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_mobile_v2.0_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v2.0_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v2.0_cls_infer/" --use_angle_cls=True --use_space_char=True # 预测image_dir指定的图像集合 -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/" --det_model_dir="./inference/ch_ppocr_mobile_v1.1_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v1.1_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v1.1_cls_infer/" --use_angle_cls=True --use_space_char=True +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/" --det_model_dir="./inference/ch_ppocr_mobile_v2.0_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v2.0_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v2.0_cls_infer/" --use_angle_cls=True --use_space_char=True # 如果想使用CPU进行预测,需设置use_gpu参数为False -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_mobile_v1.1_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v1.1_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v1.1_cls_infer/" --use_angle_cls=True --use_space_char=True --use_gpu=False +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_mobile_v2.0_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v2.0_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v2.0_cls_infer/" --use_angle_cls=True --use_space_char=True --use_gpu=False ``` - 通用中文OCR模型 @@ -83,7 +84,7 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_mode ```bash # 预测image_dir指定的单张图像 -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_server_v1.1_det_infer/" --rec_model_dir="./inference/ch_ppocr_server_v1.1_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v1.1_cls_infer/" --use_angle_cls=True --use_space_char=True +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_server_v2.0_det_infer/" --rec_model_dir="./inference/ch_ppocr_server_v2.0_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v2.0_cls_infer/" --use_angle_cls=True --use_space_char=True ``` * 注意: diff --git a/doc/doc_en/inference_en.md b/doc/doc_en/inference_en.md index ac1b634de4..ee567451aa 100644 --- a/doc/doc_en/inference_en.md +++ b/doc/doc_en/inference_en.md @@ -43,21 +43,21 @@ Next, we first introduce how to convert a trained model into an inference model, Download the lightweight Chinese detection model: ``` -wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_det_train.tar -C ./ch_lite/ +wget -P ./ch_lite/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_det_train.tar -C ./ch_lite/ ``` The above model is a DB algorithm trained with MobileNetV3 as the backbone. To convert the trained model into an inference model, just run the following command: ``` # -c Set the training algorithm yml configuration file # -o Set optional parameters -# Global.checkpoints parameter Set the training model address to be converted without adding the file suffix .pdmodel, .pdopt or .pdparams. +# Global.pretrained_model parameter Set the training model address to be converted without adding the file suffix .pdmodel, .pdopt or .pdparams. # Global.load_static_weights needs to be set to False # Global.save_inference_dir Set the address where the converted model will be saved. -python3 tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.checkpoints=./ch_lite/ch_ppocr_mobile_v2.0_det_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_db/ +python3 tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./ch_lite/ch_ppocr_mobile_v2.0_det_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_db/ ``` -When converting to an inference model, the configuration file used is the same as the configuration file used during training. In addition, you also need to set the `Global.checkpoints` parameter in the configuration file. +When converting to an inference model, the configuration file used is the same as the configuration file used during training. In addition, you also need to set the `Global.pretrained_model` parameter in the configuration file. After the conversion is successful, there are three files in the model save directory: ``` inference/det_db/ @@ -71,18 +71,18 @@ inference/det_db/ Download the lightweight Chinese recognition model: ``` -wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_rec_train.tar -C ./ch_lite/ +wget -P ./ch_lite/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_rec_train.tar -C ./ch_lite/ ``` The recognition model is converted to the inference model in the same way as the detection, as follows: ``` # -c Set the training algorithm yml configuration file # -o Set optional parameters -# Global.checkpoints parameter Set the training model address to be converted without adding the file suffix .pdmodel, .pdopt or .pdparams. +# Global.pretrained_model parameter Set the training model address to be converted without adding the file suffix .pdmodel, .pdopt or .pdparams. # Global.load_static_weights needs to be set to False # Global.save_inference_dir Set the address where the converted model will be saved. -python3 tools/export_model.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints=./ch_lite/ch_ppocr_mobile_v2.0_rec_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn/ +python3 tools/export_model.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model=./ch_lite/ch_ppocr_mobile_v2.0_rec_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn/ ``` If you have a model trained on your own dataset with a different dictionary file, please make sure that you modify the `character_dict_path` in the configuration file to your dictionary file path. @@ -100,18 +100,18 @@ inference/det_db/ Download the angle classification model: ``` -wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_cls_train.tar -C ./ch_lite/ +wget -P ./ch_lite/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_cls_train.tar -C ./ch_lite/ ``` The angle classification model is converted to the inference model in the same way as the detection, as follows: ``` # -c Set the training algorithm yml configuration file # -o Set optional parameters -# Global.checkpoints parameter Set the training model address to be converted without adding the file suffix .pdmodel, .pdopt or .pdparams. +# Global.pretrained_model parameter Set the training model address to be converted without adding the file suffix .pdmodel, .pdopt or .pdparams. # Global.load_static_weights needs to be set to False # Global.save_inference_dir Set the address where the converted model will be saved. -python3 tools/export_model.py -c configs/cls/cls_mv3.yml -o Global.checkpoints=./ch_lite/ch_ppocr_mobile_v2.0_cls_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/cls/ +python3 tools/export_model.py -c configs/cls/cls_mv3.yml -o Global.pretrained_model=./ch_lite/ch_ppocr_mobile_v2.0_cls_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/cls/ ``` After the conversion is successful, there are two files in the directory: @@ -158,10 +158,10 @@ python3 tools/infer/predict_det.py --image_dir="./doc/imgs/2.jpg" --det_model_di ### 2. DB TEXT DETECTION MODEL INFERENCE -First, convert the model saved in the DB text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the ICDAR2015 English dataset as an example ([model download link](link)), you can use the following command to convert: +First, convert the model saved in the DB text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the ICDAR2015 English dataset as an example ([model download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)), you can use the following command to convert: ``` -python3 tools/export_model.py -c configs/det/det_r50_vd_db.yml -o Global.checkpoints=./det_r50_vd_db_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_db +python3 tools/export_model.py -c configs/det/det_r50_vd_db.yml -o Global.pretrained_model=./det_r50_vd_db_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_db ``` DB text detection model inference, you can execute the following command: @@ -179,10 +179,10 @@ The visualized text detection results are saved to the `./inference_results` fol ### 3. EAST TEXT DETECTION MODEL INFERENCE -First, convert the model saved in the EAST text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the ICDAR2015 English dataset as an example ([model download link](link)), you can use the following command to convert: +First, convert the model saved in the EAST text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the ICDAR2015 English dataset as an example ([model download link (coming soon)](link)), you can use the following command to convert: ``` -python3 tools/export_model.py -c configs/det/det_r50_vd_east.yml -o Global.checkpoints=./det_r50_vd_east_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_east +python3 tools/export_model.py -c configs/det/det_r50_vd_east.yml -o Global.pretrained_model=./det_r50_vd_east_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_east ``` **For EAST text detection model inference, you need to set the parameter ``--det_algorithm="EAST"``**, run the following command: @@ -200,10 +200,10 @@ The visualized text detection results are saved to the `./inference_results` fol ### 4. SAST TEXT DETECTION MODEL INFERENCE #### (1). Quadrangle text detection model (ICDAR2015) -First, convert the model saved in the SAST text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the ICDAR2015 English dataset as an example ([model download link](link)), you can use the following command to convert: +First, convert the model saved in the SAST text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the ICDAR2015 English dataset as an example ([model download link (coming soon)](link)), you can use the following command to convert: ``` -python3 tools/export_model.py -c configs/det/det_r50_vd_sast_icdar15.yml -o Global.checkpoints=./det_r50_vd_sast_icdar15_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_sast_ic15 +python3 tools/export_model.py -c configs/det/det_r50_vd_sast_icdar15.yml -o Global.pretrained_model=./det_r50_vd_sast_icdar15_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_sast_ic15 ``` **For SAST quadrangle text detection model inference, you need to set the parameter `--det_algorithm="SAST"`**, run the following command: @@ -217,10 +217,10 @@ The visualized text detection results are saved to the `./inference_results` fol ![](../imgs_results/det_res_img_10_sast.jpg) #### (2). Curved text detection model (Total-Text) -First, convert the model saved in the SAST text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the Total-Text English dataset as an example ([model download link](https://paddleocr.bj.bcebos.com/SAST/sast_r50_vd_total_text.tar)), you can use the following command to convert: +First, convert the model saved in the SAST text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the Total-Text English dataset as an example ([model download link (coming soon)](https://paddleocr.bj.bcebos.com/SAST/sast_r50_vd_total_text.tar)), you can use the following command to convert: ``` -python3 tools/export_model.py -c configs/det/det_r50_vd_sast_totaltext.yml -o Global.checkpoints=./det_r50_vd_sast_totaltext_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_sast_tt +python3 tools/export_model.py -c configs/det/det_r50_vd_sast_totaltext.yml -o Global.pretrained_model=./det_r50_vd_sast_totaltext_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_sast_tt ``` **For SAST curved text detection model inference, you need to set the parameter `--det_algorithm="SAST"` and `--det_sast_polygon=True`**, run the following command: @@ -262,10 +262,10 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:['实力活力', 0.89552695] Taking CRNN as an example, we introduce the recognition model inference based on CTC loss. Rosetta and Star-Net are used in a similar way, No need to set the recognition algorithm parameter rec_algorithm. -First, convert the model saved in the CRNN text recognition training process into an inference model. Taking the model based on Resnet34_vd backbone network, using MJSynth and SynthText (two English text recognition synthetic datasets) for training, as an example ([model download address](link)). It can be converted as follow: +First, convert the model saved in the CRNN text recognition training process into an inference model. Taking the model based on Resnet34_vd backbone network, using MJSynth and SynthText (two English text recognition synthetic datasets) for training, as an example ([model download address](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar)). It can be converted as follow: ``` -python3 tools/export_model.py -c configs/det/rec_r34_vd_none_bilstm_ctc.yml -o Global.checkpoints=./rec_r34_vd_none_bilstm_ctc_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn +python3 tools/export_model.py -c configs/det/rec_r34_vd_none_bilstm_ctc.yml -o Global.pretrained_model=./rec_r34_vd_none_bilstm_ctc_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn ``` For CRNN text recognition model inference, execute the following commands: @@ -335,7 +335,7 @@ The following will introduce the angle classification model inference. For angle classification model inference, you can execute the following commands: ``` -python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --cls_model_dir="./inference/cls/" +python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words_en/word_10.png" --cls_model_dir="./inference/cls/" ``` ![](../imgs_words_en/word_10.png) @@ -343,8 +343,7 @@ python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words/ch/word_4.jpg" After executing the command, the prediction results (classification angle and score) of the above image will be printed on the screen. ``` -infer_img: doc/imgs_words_en/word_10.png - result: ('0', 0.9999995) + Predicts of ./doc/imgs_words_en/word_10.png:['0', 0.9999995] ``` diff --git a/doc/doc_en/quickstart_en.md b/doc/doc_en/quickstart_en.md index 6d4ce95d34..0556613889 100644 --- a/doc/doc_en/quickstart_en.md +++ b/doc/doc_en/quickstart_en.md @@ -9,13 +9,13 @@ Please refer to [quick installation](./installation_en.md) to configure the Padd ## 2.inference models -The detection and recognition models on the mobile and server sides are as follows. For more models (including multiple languages), please refer to [PP-OCR v1.1 series model list](../doc_ch/models_list.md) +The detection and recognition models on the mobile and server sides are as follows. For more models (including multiple languages), please refer to [PP-OCR v2.0 series model list](../doc_ch/models_list.md) - -| Model introduction | Model name | Recommended scene | Detection model | Direction Classifier | Recognition model | +| Model introduction | Model name | Recommended scene | Detection model | Direction Classifier | Recognition model | | ------------ | --------------- | ----------------|---- | ---------- | -------- | -| Ultra-lightweight Chinese OCR model(xM) | ch_ppocr_mobile_v1.1_xx |Mobile-side/Server-side|[inference model](link) / [pretrained model](link)|[inference model](link) / [pretrained model](link) |[inference model](link) / [pretrained model](link) | -| Universal Chinese OCR model(xM) |ch_ppocr_server_v1.1_xx|Server-side |[inference model](link) / [pretrained model](link) |[inference model](link) / [pretrained model](link) |[inference model](link) / [pretrained model](link) | +| Ultra-lightweight Chinese OCR model(8.6M) | ch_ppocr_mobile_v2.0_xx |Mobile-side/Server-side|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) | +| Universal Chinese OCR model(146.4M) | ch_ppocr_server_v2.0_xx |Server-side |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) | + * If `wget` is not installed in the windows environment, you can copy the link to the browser to download when downloading the model, then uncompress it and place it in the corresponding directory. @@ -37,28 +37,29 @@ Take the ultra-lightweight model as an example: ``` mkdir inference && cd inference # Download the detection model of the ultra-lightweight Chinese OCR model and uncompress it -wget link && tar xf ch_ppocr_mobile_v1.1_det_infer.tar +wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_ppocr_mobile_v2.0_det_infer.tar # Download the recognition model of the ultra-lightweight Chinese OCR model and uncompress it -wget link && tar xf ch_ppocr_mobile_v1.1_rec_infer.tar -# Download the direction classifier model of the ultra-lightweight Chinese OCR model and uncompress it -wget link && tar xf ch_ppocr_mobile_v1.1_cls_infer.tar +wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar +# Download the angle classifier model of the ultra-lightweight Chinese OCR model and uncompress it +wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar && tar xf ch_ppocr_mobile_v2.0_cls_infer.tar cd .. ``` After decompression, the file structure should be as follows: ``` -|-inference - |-ch_ppocr_mobile_v1.1_det_infer - |- model - |- params - |-ch_ppocr_mobile_v1.1_rec_infer - |- model - |- params - |-ch_ppocr_mobile_v1.1_cls_infer - |- model - |- params - ... +├── ch_ppocr_mobile_v2.0_cls_infer +│ ├── inference.pdiparams +│ ├── inference.pdiparams.info +│ └── inference.pdmodel +├── ch_ppocr_mobile_v2.0_det_infer +│ ├── inference.pdiparams +│ ├── inference.pdiparams.info +│ └── inference.pdmodel +├── ch_ppocr_mobile_v2.0_rec_infer + ├── inference.pdiparams + ├── inference.pdiparams.info + └── inference.pdmodel ``` ## 3. Single image or image set prediction @@ -70,13 +71,13 @@ After decompression, the file structure should be as follows: ```bash # Predict a single image specified by image_dir -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_mobile_v1.1_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v1.1_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v1.1_cls_infer/" --use_angle_cls=True --use_space_char=True +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_mobile_v2.0_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v2.0_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v2.0_cls_infer/" --use_angle_cls=True --use_space_char=True # Predict imageset specified by image_dir -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/" --det_model_dir="./inference/ch_ppocr_mobile_v1.1_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v1.1_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v1.1_cls_infer/" --use_angle_cls=True --use_space_char=True +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/" --det_model_dir="./inference/ch_ppocr_mobile_v2.0_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v2.0_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v2.0_cls_infer/" --use_angle_cls=True --use_space_char=True # If you want to use the CPU for prediction, you need to set the use_gpu parameter to False -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_mobile_v1.1_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v1.1_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v1.1_cls_infer/" --use_angle_cls=True --use_space_char=True --use_gpu=False +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_mobile_v2.0_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v2.0_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v2.0_cls_infer/" --use_angle_cls=True --use_space_char=True --use_gpu=False ``` - Universal Chinese OCR model @@ -85,7 +86,7 @@ Please follow the above steps to download the corresponding models and update th ``` # Predict a single image specified by image_dir -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_server_v1.1_det_infer/" --rec_model_dir="./inference/ch_ppocr_server_v1.1_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v1.1_cls_infer/" --use_angle_cls=True --use_space_char=True +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_server_v2.0_det_infer/" --rec_model_dir="./inference/ch_ppocr_server_v2.0_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v2.0_cls_infer/" --use_angle_cls=True --use_space_char=True ``` * Note diff --git a/tools/export_model.py b/tools/export_model.py index 51c061788e..74357d58ec 100755 --- a/tools/export_model.py +++ b/tools/export_model.py @@ -28,7 +28,7 @@ from ppocr.postprocess import build_post_process from ppocr.utils.save_load import init_model from ppocr.utils.logging import get_logger -from tools.program import load_config, merge_config,ArgsParser +from tools.program import load_config, merge_config, ArgsParser def main(): @@ -36,7 +36,6 @@ def main(): config = load_config(FLAGS.config) merge_config(FLAGS.opt) logger = get_logger() - print(config) # build post process post_process_class = build_post_process(config['PostProcess'], diff --git a/tools/program.py b/tools/program.py index 8e84d30e64..787a59d49b 100755 --- a/tools/program.py +++ b/tools/program.py @@ -113,7 +113,6 @@ def merge_config(config): global_config.keys(), sub_keys[0]) cur = global_config[sub_keys[0]] for idx, sub_key in enumerate(sub_keys[1:]): - assert (sub_key in cur) if idx == len(sub_keys) - 2: cur[sub_key] = value else: