update can data loading method and tipc configs, revert precommit config

truongnn1106 · Oct 17, 2022 · c57effb · c57effb
1 parent 25e56a6
commit c57effb
Show file tree

Hide file tree

Showing 16 changed files with 117 additions and 161 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,11 +1,10 @@
-repos:
 - repo: https://github.com/PaddlePaddle/mirrors-yapf.git
- rev: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
+ sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
  hooks:
  - id: yapf
  files: \.py$
 - repo: https://github.com/pre-commit/pre-commit-hooks
- rev: a11d9314b22d8f8c7556443875b731ef05965464
+ sha: a11d9314b22d8f8c7556443875b731ef05965464
  hooks:
  - id: check-merge-conflict
  - id: check-symlinks
@@ -16,7 +15,7 @@ repos:
  - id: trailing-whitespace
  files: \.md$
 - repo: https://github.com/Lucas-C/pre-commit-hooks
- rev: v1.0.1
+ sha: v1.0.1
  hooks:
  - id: forbid-crlf
  files: \.md$

diff --git a/configs/rec/rec_d28_can.yml b/configs/rec/rec_d28_can.yml
@@ -5,14 +5,14 @@ Global:
  print_batch_step: 10
  save_model_dir: ./output/rec/can/
  save_epoch_step: 1
- # evaluation is run every 1105 iterations
+ # evaluation is run every 1105 iterations (1 epoch)(batch_size = 8)
  eval_batch_step: [0, 1105]
  cal_metric_during_train: True
- pretrained_model: ./output/rec/can/CAN
- checkpoints: ./output/rec/can/CAN
- save_inference_dir: ./inference/rec_d28_can/
+ pretrained_model:
+ checkpoints:
+ save_inference_dir:
  use_visualdl: False
- infer_img: doc/imgs_hme/hme_01.jpeg
+ infer_img: doc/datasets/crohme_demo/hme_00.jpg
  # for data or label process
  character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
  max_text_length: 36
@@ -75,37 +75,43 @@ Metric:
 
 Train:
  dataset:
- name: HMERDataSet
+ name: PGDataSet
  data_dir: ./train_data/CROHME/training/images/
  transforms:
  - DecodeImage:
  channel_first: False
  - GrayImageChannelFormat: 
  normalize: True
  inverse: True
+ - SeqLabelEncode:
+ character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
+ lower: False
  - KeepKeys:
  keep_keys: ['image', 'label']
- label_file_list: ["./train_data/CROHME/training/labels.json"]
+ label_file_list: ["./train_data/CROHME/training/labels.txt"]
  loader:
  shuffle: True
- batch_size_per_card: 2
- drop_last: True
- num_workers: 1
+ batch_size_per_card: 8
+ drop_last: False
+ num_workers: 4
  collate_fn: DyMaskCollator
 
 Eval:
  dataset:
- name: HMERDataSet
+ name: PGDataSet
  data_dir: ./train_data/CROHME/evaluation/images/
  transforms: 
  - DecodeImage:
  channel_first: False
  - GrayImageChannelFormat:
  normalize: True
  inverse: True
+ - SeqLabelEncode:
+ character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
+ lower: False
  - KeepKeys:
  keep_keys: ['image', 'label']
- label_file_list: ["./train_data/CROHME/evaluation/labels.json"]
+ label_file_list: ["./train_data/CROHME/evaluation/labels.txt"]
  loader:
  shuffle: False
  drop_last: False

diff --git a/doc/imgs_hme/hme_00.jpg → doc/datasets/crohme_demo/hme_00.jpg b/doc/imgs_hme/hme_00.jpg → doc/datasets/crohme_demo/hme_00.jpg
diff --git a/doc/imgs_hme/hme_01.jpg → doc/datasets/crohme_demo/hme_01.jpg b/doc/imgs_hme/hme_01.jpg → doc/datasets/crohme_demo/hme_01.jpg
diff --git a/doc/imgs_hme/hme_02.jpg → doc/datasets/crohme_demo/hme_02.jpg b/doc/imgs_hme/hme_02.jpg → doc/datasets/crohme_demo/hme_02.jpg
diff --git a/doc/doc_ch/algorithm_rec_can.md b/doc/doc_ch/algorithm_rec_can.md
@@ -1,4 +1,4 @@
-# 手写数学公式识别算法-ABINet
+# 手写数学公式识别算法-CAN
 
 - [1. 算法简介](#1)
 - [2. 环境配置](#2)
@@ -27,7 +27,7 @@
 
 |模型 |骨干网络|配置文件|ExpRate|下载链接|
 | ----- | ----- | ----- | ----- | ----- |
-|CAN|DenseNet|[rec_d28_can.yml](../../configs/rec/rec_d28_can.yml)|51.72|[训练模型](https://paddleocr.bj.bcebos.com/rec_r45_abinet_train.tar)|
+|CAN|DenseNet|[rec_d28_can.yml](../../configs/rec/rec_d28_can.yml)|51.72|[训练模型](https://paddleocr.bj.bcebos.com/contribution/can_train.tar)|
 
 <a name="2"></a>
 ## 2. 环境配置
@@ -60,16 +60,21 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs
 python3 tools/train.py -c configs/rec/rec_d28_can.yml
 -o Train.dataset.transforms.GrayImageChannelFormat.inverse=False
 ```
+- 默认每训练1个epoch（1105次iteration）进行1次评估，若您更改训练的batch_size，或更换数据集，请在训练时作出如下修改
+```
+python3 tools/train.py -c configs/rec/rec_d28_can.yml
+-o Global.eval_batch_step=[0, {length_of_dataset//batch_size}]
+```
 
 #
 <a name="3-2"></a>
 ### 3.2 评估
 
-可下载已训练完成的[模型文件](#model)，使用如下命令进行评估：
+可下载已训练完成的[模型文件](https://paddleocr.bj.bcebos.com/contribution/can_train.tar)，使用如下命令进行评估：
 
 ```shell
-# 注意将pretrained_model的路径设置为本地路径。
-python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_d28_can.yml -o Global.pretrained_model=./rec_d28_can_train/best_accuracy
+# 注意将pretrained_model的路径设置为本地路径。若使用自行训练保存的模型，请注意修改路径和文件名为{path/to/weights}/{model_name}。
+python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_d28_can.yml -o Global.pretrained_model=./rec_d28_can_train/CAN
 ```
 
 <a name="3-3"></a>
@@ -78,9 +83,9 @@ python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec
 使用如下命令进行单张图片预测：
 ```shell
 # 注意将pretrained_model的路径设置为本地路径。
-python3 tools/infer_rec.py -c configs/rec/rec_d28_can.yml -o Architecture.Head.attdecoder.is_train=False Global.infer_img='./doc/imgs_hme/hme_01.jpg' Global.pretrained_model=./rec_d28_can_train/best_accuracy
+python3 tools/infer_rec.py -c configs/rec/rec_d28_can.yml -o Architecture.Head.attdecoder.is_train=False Global.infer_img='./doc/datasets/crohme_demo/hme_00.jpg' Global.pretrained_model=./rec_d28_can_train/CAN
 
-# 预测文件夹下所有图像时，可修改infer_img为文件夹，如 Global.infer_img='./doc/imgs_hme/'。
+# 预测文件夹下所有图像时，可修改infer_img为文件夹，如 Global.infer_img='./doc/datasets/crohme_demo/'。
 ```
 
 
@@ -89,17 +94,16 @@ python3 tools/infer_rec.py -c configs/rec/rec_d28_can.yml -o Architecture.Head.a
 
 <a name="4-1"></a>
 ### 4.1 Python推理
-首先将训练得到best模型，转换成inference model。这里以训练完成的模型为例（[模型下载地址](https://paddleocr.bj.bcebos.com/rec_d28_can_train.tar) )，可以使用如下命令进行转换：
+首先将训练得到best模型，转换成inference model。这里以训练完成的模型为例（[模型下载地址](https://paddleocr.bj.bcebos.com/contribution/can_train.tar) )，可以使用如下命令进行转换：
 
 ```shell
 # 注意将pretrained_model的路径设置为本地路径。
-python3 tools/export_model.py -c configs/rec/rec_d28_can.yml -o Global.save_inference_dir=./inference/rec_d28_can/ Architecture.Head.attdecoder.is_train=False
+python3 tools/export_model.py -c configs/rec/rec_d28_can.yml -o Global.pretrained_model=./rec_d28_can_train/CAN Global.save_inference_dir=./inference/rec_d28_can/ Architecture.Head.attdecoder.is_train=False
 
 # 目前的静态图模型默认的输出长度最大为36，如果您需要预测更长的序列，请在导出模型时指定其输出序列为合适的值，例如 Architecture.Head.max_text_length=72
 ```
 **注意：**
 - 如果您是在自己的数据集上训练的模型，并且调整了字典文件，请注意修改配置文件中的`character_dict_path`是否是所需要的字典文件。
-- 如果您修改了训练时的输入大小，请修改`tools/export_model.py`文件中的对应ABINet的`infer_shape`。
 
 转换成功后，在目录下有三个文件：
 ```
@@ -112,18 +116,18 @@ python3 tools/export_model.py -c configs/rec/rec_d28_can.yml -o Global.save_infe
 执行如下命令进行模型推理：
 
 ```shell
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_hme/hme_01.jpg" --rec_algorithm="CAN" --rec_batch_num=1 --rec_model_dir="./inference/rec_d28_can/" --rec_char_dict_path="./ppocr/utils/dict/latex_symbol_dict.txt"
+python3 tools/infer/predict_rec.py --image_dir="./doc/datasets/crohme_demo/hme_00.jpg" --rec_algorithm="CAN" --rec_batch_num=1 --rec_model_dir="./inference/rec_d28_can/" --rec_char_dict_path="./ppocr/utils/dict/latex_symbol_dict.txt"
 
-# 预测文件夹下所有图像时，可修改image_dir为文件夹，如 --image_dir='./doc/imgs_hme/'。
+# 预测文件夹下所有图像时，可修改image_dir为文件夹，如 --image_dir='./doc/datasets/crohme_demo/'。
 
 # 如果您需要在白底黑字的图片上进行预测，请设置 --rec_image_inverse=False
 ```
 
-![测试图片样例](../imgs_hme/hme_00.jpg)
+![测试图片样例](../datasets/crohme_demo/hme_00.jpg)
 
 执行命令后，上面图像的预测结果（识别的文本）会打印到屏幕上，示例如下：
 ```shell
-Predicts of ./doc/imgs_hme/hme_03.jpg:['x _ { k } x x _ { k } + y _ { k } y x _ { k }', []]
+Predicts of ./doc/imgs_hme/hme_00.jpg:['x _ { k } x x _ { k } + y _ { k } y x _ { k }', []]
 ```
 
 

diff --git a/doc/doc_en/algorithm_rec_can_en.md b/doc/doc_en/algorithm_rec_can_en.md
@@ -25,7 +25,7 @@ Using CROHME handwrittem mathematical expression recognition datasets for traini
 
 |Model|Backbone|config|exprate|Download link|
 | --- | --- | --- | --- | --- |
-|CAN|DenseNet|[rec_d28_can.yml](../../configs/rec/rec_d28_can.yml)|51.72|coming soon|
+|CAN|DenseNet|[rec_d28_can.yml](../../configs/rec/rec_d28_can.yml)|51.72|[trained model](https://paddleocr.bj.bcebos.com/contribution/can_train.tar)|
 
 <a name="2"></a>
 ## 2. Environment
@@ -53,31 +53,35 @@ Evaluation:
 
 ```
 # GPU evaluation
-python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_d28_can.yml -o Global.pretrained_model=./rec_d28_can_train/best_accuracy
+python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_d28_can.yml -o Global.pretrained_model=./rec_d28_can_train/CAN
 ```
 
 Prediction:
 
 ```
 # The configuration file used for prediction must match the training
-python3 tools/infer_rec.py -c configs/rec/rec_d28_can.yml -o Architecture.Head.attdecoder.is_train=False Global.infer_img='./doc/imgs_hme/hme_01.jpg' Global.pretrained_model=./rec_d28_can_train/best_accuracy
+python3 tools/infer_rec.py -c configs/rec/rec_d28_can.yml -o Architecture.Head.attdecoder.is_train=False Global.infer_img='./doc/crohme_demo/hme_00.jpg' Global.pretrained_model=./rec_d28_can_train/CAN
 ```
 
 <a name="4"></a>
 ## 4. Inference and Deployment
 
 <a name="4-1"></a>
 ### 4.1 Python Inference
-First, the model saved during the RobustScanner text recognition training process is converted into an inference model. you can use the following command to convert:
+First, the model saved during the CAN handwritten mathematical expression recognition training process is converted into an inference model. you can use the following command to convert:
 
 ```
 python3 tools/export_model.py -c configs/rec/rec_d28_can.yml -o Global.save_inference_dir=./inference/rec_d28_can/ Architecture.Head.attdecoder.is_train=False
+
+# The default output max length of the model is 36. If you need to predict a longer sequence, please specify its output sequence as an appropriate value when exporting the model, as: Architecture.Head.max_ text_ length=72
 ```
 
-For RobustScanner text recognition model inference, the following commands can be executed:
+For CAN handwritten mathematical expression recognition model inference, the following commands can be executed:
 
 ```
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_hme/hme_01.jpg" --rec_algorithm="CAN" --rec_batch_num=1 --rec_model_dir="./inference/rec_d28_can/" --rec_image_shape="1, 100, 100" --rec_char_dict_path="./ppocr/utils/dict/latex_symbol_dict.txt"
+python3 tools/infer/predict_rec.py --image_dir="./doc/crohme_demo/hme_00.jpg" --rec_algorithm="CAN" --rec_batch_num=1 --rec_model_dir="./inference/rec_d28_can/" --rec_char_dict_path="./ppocr/utils/dict/latex_symbol_dict.txt"
+
+# If you need to predict on a picture with black characters on a white background, please set: -- rec_ image_ inverse=False
 ```
 
 <a name="4-2"></a>

diff --git a/ppocr/data/__init__.py b/ppocr/data/__init__.py
@@ -37,7 +37,6 @@
 from ppocr.data.lmdb_dataset import LMDBDataSet, LMDBDataSetSR
 from ppocr.data.pgnet_dataset import PGDataSet
 from ppocr.data.pubtab_dataset import PubTabDataSet
-from ppocr.data.hmer_dataset import HMERDataSet
 
 __all__ = ['build_dataloader', 'transform', 'create_operators']
 
@@ -56,7 +55,7 @@ def build_dataloader(config, mode, device, logger, seed=None):
 
  support_dict = [
  'SimpleDataSet', 'LMDBDataSet', 'PGDataSet', 'PubTabDataSet',
- 'LMDBDataSetSR', 'HMERDataSet'
+ 'LMDBDataSetSR'
  ]
  module_name = config[mode]['dataset']['name']
  assert module_name in support_dict, Exception(

diff --git a/ppocr/data/collate_fn.py b/ppocr/data/collate_fn.py
@@ -95,8 +95,8 @@ def __call__(self, batch):
  1] > max_height else max_height
  max_width = item[0].shape[2] if item[0].shape[
  2] > max_width else max_width
- max_length = item[1].shape[0] if item[1].shape[
- 0] > max_length else max_length
+ max_length = len(item[1]) if len(item[
+ 1]) > max_length else max_length
  proper_items.append(item)
 
  images, image_masks = np.zeros(
@@ -111,7 +111,7 @@ def __call__(self, batch):
  _, h, w = proper_items[i][0].shape
  images[i][:, :h, :w] = proper_items[i][0]
  image_masks[i][:, :h, :w] = 1
- l = proper_items[i][1].shape[0]
+ l = len(proper_items[i][1])
  labels[i][:l] = proper_items[i][1]
  label_masks[i][:l] = 1
 

diff --git a/ppocr/data/hmer_dataset.py b/ppocr/data/hmer_dataset.py