Skip to content

Commit

Permalink
update can transform method and add copyright info for new file
Browse files Browse the repository at this point in the history
  • Loading branch information
dorren002 committed Oct 17, 2022
1 parent c57effb commit f875556
Show file tree
Hide file tree
Showing 14 changed files with 129 additions and 77 deletions.
23 changes: 14 additions & 9 deletions configs/rec/rec_d28_can.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ Architecture:
bottleneck: True
use_dropout: True
input_channel: 1

Head:
name: CANHead
in_channel: 684
Expand All @@ -66,24 +65,27 @@ Loss:
name: CANLoss

PostProcess:
name: SeqLabelDecode
character: 111
name: CANLabelDecode
character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt

Metric:
name: CANMetric
main_indicator: exp_rate

Train:
dataset:
name: PGDataSet
name: SimpleDataSet
data_dir: ./train_data/CROHME/training/images/
transforms:
- DecodeImage:
channel_first: False
- NormalizeImage:
mean: [0,0,0]
std: [1,1,1]
order: 'hwc'
- GrayImageChannelFormat:
normalize: True
inverse: True
- SeqLabelEncode:
- CANLabelEncode:
character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
lower: False
- KeepKeys:
Expand All @@ -98,15 +100,18 @@ Train:

Eval:
dataset:
name: PGDataSet
name: SimpleDataSet
data_dir: ./train_data/CROHME/evaluation/images/
transforms:
- DecodeImage:
channel_first: False
- NormalizeImage:
mean: [0,0,0]
std: [1,1,1]
order: 'hwc'
- GrayImageChannelFormat:
normalize: True
inverse: True
- SeqLabelEncode:
- CANLabelEncode:
character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
lower: False
- KeepKeys:
Expand Down
2 changes: 1 addition & 1 deletion ppocr/data/imaug/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from .rec_img_aug import BaseDataAugmentation, RecAug, RecConAug, RecResizeImg, ClsResizeImg, \
SRNRecResizeImg, GrayRecResizeImg, SARRecResizeImg, PRENResizeImg, \
ABINetRecResizeImg, SVTRRecResizeImg, ABINetRecAug, VLRecResizeImg, SPINRecResizeImg, RobustScannerRecResizeImg, \
RFLRecResizeImg, GrayImageChannelFormat
RFLRecResizeImg
from .ssl_img_aug import SSLRotateResize
from .randaugment import RandAugment
from .copy_paste import CopyPaste
Expand Down
4 changes: 2 additions & 2 deletions ppocr/data/imaug/label_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1479,14 +1479,14 @@ def __call__(self, data):
return data


class SeqLabelEncode(BaseRecLabelEncode):
class CANLabelEncode(BaseRecLabelEncode):
def __init__(self,
character_dict_path,
max_text_length=100,
use_space_char=False,
lower=True,
**kwargs):
super(SeqLabelEncode, self).__init__(
super(CANLabelEncode, self).__init__(
max_text_length, character_dict_path, use_space_char, lower)

def encode(self, text_seq):
Expand Down
24 changes: 24 additions & 0 deletions ppocr/data/imaug/operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,3 +498,27 @@ def __call__(self, img):
img_numpy = np.array(img).astype("float32")
img_numpy = img_numpy.transpose((2, 0, 1)) / 255
return img_numpy


class GrayImageChannelFormat(object):
"""
format gray scale image's channel: (3,h,w) -> (1,h,w)
Args:
inverse: inverse gray image
"""

def __init__(self, inverse=False, **kwargs):
self.inverse = inverse

def __call__(self, data):
img = data['image']
img_single_channel = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_expanded = np.expand_dims(img_single_channel, 0)

if self.inverse:
data['image'] = np.abs(img_expanded - 1)
else:
data['image'] = img_expanded

data['src_image'] = img
return data
30 changes: 0 additions & 30 deletions ppocr/data/imaug/rec_img_aug.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,36 +465,6 @@ def __call__(self, data):
return data


class GrayImageChannelFormat(object):
"""
format gray scale image's channel: (3,h,w) -> (1,h,w)
Args:
normalize: True/False
when True convert image dynamic range [0,255]->[0,1]
inverse: inverse gray image
"""

def __init__(self, normalize=True, inverse=False, **kwargs):
self.normalize = normalize
self.inverse = inverse

def __call__(self, data):
img = data['image']
img_single_channel = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_single_channel = np.expand_dims(img_single_channel, 0)

if self.normalize:
img_single_channel = img_single_channel / 255.0

if self.inverse:
data['image'] = np.abs(img_single_channel - 1).astype('float32')
else:
data['image'] = img_single_channel.astype('float32')

data['src_image'] = img
return data


def resize_norm_img_sar(img, image_shape, width_downsample_ratio=0.25):
imgC, imgH, imgW_min, imgW_max = image_shape
h = img.shape[0]
Expand Down
18 changes: 18 additions & 0 deletions ppocr/losses/rec_can_loss.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,21 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http:https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is refer from:
https://github.com/LBH1024/CAN/models/can.py
"""

import paddle
import paddle.nn as nn
import numpy as np
Expand Down
32 changes: 19 additions & 13 deletions ppocr/modeling/backbones/rec_densenet.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,28 @@
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http:https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import math
import paddle
import paddle.nn as nn
import paddle.nn.functional as F


class Bottleneck(nn.Layer):
'''
ratio: 16
growthRate: 24
reduction: 0.5
bottleneck: True
use_dropout: True
'''

def __init__(self, nChannels, growthRate, use_dropout):
super(Bottleneck, self).__init__()
interChannels = 4 * growthRate
Expand Down Expand Up @@ -78,11 +88,7 @@ class DenseNet(nn.Layer):
def __init__(self, growthRate, reduction, bottleneck, use_dropout,
input_channel, **kwargs):
super(DenseNet, self).__init__()
'''
ratio: 16
growthRate: 24
reduction: 0.5
'''

nDenseBlocks = 16
nChannels = 2 * growthRate

Expand Down
27 changes: 26 additions & 1 deletion ppocr/modeling/heads/rec_can_head.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,29 @@
from turtle import forward
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http:https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is refer from:
https://github.com/LBH1024/CAN/models/can.py
https://github.com/LBH1024/CAN/models/counting.py
https://github.com/LBH1024/CAN/models/decoder.py
https://github.com/LBH1024/CAN/models/attention.py
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import paddle.nn as nn
import paddle
import math
Expand Down
4 changes: 2 additions & 2 deletions ppocr/postprocess/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from .picodet_postprocess import PicoDetPostProcess
from .ct_postprocess import CTPostProcess
from .drrg_postprocess import DRRGPostprocess
from .rec_postprocess import SeqLabelDecode
from .rec_postprocess import CANLabelDecode


def build_post_process(config, global_config=None):
Expand All @@ -52,7 +52,7 @@ def build_post_process(config, global_config=None):
'TableMasterLabelDecode', 'SPINLabelDecode',
'DistillationSerPostProcess', 'DistillationRePostProcess',
'VLLabelDecode', 'PicoDetPostProcess', 'CTPostProcess',
'RFLLabelDecode', 'DRRGPostprocess', 'SeqLabelDecode'
'RFLLabelDecode', 'DRRGPostprocess', 'CANLabelDecode'
]

if config['name'] == 'PSEPostProcess':
Expand Down
4 changes: 2 additions & 2 deletions ppocr/postprocess/rec_postprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -899,12 +899,12 @@ def __call__(self, preds, label=None, length=None, *args, **kwargs):
return text, label


class SeqLabelDecode(BaseRecLabelDecode):
class CANLabelDecode(BaseRecLabelDecode):
""" Convert between latex-symbol and symbol-index """

def __init__(self, character_dict_path=None, use_space_char=False,
**kwargs):
super(SeqLabelDecode, self).__init__(character_dict_path,
super(CANLabelDecode, self).__init__(character_dict_path,
use_space_char)

def decode(self, text_index, preds_prob=None):
Expand Down
31 changes: 18 additions & 13 deletions test_tipc/configs/rec_d28_can/rec_d28_can.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ Architecture:
bottleneck: True
use_dropout: True
input_channel: 1

Head:
name: CANHead
in_channel: 684
Expand All @@ -66,29 +65,32 @@ Loss:
name: CANLoss

PostProcess:
name: SeqLabelDecode
character: 111
name: CANLabelDecode
character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt

Metric:
name: CANMetric
main_indicator: exp_rate

Train:
dataset:
name: PGDataSet
data_dir: ./train_data/CROHME_lite/training/images/
name: SimpleDataSet
data_dir: ./train_data/CROHME/training/images/
transforms:
- DecodeImage:
channel_first: False
- NormalizeImage:
mean: [0,0,0]
std: [1,1,1]
order: 'hwc'
- GrayImageChannelFormat:
normalize: True
inverse: True
- SeqLabelEncode:
- CANLabelEncode:
character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
lower: False
- KeepKeys:
keep_keys: ['image', 'label']
label_file_list: ["./train_data/CROHME_lite/training/labels.txt"]
label_file_list: ["./train_data/CROHME/training/labels.txt"]
loader:
shuffle: True
batch_size_per_card: 8
Expand All @@ -98,20 +100,23 @@ Train:

Eval:
dataset:
name: PGDataSet
data_dir: ./train_data/CROHME_lite/evaluation/images/
name: SimpleDataSet
data_dir: ./train_data/CROHME/evaluation/images/
transforms:
- DecodeImage:
channel_first: False
- NormalizeImage:
mean: [0,0,0]
std: [1,1,1]
order: 'hwc'
- GrayImageChannelFormat:
normalize: True
inverse: True
- SeqLabelEncode:
- CANLabelEncode:
character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
lower: False
- KeepKeys:
keep_keys: ['image', 'label']
label_file_list: ["./train_data/CROHME_lite/evaluation/labels.txt"]
label_file_list: ["./train_data/CROHME/evaluation/labels.txt"]
loader:
shuffle: False
drop_last: False
Expand Down
4 changes: 2 additions & 2 deletions test_tipc/configs/rec_d28_can/train_infer_python.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
===========================train_params===========================
model_name:rec_d28_can
python:python3.7
gpu_list:0|0,1
python:python
gpu_list:0|0
Global.use_gpu:True|True
Global.auto_cast:null
Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=240
Expand Down
1 change: 0 additions & 1 deletion test_tipc/prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,6 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
cd ./pretrain_models/ && tar xf can_train.tar && cd ../
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/CROHME_lite.tar --no-check-certificate
cd ./train_data/ && tar xf CROHME_lite.tar && cd ../

fi
if [ ${model_name} == "layoutxlm_ser" ]; then
${python_name} -m pip install -r ppstructure/kie/requirements.txt
Expand Down
2 changes: 1 addition & 1 deletion tools/infer/predict_rec.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def __init__(self, args):
elif self.rec_algorithm == "CAN":
self.inverse = args.rec_image_inverse
postprocess_params = {
'name': 'SeqLabelDecode',
'name': 'CANLabelDecode',
"character_dict_path": args.rec_char_dict_path,
"use_space_char": args.use_space_char
}
Expand Down

0 comments on commit f875556

Please sign in to comment.