Skip to content

Commit

Permalink
update docs and fix conflic
Browse files Browse the repository at this point in the history
  • Loading branch information
andyjiang1116 committed Aug 31, 2021
2 parents d43688a + 63ed5fc commit ea52f47
Show file tree
Hide file tree
Showing 53 changed files with 2,031 additions and 327 deletions.
102 changes: 102 additions & 0 deletions configs/rec/rec_mtb_nrtr.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
Global:
use_gpu: True
epoch_num: 21
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec/nrtr/
save_epoch_step: 1
# evaluation is run every 2000 iterations
eval_batch_step: [0, 2000]
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img: doc/imgs_words_en/word_10.png
# for data or label process
character_dict_path:
character_type: EN_symbol
max_text_length: 25
infer_mode: False
use_space_char: True
save_res_path: ./output/rec/predicts_nrtr.txt

Optimizer:
name: Adam
beta1: 0.9
beta2: 0.99
clip_norm: 5.0
lr:
name: Cosine
learning_rate: 0.0005
warmup_epoch: 2
regularizer:
name: 'L2'
factor: 0.

Architecture:
model_type: rec
algorithm: NRTR
in_channels: 1
Transform:
Backbone:
name: MTB
cnn_num: 2
Head:
name: Transformer
d_model: 512
num_encoder_layers: 6
beam_size: 10 # When Beam size is greater than 0, it means to use beam search when evaluation.


Loss:
name: NRTRLoss
smoothing: True

PostProcess:
name: NRTRLabelDecode

Metric:
name: RecMetric
main_indicator: acc

Train:
dataset:
name: LMDBDataSet
data_dir: ./train_data/data_lmdb_release/training/
transforms:
- NRTRDecodeImage: # load image
img_mode: BGR
channel_first: False
- NRTRLabelEncode: # Class handling label
- NRTRRecResizeImg:
image_shape: [100, 32]
resize_type: PIL # PIL or OpenCV
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: True
batch_size_per_card: 512
drop_last: True
num_workers: 8

Eval:
dataset:
name: LMDBDataSet
data_dir: ./train_data/data_lmdb_release/evaluation/
transforms:
- NRTRDecodeImage: # load image
img_mode: BGR
channel_first: False
- NRTRLabelEncode: # Class handling label
- NRTRRecResizeImg:
image_shape: [100, 32]
resize_type: PIL # PIL or OpenCV
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 256
num_workers: 1
use_shared_memory: False
4 changes: 4 additions & 0 deletions deploy/cpp_infer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,10 @@ endif()

set(DEPS ${DEPS} ${OpenCV_LIBS})

include(ExternalProject)
include(external-cmake/auto-log.cmake)
include_directories(${CMAKE_CURRENT_BINARY_DIR}/autolog/src/extern_Autolog/auto_log)

AUX_SOURCE_DIRECTORY(./src SRCS)
add_executable(${DEMO_NAME} ${SRCS})

Expand Down
14 changes: 14 additions & 0 deletions deploy/cpp_infer/external-cmake/auto-log.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
find_package(Git REQUIRED)
message("${CMAKE_BUILD_TYPE}")

set(AUTOLOG_REPOSITORY https://github.com/LDOUBLEV/AutoLog.git)
SET(AUTOLOG_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/install/Autolog)

ExternalProject_Add(
extern_Autolog
PREFIX autolog
GIT_REPOSITORY ${AUTOLOG_REPOSITORY}
GIT_TAG main
DOWNLOAD_NO_EXTRACT True
INSTALL_COMMAND cmake -E echo "Skipping install step."
)
6 changes: 3 additions & 3 deletions deploy/cpp_infer/include/ocr_cls.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class Classifier {
const int &gpu_id, const int &gpu_mem,
const int &cpu_math_library_num_threads,
const bool &use_mkldnn, const double &cls_thresh,
const bool &use_tensorrt, const bool &use_fp16) {
const bool &use_tensorrt, const std::string &precision) {
this->use_gpu_ = use_gpu;
this->gpu_id_ = gpu_id;
this->gpu_mem_ = gpu_mem;
Expand All @@ -51,7 +51,7 @@ class Classifier {

this->cls_thresh = cls_thresh;
this->use_tensorrt_ = use_tensorrt;
this->use_fp16_ = use_fp16;
this->precision_ = precision;

LoadModel(model_dir);
}
Expand All @@ -75,7 +75,7 @@ class Classifier {
std::vector<float> scale_ = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};
bool is_scale_ = true;
bool use_tensorrt_ = false;
bool use_fp16_ = false;
std::string precision_ = "fp32";
// pre-process
ClsResizeImg resize_op_;
Normalize normalize_op_;
Expand Down
8 changes: 4 additions & 4 deletions deploy/cpp_infer/include/ocr_det.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class DBDetector {
const double &det_db_box_thresh,
const double &det_db_unclip_ratio,
const bool &use_polygon_score, const bool &visualize,
const bool &use_tensorrt, const bool &use_fp16) {
const bool &use_tensorrt, const std::string &precision) {
this->use_gpu_ = use_gpu;
this->gpu_id_ = gpu_id;
this->gpu_mem_ = gpu_mem;
Expand All @@ -62,7 +62,7 @@ class DBDetector {

this->visualize_ = visualize;
this->use_tensorrt_ = use_tensorrt;
this->use_fp16_ = use_fp16;
this->precision_ = precision;

LoadModel(model_dir);
}
Expand All @@ -71,7 +71,7 @@ class DBDetector {
void LoadModel(const std::string &model_dir);

// Run predictor
void Run(cv::Mat &img, std::vector<std::vector<std::vector<int>>> &boxes);
void Run(cv::Mat &img, std::vector<std::vector<std::vector<int>>> &boxes, std::vector<double> *times);

private:
std::shared_ptr<Predictor> predictor_;
Expand All @@ -91,7 +91,7 @@ class DBDetector {

bool visualize_ = true;
bool use_tensorrt_ = false;
bool use_fp16_ = false;
std::string precision_ = "fp32";

std::vector<float> mean_ = {0.485f, 0.456f, 0.406f};
std::vector<float> scale_ = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};
Expand Down
11 changes: 4 additions & 7 deletions deploy/cpp_infer/include/ocr_rec.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,14 @@ class CRNNRecognizer {
const int &gpu_id, const int &gpu_mem,
const int &cpu_math_library_num_threads,
const bool &use_mkldnn, const string &label_path,
const bool &use_tensorrt, const bool &use_fp16) {
const bool &use_tensorrt, const std::string &precision) {
this->use_gpu_ = use_gpu;
this->gpu_id_ = gpu_id;
this->gpu_mem_ = gpu_mem;
this->cpu_math_library_num_threads_ = cpu_math_library_num_threads;
this->use_mkldnn_ = use_mkldnn;
this->use_tensorrt_ = use_tensorrt;
this->use_fp16_ = use_fp16;
this->precision_ = precision;

this->label_list_ = Utility::ReadDict(label_path);
this->label_list_.insert(this->label_list_.begin(),
Expand All @@ -64,7 +64,7 @@ class CRNNRecognizer {
// Load Paddle inference model
void LoadModel(const std::string &model_dir);

void Run(cv::Mat &img);
void Run(cv::Mat &img, std::vector<double> *times);

private:
std::shared_ptr<Predictor> predictor_;
Expand All @@ -81,7 +81,7 @@ class CRNNRecognizer {
std::vector<float> scale_ = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};
bool is_scale_ = true;
bool use_tensorrt_ = false;
bool use_fp16_ = false;
std::string precision_ = "fp32";
// pre-process
CrnnResizeImg resize_op_;
Normalize normalize_op_;
Expand All @@ -90,9 +90,6 @@ class CRNNRecognizer {
// post-process
PostProcessor post_processor_;

cv::Mat GetRotateCropImage(const cv::Mat &srcimage,
std::vector<std::vector<int>> box);

}; // class CrnnRecognizer

} // namespace PaddleOCR
3 changes: 3 additions & 0 deletions deploy/cpp_infer/include/utility.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ class Utility {

static void GetAllFiles(const char *dir_name,
std::vector<std::string> &all_inputs);

static cv::Mat GetRotateCropImage(const cv::Mat &srcimage,
std::vector<std::vector<int>> box);
};

} // namespace PaddleOCR
Loading

0 comments on commit ea52f47

Please sign in to comment.