update docs and fix conflic

Evezerest · Aug 31, 2021 · ea52f47 · ea52f47
2 parents d43688a + 63ed5fc
commit ea52f47
Show file tree

Hide file tree

Showing 53 changed files with 2,031 additions and 327 deletions.
diff --git a/configs/rec/rec_mtb_nrtr.yml b/configs/rec/rec_mtb_nrtr.yml
@@ -0,0 +1,102 @@
+Global:
+ use_gpu: True
+ epoch_num: 21
+ log_smooth_window: 20
+ print_batch_step: 10
+ save_model_dir: ./output/rec/nrtr/
+ save_epoch_step: 1
+ # evaluation is run every 2000 iterations
+ eval_batch_step: [0, 2000]
+ cal_metric_during_train: True
+ pretrained_model:
+ checkpoints: 
+ save_inference_dir:
+ use_visualdl: False
+ infer_img: doc/imgs_words_en/word_10.png
+ # for data or label process
+ character_dict_path: 
+ character_type: EN_symbol
+ max_text_length: 25
+ infer_mode: False
+ use_space_char: True
+ save_res_path: ./output/rec/predicts_nrtr.txt
+
+Optimizer:
+ name: Adam
+ beta1: 0.9
+ beta2: 0.99
+ clip_norm: 5.0
+ lr:
+ name: Cosine
+ learning_rate: 0.0005
+ warmup_epoch: 2
+ regularizer:
+ name: 'L2'
+ factor: 0.
+
+Architecture:
+ model_type: rec
+ algorithm: NRTR
+ in_channels: 1
+ Transform:
+ Backbone:
+ name: MTB
+ cnn_num: 2
+ Head:
+ name: Transformer
+ d_model: 512
+ num_encoder_layers: 6
+ beam_size: 10 # When Beam size is greater than 0, it means to use beam search when evaluation.
+
+
+Loss:
+ name: NRTRLoss
+ smoothing: True
+
+PostProcess:
+ name: NRTRLabelDecode
+
+Metric:
+ name: RecMetric
+ main_indicator: acc
+
+Train:
+ dataset:
+ name: LMDBDataSet
+ data_dir: ./train_data/data_lmdb_release/training/
+ transforms:
+ - NRTRDecodeImage: # load image
+ img_mode: BGR
+ channel_first: False
+ - NRTRLabelEncode: # Class handling label
+ - NRTRRecResizeImg:
+ image_shape: [100, 32]
+ resize_type: PIL # PIL or OpenCV
+ - KeepKeys:
+ keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+ loader:
+ shuffle: True
+ batch_size_per_card: 512
+ drop_last: True
+ num_workers: 8
+
+Eval:
+ dataset:
+ name: LMDBDataSet
+ data_dir: ./train_data/data_lmdb_release/evaluation/
+ transforms:
+ - NRTRDecodeImage: # load image
+ img_mode: BGR
+ channel_first: False
+ - NRTRLabelEncode: # Class handling label
+ - NRTRRecResizeImg:
+ image_shape: [100, 32]
+ resize_type: PIL # PIL or OpenCV
+ - KeepKeys:
+ keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+ loader:
+ shuffle: False
+ drop_last: False
+ batch_size_per_card: 256
+ num_workers: 1
+ use_shared_memory: False
diff --git a/deploy/cpp_infer/CMakeLists.txt b/deploy/cpp_infer/CMakeLists.txt
@@ -206,6 +206,10 @@ endif()
 
 set(DEPS ${DEPS} ${OpenCV_LIBS})
 
+include(ExternalProject)
+include(external-cmake/auto-log.cmake)
+include_directories(${CMAKE_CURRENT_BINARY_DIR}/autolog/src/extern_Autolog/auto_log)
+
 AUX_SOURCE_DIRECTORY(./src SRCS)
 add_executable(${DEMO_NAME} ${SRCS})
 

diff --git a/deploy/cpp_infer/external-cmake/auto-log.cmake b/deploy/cpp_infer/external-cmake/auto-log.cmake
@@ -0,0 +1,14 @@
+find_package(Git REQUIRED)
+message("${CMAKE_BUILD_TYPE}")
+
+set(AUTOLOG_REPOSITORY https://github.com/LDOUBLEV/AutoLog.git)
+SET(AUTOLOG_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/install/Autolog)
+
+ExternalProject_Add(
+ extern_Autolog
+ PREFIX autolog
+ GIT_REPOSITORY ${AUTOLOG_REPOSITORY}
+ GIT_TAG main
+ DOWNLOAD_NO_EXTRACT True
+ INSTALL_COMMAND cmake -E echo "Skipping install step."
+)
diff --git a/deploy/cpp_infer/include/ocr_cls.h b/deploy/cpp_infer/include/ocr_cls.h
@@ -42,7 +42,7 @@ class Classifier {
  const int &gpu_id, const int &gpu_mem,
  const int &cpu_math_library_num_threads,
  const bool &use_mkldnn, const double &cls_thresh,
- const bool &use_tensorrt, const bool &use_fp16) {
+ const bool &use_tensorrt, const std::string &precision) {
  this->use_gpu_ = use_gpu;
  this->gpu_id_ = gpu_id;
  this->gpu_mem_ = gpu_mem;
@@ -51,7 +51,7 @@ class Classifier {
 
  this->cls_thresh = cls_thresh;
  this->use_tensorrt_ = use_tensorrt;
- this->use_fp16_ = use_fp16;
+ this->precision_ = precision;
 
  LoadModel(model_dir);
  }
@@ -75,7 +75,7 @@ class Classifier {
  std::vector<float> scale_ = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};
  bool is_scale_ = true;
  bool use_tensorrt_ = false;
- bool use_fp16_ = false;
+ std::string precision_ = "fp32";
  // pre-process
  ClsResizeImg resize_op_;
  Normalize normalize_op_;

diff --git a/deploy/cpp_infer/include/ocr_det.h b/deploy/cpp_infer/include/ocr_det.h
@@ -46,7 +46,7 @@ class DBDetector {
  const double &det_db_box_thresh,
  const double &det_db_unclip_ratio,
  const bool &use_polygon_score, const bool &visualize,
- const bool &use_tensorrt, const bool &use_fp16) {
+ const bool &use_tensorrt, const std::string &precision) {
  this->use_gpu_ = use_gpu;
  this->gpu_id_ = gpu_id;
  this->gpu_mem_ = gpu_mem;
@@ -62,7 +62,7 @@ class DBDetector {
 
  this->visualize_ = visualize;
  this->use_tensorrt_ = use_tensorrt;
- this->use_fp16_ = use_fp16;
+ this->precision_ = precision;
 
  LoadModel(model_dir);
  }
@@ -71,7 +71,7 @@ class DBDetector {
  void LoadModel(const std::string &model_dir);
 
  // Run predictor
- void Run(cv::Mat &img, std::vector<std::vector<std::vector<int>>> &boxes);
+ void Run(cv::Mat &img, std::vector<std::vector<std::vector<int>>> &boxes, std::vector<double> *times);
 
 private:
  std::shared_ptr<Predictor> predictor_;
@@ -91,7 +91,7 @@ class DBDetector {
 
  bool visualize_ = true;
  bool use_tensorrt_ = false;
- bool use_fp16_ = false;
+ std::string precision_ = "fp32";
 
  std::vector<float> mean_ = {0.485f, 0.456f, 0.406f};
  std::vector<float> scale_ = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};

diff --git a/deploy/cpp_infer/include/ocr_rec.h b/deploy/cpp_infer/include/ocr_rec.h
@@ -44,14 +44,14 @@ class CRNNRecognizer {
  const int &gpu_id, const int &gpu_mem,
  const int &cpu_math_library_num_threads,
  const bool &use_mkldnn, const string &label_path,
- const bool &use_tensorrt, const bool &use_fp16) {
+ const bool &use_tensorrt, const std::string &precision) {
  this->use_gpu_ = use_gpu;
  this->gpu_id_ = gpu_id;
  this->gpu_mem_ = gpu_mem;
  this->cpu_math_library_num_threads_ = cpu_math_library_num_threads;
  this->use_mkldnn_ = use_mkldnn;
  this->use_tensorrt_ = use_tensorrt;
- this->use_fp16_ = use_fp16;
+ this->precision_ = precision;
 
  this->label_list_ = Utility::ReadDict(label_path);
  this->label_list_.insert(this->label_list_.begin(),
@@ -64,7 +64,7 @@ class CRNNRecognizer {
  // Load Paddle inference model
  void LoadModel(const std::string &model_dir);
 
- void Run(cv::Mat &img);
+ void Run(cv::Mat &img, std::vector<double> *times);
 
 private:
  std::shared_ptr<Predictor> predictor_;
@@ -81,7 +81,7 @@ class CRNNRecognizer {
  std::vector<float> scale_ = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};
  bool is_scale_ = true;
  bool use_tensorrt_ = false;
- bool use_fp16_ = false;
+ std::string precision_ = "fp32";
  // pre-process
  CrnnResizeImg resize_op_;
  Normalize normalize_op_;
@@ -90,9 +90,6 @@ class CRNNRecognizer {
  // post-process
  PostProcessor post_processor_;
 
- cv::Mat GetRotateCropImage(const cv::Mat &srcimage,
- std::vector<std::vector<int>> box);
-
 }; // class CrnnRecognizer
 
 } // namespace PaddleOCR
diff --git a/deploy/cpp_infer/include/utility.h b/deploy/cpp_infer/include/utility.h
@@ -47,6 +47,9 @@ class Utility {
 
  static void GetAllFiles(const char *dir_name,
  std::vector<std::string> &all_inputs);
+
+ static cv::Mat GetRotateCropImage(const cv::Mat &srcimage,
+ std::vector<std::vector<int>> box);
 };
 
 } // namespace PaddleOCR