[Model] Support PP-OCRv4 pipeline (#1913)

* 添加paddleclas模型 * 更新README_CN * 更新README_CN * 更新README * update get_model.sh * update get_models.sh * update paddleseg models * update paddle_seg models * update paddle_seg models * modified test resources * update benchmark_gpu_trt.sh * add paddle detection * add paddledetection to benchmark * modified benchmark cmakelists * update benchmark scripts * modified benchmark function calling * modified paddledetection documents * upadte getmodels.sh * add PaddleDetectonModel * reset examples/paddledetection * resolve conflict * update pybind * resolve conflict * fix bug * delete debug mode * update checkarch log * update trt inputs example * Update README.md * add ppocr_v4 * update ppocr_v4 * update ocr_v4 * update ocr_v4 * update ocr_v4 * update ocr_v4 --------- Co-authored-by: DefTruth <[email protected]>
PaddlePaddle · May 11, 2023 · 643f41e · 643f41e
1 parent e75e766
commit 643f41e
Show file tree

Hide file tree

Showing 10 changed files with 231 additions and 9 deletions.
diff --git a/benchmark/python/benchmark_ppocr.py b/benchmark/python/benchmark_ppocr.py
@@ -317,6 +317,30 @@ def cpu_stat_func(self, q, pid, interval=0.0):
  runtime_option=rec_option)
  model = fd.vision.ocr.PPOCRv3(
  det_model=det_model, cls_model=cls_model, rec_model=rec_model)
+ elif "OCRv4" in args.model_dir:
+ det_option = option
+ if args.backend in ["trt", "paddle_trt"]:
+ det_option.trt_option.set_shape(
+ "x", [1, 3, 64, 64], [1, 3, 640, 640], [1, 3, 960, 960])
+ det_model = fd.vision.ocr.DBDetector(
+ det_model_file, det_params_file, runtime_option=det_option)
+ cls_option = option
+ if args.backend in ["trt", "paddle_trt"]:
+ cls_option.trt_option.set_shape(
+ "x", [1, 3, 48, 10], [10, 3, 48, 320], [64, 3, 48, 1024])
+ cls_model = fd.vision.ocr.Classifier(
+ cls_model_file, cls_params_file, runtime_option=cls_option)
+ rec_option = option
+ if args.backend in ["trt", "paddle_trt"]:
+ rec_option.trt_option.set_shape(
+ "x", [1, 3, 48, 10], [10, 3, 48, 320], [64, 3, 48, 2304])
+ rec_model = fd.vision.ocr.Recognizer(
+ rec_model_file,
+ rec_params_file,
+ rec_label_file,
+ runtime_option=rec_option)
+ model = fd.vision.ocr.PPOCRv4(
+ det_model=det_model, cls_model=cls_model, rec_model=rec_model)
  else:
  raise Exception("model {} not support now in ppocr series".format(
  args.model_dir))

diff --git a/c_api/fastdeploy_capi/vision/types_internal.h b/c_api/fastdeploy_capi/vision/types_internal.h
@@ -32,6 +32,7 @@
 #include "fastdeploy/vision/ocr/ppocr/structurev2_table.h"
 #include "fastdeploy/vision/ocr/ppocr/ppocr_v2.h"
 #include "fastdeploy/vision/ocr/ppocr/ppocr_v3.h"
+#include "fastdeploy/vision/ocr/ppocr/ppocr_v4.h"
 #include "fastdeploy/vision/ocr/ppocr/ppstructurev2_table.h"
 #include "fastdeploy/vision/segmentation/ppseg/model.h"
 
@@ -187,6 +188,9 @@ DEFINE_PIPELINE_MODEL_WRAPPER_STRUCT(PPOCRv2, ppocrv2_model);
 // PPOCRv3
 DEFINE_PIPELINE_MODEL_WRAPPER_STRUCT(PPOCRv3, ppocrv3_model);
 
+// PPOCRv4
+DEFINE_PIPELINE_MODEL_WRAPPER_STRUCT(PPOCRv4, ppocrv4_model);
+
 // PPStructureV2Table
 DEFINE_PIPELINE_MODEL_WRAPPER_STRUCT(PPStructureV2Table, ppstructurev2table_model);
 
@@ -400,6 +404,9 @@ DECLARE_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PPOCRv2, fd_ppocrv2_wrapper
 // PPOCRv3
 DECLARE_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PPOCRv3, fd_ppocrv3_wrapper);
 
+// PPOCRv4
+DECLARE_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PPOCRv4, fd_ppocrv4_wrapper);
+
 // PPStructureV2Table
 DECLARE_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PPStructureV2Table, fd_ppstructurev2_table_wrapper);
 

diff --git a/fastdeploy/vision.h b/fastdeploy/vision.h
@@ -59,6 +59,7 @@
 #include "fastdeploy/vision/ocr/ppocr/structurev2_layout.h"
 #include "fastdeploy/vision/ocr/ppocr/ppocr_v2.h"
 #include "fastdeploy/vision/ocr/ppocr/ppocr_v3.h"
+#include "fastdeploy/vision/ocr/ppocr/ppocr_v4.h"
 #include "fastdeploy/vision/ocr/ppocr/ppstructurev2_table.h"
 #include "fastdeploy/vision/ocr/ppocr/ppstructurev2_layout.h"
 #include "fastdeploy/vision/ocr/ppocr/recognizer.h"

diff --git a/fastdeploy/vision/common/result.cc b/fastdeploy/vision/common/result.cc
@@ -672,8 +672,8 @@ std::string OCRResult::Str() {
  out = out + "]";
 
  if (rec_scores.size() > 0) {
- out = out + "rec text: " + text[n] + " rec score:" +
- std::to_string(rec_scores[n]) + " ";
+ out = out + "rec text: " + text[n] +
+ " rec score:" + std::to_string(rec_scores[n]) + " ";
  }
  if (cls_labels.size() > 0) {
  out = out + "cls label: " + std::to_string(cls_labels[n]) +
@@ -713,8 +713,8 @@ std::string OCRResult::Str() {
  cls_scores.size() > 0) {
  std::string out;
  for (int i = 0; i < rec_scores.size(); i++) {
- out = out + "rec text: " + text[i] + " rec score:" +
- std::to_string(rec_scores[i]) + " ";
+ out = out + "rec text: " + text[i] +
+ " rec score:" + std::to_string(rec_scores[i]) + " ";
  out = out + "cls label: " + std::to_string(cls_labels[i]) +
  " cls score: " + std::to_string(cls_scores[i]);
  out = out + "\n";
@@ -733,8 +733,8 @@ std::string OCRResult::Str() {
  cls_scores.size() == 0) {
  std::string out;
  for (int i = 0; i < rec_scores.size(); i++) {
- out = out + "rec text: " + text[i] + " rec score:" +
- std::to_string(rec_scores[i]) + " ";
+ out = out + "rec text: " + text[i] +
+ " rec score:" + std::to_string(rec_scores[i]) + " ";
  out = out + "\n";
  }
  return out;
@@ -781,9 +781,9 @@ std::string HeadPoseResult::Str() {
  std::string out;
 
  out = "HeadPoseResult: [yaw, pitch, roll]\n";
- out = out + "yaw: " + std::to_string(euler_angles[0]) + "\n" + "pitch: " +
- std::to_string(euler_angles[1]) + "\n" + "roll: " +
- std::to_string(euler_angles[2]) + "\n";
+ out = out + "yaw: " + std::to_string(euler_angles[0]) + "\n" +
+ "pitch: " + std::to_string(euler_angles[1]) + "\n" +
+ "roll: " + std::to_string(euler_angles[2]) + "\n";
  return out;
 }
 

diff --git a/fastdeploy/vision/ocr/ocr_pybind.cc b/fastdeploy/vision/ocr/ocr_pybind.cc
@@ -17,13 +17,15 @@
 namespace fastdeploy {
 
 void BindPPOCRModel(pybind11::module& m);
+void BindPPOCRv4(pybind11::module& m);
 void BindPPOCRv3(pybind11::module& m);
 void BindPPOCRv2(pybind11::module& m);
 void BindPPStructureV2Table(pybind11::module& m);
 
 void BindOcr(pybind11::module& m) {
  auto ocr_module = m.def_submodule("ocr", "Module to deploy OCR models");
  BindPPOCRModel(ocr_module);
+ BindPPOCRv4(ocr_module);
  BindPPOCRv3(ocr_module);
  BindPPOCRv2(ocr_module);
  BindPPStructureV2Table(ocr_module);

diff --git a/fastdeploy/vision/ocr/ppocr/ppocr_pybind.cc b/fastdeploy/vision/ocr/ppocr/ppocr_pybind.cc
@@ -16,6 +16,38 @@
 #include "fastdeploy/pybind/main.h"
 
 namespace fastdeploy {
+void BindPPOCRv4(pybind11::module& m) {
+ // PPOCRv4
+ pybind11::class_<pipeline::PPOCRv4, FastDeployModel>(m, "PPOCRv4")
+
+ .def(pybind11::init<fastdeploy::vision::ocr::DBDetector*,
+ fastdeploy::vision::ocr::Classifier*,
+ fastdeploy::vision::ocr::Recognizer*>())
+ .def(pybind11::init<fastdeploy::vision::ocr::DBDetector*,
+ fastdeploy::vision::ocr::Recognizer*>())
+ .def_property("cls_batch_size", &pipeline::PPOCRv4::GetClsBatchSize,
+ &pipeline::PPOCRv4::SetClsBatchSize)
+ .def_property("rec_batch_size", &pipeline::PPOCRv4::GetRecBatchSize,
+ &pipeline::PPOCRv4::SetRecBatchSize)
+ .def("clone", [](pipeline::PPOCRv4& self) { return self.Clone(); })
+ .def("predict",
+ [](pipeline::PPOCRv4& self, pybind11::array& data) {
+ auto mat = PyArrayToCvMat(data);
+ vision::OCRResult res;
+ self.Predict(&mat, &res);
+ return res;
+ })
+ .def("batch_predict",
+ [](pipeline::PPOCRv4& self, std::vector<pybind11::array>& data) {
+ std::vector<cv::Mat> images;
+ for (size_t i = 0; i < data.size(); ++i) {
+ images.push_back(PyArrayToCvMat(data[i]));
+ }
+ std::vector<vision::OCRResult> results;
+ self.BatchPredict(images, &results);
+ return results;
+ });
+}
 void BindPPOCRv3(pybind11::module& m) {
  // PPOCRv3
  pybind11::class_<pipeline::PPOCRv3, FastDeployModel>(m, "PPOCRv3")

diff --git a/fastdeploy/vision/ocr/ppocr/ppocr_v4.h b/fastdeploy/vision/ocr/ppocr/ppocr_v4.h
@@ -0,0 +1,80 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "fastdeploy/vision/ocr/ppocr/ppocr_v3.h"
+
+namespace fastdeploy {
+/** \brief This pipeline can launch detection model, classification model and recognition model sequentially. All OCR pipeline APIs are defined inside this namespace.
+ *
+ */
+namespace pipeline {
+/*! @brief PPOCRv4 is used to load PP-OCRv4 series models provided by PaddleOCR.
+ */
+class FASTDEPLOY_DECL PPOCRv4 : public PPOCRv3 {
+ public:
+ /** \brief Set up the detection model path, classification model path and recognition model path respectively.
+ *
+ * \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv4_det_infer
+ * \param[in] cls_model Path of classification model, e.g ./ch_ppocr_mobile_v2.0_cls_infer
+ * \param[in] rec_model Path of recognition model, e.g ./ch_PP-OCRv4_rec_infer
+ */
+ PPOCRv4(fastdeploy::vision::ocr::DBDetector* det_model,
+ fastdeploy::vision::ocr::Classifier* cls_model,
+ fastdeploy::vision::ocr::Recognizer* rec_model)
+ : PPOCRv3(det_model, cls_model, rec_model) {
+ // The only difference between v2 and v3
+ auto preprocess_shape = recognizer_->GetPreprocessor().GetRecImageShape();
+ preprocess_shape[1] = 48;
+ recognizer_->GetPreprocessor().SetRecImageShape(preprocess_shape);
+ }
+ /** \brief Classification model is optional, so this function is set up the detection model path and recognition model path respectively.
+ *
+ * \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv4_det_infer
+ * \param[in] rec_model Path of recognition model, e.g ./ch_PP-OCRv4_rec_infer
+ */
+ PPOCRv4(fastdeploy::vision::ocr::DBDetector* det_model,
+ fastdeploy::vision::ocr::Recognizer* rec_model)
+ : PPOCRv3(det_model, rec_model) {
+ // The only difference between v2 and v4
+ auto preprocess_shape = recognizer_->GetPreprocessor().GetRecImageShape();
+ preprocess_shape[1] = 48;
+ recognizer_->GetPreprocessor().SetRecImageShape(preprocess_shape);
+ }
+
+ /** \brief Clone a new PPOCRv4 with less memory usage when multiple instances of the same model are created
+ *
+ * \return new PPOCRv4* type unique pointer
+ */
+ std::unique_ptr<PPOCRv4> Clone() const {
+ std::unique_ptr<PPOCRv4> clone_model = utils::make_unique<PPOCRv4>(PPOCRv4(*this));
+ clone_model->detector_ = detector_->Clone().release();
+ if (classifier_ != nullptr) {
+ clone_model->classifier_ = classifier_->Clone().release();
+ }
+ clone_model->recognizer_ = recognizer_->Clone().release();
+ return clone_model;
+ }
+};
+
+} // namespace pipeline
+
+namespace application {
+namespace ocrsystem {
+ typedef pipeline::PPOCRv4 PPOCRSystemv4;
+} // namespace ocrsystem
+} // namespace application
+
+} // namespace fastdeploy
diff --git a/fastdeploy/vision/visualize/visualize.h b/fastdeploy/vision/visualize/visualize.h
@@ -92,6 +92,7 @@ FASTDEPLOY_DECL cv::Mat VisDetection(const cv::Mat& im,
  int line_size = 1, float font_size = 0.5f,
  std::vector<int> font_color = {255, 255, 255},
  int font_thickness = 1);
+
 /** \brief Show the visualized results with custom labels for detection models
  *
  * \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format

diff --git a/python/fastdeploy/vision/ocr/ppocr/__init__.py b/python/fastdeploy/vision/ocr/ppocr/__init__.py
@@ -848,6 +848,79 @@ def postprocessor(self):
  def postprocessor(self, value):
  self._model.postprocessor = value
 
+class PPOCRv4(FastDeployModel):
+ def __init__(self, det_model=None, cls_model=None, rec_model=None):
+ """Consruct a pipeline with text detector, direction classifier and text recognizer models
+
+ :param det_model: (FastDeployModel) The detection model object created by fastdeploy.vision.ocr.DBDetector.
+ :param cls_model: (FastDeployModel) The classification model object created by fastdeploy.vision.ocr.Classifier.
+ :param rec_model: (FastDeployModel) The recognition model object created by fastdeploy.vision.ocr.Recognizer.
+ """
+ assert det_model is not None and rec_model is not None, "The det_model and rec_model cannot be None."
+ if cls_model is None:
+ self.system_ = C.vision.ocr.PPOCRv4(det_model._model,
+ rec_model._model)
+ else:
+ self.system_ = C.vision.ocr.PPOCRv4(
+ det_model._model, cls_model._model, rec_model._model)
+
+ def clone(self):
+ """Clone PPOCRv4 pipeline object
+ :return: a new PPOCRv4 pipeline object
+ """
+
+ class PPOCRv4Clone(PPOCRv4):
+ def __init__(self, system):
+ self.system_ = system
+
+ clone_model = PPOCRv4Clone(self.system_.clone())
+ return clone_model
+
+ def predict(self, input_image):
+ """Predict an input image
+ :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+ :return: OCRResult
+ """
+ return self.system_.predict(input_image)
+
+ def batch_predict(self, images):
+ """Predict a batch of input image
+ :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+ :return: OCRBatchResult
+ """
+ return self.system_.batch_predict(images)
+
+ @property
+ def cls_batch_size(self):
+ return self.system_.cls_batch_size
+
+ @cls_batch_size.setter
+ def cls_batch_size(self, value):
+ assert isinstance(
+ value,
+ int), "The value to set `cls_batch_size` must be type of int."
+ self.system_.cls_batch_size = value
+
+ @property
+ def rec_batch_size(self):
+ return self.system_.rec_batch_size
+
+ @rec_batch_size.setter
+ def rec_batch_size(self, value):
+ assert isinstance(
+ value,
+ int), "The value to set `rec_batch_size` must be type of int."
+ self.system_.rec_batch_size = value
+
+class PPOCRSystemv4(PPOCRv4):
+ def __init__(self, det_model=None, cls_model=None, rec_model=None):
+ logging.warning(
+ "DEPRECATED: fd.vision.ocr.PPOCRSystemv4 is deprecated, "
+ "please use fd.vision.ocr.PPOCRv4 instead.")
+ super(PPOCRSystemv4, self).__init__(det_model, cls_model, rec_model)
+
+ def predict(self, input_image):
+ return super(PPOCRSystemv4, self).predict(input_image)
 
 class PPOCRv3(FastDeployModel):
  def __init__(self, det_model=None, cls_model=None, rec_model=None):

diff --git a/python/fastdeploy/vision/visualize/__init__.py b/python/fastdeploy/vision/visualize/__init__.py
@@ -63,6 +63,8 @@ def vis_perception(im_data,
  score_threshold, line_size, font_size)
 
 
+
+
 def vis_keypoint_detection(im_data, keypoint_det_result, conf_threshold=0.5):
  """Show the visualized results for keypoint detection models