beta 0.2.0.8

- add NaN check-up - add quantification support for ScaleAdd Op - add binary to eltwise optimization - add console logs for quantization tool - better document for quantization tool - replace redundant dimension flags with dimension format - optimize performance of TensorFlow Lite Quantized Convolution - fix axis support for ONNX softmax - fix get performance compile error on Windows
00liujj · Aug 22, 2019 · b995b25 · b995b25
1 parent 1005c13
commit b995b25
Show file tree

Hide file tree

Showing 141 changed files with 1,789 additions and 1,412 deletions.
diff --git a/include/ErrorCode.hpp b/include/ErrorCode.hpp
@@ -13,12 +13,13 @@ namespace MNN {
 enum ErrorCode {
 #ifdef NO_ERROR
 #undef NO_ERROR
-#endif //NO_ERROR
+#endif // NO_ERROR
     NO_ERROR           = 0,
     OUT_OF_MEMORY      = 1,
     NOT_SUPPORT        = 2,
     COMPUTE_SIZE_ERROR = 3,
     NO_EXECUTION       = 4,
+    INVALID_VALUE      = 5,
 
     // User error
     INPUT_DATA_ERROR = 10,
@@ -28,6 +29,6 @@ enum ErrorCode {
     TENSOR_NOT_SUPPORT = 20,
     TENSOR_NEED_DIVIDE = 21,
 };
-}
+} // namespace MNN
 
 #endif /* ErrorCode_h */
diff --git a/include/ImageProcess.hpp b/include/ImageProcess.hpp
@@ -100,6 +100,23 @@ class MNN_PUBLIC ImageProcess {
      */
     ErrorCode convert(const uint8_t* source, int iw, int ih, int stride, Tensor* dest);
 
+    /**
+     * @brief convert source data to given tensor.
+     * @param source    source data.
+     * @param iw        source width.
+     * @param ih        source height.
+     * @param stride    number of elements per row. eg: 100 width RGB contains at least 300 elements.
+     * @param dest      dest data.
+     * @param ow      output width.
+     * @param oh      output height.
+     * @param outputBpp      output bpp, if 0, set as the save and config.destFormat.
+     * @param outputStride  output stride, if 0, set as ow * outputBpp.
+     * @param type  Only support halide_type_of<uint8_t> and halide_type_of<float>.
+     * @return result code.
+     */
+    ErrorCode convert(const uint8_t* source, int iw, int ih, int stride, void* dest, int ow, int oh, int outputBpp = 0,
+                      int outputStride = 0, halide_type_t type = halide_type_of<float>());
+
     /**
      * @brief create tensor with given data.
      * @param w     image width.

diff --git a/include/MNNForwardType.h b/include/MNNForwardType.h
@@ -8,6 +8,7 @@
 
 #ifndef MNNForwardType_h
 #define MNNForwardType_h
+#include <stdint.h>
 
 typedef enum {
     MNN_FORWARD_CPU = 0,
@@ -43,33 +44,24 @@ typedef enum {
 #ifdef __cplusplus
 namespace MNN {
 struct BackendConfig {
-    enum MemoryMode {
-        Memory_Normal = 0,
-        Memory_High,
-        Memory_Low
-    };
-
+    enum MemoryMode { Memory_Normal = 0, Memory_High, Memory_Low };
+
     MemoryMode memory = Memory_Normal;
-
-    enum PowerMode {
-        Power_Normal = 0,
-        Power_High,
-        Power_Low
-    };
-
+
+    enum PowerMode { Power_Normal = 0, Power_High, Power_Low };
+
     PowerMode power = Power_Normal;
-
-    enum PrecisionMode {
-        Precision_Normal = 0,
-        Precision_High,
-        Precision_Low
-    };
-
+
+    enum PrecisionMode { Precision_Normal = 0, Precision_High, Precision_Low };
+
     PrecisionMode precision = Precision_Normal;
-    
+
     /** user defined context */
-    void* sharedContext = nullptr;
-};
+    union {
+        void* sharedContext = nullptr;
+        size_t flags; // Valid for CPU Backend
+    };
 };
+}; // namespace MNN
 #endif
 #endif /* MNNForwardType_h */
diff --git a/include/Tensor.hpp b/include/Tensor.hpp
@@ -44,16 +44,6 @@ class MNN_PUBLIC Tensor {
         HANDLE_STRING = 1
     };
 
-    /** dimension reorder flag */
-    enum DataReorderType {
-        /** default reorder type, do not reorder */
-        NO_REORDER = 0,
-        /** reorder dimension 4 by 4. usually used with NC4HW4 or NHWC4 while data type is float. */
-        REORDER_4 = 1,
-        /** reorder dimension 8 by 8. usually used with NC4HW4 or NHWC4 while data type is uint8 or int8. */
-        REORDER_8
-    };
-
 public:
     /**
      * @brief create a tensor with dimension size and type without acquire memory for data.

diff --git a/project/ios/MNN.xcodeproj/project.pbxproj b/project/ios/MNN.xcodeproj/project.pbxproj
diff --git a/schema/default/CaffeOp.fbs b/schema/default/CaffeOp.fbs
@@ -259,3 +259,9 @@ table Normalize {
   scale:[float];
 }
 
+table EltwiseInt8 {
+    type:EltwiseType;
+    inputQuan0:QuantizedFloatParam;
+    inputQuan1:QuantizedFloatParam;
+    outputQuan:QuantizedFloatParam;
+}
diff --git a/schema/default/MNN.fbs b/schema/default/MNN.fbs
@@ -147,6 +147,7 @@ enum OpType : int {
     DepthwiseConvInt8 = 515,
     PoolInt8 = 516,
     FloatToInt8 = 517,
+    EltwiseInt8 = 518,
 }
 
 table Plugin {
@@ -230,6 +231,7 @@ union OpParameter {
     BatchMatMulParam,
     QuantizedFloatParam,
     DepthSpaceParam, // DepthToSpace and SpaceToDepth using the same parameter
+    EltwiseInt8,
 }
 
 table Op {

diff --git a/source/backend/arm82/Arm82Backend.cpp b/source/backend/arm82/Arm82Backend.cpp
@@ -15,8 +15,10 @@ static const MNNForwardType gForwardType = MNN_FORWARD_USER_1;
 
 Arm82Backend::Arm82Backend(int thread) : Backend(gForwardType) {
     auto creator  = MNNGetExtraBackendCreator(MNN_FORWARD_CPU);
-    thread        = std::min(thread, 32);
-    thread        = std::max(thread, 1);
+    //Only support single thread now
+    thread = 1;
+//    thread        = std::min(thread, 32);
+//    thread        = std::max(thread, 1);
     mNumberThread = thread;
     MNN_ASSERT(nullptr != creator);
     Backend::Info info;

diff --git a/source/backend/arm82/Arm82Convolution1x1.cpp b/source/backend/arm82/Arm82Convolution1x1.cpp
@@ -8,7 +8,6 @@
 
 #include "Arm82Convolution1x1.hpp"
 #include "Arm82Backend.hpp"
-#include "Concurrency.h"
 #include "MNN_generated.h"
 #include "Macro.h"
 #define SRC_Z_UNIT 4
@@ -152,7 +151,12 @@ ErrorCode Arm82Convolution1x1::onResize(const std::vector<Tensor*>& inputs, cons
     backend()->onReleaseBuffer(mTempDstC4.get(), Backend::DYNAMIC);
     backend()->onReleaseBuffer(mTempDst.get(), Backend::DYNAMIC);
     backend()->onReleaseBuffer(mTempCol.get(), Backend::DYNAMIC);
-
+    mPost = MNNFloat16C8ToC4AddBias;
+    if (mConvOp->common()->relu()) {
+        mPost = MNNFloat16C8ToC4AddBiasRelu;
+    } else if (mConvOp->common()->relu6()) {
+        mPost = MNNFloat16C8ToC4AddBiasRelu6;
+    }
     return NO_ERROR;
 }
 
@@ -173,13 +177,8 @@ ErrorCode Arm82Convolution1x1::onExecute(const std::vector<Tensor*>& inputs, con
     int ocDiv4        = UP_DIV(output->channel(), 4);
     auto srcPlane     = input->width() * input->height();
     int numThread     = std::min(tileCount, (((Arm82Backend*)backend())->numberThread()));
-    auto postFunction = MNNFloat16C8ToC4AddBias;
-    if (mConvOp->common()->relu()) {
-        postFunction = MNNFloat16C8ToC4AddBiasRelu;
-    } else if (mConvOp->common()->relu6()) {
-        postFunction = MNNFloat16C8ToC4AddBiasRelu6;
-    }
-    MNN_CONCURRENCY_BEGIN(tId, numThread) {
+
+    for (int tId=0; tId < numThread; ++tId) {
         auto tempDst    = mTempDst->host<int16_t>() + tId * mTempDst->stride(0);
         auto tempSource = mTempCol->host<int16_t>() + tId * mTempCol->stride(0);
         auto tempDst32  = mTempDstC4->host<int16_t>() + tId * mTempDstC4->stride(0);
@@ -206,12 +205,11 @@ ErrorCode Arm82Convolution1x1::onExecute(const std::vector<Tensor*>& inputs, con
             }
 
             // PostTreat
-            postFunction(tempDst32, tempDst, bias, count, ocUnit);
+            mPost(tempDst32, tempDst, bias, count, ocUnit);
             auto dstStart = dstOrigin + start * SRC_Z_UNIT;
             MNNFloat16ToFloat32C4(dstStart, tempDst32, count, ocDiv4, plane * SRC_Z_UNIT * sizeof(float));
         }
     }
-    MNN_CONCURRENCY_END();
 
     return NO_ERROR;
 }

diff --git a/source/backend/arm82/Arm82Convolution1x1.hpp b/source/backend/arm82/Arm82Convolution1x1.hpp
@@ -33,6 +33,7 @@ class Arm82Convolution1x1 : public Execution {
 
     CPUConvolution::Im2ColParameter *mIm2ColParamter;
     const Convolution2D *mConvOp;
+    void (*mPost)(int16_t* dst, const int16_t* src, const int16_t* bias, size_t size, size_t ocUnit);
 };
 } // namespace MNN
 

diff --git a/source/backend/arm82/CMakeLists.txt b/source/backend/arm82/CMakeLists.txt
@@ -7,14 +7,7 @@ include_directories("../../source/backend/cpu")
 set(CMAKE_C_STANDARD 99)
 set(CMAKE_CXX_STANDARD 11)
 enable_language(ASM)
-option(MNN_OPENMP "Enable Multiple Thread Linux|Android" ON)
 if(SYSTEM.Android)
-    if(MNN_OPENMP)
-        FIND_PACKAGE(OpenMP REQUIRED)
-        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
-        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
-    endif()
     if(CMAKE_SYSTEM_PROCESSOR MATCHES "^armv7")
         file(GLOB SRCS_ASM "asm/arm32/*")
         add_definitions(-mfloat-abi=softfp -mfpu=neon)