Firmware and SDK release 30 Jun 2023

edgeimpulse · Jun 30, 2023 · a162de6 · a162de6
1 parent 244c4ae
commit a162de6
Show file tree

Hide file tree

Showing 34 changed files with 1,581 additions and 201 deletions.
diff --git a/edge-impulse-sdk/classifier/ei_model_types.h b/edge-impulse-sdk/classifier/ei_model_types.h
@@ -62,6 +62,10 @@
 #define EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI 5
 #define EI_CLASSIFIER_LAST_LAYER_YOLOV7 6
 
+#define EI_CLASSIFIER_IMAGE_SCALING_NONE 0
+#define EI_CLASSIFIER_IMAGE_SCALING_0_255 1
+#define EI_CLASSIFIER_IMAGE_SCALING_TORCH 2
+
 struct ei_impulse;
 
 typedef struct {
@@ -89,6 +93,7 @@ typedef struct {
 typedef struct {
  EI_IMPULSE_ERROR (*infer_fn)(const ei_impulse *impulse, ei::matrix_t *fmatrix, ei_impulse_result_t *result, void *config, bool debug);
  void *config;
+ int image_scaling;
 } ei_learning_block_t;
 
 typedef struct {

diff --git a/edge-impulse-sdk/classifier/ei_performance_calibration.h b/edge-impulse-sdk/classifier/ei_performance_calibration.h
@@ -38,11 +38,6 @@ class RecognizeEvents {
  uint32_t sample_length,
  float sample_interval_ms)
  {
- if ((void *)this == NULL) {
- ei_printf(MEM_ERROR);
- return;
- }
-
  this->_score_array = nullptr;
  this->_running_sum = nullptr;
  this->_detection_threshold = config->detection_threshold;
@@ -123,7 +118,7 @@ class RecognizeEvents {
  uint32_t current_top_index = 0;
 
  /* Check pointers */
- if ((void *)this == NULL || this->_score_array == NULL || this->_running_sum == NULL) {
+ if (this->_score_array == NULL || this->_running_sum == NULL) {
  return EI_PC_RET_MEMORY_ERROR;
  }
 

diff --git a/edge-impulse-sdk/classifier/ei_quantize.h b/edge-impulse-sdk/classifier/ei_quantize.h
@@ -0,0 +1,37 @@
+/* Edge Impulse inferencing library
+ * Copyright (c) 2022 EdgeImpulse Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __EI_QUANTIZE__H__
+#define __EI_QUANTIZE__H__
+
+#include <algorithm>
+#include <cmath>
+
+static int32_t pre_cast_quantize(float value, float scale, int32_t zero_point, bool is_signed) {
+
+ int32_t max_value = is_signed ? 127 : 255;
+ int32_t min_value = is_signed ? -128 : 0;
+ // Saturate/clip any overflows post scaling
+ return std::min( std::max( static_cast<int32_t>(round(value / scale)) + zero_point, min_value), max_value);
+}
+
+#endif //!__EI_QUANTIZE__H__
diff --git a/edge-impulse-sdk/classifier/ei_run_classifier.h b/edge-impulse-sdk/classifier/ei_run_classifier.h
@@ -97,6 +97,50 @@ static RecognizeEvents *avg_scores = NULL;
 /* These functions (up to Public functions section) are not exposed to end-user,
 therefore changes are allowed. */
 
+#if EI_CLASSIFIER_LOAD_IMAGE_SCALING
+static const float torch_mean[] = { 0.485, 0.456, 0.406 };
+static const float torch_std[] = { 0.229, 0.224, 0.225 };
+
+static EI_IMPULSE_ERROR scale_fmatrix(ei_learning_block_t *block, ei::matrix_t *fmatrix) {
+ if (block->image_scaling == EI_CLASSIFIER_IMAGE_SCALING_TORCH) {
+ // @todo; could we write some faster vector math here?
+ for (size_t ix = 0; ix < fmatrix->rows * fmatrix->cols; ix += 3) {
+ fmatrix->buffer[ix + 0] = (fmatrix->buffer[ix + 0] - torch_mean[0]) / torch_std[0];
+ fmatrix->buffer[ix + 1] = (fmatrix->buffer[ix + 1] - torch_mean[1]) / torch_std[1];
+ fmatrix->buffer[ix + 2] = (fmatrix->buffer[ix + 2] - torch_mean[2]) / torch_std[2];
+ }
+ }
+ else if (block->image_scaling == EI_CLASSIFIER_IMAGE_SCALING_0_255) {
+ int scale_res = numpy::scale(fmatrix, 255.0f);
+ if (scale_res != EIDSP_OK) {
+ ei_printf("ERR: Failed to scale matrix (%d)\n", scale_res);
+ return EI_IMPULSE_DSP_ERROR;
+ }
+ }
+
+ return EI_IMPULSE_OK;
+}
+
+static EI_IMPULSE_ERROR unscale_fmatrix(ei_learning_block_t *block, ei::matrix_t *fmatrix) {
+ if (block->image_scaling == EI_CLASSIFIER_IMAGE_SCALING_TORCH) {
+ // @todo; could we write some faster vector math here?
+ for (size_t ix = 0; ix < fmatrix->rows * fmatrix->cols; ix += 3) {
+ fmatrix->buffer[ix + 0] = (fmatrix->buffer[ix + 0] * torch_std[0]) + torch_mean[0];
+ fmatrix->buffer[ix + 1] = (fmatrix->buffer[ix + 1] * torch_std[1]) + torch_mean[1];
+ fmatrix->buffer[ix + 2] = (fmatrix->buffer[ix + 2] * torch_std[2]) + torch_mean[2];
+ }
+ }
+ else if (block->image_scaling == EI_CLASSIFIER_IMAGE_SCALING_0_255) {
+ int scale_res = numpy::scale(fmatrix, 1 / 255.0f);
+ if (scale_res != EIDSP_OK) {
+ ei_printf("ERR: Failed to scale matrix (%d)\n", scale_res);
+ return EI_IMPULSE_DSP_ERROR;
+ }
+ }
+ return EI_IMPULSE_OK;
+}
+#endif
+
 /**
  * @brief Do inferencing over the processed feature matrix
  *
@@ -116,10 +160,25 @@ extern "C" EI_IMPULSE_ERROR run_inference(
  for (size_t ix = 0; ix < impulse->learning_blocks_size; ix++) {
  ei_learning_block_t block = impulse->learning_blocks[ix];
 
+#if EI_CLASSIFIER_LOAD_IMAGE_SCALING
+ EI_IMPULSE_ERROR scale_res = scale_fmatrix(&block, fmatrix);
+ if (scale_res != EI_IMPULSE_OK) {
+ return scale_res;
+ }
+#endif
+
  EI_IMPULSE_ERROR res = block.infer_fn(impulse, fmatrix, result, block.config, debug);
  if (res != EI_IMPULSE_OK) {
  return res;
  }
+
+#if EI_CLASSIFIER_LOAD_IMAGE_SCALING
+ // undo scaling
+ scale_res = unscale_fmatrix(&block, fmatrix);
+ if (scale_res != EI_IMPULSE_OK) {
+ return scale_res;
+ }
+#endif
  }
 
  if (ei_run_impulse_check_canceled() == EI_IMPULSE_CANCELED) {
@@ -449,11 +508,9 @@ extern "C" EI_IMPULSE_ERROR run_classifier_image_quantized(
  ei_impulse_result_t *result,
  bool debug = false)
 {
-
  memset(result, 0, sizeof(ei_impulse_result_t));
 
  return run_nn_inference_image_quantized(impulse, signal, result, impulse->learning_blocks[0].config, debug);
-
 }
 
 #endif // #if EI_CLASSIFIER_TFLITE_INPUT_QUANTIZED == 1 && (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TENSAIFLOW || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_DRPAI)

diff --git a/edge-impulse-sdk/classifier/ei_run_dsp.h b/edge-impulse-sdk/classifier/ei_run_dsp.h
@@ -1262,7 +1262,8 @@ __attribute__((unused)) int extract_drpai_features_quantized(signal_t *signal, m
 
 #if (EI_CLASSIFIER_TFLITE_INPUT_QUANTIZED == 1) && (EI_CLASSIFIER_INFERENCING_ENGINE != EI_CLASSIFIER_DRPAI)
 
-__attribute__((unused)) int extract_image_features_quantized(signal_t *signal, matrix_i8_t *output_matrix, void *config_ptr, float scale, float zero_point, const float frequency) {
+__attribute__((unused)) int extract_image_features_quantized(signal_t *signal, matrix_i8_t *output_matrix, void *config_ptr, float scale, float zero_point, const float frequency,
+ int image_scaling) {
  ei_dsp_config_image_t config = *((ei_dsp_config_image_t*)config_ptr);
 
  int16_t channel_count = strcmp(config.channels, "Grayscale") == 0 ? 1 : 3;
@@ -1273,6 +1274,9 @@ __attribute__((unused)) int extract_image_features_quantized(signal_t *signal, m
  const int32_t iGreenToGray = (int32_t)(0.587f * 65536.0f);
  const int32_t iBlueToGray = (int32_t)(0.114f * 65536.0f);
 
+ static const float torch_mean[] = { 0.485, 0.456, 0.406 };
+ static const float torch_std[] = { 0.229, 0.224, 0.225 };
+
 #if defined(EI_DSP_IMAGE_BUFFER_STATIC_SIZE)
  const size_t page_size = EI_DSP_IMAGE_BUFFER_STATIC_SIZE;
 #else
@@ -1299,7 +1303,7 @@ __attribute__((unused)) int extract_image_features_quantized(signal_t *signal, m
 
  if (channel_count == 3) {
  // fast code path
- if (scale == 0.003921568859368563f && zero_point == -128) {
+ if (scale == 0.003921568859368563f && zero_point == -128 && image_scaling == EI_CLASSIFIER_IMAGE_SCALING_NONE) {
  int32_t r = static_cast<int32_t>(pixel >> 16 & 0xff);
  int32_t g = static_cast<int32_t>(pixel >> 8 & 0xff);
  int32_t b = static_cast<int32_t>(pixel & 0xff);
@@ -1310,9 +1314,24 @@ __attribute__((unused)) int extract_image_features_quantized(signal_t *signal, m
  }
  // slow code path
  else {
- float r = static_cast<float>(pixel >> 16 & 0xff) / 255.0f;
- float g = static_cast<float>(pixel >> 8 & 0xff) / 255.0f;
- float b = static_cast<float>(pixel & 0xff) / 255.0f;
+ float r = static_cast<float>(pixel >> 16 & 0xff);
+ float g = static_cast<float>(pixel >> 8 & 0xff);
+ float b = static_cast<float>(pixel & 0xff);
+
+ if (image_scaling == EI_CLASSIFIER_IMAGE_SCALING_NONE) {
+ r /= 255.0f;
+ g /= 255.0f;
+ b /= 255.0f;
+ }
+ else if (image_scaling == EI_CLASSIFIER_IMAGE_SCALING_TORCH) {
+ r /= 255.0f;
+ g /= 255.0f;
+ b /= 255.0f;
+
+ r = (r - torch_mean[0]) / torch_std[0];
+ g = (g - torch_mean[1]) / torch_std[1];
+ b = (b - torch_mean[2]) / torch_std[2];
+ }
 
  output_matrix->buffer[output_ix++] = static_cast<int8_t>(round(r / scale) + zero_point);
  output_matrix->buffer[output_ix++] = static_cast<int8_t>(round(g / scale) + zero_point);
@@ -1321,7 +1340,7 @@ __attribute__((unused)) int extract_image_features_quantized(signal_t *signal, m
  }
  else {
  // fast code path
- if (scale == 0.003921568859368563f && zero_point == -128) {
+ if (scale == 0.003921568859368563f && zero_point == -128 && image_scaling == EI_CLASSIFIER_IMAGE_SCALING_NONE) {
  int32_t r = static_cast<int32_t>(pixel >> 16 & 0xff);
  int32_t g = static_cast<int32_t>(pixel >> 8 & 0xff);
  int32_t b = static_cast<int32_t>(pixel & 0xff);
@@ -1337,9 +1356,24 @@ __attribute__((unused)) int extract_image_features_quantized(signal_t *signal, m
  }
  // slow code path
  else {
- float r = static_cast<float>(pixel >> 16 & 0xff) / 255.0f;
- float g = static_cast<float>(pixel >> 8 & 0xff) / 255.0f;
- float b = static_cast<float>(pixel & 0xff) / 255.0f;
+ float r = static_cast<float>(pixel >> 16 & 0xff);
+ float g = static_cast<float>(pixel >> 8 & 0xff);
+ float b = static_cast<float>(pixel & 0xff);
+
+ if (image_scaling == EI_CLASSIFIER_IMAGE_SCALING_NONE) {
+ r /= 255.0f;
+ g /= 255.0f;
+ b /= 255.0f;
+ }
+ else if (image_scaling == EI_CLASSIFIER_IMAGE_SCALING_TORCH) {
+ r /= 255.0f;
+ g /= 255.0f;
+ b /= 255.0f;
+
+ r = (r - torch_mean[0]) / torch_std[0];
+ g = (g - torch_mean[1]) / torch_std[1];
+ b = (b - torch_mean[2]) / torch_std[2];
+ }
 
  // ITU-R 601-2 luma transform
  // see: https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.convert

diff --git a/edge-impulse-sdk/classifier/inferencing_engines/akida.h b/edge-impulse-sdk/classifier/inferencing_engines/akida.h
@@ -54,6 +54,7 @@
 #include <iomanip>
 #include <limits>
 #include <math.h>
+#include <algorithm>
 #include "pybind11/embed.h"
 #include "pybind11/numpy.h"
 #include "pybind11/stl.h"
@@ -71,6 +72,9 @@ static bool akida_initialized = false;
 static std::vector<size_t> input_shape;
 static tflite::RuntimeShape softmax_shape;
 static tflite::SoftmaxParams dummy_params;
+static int model_input_bits = 0;
+static float scale;
+static int down_scale;
 
 bool init_akida(const uint8_t *model_arr, size_t model_arr_size, bool debug)
 {
@@ -133,6 +137,32 @@ bool init_akida(const uint8_t *model_arr, size_t model_arr_size, bool debug)
  // extend input by (N, ...) - hardcoded to (1, ...)
  input_shape.insert(input_shape.begin(), (size_t)1);
 
+ // get model input_bits
+ std::vector<py::object> layers = model.attr("layers").cast<std::vector<py::object>>();
+ auto input_layer = layers[0];
+ model_input_bits = input_layer.attr("input_bits").cast<int>();
+ if((model_input_bits != 8) && (model_input_bits != 4)) {
+ ei_printf("ERR: Unsupported input_bits. Expected 4 or 8 got %d\n", model_input_bits);
+ return false;
+ }
+
+ // initialize scale coefficients
+ if(model_input_bits == 8) {
+ scale = 255;
+ down_scale = 1;
+ }
+ else if(model_input_bits == 4) {
+ // these values are recommended by BrainChip
+ scale = 15;
+ down_scale = 16;
+ }
+
+ if(debug) {
+ ei_printf("INFO: Model input_bits: %d\n", model_input_bits);
+ ei_printf("INFO: Scale: %f\n", scale);
+ ei_printf("INFO: Down scale: %d\n", down_scale);
+ }
+
 #if (defined(EI_CLASSIFIER_USE_AKIDA_HARDWARE) && (EI_CLASSIFIER_USE_AKIDA_HARDWARE == 1))
  // get list of available devices
  py::list devices = akida.attr("devices")();
@@ -238,19 +268,31 @@ EI_IMPULSE_ERROR run_nn_inference(
  * For images BW shape is (width, height, 1)
  * For Audio shape is (width, height, 1) - spectrogram
  * TODO: test with other ML models/data types
+ * For details see:
+ * https://pybind11.readthedocs.io/en/stable/advanced/pycpp/numpy.html#direct-access
  */
  auto r = input_data.mutable_unchecked<4>();
+ float temp;
  for (py::ssize_t x = 0; x < r.shape(1); x++) {
  for (py::ssize_t y = 0; y < r.shape(2); y++) {
  for(py::ssize_t z = 0; z < r.shape(3); z++) {
- r(0, x, y, z) = (uint8_t)(fmatrix->buffer[x * r.shape(2) * r.shape(3) + y * r.shape(3) + z] * 255.0);
+ temp = (fmatrix->buffer[x * r.shape(2) * r.shape(3) + y * r.shape(3) + z] * scale);
+ temp = std::max(0.0f, std::min(temp, 255.0f));
+ r(0, x, y, z) = (uint8_t)(temp / down_scale);
  }
  }
  }
 
  // Run inference on AKD1000
  uint64_t ctx_start_us = ei_read_timer_us();
- py::array_t<float> potentials = model_predict(input_data);
+ py::array_t<float> potentials;
+ try {
+ potentials = model_predict(input_data);
+ }
+ catch (py::error_already_set &e) {
+ ei_printf("ERR: Inference error:\n%s\n", e.what());
+ return EI_IMPULSE_AKIDA_ERROR;
+ }
  // TODO: 'forward' is returning int8 or int32, but EI SDK supports int8 or float32 only
  // py::array_t<float> potentials = model_forward(input_data);
  uint64_t ctx_end_us = ei_read_timer_us();