load files from model

ggerganov · katsu560 · May 19, 2024 · May 19, 2024 · May 19, 2024 · May 31, 2024
commit 2c3603e402e6b5f4cf3024b38024628df2ddbc53
diff --git a/examples/yolo/yolo-image.cpp b/examples/yolo/yolo-image.cpp
@@ -88,6 +88,31 @@ bool load_image(const char *fname, yolo_image & img)
  return true;
 }
 
+bool load_image_from_memory(const char *buffer, int len, yolo_image & img)
+{
+ int w, h, c;
+ uint8_t * data = stbi_load_from_memory((uint8_t *)buffer, len, &w, &h, &c, 3);
+ if (!data) {
+ return false;
+ }
+ c = 3;
+ img.w = w;
+ img.h = h;
+ img.c = c;
+ img.data.resize(w*h*c);
+ for (int k = 0; k < c; ++k){
+ for (int j = 0; j < h; ++j){
+ for (int i = 0; i < w; ++i){
+ int dst_index = i + w*j + w*h*k;
+ int src_index = k + c*i + c*w*j;
+ img.data[dst_index] = (float)data[src_index]/255.;
+ }
+ }
+ }
+ stbi_image_free(data);
+ return true;
+}
+
 static yolo_image resize_image(const yolo_image & im, int w, int h)
 {
  yolo_image resized(w, h, im.c);
@@ -207,4 +232,4 @@ void draw_label(yolo_image & im, int row, int col, const yolo_image & label, con
  }
  }
  }
-}
+}
diff --git a/examples/yolo/yolo-image.h b/examples/yolo/yolo-image.h
@@ -32,6 +32,7 @@ struct yolo_image {
 };
 
 bool load_image(const char *fname, yolo_image & img);
+bool load_image_from_memory(const char *buffer, int len, yolo_image & img);
 void draw_box_width(yolo_image & a, int x1, int y1, int x2, int y2, int w, float r, float g, float b);
 yolo_image letterbox_image(const yolo_image & im, int w, int h);
 bool save_image(const yolo_image & im, const char *name, int quality);

diff --git a/examples/yolo/yolov3-tiny.cpp b/examples/yolo/yolov3-tiny.cpp
@@ -30,6 +30,7 @@ struct yolo_model {
  int height = 416;
  std::vector<conv2d_layer> conv2d_layers;
  struct ggml_context * ctx;
+ struct gguf_context * ctx_gguf;
 };
 
 struct yolo_layer {
@@ -71,6 +72,7 @@ static bool load_model(const std::string & fname, yolo_model & model) {
  fprintf(stderr, "%s: gguf_init_from_file() failed\n", __func__);
  return false;
  }
+ model.ctx_gguf = ctx;
  model.width = 416;
  model.height = 416;
  model.conv2d_layers.resize(13);
@@ -100,6 +102,47 @@ static bool load_model(const std::string & fname, yolo_model & model) {
  return true;
 }
 
+// istream from memory
+#include <streambuf>
+#include <istream>
+
+struct membuf : std::streambuf {
+ membuf(const char * begin, const char * end) {
+ char * b(const_cast<char *>(begin));
+ char * e(const_cast<char *>(end));
+ this->begin = b;
+ this->end = e;
+ this->setg(b, b, e);
+ }
+
+ membuf(const char * base, size_t size) {
+ char * b(const_cast<char *>(begin));
+ this->begin = b;
+ this->end = b + size;
+ this->setg(b, b, end);
+ }
+
+ virtual pos_type seekoff(off_type off, std::ios_base::seekdir dir, std::ios_base::openmode which = std::ios_base::in) override {
+ if(dir == std::ios_base::cur) {
+ gbump(off);
+ } else if(dir == std::ios_base::end) {
+ setg(begin, end + off, end);
+ } else if(dir == std::ios_base::beg) {
+ setg(begin, begin + off, end);
+ }
+
+ return gptr() - eback();
+ }
+
+ virtual pos_type seekpos(std::streampos pos, std::ios_base::openmode mode) override {
+ return seekoff(pos - pos_type(off_type(0)), std::ios_base::beg, mode);
+ }
+
+ char * begin;
+ char * end;
+};
+
+
 static bool load_labels(const char * filename, std::vector<std::string> & labels)
 {
  std::ifstream file_in(filename);
@@ -114,6 +157,32 @@ static bool load_labels(const char * filename, std::vector<std::string> & labels
  return true;
 }
 
+static bool load_labels_gguf(const struct gguf_context * ctx, const char * filename, std::vector<std::string> & labels)
+{
+ int key_id = gguf_find_key_array(ctx, "embedded_files", filename);
+ if (key_id == -1) {
+ return false;
+ }
+ int tensor = gguf_find_tensor(ctx, filename);
+ if (tensor == -1) {
+ return false;
+ }
+ const size_t offset = gguf_get_tensor_offset(ctx, tensor);
+ const size_t len = gguf_get_tensor_size(ctx, tensor);
- const size_t len = gguf_get_tensor_size(ctx, tensor);
+ const size_t len = ggml_nelements(tensor);
- const size_t len = gguf_get_tensor_size(ctx, tensor);
+ const size_t len = ggml_nelements(tensor);
+ const char * data = (char *)gguf_get_data(ctx);
+ membuf buf(data + offset, data + offset + len);
+ std::istream file_in(&buf);
+ if (!file_in) {
+ return false;
+ }
+ std::string line;
+ while (std::getline(file_in, line)) {
+ labels.push_back(line);
+ }
+ GGML_ASSERT(labels.size() == 80);
+ return true;
+}
+
 static bool load_alphabet(std::vector<yolo_image> & alphabet)
 {
  alphabet.resize(8 * 128);
@@ -130,6 +199,35 @@ static bool load_alphabet(std::vector<yolo_image> & alphabet)
  return true;
 }
 
+static bool load_alphabet_gguf(const struct gguf_context * ctx, std::vector<yolo_image> & alphabet)
+{
+ alphabet.resize(8 * 128);
+ for (int j = 0; j < 8; j++) {
+ for (int i = 32; i < 127; i++) {
+ char fname[256];
+ sprintf(fname, "data/labels/%d_%d.png", i, j);
+ int key_id = gguf_find_key_array(ctx, "embedded_files", fname);
+ if (key_id == -1) {
+ fprintf(stderr, "Cannot find '%s' in embedded_files\n", fname);
+ return false;
+ }
+ int tensor = gguf_find_tensor(ctx, fname);
+ if (tensor == -1) {
+ fprintf(stderr, "Cannot find '%s' in tensor\n", fname);
+ return false;
+ }
+ const size_t offset = gguf_get_tensor_offset(ctx, tensor);
+ const size_t len = gguf_get_tensor_size(ctx, tensor);
+ const char * data = (char *)gguf_get_data(ctx);
+ if (!load_image_from_memory(data + offset, len, alphabet[j*128 + i])) {
+ fprintf(stderr, "Cannot load '%s'\n", fname);
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
 static ggml_tensor * apply_conv2d(ggml_context * ctx, ggml_tensor * input, const conv2d_layer & layer)
 {
  struct ggml_tensor * result = ggml_conv_2d(ctx, layer.weights, input, 1, 1, layer.padding, layer.padding, 1, 1);
@@ -503,14 +601,20 @@ int main(int argc, char *argv[])
  return 1;
  }
  std::vector<std::string> labels;
- if (!load_labels("data/coco.names", labels)) {
- fprintf(stderr, "%s: failed to load labels from 'data/coco.names'\n", __func__);
- return 1;
+ if (!load_labels_gguf(model.ctx_gguf, "data/coco.names", labels)) {
+ fprintf(stderr, "%s: failed to load labels from 'data/coco.names' in model\n", __func__);
+ if (!load_labels("data/coco.names", labels)) {
+ fprintf(stderr, "%s: failed to load labels from 'data/coco.names'\n", __func__);
+ return 1;
+ }
  }
  std::vector<yolo_image> alphabet;
- if (!load_alphabet(alphabet)) {
- fprintf(stderr, "%s: failed to load alphabet\n", __func__);
- return 1;
+ if (!load_alphabet_gguf(model.ctx_gguf, alphabet)) {
+ fprintf(stderr, "%s: failed to load alphabet from model\n", __func__);
+ if (!load_alphabet(alphabet)) {
+ fprintf(stderr, "%s: failed to load alphabet\n", __func__);
+ return 1;
+ }
  }
  const int64_t t_start_ms = ggml_time_ms();
  detect(img, model, params.thresh, labels, alphabet);

diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h
@@ -2305,6 +2305,7 @@ extern "C" {
 
  GGML_API int gguf_get_n_kv(const struct gguf_context * ctx);
  GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key);
+ GGML_API int gguf_find_key_array(const struct gguf_context * ctx, const char * key, const char * val);
  GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int key_id);
 
  GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int key_id);
@@ -2333,6 +2334,7 @@ extern "C" {
  GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
  GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
  GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);
+ GGML_API size_t gguf_get_tensor_size (const struct gguf_context * ctx, int i);
 
  // removes key if it exists
  GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key);

diff --git a/src/ggml.c b/src/ggml.c
@@ -21562,6 +21562,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
  ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
  ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
 
+ // set tensor size
+ size_t size = 1;
+ for (uint32_t j = 0; j < info->n_dims; ++j) {
+ size *= info->ne[j];
+ }
+ info->size = size;
+
  // TODO: return an error instead of crashing with GGML_ASSERT
  gguf_tensor_info_sanitize(info);
 
@@ -21784,6 +21791,37 @@ int gguf_find_key(const struct gguf_context * ctx, const char * key) {
  return keyfound;
 }
 
+int gguf_find_key_array(const struct gguf_context * ctx, const char * key, const char * val) {
+ // return -1 if key not found
+ int keyfound = -1;
+ int key_id = -1;
+
+ const int n_kv = gguf_get_n_kv(ctx);
+
+ for (int i = 0; i < n_kv; ++i) {
+ if (strcmp(key, gguf_get_key(ctx, i)) == 0) {
+ key_id = i;
+ break;
+ }
+ }
+
+ if (key_id != -1) {
+ if (ctx->kv[key_id].type == GGUF_TYPE_ARRAY) {
+ const int n = gguf_get_arr_n(ctx, key_id); 
+ struct gguf_kv * kv = &ctx->kv[key_id];
+
+ for (int i = 0; i < n; ++i) {
+ struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i];
+ if (strcmp(val, str->data) == 0) {
+ keyfound = i;
+ }
+ }
+ }
+ }
+
+ return keyfound;
+}
+
 const char * gguf_get_key(const struct gguf_context * ctx, int key_id) {
  GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
  return ctx->kv[key_id].key.data;
@@ -21920,17 +21958,25 @@ int gguf_find_tensor(const struct gguf_context * ctx, const char * name) {
 }
 
 size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i) {
+ GGML_ASSERT(i >= 0 && i < gguf_get_n_tensors(ctx));
  return ctx->infos[i].offset;
 }
 
 char * gguf_get_tensor_name(const struct gguf_context * ctx, int i) {
+ GGML_ASSERT(i >= 0 && i < gguf_get_n_tensors(ctx));
  return ctx->infos[i].name.data;
 }
 
 enum ggml_type gguf_get_tensor_type(const struct gguf_context * ctx, int i) {
+ GGML_ASSERT(i >= 0 && i < gguf_get_n_tensors(ctx));
  return ctx->infos[i].type;
 }
 
+size_t gguf_get_tensor_size(const struct gguf_context * ctx, int i) {
+ GGML_ASSERT(i >= 0 && i < gguf_get_n_tensors(ctx));
+ return ctx->infos[i].size;
+}
+
 // returns the index
 static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) {
  const int idx = gguf_find_key(ctx, key);
@@ -22242,7 +22288,7 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
  gguf_bwrite_el (buf, &kv->type, sizeof(kv->type));
 
  switch (kv->type) {
- case GGUF_TYPE_UINT8: gguf_bwrite_el( buf, &kv->value.uint8, sizeof(kv->value.uint8) ); break;
+ case GGUF_TYPE_UINT8: gguf_bwrite_el (buf, &kv->value.uint8, sizeof(kv->value.uint8) ); break;
  case GGUF_TYPE_INT8: gguf_bwrite_el (buf, &kv->value.int8, sizeof(kv->value.int8) ); break;
  case GGUF_TYPE_UINT16: gguf_bwrite_el (buf, &kv->value.uint16, sizeof(kv->value.uint16) ); break;
  case GGUF_TYPE_INT16: gguf_bwrite_el (buf, &kv->value.int16, sizeof(kv->value.int16) ); break;