Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Embed yolo files #831

Open
wants to merge 25 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
185 changes: 185 additions & 0 deletions examples/yolo/gguf-addfile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
#!/usr/bin/env python3
# gguf-addfile.py srcfile dstfile addfiles ...

from __future__ import annotations

import logging
import argparse
import os
import sys
from pathlib import Path
#from typing import Any
from typing import Any, Literal, NamedTuple, TypeVar, Union

import numpy as np
import numpy.typing as npt

# Necessary to load the local gguf package
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
sys.path.insert(0, str(Path(__file__).parent.parent))

from gguf import GGUFReader, GGUFWriter, ReaderField, GGMLQuantizationType, GGUFEndian, GGUFValueType, Keys # noqa: E402

logger = logging.getLogger("gguf-addfile")


def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]:
host_endian = 'LITTLE' if np.uint32(1) == np.uint32(1).newbyteorder("<") else 'BIG'
if reader.byte_order == 'S':
file_endian = 'BIG' if host_endian == 'LITTLE' else 'LITTLE'
else:
file_endian = host_endian
return (host_endian, file_endian)


def get_byteorder(reader: GGUFReader) -> GGUFEndian:
if np.uint32(1) == np.uint32(1).newbyteorder("<"):
# Host is little endian
host_endian = GGUFEndian.LITTLE
swapped_endian = GGUFEndian.BIG
else:
# Sorry PDP or other weird systems that don't use BE or LE.
host_endian = GGUFEndian.BIG
swapped_endian = GGUFEndian.LITTLE

if reader.byte_order == "S":
return swapped_endian
else:
return host_endian


def decode_field(field: ReaderField) -> Any:
if field and field.types:
main_type = field.types[0]

if main_type == GGUFValueType.ARRAY:
sub_type = field.types[-1]

if sub_type == GGUFValueType.STRING:
return [str(bytes(field.parts[idx]), encoding='utf8') for idx in field.data]
else:
return [pv for idx in field.data for pv in field.parts[idx].tolist()]
if main_type == GGUFValueType.STRING:
return str(bytes(field.parts[-1]), encoding='utf8')
else:
return field.parts[-1][0]

return None


def get_field_data(reader: GGUFReader, key: str) -> Any:
field = reader.get_field(key)

return decode_field(field)


def copy_with_filename(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new_metadata: Mapping[str, str], filename: str[Any]) -> None:
for field in reader.fields.values():
# Suppress virtual fields and fields written by GGUFWriter
if field.name == Keys.General.ARCHITECTURE or field.name.startswith('GGUF.'):
logger.debug(f'Suppressing {field.name}')
continue

# Skip old chat templates if we have new ones
if field.name.startswith(Keys.Tokenizer.CHAT_TEMPLATE) and Keys.Tokenizer.CHAT_TEMPLATE in new_metadata:
logger.debug(f'Skipping {field.name}')
continue

old_val = decode_field(field)
val = new_metadata.get(field.name, old_val)

if field.name in new_metadata:
logger.debug(f'Modifying {field.name}: "{old_val}" -> "{val}"')
del new_metadata[field.name]
elif val is not None:
logger.debug(f'Copying {field.name}')

if val is not None:
writer.add_key(field.name)
writer.add_val(val, field.types[0])

if Keys.Tokenizer.CHAT_TEMPLATE in new_metadata:
logger.debug('Adding chat template(s)')
writer.add_chat_template(new_metadata[Keys.Tokenizer.CHAT_TEMPLATE])
del new_metadata[Keys.Tokenizer.CHAT_TEMPLATE]

# add filenames to kv
writer.add_array(Keys.EMBEDDED_FILES, filename)

for tensor in reader.tensors:
# Dimensions are written in reverse order, so flip them first
shape = np.flipud(tensor.shape)
writer.add_tensor_info(tensor.name, shape, tensor.data.dtype, tensor.data.nbytes, tensor.tensor_type)

offset_next = 0
len_last = 0
offset_last = 0
for n, tensor in enumerate(reader.tensors, 1):
len_last = tensor.n_bytes
offset_last = tensor.data_offset
offset_next = max(offset_next, writer.ggml_pad(offset_last + int(len_last), writer.data_alignment))

offs = offset_next
# add file info as tensor_info
for path in filename:
logger.debug(f'Adding {path}')
with open(path, "rb") as f:
data = f.read()
data_len = len(data)
dims = [data_len]
raw_dtype = GGMLQuantizationType.I8
writer.add_tensor_info(path, dims, np.float16, data_len, raw_dtype)

writer.write_header_to_file()
writer.write_kv_data_to_file()
writer.write_ti_data_to_file()

for tensor in reader.tensors:
writer.write_tensor_data(tensor.data)

# write file body as tensor data
for path in filename:
logger.debug(f'Adding {path}')
with open(path, "rb") as f:
data = f.read()
data_len = len(data)
# write data with padding
writer.write_data(data)

writer.close()


def main() -> None:
parser = argparse.ArgumentParser(description="Add files to GGUF file metadata")
parser.add_argument("input", type=str, help="GGUF format model input filename")
parser.add_argument("output", type=str, help="GGUF format model output filename")
parser.add_argument("addfiles", type=str, nargs='+', help="add filenames ...")
parser.add_argument("--verbose", action="store_true", help="Increase output verbosity")
args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)

logger.info(f'* Loading: {args.input}')
reader = GGUFReader(args.input, 'r')
arch = get_field_data(reader, Keys.General.ARCHITECTURE)
endianess = get_byteorder(reader)

logger.info(f'* Writing: {args.output}')
writer = GGUFWriter(args.output, arch=arch, endianess=endianess)

alignment = get_field_data(reader, Keys.General.ALIGNMENT)
if alignment is not None:
logger.debug(f'Setting custom alignment: {alignment}')
writer.data_alignment = alignment

logger.info(f'* Adding: {args.addfiles}')
new_metadata = {}
filename = []
for path in args.addfiles:
filename.append(path)
logger.info(f'* Adding: {path}')
#new_metadata[Keys.EMBEDDED_FILES] = path
copy_with_filename(reader, writer, new_metadata, filename)


if __name__ == '__main__':
main()
27 changes: 26 additions & 1 deletion examples/yolo/yolo-image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,31 @@ bool load_image(const char *fname, yolo_image & img)
return true;
}

bool load_image_from_memory(const char *buffer, int len, yolo_image & img)
{
int w, h, c;
uint8_t * data = stbi_load_from_memory((uint8_t *)buffer, len, &w, &h, &c, 3);
if (!data) {
return false;
}
c = 3;
img.w = w;
img.h = h;
img.c = c;
img.data.resize(w*h*c);
for (int k = 0; k < c; ++k){
for (int j = 0; j < h; ++j){
for (int i = 0; i < w; ++i){
int dst_index = i + w*j + w*h*k;
int src_index = k + c*i + c*w*j;
img.data[dst_index] = (float)data[src_index]/255.;
}
}
}
stbi_image_free(data);
return true;
}

static yolo_image resize_image(const yolo_image & im, int w, int h)
{
yolo_image resized(w, h, im.c);
Expand Down Expand Up @@ -207,4 +232,4 @@ void draw_label(yolo_image & im, int row, int col, const yolo_image & label, con
}
}
}
}
}
1 change: 1 addition & 0 deletions examples/yolo/yolo-image.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ struct yolo_image {
};

bool load_image(const char *fname, yolo_image & img);
bool load_image_from_memory(const char *buffer, int len, yolo_image & img);
void draw_box_width(yolo_image & a, int x1, int y1, int x2, int y2, int w, float r, float g, float b);
yolo_image letterbox_image(const yolo_image & im, int w, int h);
bool save_image(const yolo_image & im, const char *name, int quality);
Expand Down
114 changes: 108 additions & 6 deletions examples/yolo/yolov3-tiny.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ struct yolo_model {
int height = 416;
std::vector<conv2d_layer> conv2d_layers;
struct ggml_context * ctx;
struct gguf_context * ctx_gguf;
};

struct yolo_layer {
Expand Down Expand Up @@ -71,6 +72,7 @@ static bool load_model(const std::string & fname, yolo_model & model) {
fprintf(stderr, "%s: gguf_init_from_file() failed\n", __func__);
return false;
}
model.ctx_gguf = ctx;
model.width = 416;
model.height = 416;
model.conv2d_layers.resize(13);
Expand Down Expand Up @@ -100,6 +102,47 @@ static bool load_model(const std::string & fname, yolo_model & model) {
return true;
}

// istream from memory
#include <streambuf>
#include <istream>

struct membuf : std::streambuf {
membuf(const char * begin, const char * end) {
char * b(const_cast<char *>(begin));
char * e(const_cast<char *>(end));
this->begin = b;
this->end = e;
this->setg(b, b, e);
}

membuf(const char * base, size_t size) {
char * b(const_cast<char *>(begin));
this->begin = b;
this->end = b + size;
this->setg(b, b, end);
}

virtual pos_type seekoff(off_type off, std::ios_base::seekdir dir, std::ios_base::openmode which = std::ios_base::in) override {
if(dir == std::ios_base::cur) {
gbump(off);
} else if(dir == std::ios_base::end) {
setg(begin, end + off, end);
} else if(dir == std::ios_base::beg) {
setg(begin, begin + off, end);
}

return gptr() - eback();
}

virtual pos_type seekpos(std::streampos pos, std::ios_base::openmode mode) override {
return seekoff(pos - pos_type(off_type(0)), std::ios_base::beg, mode);
}

char * begin;
char * end;
};


static bool load_labels(const char * filename, std::vector<std::string> & labels)
{
std::ifstream file_in(filename);
Expand All @@ -114,6 +157,31 @@ static bool load_labels(const char * filename, std::vector<std::string> & labels
return true;
}

static bool load_labels_gguf(const struct gguf_context * ctx, const char * filename, std::vector<std::string> & labels)
{
int key_id = gguf_find_key_array(ctx, "embedded_files", filename);
if (key_id == -1) {
return false;
}
char *data = NULL;
size_t size = 0;
int tensor = gguf_find_and_get_tensor(ctx, filename, &data, &size);
if (tensor == -1) {
return false;
}
membuf buf(data, data + size);
std::istream file_in(&buf);
if (!file_in) {
return false;
}
std::string line;
while (std::getline(file_in, line)) {
labels.push_back(line);
}
GGML_ASSERT(labels.size() == 80);
return true;
}

static bool load_alphabet(std::vector<yolo_image> & alphabet)
{
alphabet.resize(8 * 128);
Expand All @@ -130,6 +198,34 @@ static bool load_alphabet(std::vector<yolo_image> & alphabet)
return true;
}

static bool load_alphabet_gguf(const struct gguf_context * ctx, std::vector<yolo_image> & alphabet)
{
alphabet.resize(8 * 128);
for (int j = 0; j < 8; j++) {
for (int i = 32; i < 127; i++) {
char fname[256];
sprintf(fname, "data/labels/%d_%d.png", i, j);
int key_id = gguf_find_key_array(ctx, "embedded_files", fname);
if (key_id == -1) {
fprintf(stderr, "Cannot find '%s' in embedded_files\n", fname);
return false;
}
char *data = NULL;
size_t size = 0;
int tensor = gguf_find_and_get_tensor(ctx, fname, &data, &size);
if (tensor == -1) {
fprintf(stderr, "Cannot find '%s' in tensor\n", fname);
return false;
}
if (!load_image_from_memory(data, size, alphabet[j*128 + i])) {
fprintf(stderr, "Cannot load '%s'\n", fname);
return false;
}
}
}
return true;
}

static ggml_tensor * apply_conv2d(ggml_context * ctx, ggml_tensor * input, const conv2d_layer & layer)
{
struct ggml_tensor * result = ggml_conv_2d(ctx, layer.weights, input, 1, 1, layer.padding, layer.padding, 1, 1);
Expand Down Expand Up @@ -503,14 +599,20 @@ int main(int argc, char *argv[])
return 1;
}
std::vector<std::string> labels;
if (!load_labels("data/coco.names", labels)) {
fprintf(stderr, "%s: failed to load labels from 'data/coco.names'\n", __func__);
return 1;
if (!load_labels_gguf(model.ctx_gguf, "data/coco.names", labels)) {
fprintf(stderr, "%s: failed to load labels from 'data/coco.names' in model\n", __func__);
if (!load_labels("data/coco.names", labels)) {
fprintf(stderr, "%s: failed to load labels from 'data/coco.names'\n", __func__);
return 1;
}
}
std::vector<yolo_image> alphabet;
if (!load_alphabet(alphabet)) {
fprintf(stderr, "%s: failed to load alphabet\n", __func__);
return 1;
if (!load_alphabet_gguf(model.ctx_gguf, alphabet)) {
fprintf(stderr, "%s: failed to load alphabet from model\n", __func__);
if (!load_alphabet(alphabet)) {
fprintf(stderr, "%s: failed to load alphabet\n", __func__);
return 1;
}
}
const int64_t t_start_ms = ggml_time_ms();
detect(img, model, params.thresh, labels, alphabet);
Expand Down
Loading
Loading