Skip to content

Commit

Permalink
ggml : cgraph export/import/eval example + GPU support (#108)
Browse files Browse the repository at this point in the history
* ggml : cgraph export brainstorming

* mnist : code style

* mnist : minor

* ggml : initial cgraph export

* ggml : initial graph import (wip)

* ggml : import op args correctly

* ggml : add ggml_get_tensor_by_name()

* mnist : add compute graph evaluation on CPU example

* ggml : add ggml_tensor_overhead()

* ggml : rename new functions to ggml_cgraph_...

* mnist : add Metal inference skeleton (WIP)

* mnist : working on the Metal pipeline (WIP)

* mnist : prepare the Metal encoder (WIP)

* mnist : first Metal kernel for F32 ADD

* mnist : looks like MTLHeap does not work

* mnist : initial full pass of MNIST on the GPU (not verified)

* mnist : minor cleanup

* mnist : full GPU inference works

* mnist : use custom soft_max kernel since MPSMatrixSoftMax is bugged

* mnist : use constant for soft_max instead of hardcoded 10

* mnist : check multiple predictions (Metal)

* mnist : minor

* ggml : move cgraph import / export to ggml

* mnist : remove common dependencies

* mnist : fix soft_max threadgroup size

* mnist : init no_alloc member

* ggml : improve "get tensor" API
  • Loading branch information
ggerganov committed May 29, 2023
1 parent db5eef1 commit 3b697a2
Show file tree
Hide file tree
Showing 8 changed files with 1,297 additions and 11 deletions.
26 changes: 26 additions & 0 deletions examples/mnist/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,29 @@ set(TEST_TARGET mnist)
add_executable(${TEST_TARGET} main.cpp)
target_link_libraries(${TEST_TARGET} PRIVATE ggml common)

#
# mnist-cpu

set(TEST_TARGET mnist-cpu)
add_executable(${TEST_TARGET} main-cpu.cpp)
target_link_libraries(${TEST_TARGET} PRIVATE ggml)

if (APPLE)
#
# mnist-mtl

find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
find_library(METAL_FRAMEWORK Metal REQUIRED)
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
find_library(METALPERFORMANCE_FRAMEWORK MetalPerformanceShaders REQUIRED)

set(TEST_TARGET mnist-mtl)
add_executable(${TEST_TARGET} main-mtl.cpp main-mtl.h main-mtl.m)
target_link_libraries(${TEST_TARGET} PRIVATE
ggml
${FOUNDATION_LIBRARY}
${METAL_FRAMEWORK}
${METALKIT_FRAMEWORK}
${METALPERFORMANCE_FRAMEWORK}
)
endif()
116 changes: 116 additions & 0 deletions examples/mnist/main-cpu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
// Use a pre-generated MNIST compute graph for inference on the CPU
//
// You can generate a compute graph using the "mnist" tool:
//
// $ ./bin/mnist ./models/mnist/ggml-model-f32.bin ../examples/mnist/models/mnist/t10k-images.idx3-ubyte
//
// This command creates the "mnist.ggml" file, which contains the generated compute graph.
// Now, you can re-use the compute graph with the "mnist-cpu" tool:
//
// $ ./bin/mnist-cpu ./models/mnist/mnist.ggml ../examples/mnist/models/mnist/t10k-images.idx3-ubyte
//

#include "ggml/ggml.h"

#include <cmath>
#include <cstdio>
#include <cstring>
#include <ctime>
#include <fstream>
#include <vector>

// evaluate the MNIST compute graph
//
// - fname_cgraph: path to the compute graph
// - n_threads: number of threads to use
// - digit: 784 pixel values
//
// returns 0 - 9 prediction
int mnist_eval(
const char * fname_cgraph,
const int n_threads,
std::vector<float> digit
) {
// load the compute graph
struct ggml_context * ctx_data = NULL;
struct ggml_context * ctx_eval = NULL;

struct ggml_cgraph gfi = ggml_graph_import(fname_cgraph, &ctx_data, &ctx_eval);
gfi.n_threads = n_threads;

// allocate eval context
// needed during ggml_graph_compute() to allocate a work tensor
static size_t buf_size = gfi.work_size; // TODO
static void * buf = malloc(buf_size);

struct ggml_init_params params = {
.mem_size = buf_size,
.mem_buffer = buf,
.no_alloc = false,
};

struct ggml_context * ctx0 = ggml_init(params);

struct ggml_tensor * input = ggml_graph_get_tensor(&gfi, "input");
memcpy(input->data, digit.data(), ggml_nbytes(input));

ggml_graph_compute(ctx0, &gfi);

const float * probs_data = ggml_get_data_f32(ggml_graph_get_tensor(&gfi, "probs"));

const int prediction = std::max_element(probs_data, probs_data + 10) - probs_data;

ggml_free(ctx0);
ggml_free(ctx_data);
ggml_free(ctx_eval);

return prediction;
}

int main(int argc, char ** argv) {
srand(time(NULL));
ggml_time_init();

if (argc != 3) {
fprintf(stderr, "Usage: %s models/mnist/mnist.ggml models/mnist/t10k-images.idx3-ubyte\n", argv[0]);
exit(0);
}

uint8_t buf[784];
std::vector<float> digit;

// read a random digit from the test set
{
std::ifstream fin(argv[2], std::ios::binary);
if (!fin) {
fprintf(stderr, "%s: failed to open '%s'\n", __func__, argv[2]);
return 1;
}

// seek to a random digit: 16-byte header + 28*28 * (random 0 - 10000)
fin.seekg(16 + 784 * (rand() % 10000));
fin.read((char *) &buf, sizeof(buf));
}

// render the digit in ASCII
{
digit.resize(sizeof(buf));

for (int row = 0; row < 28; row++) {
for (int col = 0; col < 28; col++) {
fprintf(stderr, "%c ", (float)buf[row*28 + col] > 230 ? '*' : '_');
digit[row*28 + col] = ((float)buf[row*28 + col]);
}

fprintf(stderr, "\n");
}

fprintf(stderr, "\n");
}

const int prediction = mnist_eval(argv[1], 1, digit);

fprintf(stdout, "%s: predicted digit is %d\n", __func__, prediction);

return 0;
}
129 changes: 129 additions & 0 deletions examples/mnist/main-mtl.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
// Use a pre-generated MNIST compute graph for inference on the M1 GPU via MPS
//
// You can generate a compute graph using the "mnist" tool:
//
// $ ./bin/mnist ./models/mnist/ggml-model-f32.bin ../examples/mnist/models/mnist/t10k-images.idx3-ubyte
//
// This command creates the "mnist.ggml" file, which contains the generated compute graph.
// Now, you can re-use the compute graph on the GPU with the "mnist-mtl" tool:
//
// $ ./bin/mnist-mtl ./models/mnist/mnist.ggml ../examples/mnist/models/mnist/t10k-images.idx3-ubyte
//

#include "ggml/ggml.h"

#include "main-mtl.h"

#include <cmath>
#include <cstdio>
#include <cstring>
#include <ctime>
#include <fstream>
#include <vector>

// evaluate the MNIST compute graph
//
// - fname_cgraph: path to the compute graph
// - n_threads: number of threads to use
// - digit: 784 pixel values
//
// returns 0 - 9 prediction
int mnist_eval(
const char * fname_cgraph,
const int n_threads,
std::vector<float> digit
) {
// load the compute graph
struct ggml_context * ctx_data = NULL;
struct ggml_context * ctx_eval = NULL;

struct ggml_cgraph gf = ggml_graph_import(fname_cgraph, &ctx_data, &ctx_eval);
gf.n_threads = n_threads;

// allocate eval context
// needed during ggml_graph_compute() to allocate a work tensor
static size_t buf_size = gf.work_size; // TODO
static void * buf = malloc(buf_size);

struct ggml_init_params params = {
.mem_size = buf_size,
.mem_buffer = buf,
.no_alloc = false,
};

struct ggml_context * ctx_work = ggml_init(params);

// this allocates all Metal resources and memory buffers
auto ctx_mtl = mnist_mtl_init(ctx_data, ctx_eval, ctx_work, &gf);

int prediction = -1;

for (int i = 0; i < 1; ++i) {
struct ggml_tensor * input = ggml_graph_get_tensor(&gf, "input");

if (i % 2 == 0) {
memcpy(input->data, digit.data(), ggml_nbytes(input));
} else {
memset(input->data, 0, ggml_nbytes(input));
}

// the actual inference happens here
prediction = mnist_mtl_eval(ctx_mtl, &gf);
}

mnist_mtl_free(ctx_mtl);

ggml_free(ctx_work);
ggml_free(ctx_data);
ggml_free(ctx_eval);

return prediction;
}

int main(int argc, char ** argv) {
srand(time(NULL));
ggml_time_init();

if (argc != 3) {
fprintf(stderr, "Usage: %s models/mnist/mnist.ggml models/mnist/t10k-images.idx3-ubyte\n", argv[0]);
exit(0);
}

uint8_t buf[784];
std::vector<float> digit;

// read a random digit from the test set
{
std::ifstream fin(argv[2], std::ios::binary);
if (!fin) {
fprintf(stderr, "%s: failed to open '%s'\n", __func__, argv[2]);
return 1;
}

// seek to a random digit: 16-byte header + 28*28 * (random 0 - 10000)
fin.seekg(16 + 784 * (rand() % 10000));
fin.read((char *) &buf, sizeof(buf));
}

// render the digit in ASCII
{
digit.resize(sizeof(buf));

for (int row = 0; row < 28; row++) {
for (int col = 0; col < 28; col++) {
fprintf(stderr, "%c ", (float)buf[row*28 + col] > 230 ? '*' : '_');
digit[row*28 + col] = ((float)buf[row*28 + col]);
}

fprintf(stderr, "\n");
}

fprintf(stderr, "\n");
}

const int prediction = mnist_eval(argv[1], 1, digit);

fprintf(stdout, "%s: predicted digit is %d\n", __func__, prediction);

return 0;
}
26 changes: 26 additions & 0 deletions examples/mnist/main-mtl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#pragma once

struct ggml_context;
struct ggml_cgraph;

#ifdef __cplusplus
extern "C" {
#endif

struct ggml_mtl_context;

struct ggml_mtl_context * mnist_mtl_init(
struct ggml_context * ctx_data,
struct ggml_context * ctx_eval,
struct ggml_context * ctx_work,
struct ggml_cgraph * gf);

void mnist_mtl_free(struct ggml_mtl_context * ctx);

int mnist_mtl_eval(
struct ggml_mtl_context * ctx,
struct ggml_cgraph * gf);

#ifdef __cplusplus
}
#endif
Loading

0 comments on commit 3b697a2

Please sign in to comment.