Skip to content

Commit

Permalink
rewrite: no longer consider backward compitability; plan and make_plan
Browse files Browse the repository at this point in the history
  • Loading branch information
mqy committed Jul 3, 2023
1 parent 7d2e391 commit e052bc4
Show file tree
Hide file tree
Showing 8 changed files with 408 additions and 169 deletions.
41 changes: 36 additions & 5 deletions examples/baby-llama/baby-llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1586,7 +1586,6 @@ int main(int argc, char ** argv) {
int n_past = 0;

ggml_cgraph gf = {};
gf.n_threads = 1;

get_example_targets_batch(ctx0, 64*ex+0, tokens_input, targets);

Expand All @@ -1595,7 +1594,18 @@ int main(int argc, char ** argv) {
struct ggml_tensor * e = square_error_loss(ctx0, targets, logits);

ggml_build_forward_expand(&gf, e);
ggml_graph_compute(ctx0, &gf);

{
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1);
if (plan.work_size > 0) {
plan.work_data = malloc(plan.work_size);
GGML_ASSERT(plan.work_data);
}
ggml_graph_compute(&plan, &gf);
if (plan.work_data) {
free(plan.work_data);
}
}

float error_before_opt = ggml_get_f32_1d(e, 0);

Expand All @@ -1611,7 +1621,18 @@ int main(int argc, char ** argv) {
ggml_opt(ctx0, opt_params_lbfgs, e);
//
ggml_build_forward_expand(&gf, e);
ggml_graph_compute(ctx0, &gf);

{
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1);
if (plan.work_size > 0) {
plan.work_data = malloc(plan.work_size);
GGML_ASSERT(plan.work_data);
}
ggml_graph_compute(&plan, &gf);
if (plan.work_data) {
free(plan.work_data);
}
}

float error_after_opt = ggml_get_f32_1d(e, 0);

Expand Down Expand Up @@ -1659,13 +1680,23 @@ int main(int argc, char ** argv) {
struct ggml_context * ctx0 = ggml_init(params);

ggml_cgraph gf = {};
gf.n_threads = 1;

int n_past = 0;
struct ggml_tensor * logits = forward(&model, &kv_self, ctx0, &gf, tokens_input, sample_ctx, n_past);

ggml_build_forward_expand(&gf, logits);
ggml_graph_compute(ctx0, &gf);

{
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1);
if (plan.work_size > 0) {
plan.work_data = malloc(plan.work_size);
GGML_ASSERT(plan.work_data);
}
ggml_graph_compute(&plan, &gf);
if (plan.work_data) {
free(plan.work_data);
}
}

struct ggml_tensor * best_samples = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, sample_ctx);
struct ggml_tensor * probs = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_vocab, sample_ctx);
Expand Down
46 changes: 37 additions & 9 deletions examples/benchmark/benchmark-matmult.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,13 +159,22 @@ int main(int argc, char ** argv) {
// printf("Creating compute graph\n");
struct ggml_cgraph gf = ggml_build_forward(m11xm2);

gf.n_threads=benchmark_params.n_threads;
printf("cgraph->n_threads=%i\n",gf.n_threads);
printf("n_threads=%i\n", benchmark_params.n_threads);

TENSOR_DUMP(m11);
TENSOR_DUMP(m2);

ggml_graph_compute(ctx, &gf);
{
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, benchmark_params.n_threads);
if (plan.work_size > 0) {
plan.work_data = malloc(plan.work_size);
GGML_ASSERT(plan.work_data);
}
ggml_graph_compute(&plan, &gf);
if (plan.work_data) {
free(plan.work_data);
}
}

TENSOR_DUMP(gf.nodes[0]);

Expand All @@ -187,7 +196,6 @@ int main(int argc, char ** argv) {

// printf("Creating compute graph\n");
struct ggml_cgraph gf31 = ggml_build_forward(q31);
gf31.n_threads=benchmark_params.n_threads;

// Set up a second graph computation to make sure we override the CPU cache lines
// printf("Creating new tensor q12 & Running quantize\n");
Expand All @@ -199,8 +207,7 @@ int main(int argc, char ** argv) {

//printf("Creating compute graph\n");
struct ggml_cgraph gf32 = ggml_build_forward(q32);
gf32.n_threads=benchmark_params.n_threads;
printf("cgraph->n_threads=%i\n",gf31.n_threads);
printf("n_threads=%i\n", benchmark_params.n_threads);

const int dimx = sizex;
const int dimy = sizey;
Expand All @@ -221,14 +228,25 @@ int main(int argc, char ** argv) {

long long int start = ggml_time_us();
//printf("Running ggml_graph_compute\n");
ggml_graph_compute(ctx, &gf31);
{
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf31, benchmark_params.n_threads);
if (plan.work_size > 0) {
plan.work_data = malloc(plan.work_size);
GGML_ASSERT(plan.work_data);
}
ggml_graph_compute(&plan, &gf31);
if (plan.work_data) {
free(plan.work_data);
}
}

long long int stop = ggml_time_us();
long long int usec = stop-start;
double gflops = (double)(flops_per_matrix)/usec/1000.0;
gflops_sum += gflops;
printf("%9i;%8i;%6i;%6i;%6i;%15lli;%18lli;%10.2f\n",
i,
gf31.n_threads,
benchmark_params.n_threads,
sizex, sizey, sizez, flops_per_matrix,
usec,gflops);

Expand All @@ -253,7 +271,17 @@ int main(int argc, char ** argv) {
}

// Running a different graph computation to make sure we override the CPU cache lines
ggml_graph_compute(ctx, &gf32);
{
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf32, benchmark_params.n_threads);
if (plan.work_size > 0) {
plan.work_data = malloc(plan.work_size);
GGML_ASSERT(plan.work_data);
}
ggml_graph_compute(&plan, &gf32);
if (plan.work_data) {
free(plan.work_data);
}
}
}
printf("\n");
printf("Average%78.2f\n",gflops_sum/((double)benchmark_params.n_iterations));
Expand Down
41 changes: 34 additions & 7 deletions examples/train-text-from-scratch/train-text-from-scratch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3215,9 +3215,6 @@ int main(int argc, char ** argv) {
struct ggml_cgraph * gf = (struct ggml_cgraph *) gfbuf->data;
struct ggml_cgraph * gb = (struct ggml_cgraph *) gbbuf->data;

// ggml_cgraph gf = {};
gf->n_threads = params.n_threads;
gb->n_threads = params.n_threads;

get_example_targets_batch(lctx, train_samples.data(), train_samples.size(), train_tokens.data(), train_tokens.size(), ex, tokens_input, target_logits, target_probs);

Expand Down Expand Up @@ -3246,7 +3243,17 @@ int main(int argc, char ** argv) {
*gb = ggml_build_backward(ctx0, gf, true);
}

ggml_graph_compute(ctx0, gf);
{
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(gf, params.n_threads);
if (plan.work_size > 0) {
plan.work_data = malloc(plan.work_size);
GGML_ASSERT(plan.work_data);
}
ggml_graph_compute(&plan, gf);
if (plan.work_data) {
free(plan.work_data);
}
}

size_t used_mem_before_opt = ggml_used_mem(ctx0);

Expand All @@ -3270,7 +3277,17 @@ int main(int argc, char ** argv) {
model.train_samples += n_batch;
model.train_tokens += n_batch * n_tokens;

ggml_graph_compute(ctx0, gf);
{
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(gf, params.n_threads);
if (plan.work_size > 0) {
plan.work_data = malloc(plan.work_size);
GGML_ASSERT(plan.work_data);
}
ggml_graph_compute(&plan, gf);
if (plan.work_data) {
free(plan.work_data);
}
}

float error_after_opt = ggml_get_f32_1d(loss, 0);

Expand Down Expand Up @@ -3352,13 +3369,23 @@ int main(int argc, char ** argv) {
struct ggml_context * ctx0 = ggml_init(cparams);

ggml_cgraph gf = {};
gf.n_threads = params.n_threads;

int n_past = 0;
struct ggml_tensor * logits = forward(&model, &kv_self, ctx0, &gf, tokens_input, sample_ctx, n_past);

ggml_build_forward_expand(&gf, logits);
ggml_graph_compute(ctx0, &gf);

{
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, params.n_threads);
if (plan.work_size > 0) {
plan.work_data = malloc(plan.work_size);
GGML_ASSERT(plan.work_data);
}
ggml_graph_compute(&plan, &gf);
if (plan.work_data) {
free(plan.work_data);
}
}

//struct ggml_tensor * best_samples = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, sample_ctx);
//struct ggml_tensor * probs = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_vocab, sample_ctx);
Expand Down
Loading

0 comments on commit e052bc4

Please sign in to comment.