Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ggml : change ggml_graph_compute() API to not require context #1999

Merged
merged 20 commits into from
Jul 7, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
minor: rename ctx as plan; const
  • Loading branch information
mqy committed Jul 6, 2023
commit a37de23953ed794e1f8b100156b31f909c245edb
50 changes: 25 additions & 25 deletions ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -15941,13 +15941,13 @@ void clear_numa_thread_affinity(void) {}
#endif

struct ggml_compute_state_shared {
struct ggml_cgraph * cgraph;
struct ggml_graph_compute_plan * cgraph_ctx;
const struct ggml_cgraph * cgraph;
const struct ggml_graph_compute_plan * plan;

int64_t perf_node_start_cycles;
int64_t perf_node_start_time_us;

int n_threads;
const int n_threads;

// synchronization primitives
atomic_int n_active; // num active threads
Expand All @@ -15971,10 +15971,10 @@ static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const

static thread_ret_t ggml_graph_compute_thread(void * data) {
struct ggml_compute_state * state = (struct ggml_compute_state *) data;
struct ggml_cgraph * cgraph = state->shared->cgraph;
const struct ggml_cgraph * cgraph = state->shared->cgraph;

struct ggml_graph_compute_plan * ctx = state->shared->cgraph_ctx;
const int *n_tasks_arr = ctx->n_tasks;
const struct ggml_graph_compute_plan * plan = state->shared->plan;
const int *n_tasks_arr = plan->n_tasks;
mqy marked this conversation as resolved.
Show resolved Hide resolved

const int n_threads = state->shared->n_threads;
set_numa_thread_affinity(state->ith, n_threads);
Expand All @@ -15989,8 +15989,8 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
/*.type =*/ GGML_TASK_FINALIZE,
/*.ith =*/ 0,
/*.nth =*/ 0,
/*.wsize =*/ ctx->work_size,
/*.wdata =*/ ctx->work_data,
/*.wsize =*/ plan->work_size,
/*.wdata =*/ plan->work_data,
};

if (node_n != -1) {
Expand Down Expand Up @@ -16059,8 +16059,8 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
/*.type =*/ GGML_TASK_COMPUTE,
/*.ith =*/ state->ith,
/*.nth =*/ n_tasks,
/*.wsize =*/ ctx->work_size,
/*.wdata =*/ ctx->work_data,
/*.wsize =*/ plan->work_size,
/*.wdata =*/ plan->work_data,
};

if (state->ith < n_tasks) {
Expand All @@ -16077,9 +16077,9 @@ struct ggml_graph_compute_plan ggml_graph_compute_make_plan(struct ggml_cgraph *
n_threads = GGML_DEFAULT_N_THREADS;
}

struct ggml_graph_compute_plan ctx;
memset(&ctx, 0, sizeof(struct ggml_graph_compute_plan));
int * n_tasks = ctx.n_tasks;
struct ggml_graph_compute_plan plan;
memset(&plan, 0, sizeof(struct ggml_graph_compute_plan));
int * n_tasks = plan.n_tasks;
size_t work_size = 0;

// initialize tasks + work buffer
Expand Down Expand Up @@ -16403,35 +16403,35 @@ struct ggml_graph_compute_plan ggml_graph_compute_make_plan(struct ggml_cgraph *
work_size += CACHE_LINE_SIZE*(n_threads - 1);
}

ctx.n_threads = n_threads;
ctx.work_size = work_size;
ctx.work_data = NULL;
plan.n_threads = n_threads;
plan.work_size = work_size;
plan.work_data = NULL;

return ctx;
return plan;
}

void ggml_graph_compute(struct ggml_graph_compute_plan * ctx, struct ggml_cgraph * cgraph) {
void ggml_graph_compute(struct ggml_graph_compute_plan * plan, struct ggml_cgraph * cgraph) {
{
GGML_ASSERT(ctx);
GGML_ASSERT(ctx->n_threads > 0);
GGML_ASSERT(plan);
GGML_ASSERT(plan->n_threads > 0);

if (ctx->work_size > 0) {
GGML_ASSERT(ctx->work_data);
if (plan->work_size > 0) {
GGML_ASSERT(plan->work_data);
}

for (int i = 0; i < cgraph->n_nodes; ++i) {
if (cgraph->nodes[i]->op != GGML_OP_NONE) {
GGML_ASSERT(ctx->n_tasks[i] > 0);
GGML_ASSERT(plan->n_tasks[i] > 0);
}
}

}

const int n_threads = ctx->n_threads;
const int n_threads = plan->n_threads;

struct ggml_compute_state_shared state_shared = {
/*.cgraph =*/ cgraph,
/*.cgraph_ctx =*/ ctx,
/*.cgraph_plan =*/ plan,
/*.perf_node_start_cycles =*/ 0,
/*.perf_node_start_time_us =*/ 0,
/*.n_threads =*/ n_threads,
Expand Down
14 changes: 7 additions & 7 deletions ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,14 @@
// ggml_set_f32(b, 4.0f);
//
// const int n_threads = 1;
// struct ggml_graph_compute_plan ctx = ggml_graph_compute_make_plan(&gf, n_threads);
// if (ctx.work_size > 0) {
// ctx.work_data = malloc(ctx.work_size);
// GGML_ASSERT(ctx.work_data);
// struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, n_threads);
// if (plan.work_size > 0) {
// plan.work_data = malloc(plan.work_size);
// GGML_ASSERT(plan.work_data);
// }
// ggml_graph_compute(&ctx, &gf);
// if (ctx.work_data) {
// free(ctx.work_data);
// ggml_graph_compute(&plan, &gf);
// if (plan.work_data) {
// free(plan.work_data);
// }
//
// printf("f = %f\n", ggml_get_f32_1d(f, 0));
Expand Down