Skip to content

Commit

Permalink
breaking change: deprecate GGML_TASK_INIT and GGML_TASK_FINALIZE.
Browse files Browse the repository at this point in the history
Will not be scheduled unless explicitly enabled.
  • Loading branch information
mqy committed Jun 29, 2023
1 parent 96a712c commit 13c8d87
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 9 deletions.
61 changes: 52 additions & 9 deletions ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -3846,6 +3846,40 @@ static_assert(GGML_OP_COUNT == 64, "GGML_OP_COUNT != 64");
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");

// WARN:
// Mis-confguration can lead to problem that's hard to reason about:
// * At best it crash or talks nosense.
// * At worst it talks slightly difference but hard to perceive.
//
// An op has to enable INIT or FINALIZE when any of it's branch needs that pass.
// Take care about compile options (e.g., GGML_USE_xxx).
static bool GGML_OP_HAS_INIT [GGML_OP_COUNT] = { 0 };
static bool GGML_OP_HAS_FINALIZE[GGML_OP_COUNT] = { 0 };
static void ggml_setup_op_has_task_pass(void) {
{ // INIT
bool * I = GGML_OP_HAS_INIT;

I[GGML_OP_ACC ] = true;
I[GGML_OP_MUL_MAT ] = true;
I[GGML_OP_OUT_PROD ] = true;
I[GGML_OP_SET ] = true;
I[GGML_OP_GET_ROWS_BACK ] = true;
I[GGML_OP_DIAG_MASK_INF ] = true;
I[GGML_OP_DIAG_MASK_ZERO ] = true;
I[GGML_OP_CONV_1D_S1_PH ] = true;
I[GGML_OP_CONV_1D_S2_PH ] = true;
I[GGML_OP_CONV_2D_SK_P0 ] = true;
I[GGML_OP_FLASH_ATTN_BACK ] = true;
I[GGML_OP_CROSS_ENTROPY_LOSS ] = true;
}

{ // FINALIZE
bool * F = GGML_OP_HAS_FINALIZE;

F[GGML_OP_CROSS_ENTROPY_LOSS ] = true;
}
}

//
// ggml context
//
Expand Down Expand Up @@ -4267,6 +4301,8 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
ggml_cl_init();
#endif

ggml_setup_op_has_task_pass();

is_first_call = false;
}

Expand Down Expand Up @@ -16791,9 +16827,11 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
if (node_n != -1) {
/* FINALIZE */
struct ggml_tensor * node = state->shared->cgraph->nodes[node_n];
params.nth = node->n_tasks;
ggml_compute_forward(&params, node);
ggml_graph_compute_perf_stats_node(node, state->shared);
if (GGML_OP_HAS_FINALIZE[node->op]) {
params.nth = node->n_tasks;
ggml_compute_forward(&params, node);
ggml_graph_compute_perf_stats_node(node, state->shared);
}
}

// distribute new work or execute it direct if 1T
Expand All @@ -16805,20 +16843,25 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
state->shared->perf_node_start_cycles = ggml_perf_cycles();
state->shared->perf_node_start_time_us = ggml_perf_time_us();

params.nth = node->n_tasks;

/* INIT */
params.type = GGML_TASK_INIT;
params.nth = node->n_tasks;
ggml_compute_forward(&params, node);
if (GGML_OP_HAS_INIT[node->op]) {
params.type = GGML_TASK_INIT;
ggml_compute_forward(&params, node);
}

if (node->n_tasks == 1) {
// TODO: maybe push node_n to the atomic but if other threads see n_tasks is 1,
// they do something more efficient than spinning (?)
params.type = GGML_TASK_COMPUTE;
ggml_compute_forward(&params, node);

params.type = GGML_TASK_FINALIZE;
ggml_compute_forward(&params, node);
ggml_graph_compute_perf_stats_node(node, state->shared);
if (GGML_OP_HAS_FINALIZE[node->op]) {
params.type = GGML_TASK_FINALIZE;
ggml_compute_forward(&params, node);
ggml_graph_compute_perf_stats_node(node, state->shared);
}
} else {
break;
}
Expand Down
3 changes: 3 additions & 0 deletions ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,9 @@ extern "C" {


// compute types

// NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
// This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
enum ggml_task_type {
GGML_TASK_INIT = 0,
GGML_TASK_COMPUTE,
Expand Down

0 comments on commit 13c8d87

Please sign in to comment.