Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

broken change: deprecate GGML_TASK_INIT and GGML_TASK_FINALIZE #1995

Merged
merged 1 commit into from
Jul 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
breaking change: deprecate GGML_TASK_INIT and GGML_TASK_FINALIZE.
Will not be scheduled unless explicitly enabled.
  • Loading branch information
mqy committed Jun 29, 2023
commit 13c8d8711181a693837d683c060a91d5e30bb39c
61 changes: 52 additions & 9 deletions ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -3846,6 +3846,40 @@ static_assert(GGML_OP_COUNT == 64, "GGML_OP_COUNT != 64");
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");

// WARN:
// Mis-confguration can lead to problem that's hard to reason about:
// * At best it crash or talks nosense.
// * At worst it talks slightly difference but hard to perceive.
//
// An op has to enable INIT or FINALIZE when any of it's branch needs that pass.
// Take care about compile options (e.g., GGML_USE_xxx).
static bool GGML_OP_HAS_INIT [GGML_OP_COUNT] = { 0 };
static bool GGML_OP_HAS_FINALIZE[GGML_OP_COUNT] = { 0 };
static void ggml_setup_op_has_task_pass(void) {
{ // INIT
bool * I = GGML_OP_HAS_INIT;

I[GGML_OP_ACC ] = true;
I[GGML_OP_MUL_MAT ] = true;
I[GGML_OP_OUT_PROD ] = true;
I[GGML_OP_SET ] = true;
I[GGML_OP_GET_ROWS_BACK ] = true;
I[GGML_OP_DIAG_MASK_INF ] = true;
I[GGML_OP_DIAG_MASK_ZERO ] = true;
I[GGML_OP_CONV_1D_S1_PH ] = true;
I[GGML_OP_CONV_1D_S2_PH ] = true;
I[GGML_OP_CONV_2D_SK_P0 ] = true;
I[GGML_OP_FLASH_ATTN_BACK ] = true;
I[GGML_OP_CROSS_ENTROPY_LOSS ] = true;
}

{ // FINALIZE
bool * F = GGML_OP_HAS_FINALIZE;

F[GGML_OP_CROSS_ENTROPY_LOSS ] = true;
}
}

//
// ggml context
//
Expand Down Expand Up @@ -4267,6 +4301,8 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
ggml_cl_init();
#endif

ggml_setup_op_has_task_pass();

is_first_call = false;
}

Expand Down Expand Up @@ -16791,9 +16827,11 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
if (node_n != -1) {
/* FINALIZE */
struct ggml_tensor * node = state->shared->cgraph->nodes[node_n];
params.nth = node->n_tasks;
ggml_compute_forward(&params, node);
ggml_graph_compute_perf_stats_node(node, state->shared);
if (GGML_OP_HAS_FINALIZE[node->op]) {
params.nth = node->n_tasks;
ggml_compute_forward(&params, node);
ggml_graph_compute_perf_stats_node(node, state->shared);
}
}

// distribute new work or execute it direct if 1T
Expand All @@ -16805,20 +16843,25 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
state->shared->perf_node_start_cycles = ggml_perf_cycles();
state->shared->perf_node_start_time_us = ggml_perf_time_us();

params.nth = node->n_tasks;

/* INIT */
params.type = GGML_TASK_INIT;
params.nth = node->n_tasks;
ggml_compute_forward(&params, node);
if (GGML_OP_HAS_INIT[node->op]) {
params.type = GGML_TASK_INIT;
ggml_compute_forward(&params, node);
}

if (node->n_tasks == 1) {
// TODO: maybe push node_n to the atomic but if other threads see n_tasks is 1,
// they do something more efficient than spinning (?)
params.type = GGML_TASK_COMPUTE;
ggml_compute_forward(&params, node);

params.type = GGML_TASK_FINALIZE;
ggml_compute_forward(&params, node);
ggml_graph_compute_perf_stats_node(node, state->shared);
if (GGML_OP_HAS_FINALIZE[node->op]) {
params.type = GGML_TASK_FINALIZE;
ggml_compute_forward(&params, node);
ggml_graph_compute_perf_stats_node(node, state->shared);
}
} else {
break;
}
Expand Down
3 changes: 3 additions & 0 deletions ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,9 @@ extern "C" {


// compute types

// NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
// This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
enum ggml_task_type {
GGML_TASK_INIT = 0,
GGML_TASK_COMPUTE,
Expand Down