From 10fb1850abc101f9d378c317b8dc010b140dcd55 Mon Sep 17 00:00:00 2001 From: CCLDArjun Date: Sun, 2 Jul 2023 16:30:46 +0530 Subject: [PATCH 01/11] mechanism to abort ggml_graph_compute --- include/ggml/ggml.h | 1 + src/ggml.c | 29 ++++++++++++++++++++++++++--- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h index 459913222..134cd867f 100644 --- a/include/ggml/ggml.h +++ b/include/ggml/ggml.h @@ -1271,6 +1271,7 @@ extern "C" { GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep); GGML_API void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph); + GGML_API void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool (*abort_callback)()); GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name); diff --git a/src/ggml.c b/src/ggml.c index 92faf03f7..8f7b9888f 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -25,6 +25,7 @@ #include #include #include +#include #ifdef GGML_USE_METAL #include @@ -16654,6 +16655,7 @@ typedef pthread_t ggml_thread_t; #define ggml_thread_create pthread_create #define ggml_thread_join pthread_join +#define ggml_thread_kill pthread_kill #else @@ -16749,6 +16751,8 @@ struct ggml_compute_state_shared { // synchronization primitives atomic_int n_active; // num active threads atomic_int node_n; // active graph node + + bool (*abort_callback)(); // abort ggml_graph_compute when true }; struct ggml_compute_state { @@ -16776,6 +16780,9 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { int node_n = -1; while (true) { + if (state->ith == 0 && state->shared->abort_callback()) { + return 0; + } if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) { // all other threads are finished and spinning // do finalize and init here so we don't have synchronize again @@ -16793,6 +16800,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { params.nth = node->n_tasks; ggml_compute_forward(¶ms, node); ggml_graph_compute_perf_stats_node(node, state->shared); + } // distribute new work or execute it direct if 1T @@ -16821,6 +16829,10 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { } else { break; } + + if (state->shared->abort_callback()) { + break; + } } atomic_store(&state->shared->n_active, n_threads); @@ -16856,7 +16868,12 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { return 0; } +bool always_false() { return false; } void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) { + ggml_graph_compute_with_abort(ctx, cgraph, always_false); +} + +void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool (*abort_callback)(void)) { const int n_threads = cgraph->n_threads; struct ggml_compute_state_shared state_shared = { @@ -16866,6 +16883,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) /*.n_threads =*/ n_threads, /*.n_active =*/ n_threads, /*.node_n =*/ -1, + /*.abort_callback =*/ abort_callback, }; struct ggml_compute_state * workers = alloca(sizeof(struct ggml_compute_state)*n_threads); @@ -17240,11 +17258,16 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) // don't leave affinity set on the main thread clear_numa_thread_affinity(); - // join thread pool + // join or kill thread pool if (n_threads > 1) { for (int j = 1; j < n_threads; j++) { - const int rc = ggml_thread_join(workers[j].thrd, NULL); - GGML_ASSERT(rc == 0); + if (abort_callback()) { + const int rc = ggml_thread_kill(workers[j].thrd, SIGKILL); + GGML_ASSERT(rc == 0); + } else { + const int rc = ggml_thread_join(workers[j].thrd, NULL); + GGML_ASSERT(rc == 0); + } } } From 8bff0bdb50de6612eca6006665b38e77de931984 Mon Sep 17 00:00:00 2001 From: CCLDArjun Date: Sun, 2 Jul 2023 19:15:06 +0530 Subject: [PATCH 02/11] use pthread_cancel --- src/ggml.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/ggml.c b/src/ggml.c index 8f7b9888f..427388f70 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -16655,7 +16655,7 @@ typedef pthread_t ggml_thread_t; #define ggml_thread_create pthread_create #define ggml_thread_join pthread_join -#define ggml_thread_kill pthread_kill +#define ggml_thread_cancel pthread_cancel #else @@ -16781,7 +16781,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { while (true) { if (state->ith == 0 && state->shared->abort_callback()) { - return 0; + return GGML_EXIT_ABORTED; } if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) { // all other threads are finished and spinning @@ -16865,7 +16865,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { } } - return 0; + return GGML_EXIT_SUCCESS; } bool always_false() { return false; } @@ -17253,7 +17253,7 @@ void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph const int64_t perf_start_time_us = ggml_perf_time_us(); // this is a work thread too - ggml_graph_compute_thread(&workers[0]); + int compute_status = ggml_graph_compute_thread(&workers[0]); // don't leave affinity set on the main thread clear_numa_thread_affinity(); @@ -17261,10 +17261,10 @@ void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph // join or kill thread pool if (n_threads > 1) { for (int j = 1; j < n_threads; j++) { - if (abort_callback()) { - const int rc = ggml_thread_kill(workers[j].thrd, SIGKILL); + if (compute_status == GGML_EXIT_ABORTED) { + const int rc = ggml_thread_cancel(workers[j].thrd); GGML_ASSERT(rc == 0); - } else { + } else if (compute_status == GGML_EXIT_SUCCESS) { const int rc = ggml_thread_join(workers[j].thrd, NULL); GGML_ASSERT(rc == 0); } From 293d605beb03035ab2fabe08fe370ac53e0ae70f Mon Sep 17 00:00:00 2001 From: CCLDArjun Date: Sun, 2 Jul 2023 19:20:46 +0530 Subject: [PATCH 03/11] forgot to commit ggml.h --- include/ggml/ggml.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h index 134cd867f..9e486dd0f 100644 --- a/include/ggml/ggml.h +++ b/include/ggml/ggml.h @@ -201,6 +201,9 @@ #define GGML_MAX_NAME 48 #define GGML_DEFAULT_N_THREADS 4 +#define GGML_EXIT_SUCCESS 0 +#define GGML_EXIT_ABORTED 1 + #define GGML_ASSERT(x) \ do { \ if (!(x)) { \ From 21bdeead09139bc0918ad5bed93b24cdb7a7e372 Mon Sep 17 00:00:00 2001 From: Arjun Date: Mon, 3 Jul 2023 20:21:26 +0530 Subject: [PATCH 04/11] static always_false() Co-authored-by: Georgi Gerganov --- src/ggml.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ggml.c b/src/ggml.c index 427388f70..4da7f393a 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -16868,7 +16868,8 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { return GGML_EXIT_SUCCESS; } -bool always_false() { return false; } +static bool always_false() { return false; } + void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) { ggml_graph_compute_with_abort(ctx, cgraph, always_false); } From f132e16fc9ceb9bce1fb5a14147dd1f0793b4f80 Mon Sep 17 00:00:00 2001 From: CCLDArjun Date: Mon, 3 Jul 2023 20:31:07 +0530 Subject: [PATCH 05/11] accept callback data --- include/ggml/ggml.h | 2 +- src/ggml.c | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h index 9e486dd0f..d088c8459 100644 --- a/include/ggml/ggml.h +++ b/include/ggml/ggml.h @@ -1274,7 +1274,7 @@ extern "C" { GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep); GGML_API void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph); - GGML_API void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool (*abort_callback)()); + GGML_API void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool (*abort_callback)(), void * abort_callback_data); GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name); diff --git a/src/ggml.c b/src/ggml.c index 4da7f393a..f22f0501f 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -16752,7 +16752,8 @@ struct ggml_compute_state_shared { atomic_int n_active; // num active threads atomic_int node_n; // active graph node - bool (*abort_callback)(); // abort ggml_graph_compute when true + bool (*abort_callback)(void * data); // abort ggml_graph_compute when true + void * abort_callback_data; }; struct ggml_compute_state { @@ -16780,7 +16781,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { int node_n = -1; while (true) { - if (state->ith == 0 && state->shared->abort_callback()) { + if (state->ith == 0 && state->shared->abort_callback(state->shared->abort_callback_data)) { return GGML_EXIT_ABORTED; } if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) { @@ -16830,7 +16831,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { break; } - if (state->shared->abort_callback()) { + if (state->shared->abort_callback(state->shared->abort_callback_data)) { break; } } @@ -16868,13 +16869,13 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { return GGML_EXIT_SUCCESS; } -static bool always_false() { return false; } +static bool always_false(void * data) { return false; } void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) { - ggml_graph_compute_with_abort(ctx, cgraph, always_false); + ggml_graph_compute_with_abort(ctx, cgraph, always_false, NULL); } -void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool (*abort_callback)(void)) { +void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool (*abort_callback)(void*), void *abort_callback_data) { const int n_threads = cgraph->n_threads; struct ggml_compute_state_shared state_shared = { @@ -16885,6 +16886,7 @@ void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph /*.n_active =*/ n_threads, /*.node_n =*/ -1, /*.abort_callback =*/ abort_callback, + /*.abort_callback_data =*/ abort_callback_data, }; struct ggml_compute_state * workers = alloca(sizeof(struct ggml_compute_state)*n_threads); From f9432d76fac318437d0701e65f77dc9b6d8dc3bd Mon Sep 17 00:00:00 2001 From: CCLDArjun Date: Mon, 3 Jul 2023 20:36:02 +0530 Subject: [PATCH 06/11] proper function prototype --- include/ggml/ggml.h | 2 +- src/ggml.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h index d088c8459..e66fb090b 100644 --- a/include/ggml/ggml.h +++ b/include/ggml/ggml.h @@ -1274,7 +1274,7 @@ extern "C" { GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep); GGML_API void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph); - GGML_API void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool (*abort_callback)(), void * abort_callback_data); + GGML_API void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool (*abort_callback)(void * data), void * abort_callback_data); GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name); diff --git a/src/ggml.c b/src/ggml.c index f22f0501f..5d8661376 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -16875,7 +16875,8 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) ggml_graph_compute_with_abort(ctx, cgraph, always_false, NULL); } -void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool (*abort_callback)(void*), void *abort_callback_data) { +void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph, + bool (*abort_callback)(void * data), void *abort_callback_data) { const int n_threads = cgraph->n_threads; struct ggml_compute_state_shared state_shared = { From 4218d926aa9b407d7fef6489c50ceabb86119889 Mon Sep 17 00:00:00 2001 From: CCLDArjun Date: Mon, 3 Jul 2023 20:46:13 +0530 Subject: [PATCH 07/11] return exit status --- include/ggml/ggml.h | 2 +- src/ggml.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h index 135c4f02b..aa61d93bd 100644 --- a/include/ggml/ggml.h +++ b/include/ggml/ggml.h @@ -1303,7 +1303,7 @@ extern "C" { GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep); GGML_API void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph); - GGML_API void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool (*abort_callback)(void * data), void * abort_callback_data); + GGML_API int ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool (*abort_callback)(void * data), void * abort_callback_data); GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name); diff --git a/src/ggml.c b/src/ggml.c index 0acbe3a2f..218c7433f 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -16443,7 +16443,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) ggml_graph_compute_with_abort(ctx, cgraph, always_false, NULL); } -void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph, +int ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool (*abort_callback)(void * data), void *abort_callback_data) { const int n_threads = cgraph->n_threads; @@ -16861,6 +16861,8 @@ void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph (double) perf_time_us_cur / 1000.0, (double) cgraph->perf_time_us / 1000.0 / cgraph->perf_runs); } + + return compute_status; } void ggml_graph_reset(struct ggml_cgraph * cgraph) { From fcd5b1708c6dce498ce63403d2c2b09e6bf6b64b Mon Sep 17 00:00:00 2001 From: CCLDArjun Date: Wed, 5 Jul 2023 12:34:01 +0530 Subject: [PATCH 08/11] remove pthread_cancel and join every thread --- src/ggml.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/ggml.c b/src/ggml.c index 218c7433f..1130d1e1a 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -16216,7 +16216,6 @@ typedef pthread_t ggml_thread_t; #define ggml_thread_create pthread_create #define ggml_thread_join pthread_join -#define ggml_thread_cancel pthread_cancel #else @@ -16343,7 +16342,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { int node_n = -1; while (true) { - if (state->ith == 0 && state->shared->abort_callback(state->shared->abort_callback_data)) { + if (state->shared->abort_callback(state->shared->abort_callback_data)) { return GGML_EXIT_ABORTED; } if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) { @@ -16835,13 +16834,8 @@ int ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph // join or kill thread pool if (n_threads > 1) { for (int j = 1; j < n_threads; j++) { - if (compute_status == GGML_EXIT_ABORTED) { - const int rc = ggml_thread_cancel(workers[j].thrd); - GGML_ASSERT(rc == 0); - } else if (compute_status == GGML_EXIT_SUCCESS) { - const int rc = ggml_thread_join(workers[j].thrd, NULL); - GGML_ASSERT(rc == 0); - } + const int rc = ggml_thread_join(workers[j].thrd, NULL); + GGML_ASSERT(rc == 0); } } From 47de3d6ee9cba2b9809cccaee6719d70b61ec46c Mon Sep 17 00:00:00 2001 From: CCLDArjun Date: Tue, 11 Jul 2023 23:39:32 +0530 Subject: [PATCH 09/11] put abort_callback onto cplan --- include/ggml/ggml.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h index 502a39a89..8fe05d3a5 100644 --- a/include/ggml/ggml.h +++ b/include/ggml/ggml.h @@ -447,6 +447,10 @@ extern "C" { // the `n_tasks` of nodes, 1:1 mapping to cgraph nodes int n_tasks[GGML_MAX_NODES]; + + // abort ggml_graph_compute when true + bool (*abort_callback)(void * data); + void * abort_callback_data; }; // computation graph @@ -1308,7 +1312,7 @@ extern "C" { // ggml_graph_plan() has to be called before ggml_graph_compute() // when plan.work_size > 0, caller must allocate memory for plan.work_data GGML_API struct ggml_cplan ggml_graph_plan (struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/); - GGML_API void ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan); + GGML_API int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan); GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // same as ggml_graph_compute() but the work data is allocated as a part of the context From 118809b3d88f14bf1daeade4433d1c3d7cbe19c3 Mon Sep 17 00:00:00 2001 From: CCLDArjun Date: Tue, 11 Jul 2023 23:43:35 +0530 Subject: [PATCH 10/11] cplan abort_callback in ggml.c --- src/ggml.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ggml.c b/src/ggml.c index 8dac60f7d..33fd2e6e8 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -15981,7 +15981,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { int node_n = -1; while (true) { - if (state->shared->abort_callback(state->shared->abort_callback_data)) { + if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) { return GGML_EXIT_ABORTED; } if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) { @@ -16038,7 +16038,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { break; } - if (state->shared->abort_callback(state->shared->abort_callback_data)) { + if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) { break; } } @@ -16415,7 +16415,7 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) { return cplan; } -void ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) { +int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) { { GGML_ASSERT(cplan); GGML_ASSERT(cplan->n_threads > 0); From 26b90d06eae80ef295be14071be714ac8e39ee2f Mon Sep 17 00:00:00 2001 From: CCLDArjun Date: Wed, 12 Jul 2023 00:39:42 +0530 Subject: [PATCH 11/11] make sure all threads abort --- src/ggml.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ggml.c b/src/ggml.c index 33fd2e6e8..149e069f0 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -15982,6 +15982,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { while (true) { if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) { + state->shared->node_n += 1; return GGML_EXIT_ABORTED; } if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {