From 10fb1850abc101f9d378c317b8dc010b140dcd55 Mon Sep 17 00:00:00 2001
From: CCLDArjun <ccldarjun@icloud.com>
Date: Sun, 2 Jul 2023 16:30:46 +0530
Subject: [PATCH 01/11] mechanism to abort ggml_graph_compute

---
 include/ggml/ggml.h |  1 +
 src/ggml.c          | 29 ++++++++++++++++++++++++++---
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h
index 459913222..134cd867f 100644
--- a/include/ggml/ggml.h
+++ b/include/ggml/ggml.h
@@ -1271,6 +1271,7 @@ extern "C" {
     GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
 
     GGML_API void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph);
+    GGML_API void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool (*abort_callback)());
     GGML_API void ggml_graph_reset  (struct ggml_cgraph * cgraph);
 
     GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
diff --git a/src/ggml.c b/src/ggml.c
index 92faf03f7..8f7b9888f 100644
--- a/src/ggml.c
+++ b/src/ggml.c
@@ -25,6 +25,7 @@
 #include <float.h>
 #include <limits.h>
 #include <stdarg.h>
+#include <signal.h>
 
 #ifdef GGML_USE_METAL
 #include <unistd.h>
@@ -16654,6 +16655,7 @@ typedef pthread_t ggml_thread_t;
 
 #define ggml_thread_create pthread_create
 #define ggml_thread_join   pthread_join
+#define ggml_thread_kill   pthread_kill
 
 #else
 
@@ -16749,6 +16751,8 @@ struct ggml_compute_state_shared {
     // synchronization primitives
     atomic_int n_active; // num active threads
     atomic_int node_n;   // active graph node
+
+    bool (*abort_callback)(); // abort ggml_graph_compute when true
 };
 
 struct ggml_compute_state {
@@ -16776,6 +16780,9 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
     int node_n = -1;
 
     while (true) {
+        if (state->ith == 0 && state->shared->abort_callback()) {
+            return 0;
+        }
         if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
             // all other threads are finished and spinning
             // do finalize and init here so we don't have synchronize again
@@ -16793,6 +16800,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
                 params.nth = node->n_tasks;
                 ggml_compute_forward(&params, node);
                 ggml_graph_compute_perf_stats_node(node, state->shared);
+
             }
 
             // distribute new work or execute it direct if 1T
@@ -16821,6 +16829,10 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
                 } else {
                     break;
                 }
+
+                if (state->shared->abort_callback()) {
+                    break;
+                }
             }
 
             atomic_store(&state->shared->n_active, n_threads);
@@ -16856,7 +16868,12 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
     return 0;
 }
 
+bool always_false() { return false; }
 void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) {
+    ggml_graph_compute_with_abort(ctx, cgraph, always_false);
+}
+
+void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool (*abort_callback)(void)) {
     const int n_threads = cgraph->n_threads;
 
     struct ggml_compute_state_shared state_shared = {
@@ -16866,6 +16883,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
         /*.n_threads               =*/ n_threads,
         /*.n_active                =*/ n_threads,
         /*.node_n                  =*/ -1,
+        /*.abort_callback          =*/ abort_callback,
     };
     struct ggml_compute_state * workers = alloca(sizeof(struct ggml_compute_state)*n_threads);
 
@@ -17240,11 +17258,16 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
     // don't leave affinity set on the main thread
     clear_numa_thread_affinity();
 
-    // join thread pool
+    // join or kill thread pool
     if (n_threads > 1) {
         for (int j = 1; j < n_threads; j++) {
-            const int rc = ggml_thread_join(workers[j].thrd, NULL);
-            GGML_ASSERT(rc == 0);
+            if (abort_callback()) {
+                const int rc = ggml_thread_kill(workers[j].thrd, SIGKILL);
+                GGML_ASSERT(rc == 0);
+            } else {
+                const int rc = ggml_thread_join(workers[j].thrd, NULL);
+                GGML_ASSERT(rc == 0);
+            }
         }
     }
 

From 8bff0bdb50de6612eca6006665b38e77de931984 Mon Sep 17 00:00:00 2001
From: CCLDArjun <ccldarjun@icloud.com>
Date: Sun, 2 Jul 2023 19:15:06 +0530
Subject: [PATCH 02/11] use pthread_cancel

---
 src/ggml.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/ggml.c b/src/ggml.c
index 8f7b9888f..427388f70 100644
--- a/src/ggml.c
+++ b/src/ggml.c
@@ -16655,7 +16655,7 @@ typedef pthread_t ggml_thread_t;
 
 #define ggml_thread_create pthread_create
 #define ggml_thread_join   pthread_join
-#define ggml_thread_kill   pthread_kill
+#define ggml_thread_cancel   pthread_cancel
 
 #else
 
@@ -16781,7 +16781,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
 
     while (true) {
         if (state->ith == 0 && state->shared->abort_callback()) {
-            return 0;
+            return GGML_EXIT_ABORTED;
         }
         if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
             // all other threads are finished and spinning
@@ -16865,7 +16865,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
         }
     }
 
-    return 0;
+    return GGML_EXIT_SUCCESS;
 }
 
 bool always_false() { return false; }
@@ -17253,7 +17253,7 @@ void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph
     const int64_t perf_start_time_us = ggml_perf_time_us();
 
     // this is a work thread too
-    ggml_graph_compute_thread(&workers[0]);
+    int compute_status = ggml_graph_compute_thread(&workers[0]);
 
     // don't leave affinity set on the main thread
     clear_numa_thread_affinity();
@@ -17261,10 +17261,10 @@ void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph
     // join or kill thread pool
     if (n_threads > 1) {
         for (int j = 1; j < n_threads; j++) {
-            if (abort_callback()) {
-                const int rc = ggml_thread_kill(workers[j].thrd, SIGKILL);
+            if (compute_status == GGML_EXIT_ABORTED) {
+                const int rc = ggml_thread_cancel(workers[j].thrd);
                 GGML_ASSERT(rc == 0);
-            } else {
+            } else if (compute_status == GGML_EXIT_SUCCESS) {
                 const int rc = ggml_thread_join(workers[j].thrd, NULL);
                 GGML_ASSERT(rc == 0);
             }

From 293d605beb03035ab2fabe08fe370ac53e0ae70f Mon Sep 17 00:00:00 2001
From: CCLDArjun <ccldarjun@icloud.com>
Date: Sun, 2 Jul 2023 19:20:46 +0530
Subject: [PATCH 03/11] forgot to commit ggml.h

---
 include/ggml/ggml.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h
index 134cd867f..9e486dd0f 100644
--- a/include/ggml/ggml.h
+++ b/include/ggml/ggml.h
@@ -201,6 +201,9 @@
 #define GGML_MAX_NAME          48
 #define GGML_DEFAULT_N_THREADS 4
 
+#define GGML_EXIT_SUCCESS 0
+#define GGML_EXIT_ABORTED 1
+
 #define GGML_ASSERT(x) \
     do { \
         if (!(x)) { \

From 21bdeead09139bc0918ad5bed93b24cdb7a7e372 Mon Sep 17 00:00:00 2001
From: Arjun <ccldarjun@icloud.com>
Date: Mon, 3 Jul 2023 20:21:26 +0530
Subject: [PATCH 04/11] static always_false()

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
---
 src/ggml.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/ggml.c b/src/ggml.c
index 427388f70..4da7f393a 100644
--- a/src/ggml.c
+++ b/src/ggml.c
@@ -16868,7 +16868,8 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
     return GGML_EXIT_SUCCESS;
 }
 
-bool always_false() { return false; }
+static bool always_false() { return false; }
+
 void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) {
     ggml_graph_compute_with_abort(ctx, cgraph, always_false);
 }

From f132e16fc9ceb9bce1fb5a14147dd1f0793b4f80 Mon Sep 17 00:00:00 2001
From: CCLDArjun <ccldarjun@icloud.com>
Date: Mon, 3 Jul 2023 20:31:07 +0530
Subject: [PATCH 05/11] accept callback data

---
 include/ggml/ggml.h |  2 +-
 src/ggml.c          | 14 ++++++++------
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h
index 9e486dd0f..d088c8459 100644
--- a/include/ggml/ggml.h
+++ b/include/ggml/ggml.h
@@ -1274,7 +1274,7 @@ extern "C" {
     GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
 
     GGML_API void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph);
-    GGML_API void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool (*abort_callback)());
+    GGML_API void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool (*abort_callback)(), void * abort_callback_data);
     GGML_API void ggml_graph_reset  (struct ggml_cgraph * cgraph);
 
     GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
diff --git a/src/ggml.c b/src/ggml.c
index 4da7f393a..f22f0501f 100644
--- a/src/ggml.c
+++ b/src/ggml.c
@@ -16752,7 +16752,8 @@ struct ggml_compute_state_shared {
     atomic_int n_active; // num active threads
     atomic_int node_n;   // active graph node
 
-    bool (*abort_callback)(); // abort ggml_graph_compute when true
+    bool (*abort_callback)(void * data); // abort ggml_graph_compute when true
+    void * abort_callback_data;
 };
 
 struct ggml_compute_state {
@@ -16780,7 +16781,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
     int node_n = -1;
 
     while (true) {
-        if (state->ith == 0 && state->shared->abort_callback()) {
+        if (state->ith == 0 && state->shared->abort_callback(state->shared->abort_callback_data)) {
             return GGML_EXIT_ABORTED;
         }
         if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
@@ -16830,7 +16831,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
                     break;
                 }
 
-                if (state->shared->abort_callback()) {
+                if (state->shared->abort_callback(state->shared->abort_callback_data)) {
                     break;
                 }
             }
@@ -16868,13 +16869,13 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
     return GGML_EXIT_SUCCESS;
 }
 
-static bool always_false() { return false; }
+static bool always_false(void * data) { return false; }
 
 void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) {
-    ggml_graph_compute_with_abort(ctx, cgraph, always_false);
+    ggml_graph_compute_with_abort(ctx, cgraph, always_false, NULL);
 }
 
-void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool (*abort_callback)(void)) {
+void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool (*abort_callback)(void*), void *abort_callback_data) {
     const int n_threads = cgraph->n_threads;
 
     struct ggml_compute_state_shared state_shared = {
@@ -16885,6 +16886,7 @@ void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph
         /*.n_active                =*/ n_threads,
         /*.node_n                  =*/ -1,
         /*.abort_callback          =*/ abort_callback,
+        /*.abort_callback_data     =*/ abort_callback_data,
     };
     struct ggml_compute_state * workers = alloca(sizeof(struct ggml_compute_state)*n_threads);
 

From f9432d76fac318437d0701e65f77dc9b6d8dc3bd Mon Sep 17 00:00:00 2001
From: CCLDArjun <ccldarjun@icloud.com>
Date: Mon, 3 Jul 2023 20:36:02 +0530
Subject: [PATCH 06/11] proper function prototype

---
 include/ggml/ggml.h | 2 +-
 src/ggml.c          | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h
index d088c8459..e66fb090b 100644
--- a/include/ggml/ggml.h
+++ b/include/ggml/ggml.h
@@ -1274,7 +1274,7 @@ extern "C" {
     GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
 
     GGML_API void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph);
-    GGML_API void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool (*abort_callback)(), void * abort_callback_data);
+    GGML_API void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool (*abort_callback)(void * data), void * abort_callback_data);
     GGML_API void ggml_graph_reset  (struct ggml_cgraph * cgraph);
 
     GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
diff --git a/src/ggml.c b/src/ggml.c
index f22f0501f..5d8661376 100644
--- a/src/ggml.c
+++ b/src/ggml.c
@@ -16875,7 +16875,8 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
     ggml_graph_compute_with_abort(ctx, cgraph, always_false, NULL);
 }
 
-void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool (*abort_callback)(void*), void *abort_callback_data) {
+void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph,
+        bool (*abort_callback)(void * data), void *abort_callback_data) {
     const int n_threads = cgraph->n_threads;
 
     struct ggml_compute_state_shared state_shared = {

From 4218d926aa9b407d7fef6489c50ceabb86119889 Mon Sep 17 00:00:00 2001
From: CCLDArjun <ccldarjun@icloud.com>
Date: Mon, 3 Jul 2023 20:46:13 +0530
Subject: [PATCH 07/11] return exit status

---
 include/ggml/ggml.h | 2 +-
 src/ggml.c          | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h
index 135c4f02b..aa61d93bd 100644
--- a/include/ggml/ggml.h
+++ b/include/ggml/ggml.h
@@ -1303,7 +1303,7 @@ extern "C" {
     GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
 
     GGML_API void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph);
-    GGML_API void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool (*abort_callback)(void * data), void * abort_callback_data);
+    GGML_API int ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool (*abort_callback)(void * data), void * abort_callback_data);
     GGML_API void ggml_graph_reset  (struct ggml_cgraph * cgraph);
 
     GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
diff --git a/src/ggml.c b/src/ggml.c
index 0acbe3a2f..218c7433f 100644
--- a/src/ggml.c
+++ b/src/ggml.c
@@ -16443,7 +16443,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
     ggml_graph_compute_with_abort(ctx, cgraph, always_false, NULL);
 }
 
-void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph,
+int ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph * cgraph,
         bool (*abort_callback)(void * data), void *abort_callback_data) {
     const int n_threads = cgraph->n_threads;
 
@@ -16861,6 +16861,8 @@ void ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph
                 (double) perf_time_us_cur     / 1000.0,
                 (double) cgraph->perf_time_us / 1000.0 / cgraph->perf_runs);
     }
+
+    return compute_status;
 }
 
 void ggml_graph_reset(struct ggml_cgraph * cgraph) {

From fcd5b1708c6dce498ce63403d2c2b09e6bf6b64b Mon Sep 17 00:00:00 2001
From: CCLDArjun <ccldarjun@icloud.com>
Date: Wed, 5 Jul 2023 12:34:01 +0530
Subject: [PATCH 08/11] remove pthread_cancel and join every thread

---
 src/ggml.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/src/ggml.c b/src/ggml.c
index 218c7433f..1130d1e1a 100644
--- a/src/ggml.c
+++ b/src/ggml.c
@@ -16216,7 +16216,6 @@ typedef pthread_t ggml_thread_t;
 
 #define ggml_thread_create pthread_create
 #define ggml_thread_join   pthread_join
-#define ggml_thread_cancel   pthread_cancel
 
 #else
 
@@ -16343,7 +16342,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
     int node_n = -1;
 
     while (true) {
-        if (state->ith == 0 && state->shared->abort_callback(state->shared->abort_callback_data)) {
+        if (state->shared->abort_callback(state->shared->abort_callback_data)) {
             return GGML_EXIT_ABORTED;
         }
         if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
@@ -16835,13 +16834,8 @@ int ggml_graph_compute_with_abort(struct ggml_context * ctx, struct ggml_cgraph
     // join or kill thread pool
     if (n_threads > 1) {
         for (int j = 1; j < n_threads; j++) {
-            if (compute_status == GGML_EXIT_ABORTED) {
-                const int rc = ggml_thread_cancel(workers[j].thrd);
-                GGML_ASSERT(rc == 0);
-            } else if (compute_status == GGML_EXIT_SUCCESS) {
-                const int rc = ggml_thread_join(workers[j].thrd, NULL);
-                GGML_ASSERT(rc == 0);
-            }
+            const int rc = ggml_thread_join(workers[j].thrd, NULL);
+            GGML_ASSERT(rc == 0);
         }
     }
 

From 47de3d6ee9cba2b9809cccaee6719d70b61ec46c Mon Sep 17 00:00:00 2001
From: CCLDArjun <ccldarjun@icloud.com>
Date: Tue, 11 Jul 2023 23:39:32 +0530
Subject: [PATCH 09/11] put abort_callback onto cplan

---
 include/ggml/ggml.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h
index 502a39a89..8fe05d3a5 100644
--- a/include/ggml/ggml.h
+++ b/include/ggml/ggml.h
@@ -447,6 +447,10 @@ extern "C" {
 
         // the `n_tasks` of nodes, 1:1 mapping to cgraph nodes
         int n_tasks[GGML_MAX_NODES];
+
+        // abort ggml_graph_compute when true
+        bool (*abort_callback)(void * data);
+        void * abort_callback_data;
     };
 
     // computation graph
@@ -1308,7 +1312,7 @@ extern "C" {
     // ggml_graph_plan() has to be called before ggml_graph_compute()
     // when plan.work_size > 0, caller must allocate memory for plan.work_data
     GGML_API struct ggml_cplan ggml_graph_plan   (struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
-    GGML_API              void ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
+    GGML_API               int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
     GGML_API              void ggml_graph_reset  (struct ggml_cgraph * cgraph);
 
     // same as ggml_graph_compute() but the work data is allocated as a part of the context

From 118809b3d88f14bf1daeade4433d1c3d7cbe19c3 Mon Sep 17 00:00:00 2001
From: CCLDArjun <ccldarjun@icloud.com>
Date: Tue, 11 Jul 2023 23:43:35 +0530
Subject: [PATCH 10/11] cplan abort_callback in ggml.c

---
 src/ggml.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/ggml.c b/src/ggml.c
index 8dac60f7d..33fd2e6e8 100644
--- a/src/ggml.c
+++ b/src/ggml.c
@@ -15981,7 +15981,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
     int node_n = -1;
 
     while (true) {
-        if (state->shared->abort_callback(state->shared->abort_callback_data)) {
+        if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) {
             return GGML_EXIT_ABORTED;
         }
         if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
@@ -16038,7 +16038,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
                     break;
                 }
 
-                if (state->shared->abort_callback(state->shared->abort_callback_data)) {
+                if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) {
                     break;
                 }
             }
@@ -16415,7 +16415,7 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
     return cplan;
 }
 
-void ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
+int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
     {
         GGML_ASSERT(cplan);
         GGML_ASSERT(cplan->n_threads > 0);

From 26b90d06eae80ef295be14071be714ac8e39ee2f Mon Sep 17 00:00:00 2001
From: CCLDArjun <ccldarjun@icloud.com>
Date: Wed, 12 Jul 2023 00:39:42 +0530
Subject: [PATCH 11/11] make sure all threads abort

---
 src/ggml.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/ggml.c b/src/ggml.c
index 33fd2e6e8..149e069f0 100644
--- a/src/ggml.c
+++ b/src/ggml.c
@@ -15982,6 +15982,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
 
     while (true) {
         if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) {
+            state->shared->node_n += 1;
             return GGML_EXIT_ABORTED;
         }
         if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {