-
Notifications
You must be signed in to change notification settings - Fork 9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Numa #1556
Numa #1556
Changes from 1 commit
6fc5f17
0d23f8c
9d058c2
2c1b5ae
8502d51
bf83dcb
b71dfe6
adaad10
c31d51d
2f5bb46
4b94582
90a0e65
d0e3596
67ba34e
8f98035
0fe4b00
875a1e1
4a555b4
81a40e9
9aec2b7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -505,7 +505,7 @@ struct ggml_numa_nodes ggml_numa = { | |
|
||
void ggml_numa_init(void) | ||
{ | ||
if (ggml_numa.n_nodes > 0) return; | ||
if (ggml_numa.n_nodes > 0) { return; } | ||
#ifdef __linux__ | ||
struct stat st; | ||
char path[256]; | ||
|
@@ -514,17 +514,21 @@ void ggml_numa_init(void) | |
while (true) { | ||
rv = snprintf(path, sizeof(path), "/sys/devices/system/node/node%u", ggml_numa.n_nodes); | ||
GGML_ASSERT(rv > 0 && (unsigned)rv < sizeof(path)); | ||
if (stat(path, &st) != 0) break; | ||
if (stat(path, &st) != 0) { break; } | ||
++ggml_numa.n_nodes; | ||
} | ||
// enumerate CPUs | ||
while (true) { | ||
rv = snprintf(path, sizeof(path), "/sys/devices/system/cpu/cpu%u", ggml_numa.total_cpus); | ||
GGML_ASSERT(rv > 0 && (unsigned)rv < sizeof(path)); | ||
if (stat(path, &st) != 0) break; | ||
if (stat(path, &st) != 0) { break; } | ||
++ggml_numa.total_cpus; | ||
} | ||
GGML_PRINT_DEBUG("found %u numa nodes, %u CPUs\n", ggml_numa.n_nodes, ggml_numa.total_cpus); | ||
if (ggml_numa.n_nodes < 1 || ggml_numa.total_cpus < 1) { | ||
ggml_numa.n_nodes = 0; | ||
return; | ||
} | ||
ggml_numa.nodes = calloc(ggml_numa.n_nodes, sizeof(struct ggml_numa_node)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Avoid allocs or figure out a way to release the memory at the end of the program. |
||
GGML_ASSERT(ggml_numa.nodes != NULL); | ||
for (uint32_t n = 0; n < ggml_numa.n_nodes; ++n) { | ||
|
@@ -14058,34 +14062,34 @@ typedef pthread_t ggml_thread_t; | |
#ifdef __linux__ | ||
void set_numa_thread_affinity(int thread_n, int n_threads) | ||
{ | ||
if (!ggml_is_numa()) return; | ||
if (!ggml_is_numa()) { return; } | ||
// run thread on node_num thread_n / (threads per node) | ||
int node_num = thread_n / (n_threads / ggml_numa.n_nodes); | ||
struct ggml_numa_node *node = &ggml_numa.nodes[node_num]; | ||
size_t setsize = CPU_ALLOC_SIZE(ggml_numa.total_cpus); | ||
cpu_set_t *cpus = CPU_ALLOC(ggml_numa.total_cpus); | ||
CPU_ZERO_S(setsize, cpus); | ||
for (size_t i=0; i < node->n_cpus; ++i) { | ||
for (size_t i = 0; i < node->n_cpus; ++i) { | ||
CPU_SET_S(node->cpus[i], setsize, cpus); | ||
} | ||
int rv; | ||
if ((rv = pthread_setaffinity_np(pthread_self(), setsize, cpus))) { | ||
int rv = pthread_setaffinity_np(pthread_self(), setsize, cpus); | ||
if (rv) { | ||
fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n", | ||
strerror(rv)); | ||
} | ||
CPU_FREE(cpus); | ||
} | ||
void clear_numa_thread_affinity(void) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function does not seem to be used There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The workers don't need it because they terminate. In the first attempt I wasn't setting affinity for the main thread, which seems to be fine because then it runs on the last remaining core. When I tried setting it performance was degraded if it stayed set after eval, so I wrote that to clear it when the worker threads were joined. That solved the performance degradation, but setting it and clearing it seemed to perform the same as not setting it to begin with, so then I didn't use it. |
||
{ | ||
if (!ggml_is_numa()) return; | ||
if (!ggml_is_numa()) { return; } | ||
size_t setsize = CPU_ALLOC_SIZE(ggml_numa.total_cpus); | ||
cpu_set_t *cpus = CPU_ALLOC(ggml_numa.total_cpus); | ||
CPU_ZERO_S(setsize, cpus); | ||
for (unsigned i=0; i < ggml_numa.total_cpus; ++i) { | ||
for (unsigned i = 0; i < ggml_numa.total_cpus; ++i) { | ||
CPU_SET_S(i, setsize, cpus); | ||
} | ||
int rv; | ||
if((rv = pthread_setaffinity_np(pthread_self(), setsize, cpus))) { | ||
int rv = pthread_setaffinity_np(pthread_self(), setsize, cpus); | ||
if (rv) { | ||
fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n", | ||
strerror(rv)); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
warning: Call to 'calloc' has an allocation size of 0 bytes [clang-analyzer-optin.portability.UnixAPI]
Additional context
ggml.c:507: Assuming field 'n_nodes' is <= 0
ggml.c:507: Taking false branch
ggml.c:513: Loop condition is true. Entering loop body
ggml.c:515: Assuming 'rv' is > 0
ggml.h:204: expanded from macro 'GGML_ASSERT'
if (!(x)) { \ ^
ggml.c:515: Left side of '&&' is true
ggml.c:515: Assuming the condition is true
ggml.h:204: expanded from macro 'GGML_ASSERT'
if (!(x)) { \ ^
ggml.c:515: Taking false branch
ggml.h:204: expanded from macro 'GGML_ASSERT'
if (!(x)) { \ ^
ggml.c:515: Loop condition is false. Exiting loop
ggml.h:203: expanded from macro 'GGML_ASSERT'
do { \ ^
ggml.c:516: Assuming the condition is true
ggml.c:516: Taking true branch
ggml.c:516: Execution continues on line 521
ggml.c:520: Loop condition is true. Entering loop body
ggml.c:522: Assuming 'rv' is > 0
ggml.h:204: expanded from macro 'GGML_ASSERT'
if (!(x)) { \ ^
ggml.c:522: Left side of '&&' is true
ggml.c:522: Assuming the condition is true
ggml.h:204: expanded from macro 'GGML_ASSERT'
if (!(x)) { \ ^
ggml.c:522: Taking false branch
ggml.h:204: expanded from macro 'GGML_ASSERT'
if (!(x)) { \ ^
ggml.c:522: Loop condition is false. Exiting loop
ggml.h:203: expanded from macro 'GGML_ASSERT'
do { \ ^
ggml.c:523: Assuming the condition is true
ggml.c:523: Taking true branch
ggml.c:523: Execution continues on line 528
ggml.c:527: Call to 'calloc' has an allocation size of 0 bytes