Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Numa #1556

Merged
merged 20 commits into from
Jun 26, 2023
Merged

Numa #1556

Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
llama : allow to initialize backend with NUMA support
  • Loading branch information
ggerganov committed Jun 26, 2023
commit 0fe4b00de249194c134b72fd7a89c0550c4e84b7
2 changes: 1 addition & 1 deletion examples/embedding/embedding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ int main(int argc, char ** argv) {
params.prompt = gpt_random_prompt(rng);
}

llama_init_backend();
llama_init_backend(params.numa);

llama_model * model;
llama_context * ctx;
Expand Down
6 changes: 1 addition & 5 deletions examples/main/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

#include "common.h"
#include "llama.h"
#include "ggml.h"
#include "build-info.h"

#include <cassert>
Expand Down Expand Up @@ -106,10 +105,7 @@ int main(int argc, char ** argv) {
params.prompt = gpt_random_prompt(rng);
}

llama_init_backend();
if (params.numa) {
ggml_numa_init();
}
llama_init_backend(params.numa);

llama_model * model;
llama_context * ctx;
Expand Down
2 changes: 1 addition & 1 deletion examples/perplexity/perplexity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ int main(int argc, char ** argv) {
params.prompt = gpt_random_prompt(rng);
}

llama_init_backend();
llama_init_backend(params.numa);

llama_model * model;
llama_context * ctx;
Expand Down
2 changes: 1 addition & 1 deletion examples/quantize/quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ int main(int argc, char ** argv) {
usage(argv[0]);
}

llama_init_backend();
llama_init_backend(false);

// parse command line arguments
const std::string fname_inp = argv[arg_idx];
Expand Down
2 changes: 1 addition & 1 deletion examples/simple/simple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ int main(int argc, char ** argv)
// Init LLM :
//---------------------------------

llama_init_backend();
llama_init_backend(params.numa);

llama_model * model;
llama_context * ctx;
Expand Down
26 changes: 18 additions & 8 deletions ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -3879,14 +3879,12 @@ struct ggml_context_container {
#define GGML_NUMA_MAX_NODES 8
#define GGML_NUMA_MAX_CPUS 512

struct ggml_numa_node
{
struct ggml_numa_node {
uint32_t cpus[GGML_NUMA_MAX_CPUS]; // hardware threads on this node
uint32_t n_cpus;
};

struct ggml_numa_nodes
{
struct ggml_numa_nodes {
struct ggml_numa_node nodes[GGML_NUMA_MAX_NODES];
uint32_t n_nodes;
uint32_t total_cpus; // hardware threads on system
Expand Down Expand Up @@ -3923,32 +3921,41 @@ inline static void ggml_critical_section_end(void) {
atomic_fetch_sub(&g_state_barrier, 1);
}

void ggml_numa_init(void)
{
if (g_state.numa.n_nodes > 0) { return; }
void ggml_numa_init(void) {
if (g_state.numa.n_nodes > 0) {
fprintf(stderr, "ggml_numa_init: NUMA already initialized\n");

return;
}

#ifdef __linux__
struct stat st;
char path[256];
int rv;

// enumerate nodes
while (g_state.numa.n_nodes < GGML_NUMA_MAX_NODES) {
rv = snprintf(path, sizeof(path), "/sys/devices/system/node/node%u", g_state.numa.n_nodes);
GGML_ASSERT(rv > 0 && (unsigned)rv < sizeof(path));
if (stat(path, &st) != 0) { break; }
++g_state.numa.n_nodes;
}

// enumerate CPUs
while (g_state.numa.total_cpus < GGML_NUMA_MAX_CPUS) {
rv = snprintf(path, sizeof(path), "/sys/devices/system/cpu/cpu%u", g_state.numa.total_cpus);
GGML_ASSERT(rv > 0 && (unsigned)rv < sizeof(path));
if (stat(path, &st) != 0) { break; }
++g_state.numa.total_cpus;
}

GGML_PRINT_DEBUG("found %u numa nodes, %u CPUs\n", g_state.numa.n_nodes, g_state.numa.total_cpus);

if (g_state.numa.n_nodes < 1 || g_state.numa.total_cpus < 1) {
g_state.numa.n_nodes = 0;
return;
}

for (uint32_t n = 0; n < g_state.numa.n_nodes; ++n) {
struct ggml_numa_node * node = &g_state.numa.nodes[n];
GGML_PRINT_DEBUG("CPUs on node %u:", n);
Expand All @@ -3963,6 +3970,7 @@ void ggml_numa_init(void)
}
GGML_PRINT_DEBUG("\n");
}

if (ggml_is_numa()) {
FILE *fptr = fopen("/proc/sys/kernel/numa_balancing", "r");
if (fptr != NULL) {
Expand All @@ -3978,7 +3986,9 @@ void ggml_numa_init(void)
#endif
}

bool ggml_is_numa(void) { return g_state.numa.n_nodes > 1; }
bool ggml_is_numa(void) {
return g_state.numa.n_nodes > 1;
}

////////////////////////////////////////////////////////////////////////////////

Expand Down
6 changes: 5 additions & 1 deletion llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -977,7 +977,7 @@ bool llama_mlock_supported() {
return llama_mlock::SUPPORTED;
}

void llama_init_backend() {
void llama_init_backend(bool numa) {
ggml_time_init();

// needed to initialize f16 tables
Expand All @@ -986,6 +986,10 @@ void llama_init_backend() {
struct ggml_context * ctx = ggml_init(params);
ggml_free(ctx);
}

if (numa) {
ggml_numa_init();
}
}

int64_t llama_time_us() {
Expand Down
3 changes: 2 additions & 1 deletion llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,9 @@ extern "C" {

// TODO: not great API - very likely to change
// Initialize the llama + ggml backend
// If numa is true, use NUMA optimizations
// Call once at the start of the program
LLAMA_API void llama_init_backend();
LLAMA_API void llama_init_backend(bool numa);

LLAMA_API int64_t llama_time_us();

Expand Down
Loading