Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Numa #1556

Merged
merged 20 commits into from
Jun 26, 2023
Merged

Numa #1556

Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Merge branch 'master' into HEAD
  • Loading branch information
ggerganov committed Jun 19, 2023
commit 90a0e65c6716cd8aed66162557057d20e4e7b6bc
3 changes: 3 additions & 0 deletions examples/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
params.mem_test = true;
} else if (arg == "--numa") {
params.numa = true;
} else if (arg == "--export") {
params.export_cgraph = true;
} else if (arg == "--verbose-prompt") {
params.verbose_prompt = true;
} else if (arg == "-r" || arg == "--reverse-prompt") {
Expand Down Expand Up @@ -491,6 +493,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
fprintf(stderr, " --numa attempt optimizations that help on some NUMA systems\n");
fprintf(stderr, " if run without this previously, it is recommended to drop the system page cache before using this\n");
fprintf(stderr, " see https://github.com/ggerganov/llama.cpp/issues/1437\n");
#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD
fprintf(stderr, " -ngl N, --n-gpu-layers N\n");
fprintf(stderr, " number of layers to store in VRAM\n");
fprintf(stderr, " -ts SPLIT --tensor-split SPLIT\n");
Expand Down
1 change: 1 addition & 0 deletions examples/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ struct gpt_params {
bool use_mlock = false; // use mlock to keep model in memory
bool mem_test = false; // compute maximum memory usage
bool numa = false; // attempt optimizations that help on some NUMA systems
bool export_cgraph = false; // export the computation graph
bool verbose_prompt = false; // print prompt tokens before generation
};

Expand Down
20 changes: 0 additions & 20 deletions ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -3824,26 +3824,6 @@ struct ggml_context_container {
struct ggml_context context;
};

//
// compute types
//

enum ggml_task_type {
GGML_TASK_INIT = 0,
GGML_TASK_COMPUTE,
GGML_TASK_FINALIZE,
};

struct ggml_compute_params {
enum ggml_task_type type;

int ith, nth;

// work buffer for all threads
size_t wsize;
void * wdata;
};

//
// NUMA support
//
Expand Down
Loading
You are viewing a condensed version of this merge commit. You can view the full changes here.