Merge branch 'master' into HEAD

ggerganov · ggerganov · Jun 26, 2023 · May 21, 2023 · May 21, 2023 · May 21, 2023
commit 90a0e65c6716cd8aed66162557057d20e4e7b6bc
diff --git a/examples/common.cpp b/examples/common.cpp
@@ -345,6 +345,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
  params.mem_test = true;
  } else if (arg == "--numa") {
  params.numa = true;
+ } else if (arg == "--export") {
+ params.export_cgraph = true;
  } else if (arg == "--verbose-prompt") {
  params.verbose_prompt = true;
  } else if (arg == "-r" || arg == "--reverse-prompt") {
@@ -491,6 +493,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
  fprintf(stderr, " --numa attempt optimizations that help on some NUMA systems\n");
  fprintf(stderr, " if run without this previously, it is recommended to drop the system page cache before using this\n");
  fprintf(stderr, " see https://github.com/ggerganov/llama.cpp/issues/1437\n");
+#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD
  fprintf(stderr, " -ngl N, --n-gpu-layers N\n");
  fprintf(stderr, " number of layers to store in VRAM\n");
  fprintf(stderr, " -ts SPLIT --tensor-split SPLIT\n");

diff --git a/examples/common.h b/examples/common.h
@@ -76,6 +76,7 @@ struct gpt_params {
  bool use_mlock = false; // use mlock to keep model in memory
  bool mem_test = false; // compute maximum memory usage
  bool numa = false; // attempt optimizations that help on some NUMA systems
+ bool export_cgraph = false; // export the computation graph
  bool verbose_prompt = false; // print prompt tokens before generation
 };
 

diff --git a/ggml.c b/ggml.c
@@ -3824,26 +3824,6 @@ struct ggml_context_container {
  struct ggml_context context;
 };
 
-//
-// compute types
-//
-
-enum ggml_task_type {
- GGML_TASK_INIT = 0,
- GGML_TASK_COMPUTE,
- GGML_TASK_FINALIZE,
-};
-
-struct ggml_compute_params {
- enum ggml_task_type type;
-
- int ith, nth;
-
- // work buffer for all threads
- size_t wsize;
- void * wdata;
-};
-
 //
 // NUMA support
 //