llama : avoid ggml include in llama-util.h

ggerganov · Jun 26, 2023 · a38f4a2 · a38f4a2
1 parent 0fe4b00
commit a38f4a2
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 7 deletions.
diff --git a/llama-util.h b/llama-util.h
@@ -16,8 +16,6 @@
 #include <vector>
 #include <stdexcept>
 
-#include "ggml.h"
-
 #ifdef __has_include
  #if __has_include(<unistd.h>)
  #include <unistd.h>
@@ -174,12 +172,12 @@ struct llama_mmap {
 #ifdef _POSIX_MAPPED_FILES
  static constexpr bool SUPPORTED = true;
 
- llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */) {
+ llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */, bool numa = false) {
  size = file->size;
  int fd = fileno(file->fp);
  int flags = MAP_SHARED;
  // prefetch/readahead impairs performance on NUMA systems
- if (ggml_is_numa()) { prefetch = 0; }
+ if (numa) { prefetch = 0; }
 #ifdef __linux__
  if (prefetch) { flags |= MAP_POPULATE; }
 #endif
@@ -195,7 +193,7 @@ struct llama_mmap {
  strerror(errno));
  }
  }
- if (ggml_is_numa()) {
+ if (numa) {
  // advise the kernel not to use readahead
  // (because the next page might not belong on the same node)
  if (madvise(addr, file->size, MADV_RANDOM)) {

diff --git a/llama.cpp b/llama.cpp
@@ -774,7 +774,7 @@ struct llama_model_loader {
  }
 
  if (use_mmap) {
- mapping.reset(new llama_mmap(&file_loaders.at(0)->file, prefetch_size));
+ mapping.reset(new llama_mmap(&file_loaders.at(0)->file, prefetch_size, ggml_is_numa()));
  if (lmlock) {
  lmlock->init(mapping->addr);
  }
@@ -2903,7 +2903,7 @@ int llama_apply_lora_from_file_internal(const struct llama_model & model, const
 
  // maybe this should in llama_model_loader
  if (model_loader->use_mmap) {
- model_loader->mapping.reset(new llama_mmap(&model_loader->file_loaders.at(0)->file, /* prefetch */ 0));
+ model_loader->mapping.reset(new llama_mmap(&model_loader->file_loaders.at(0)->file, /* prefetch */ 0, ggml_is_numa()));
  }
  }