Skip to content

Commit

Permalink
llama : avoid ggml include in llama-util.h
Browse files Browse the repository at this point in the history
  • Loading branch information
ggerganov committed Jun 26, 2023
1 parent 0fe4b00 commit a38f4a2
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 7 deletions.
8 changes: 3 additions & 5 deletions llama-util.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
#include <vector>
#include <stdexcept>

#include "ggml.h"

#ifdef __has_include
#if __has_include(<unistd.h>)
#include <unistd.h>
Expand Down Expand Up @@ -174,12 +172,12 @@ struct llama_mmap {
#ifdef _POSIX_MAPPED_FILES
static constexpr bool SUPPORTED = true;

llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */) {
llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */, bool numa = false) {
size = file->size;
int fd = fileno(file->fp);
int flags = MAP_SHARED;
// prefetch/readahead impairs performance on NUMA systems
if (ggml_is_numa()) { prefetch = 0; }
if (numa) { prefetch = 0; }
#ifdef __linux__
if (prefetch) { flags |= MAP_POPULATE; }
#endif
Expand All @@ -195,7 +193,7 @@ struct llama_mmap {
strerror(errno));
}
}
if (ggml_is_numa()) {
if (numa) {
// advise the kernel not to use readahead
// (because the next page might not belong on the same node)
if (madvise(addr, file->size, MADV_RANDOM)) {
Expand Down
4 changes: 2 additions & 2 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -774,7 +774,7 @@ struct llama_model_loader {
}

if (use_mmap) {
mapping.reset(new llama_mmap(&file_loaders.at(0)->file, prefetch_size));
mapping.reset(new llama_mmap(&file_loaders.at(0)->file, prefetch_size, ggml_is_numa()));
if (lmlock) {
lmlock->init(mapping->addr);
}
Expand Down Expand Up @@ -2903,7 +2903,7 @@ int llama_apply_lora_from_file_internal(const struct llama_model & model, const

// maybe this should in llama_model_loader
if (model_loader->use_mmap) {
model_loader->mapping.reset(new llama_mmap(&model_loader->file_loaders.at(0)->file, /* prefetch */ 0));
model_loader->mapping.reset(new llama_mmap(&model_loader->file_loaders.at(0)->file, /* prefetch */ 0, ggml_is_numa()));
}
}

Expand Down

0 comments on commit a38f4a2

Please sign in to comment.