Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Numa #1556

Merged
merged 20 commits into from
Jun 26, 2023
Merged

Numa #1556

Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
llama : avoid ggml include in llama-util.h
  • Loading branch information
ggerganov committed Jun 26, 2023
commit 875a1e111eaa9db3fd51be8c3b3288291ec2f1d2
18 changes: 10 additions & 8 deletions llama-util.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
#include <vector>
#include <stdexcept>

#include "ggml.h"

#ifdef __has_include
#if __has_include(<unistd.h>)
#include <unistd.h>
Expand Down Expand Up @@ -174,12 +172,12 @@ struct llama_mmap {
#ifdef _POSIX_MAPPED_FILES
static constexpr bool SUPPORTED = true;

llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */) {
llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */, bool numa = false) {
size = file->size;
int fd = fileno(file->fp);
int flags = MAP_SHARED;
// prefetch/readahead impairs performance on NUMA systems
if (ggml_is_numa()) { prefetch = 0; }
if (numa) { prefetch = 0; }

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes again. I want to understand why prefetch hurts performance with NUMA node ? (with paged memory or pin memory)

#ifdef __linux__
if (prefetch) { flags |= MAP_POPULATE; }
#endif
Expand All @@ -195,7 +193,7 @@ struct llama_mmap {
strerror(errno));
}
}
if (ggml_is_numa()) {
if (numa) {
// advise the kernel not to use readahead
// (because the next page might not belong on the same node)
if (madvise(addr, file->size, MADV_RANDOM)) {
Expand All @@ -211,7 +209,9 @@ struct llama_mmap {
#elif defined(_WIN32)
static constexpr bool SUPPORTED = true;

llama_mmap(struct llama_file * file, bool prefetch = true) {
llama_mmap(struct llama_file * file, bool prefetch = true, bool numa = false) {
(void) numa;

size = file->size;

HANDLE hFile = (HANDLE) _get_osfhandle(_fileno(file->fp));
Expand Down Expand Up @@ -256,8 +256,10 @@ struct llama_mmap {
#else
static constexpr bool SUPPORTED = false;

llama_mmap(struct llama_file *, bool prefetch = true) {
(void)prefetch;
llama_mmap(struct llama_file *, bool prefetch = true, bool numa = false) {
(void) prefetch;
(void) numa;

throw std::runtime_error(std::string("mmap not supported"));
}
#endif
Expand Down
4 changes: 2 additions & 2 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -774,7 +774,7 @@ struct llama_model_loader {
}

if (use_mmap) {
mapping.reset(new llama_mmap(&file_loaders.at(0)->file, prefetch_size));
mapping.reset(new llama_mmap(&file_loaders.at(0)->file, prefetch_size, ggml_is_numa()));
if (lmlock) {
lmlock->init(mapping->addr);
}
Expand Down Expand Up @@ -2903,7 +2903,7 @@ int llama_apply_lora_from_file_internal(const struct llama_model & model, const

// maybe this should in llama_model_loader
if (model_loader->use_mmap) {
model_loader->mapping.reset(new llama_mmap(&model_loader->file_loaders.at(0)->file, /* prefetch */ 0));
model_loader->mapping.reset(new llama_mmap(&model_loader->file_loaders.at(0)->file, /* prefetch */ 0, ggml_is_numa()));
}
}

Expand Down