Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL] remove global variables #7710

Merged
merged 10 commits into from
Jun 15, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
replace global variables with context[2/2]
  • Loading branch information
airMeng committed Jun 13, 2024
commit d0186d381c90c1c6f781e376f4507481b08076e9
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,8 @@ if (LLAMA_SYCL)
endif()

set(GGML_HEADERS_SYCL ggml-sycl.h)
set(GGML_SOURCES_SYCL ggml-sycl.cpp)
file(GLOB GGML_SOURCES_SYCL "ggml-sycl/*.cpp")
list(APPEND GGML_SOURCES_SYCL "ggml-sycl.cpp")

if (WIN32)
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib)
Expand Down
457 changes: 209 additions & 248 deletions ggml-sycl.cpp

Large diffs are not rendered by default.

8 changes: 1 addition & 7 deletions ggml-sycl.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "ggml.h"
#include "ggml-backend.h"
#include "ggml-sycl/presets.hpp"

#ifdef __cplusplus
extern "C" {
Expand All @@ -30,13 +31,6 @@ GGML_API GGML_CALL void ggml_sycl_get_gpu_list(int *id_list, int max_len);
GGML_API GGML_CALL void ggml_sycl_get_device_description(int device, char *description, size_t description_size);
GGML_API GGML_CALL int ggml_backend_sycl_get_device_count();
GGML_API GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id);

// TODO: these are temporary
// ref: https://github.com/ggerganov/llama.cpp/pull/6022#issuecomment-1992615670
GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index);
GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id);
GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode();

// SYCL doesn't support registering host memory, keep here for reference
// GGML_API GGML_CALL bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
Expand Down
4 changes: 0 additions & 4 deletions ggml-sycl/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,6 @@

#include "common.hpp"

int get_main_device() {
return g_main_device;
}

int get_current_device_id() {
return dpct::dev_mgr::instance().current_device_id();
}
Expand Down
3 changes: 1 addition & 2 deletions ggml-sycl/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ typedef sycl::float2 dfloat2;

static const int8_t kvalues_iq4nl[16]={-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};

static int g_all_sycl_device_count = -1;
static bool g_ggml_backend_sycl_buffer_type_initialized = false;

static ggml_sycl_backend_gpu_mode g_ggml_sycl_backend_gpu_mode =
Expand All @@ -145,8 +146,6 @@ static void* g_scratch_buffer = nullptr;
static size_t g_scratch_size = 0; // disabled by default
static size_t g_scratch_offset = 0;

int get_main_device();

[[noreturn]] static inline void bad_arch(const sycl::stream& stream_ct1) {
stream_ct1 << "ERROR: ggml-sycl was compiled without support for the "
"current GPU architecture.\n";
Expand Down
2 changes: 1 addition & 1 deletion ggml-sycl/dpct/helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2866,7 +2866,7 @@ namespace dpct
/// when usm is used and dimension is greater than 1.
template <size_t D = Dimension>
typename std::enable_if<D != 1, dpct_accessor_t>::type
get_access(sycl::handler &cgh) {
get_access([[maybe_unused]] sycl::handler &cgh) {
return dpct_accessor_t((T *)_device_ptr, _range);
}

Expand Down
4 changes: 2 additions & 2 deletions ggml-sycl/presets.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
#ifndef GGML_SYCL_PRESETS_HPP
#define GGML_SYCL_PRESETS_HPP

#define GGML_SYCL_MAX_DEVICES 48
#define GGML_SYCL_MAX_STREAMS 8
#define GGML_SYCL_MAX_STREAMS 8
#define GGML_SYCL_MAX_BUFFERS 256
#define GGML_SYCL_MAX_DEVICES 48
#define GGML_SYCL_NAME "SYCL"

#define WARP_SIZE 32
Expand Down
13 changes: 1 addition & 12 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6612,16 +6612,6 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
}
#endif

#ifdef GGML_USE_SYCL
if (params.split_mode == LLAMA_SPLIT_MODE_NONE) {
ggml_backend_sycl_set_single_device_mode(params.main_gpu);
//SYCL use device index (0, 1, 2) directly, uer input device id, then convert to device index.
params.main_gpu = ggml_backend_sycl_get_device_index(params.main_gpu);
} else {
ggml_backend_sycl_set_mul_device_mode();
}
#endif

if (!llm_load_tensors(
ml, model, params.n_gpu_layers, params.split_mode, params.main_gpu, params.tensor_split, params.use_mlock,
params.progress_callback, params.progress_callback_user_data
Expand Down Expand Up @@ -16223,8 +16213,7 @@ struct llama_context * llama_new_context_with_model(
if (model->split_mode == LLAMA_SPLIT_MODE_NONE || model->split_mode == LLAMA_SPLIT_MODE_ROW) {
ggml_backend_t backend = ggml_backend_sycl_init(model->main_gpu);
if (backend == nullptr) {
int main_gpu_id = ggml_backend_sycl_get_device_id(model->main_gpu);
LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d) backend\n", __func__, main_gpu_id, model->main_gpu);
LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d backend\n", __func__, model->main_gpu);
llama_free(ctx);
return nullptr;
}
Expand Down