Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL] remove global variables #7710

Merged
merged 10 commits into from
Jun 15, 2024
Prev Previous commit
use macro for group_size and remove cuda-related
  • Loading branch information
airMeng committed Jun 15, 2024
commit 28eaafc16617079ad439fdd4d5b020797976f028
14 changes: 6 additions & 8 deletions ggml-sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6220,7 +6220,7 @@ static void norm_f32_sycl(const float *x, float *dst, const int ncols,
});
} else {
// FIXME: 1024 from cuda
const int work_group_size = 1024;
const int work_group_size = GROUP_SIZE;
const sycl::range<3> block_dims(1, 1, work_group_size);
/*
DPCT1049:17: The work-group size passed to the SYCL kernel may exceed
Expand Down Expand Up @@ -6266,7 +6266,7 @@ static void group_norm_f32_sycl(const float *x, float *dst,
});
});
} else {
const int work_group_size = 1024;
const int work_group_size = GROUP_SIZE;
const sycl::range<3> block_dims(1, 1, work_group_size);
/*
DPCT1049:18: The work-group size passed to the SYCL kernel may exceed
Expand Down Expand Up @@ -6355,7 +6355,7 @@ static void rms_norm_f32_sycl(const float *x, float *dst, const int ncols,
});
});
} else {
const int work_group_size = 1024;
const int work_group_size = GROUP_SIZE;
const sycl::range<3> block_dims(1, 1, work_group_size);
/*
DPCT1049:19: The work-group size passed to the SYCL kernel may exceed
Expand Down Expand Up @@ -9115,8 +9115,6 @@ static void argsort_f32_i32_sycl(const float *x, int *dst, const int ncols,
const sycl::range<3> block_nums(1, nrows, 1);
const size_t shared_mem = ncols_pad * sizeof(int);

// GGML_ASSERT(shared_mem <= ggml_cuda_info().devices[ggml_cuda_get_device()].smpb);

if (order == GGML_SORT_ORDER_ASC) {
stream->submit([&](sycl::handler &cgh) {
sycl::local_accessor<uint8_t, 1> dpct_local_acc_ct1(
Expand Down Expand Up @@ -9189,7 +9187,7 @@ static void soft_max_f32_sycl(const float * x, const float * mask,
const int nrows_y, const float scale, const float max_bias,
queue_ptr stream) {
int nth = WARP_SIZE;
int max_block_size = 1024;
int max_block_size = GROUP_SIZE;
while (nth < ncols_x && nth < max_block_size) nth *= 2;
if (nth>max_block_size) nth = max_block_size;

Expand Down Expand Up @@ -9572,8 +9570,8 @@ struct ggml_sycl_pool_leg : public ggml_sycl_pool {

std::unique_ptr<ggml_sycl_pool> ggml_backend_sycl_context::new_pool_for_device(queue_ptr qptr, int device) {
// TBD: NO VMM support
// if (ggml_cuda_info().devices[device].vmm) {
// return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_vmm(device));
// if (ggml_sycl_info().devices[device].vmm) {
// return std::unique_ptr<ggml_sycl_pool>(new ggml_sycl_pool_vmm(device));
// }
return std::unique_ptr<ggml_sycl_pool>(new ggml_sycl_pool_leg(qptr, device));
}
Expand Down
2 changes: 2 additions & 0 deletions ggml-sycl/presets.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#define GGML_SYCL_MAX_DEVICES 48
#define GGML_SYCL_NAME "SYCL"

// FIXME: 1024 from cuda
#define GROUP_SIZE 1024
#define WARP_SIZE 32
#define MATRIX_ROW_PADDING 512 // last row of quant. matrices is a multiple of this to avoid out-of-bounds memory accesses

Expand Down