Skip to content

Commit

Permalink
files : reorganize + sync [no ci]
Browse files Browse the repository at this point in the history
  • Loading branch information
ggerganov committed Jun 21, 2024
1 parent cf9470f commit e20ebb2
Show file tree
Hide file tree
Showing 33 changed files with 10,985 additions and 9,088 deletions.
10 changes: 1 addition & 9 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,13 +1,5 @@
build/
build-blas/
build-debug/
build-release/
build-sanitize-addr/
build-sanitize-thread/
build-cov/
build-ci-debug/
build-ci-release/
build-cublas/
build-*/
out/
tmp/
models/
Expand Down
324 changes: 157 additions & 167 deletions CMakeLists.txt

Large diffs are not rendered by default.

File renamed without changes.
File renamed without changes.
6 changes: 6 additions & 0 deletions include/ggml/ggml.h → include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,12 @@
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)

#define GGML_TENSOR_BINARY_OP_LOCALS01 \
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb)

#ifdef __cplusplus
extern "C" {
#endif
Expand Down
1,183 changes: 833 additions & 350 deletions src/CMakeLists.txt

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/ggml-backend.c
Original file line number Diff line number Diff line change
Expand Up @@ -1172,7 +1172,7 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st
// check if a backend with higher prio wants to offload the op
if (src_backend_id == sched->n_backends - 1) {
for (int b = 0; b < src_backend_id; b++) {
if (ggml_backend_offload_op(sched->backends[b], tensor)) {
if (ggml_backend_supports_op(sched->backends[b], tensor) && ggml_backend_offload_op(sched->backends[b], tensor)) {
SET_CAUSE(tensor, "1.off");
return b;
}
Expand Down
6 changes: 5 additions & 1 deletion src/ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -635,7 +635,7 @@ static int64_t get_row_rounding(const std::array<float, GGML_CUDA_MAX_DEVICES> &
}

const int cc = ggml_cuda_info().devices[id].cc;
row_rounding = std::max(row_rounding, (int64_t)get_mmq_y_host(cc, get_mmq_x_max_host(cc)));
row_rounding = std::max(row_rounding, (int64_t)get_mmq_y_host(cc));
}
return row_rounding;
}
Expand Down Expand Up @@ -2267,6 +2267,9 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
case GGML_OP_SQR:
ggml_cuda_op_sqr(ctx, dst);
break;
case GGML_OP_SQRT:
ggml_cuda_op_sqrt(ctx, dst);
break;
case GGML_OP_CLAMP:
ggml_cuda_op_clamp(ctx, dst);
break;
Expand Down Expand Up @@ -2830,6 +2833,7 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
case GGML_OP_RMS_NORM:
case GGML_OP_SCALE:
case GGML_OP_SQR:
case GGML_OP_SQRT:
case GGML_OP_CLAMP:
case GGML_OP_CONT:
case GGML_OP_DIAG_MASK_INF:
Expand Down
4 changes: 2 additions & 2 deletions src/ggml-cuda/common.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -652,8 +652,8 @@ static int get_mmq_x_max_host(const int cc) {
}

// Round rows to this value for --split-mode row:
static int get_mmq_y_host(const int cc, const int mmq_x) {
return cc >= CC_VOLTA && mmq_x >= 32 ? 128 : 64;
static int get_mmq_y_host(const int cc) {
return cc >= CC_VOLTA ? 128 : 64;
}

//////////////////////
Expand Down
20 changes: 10 additions & 10 deletions src/ggml-cuda/mmq.cu
Original file line number Diff line number Diff line change
Expand Up @@ -30,34 +30,34 @@ void ggml_cuda_op_mul_mat_q(

switch (src0->type) {
case GGML_TYPE_Q4_0:
mul_mat_q_case<GGML_TYPE_Q4_0>(args, stream);
mul_mat_q_case<GGML_TYPE_Q4_0>(ctx, args, stream);
break;
case GGML_TYPE_Q4_1:
mul_mat_q_case<GGML_TYPE_Q4_1>(args, stream);
mul_mat_q_case<GGML_TYPE_Q4_1>(ctx, args, stream);
break;
case GGML_TYPE_Q5_0:
mul_mat_q_case<GGML_TYPE_Q5_0>(args, stream);
mul_mat_q_case<GGML_TYPE_Q5_0>(ctx, args, stream);
break;
case GGML_TYPE_Q5_1:
mul_mat_q_case<GGML_TYPE_Q5_1>(args, stream);
mul_mat_q_case<GGML_TYPE_Q5_1>(ctx, args, stream);
break;
case GGML_TYPE_Q8_0:
mul_mat_q_case<GGML_TYPE_Q8_0>(args, stream);
mul_mat_q_case<GGML_TYPE_Q8_0>(ctx, args, stream);
break;
case GGML_TYPE_Q2_K:
mul_mat_q_case<GGML_TYPE_Q2_K>(args, stream);
mul_mat_q_case<GGML_TYPE_Q2_K>(ctx, args, stream);
break;
case GGML_TYPE_Q3_K:
mul_mat_q_case<GGML_TYPE_Q3_K>(args, stream);
mul_mat_q_case<GGML_TYPE_Q3_K>(ctx, args, stream);
break;
case GGML_TYPE_Q4_K:
mul_mat_q_case<GGML_TYPE_Q4_K>(args, stream);
mul_mat_q_case<GGML_TYPE_Q4_K>(ctx, args, stream);
break;
case GGML_TYPE_Q5_K:
mul_mat_q_case<GGML_TYPE_Q5_K>(args, stream);
mul_mat_q_case<GGML_TYPE_Q5_K>(ctx, args, stream);
break;
case GGML_TYPE_Q6_K:
mul_mat_q_case<GGML_TYPE_Q6_K>(args, stream);
mul_mat_q_case<GGML_TYPE_Q6_K>(ctx, args, stream);
break;
default:
GGML_ASSERT(false);
Expand Down
Loading

0 comments on commit e20ebb2

Please sign in to comment.