diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 000000000..135a7e4bc --- /dev/null +++ b/.editorconfig @@ -0,0 +1,19 @@ +# https://EditorConfig.org + +# Top-most EditorConfig file +root = true + +# Unix-style newlines with a newline ending every file, utf-8 charset +[*] +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +charset = utf-8 +indent_style = space +indent_size = 4 + +[Makefile] +indent_style = tab + +[prompts/*.txt] +insert_final_newline = unset diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h index c309f1361..d4b6cee0f 100644 --- a/include/ggml/ggml.h +++ b/include/ggml/ggml.h @@ -183,6 +183,15 @@ # define GGML_API #endif +// TODO: support for clang +#ifdef __GNUC__ +# define GGML_DEPRECATED(func, hint) func __attribute__((deprecated(hint))) +#elif defined(_MSC_VER) +# define GGML_DEPRECATED(func, hint) __declspec(deprecated(hint)) func +#else +# define GGML_DEPRECATED(func, hint) func +#endif + #include #include #include @@ -373,6 +382,10 @@ extern "C" { GGML_OP_MAP_UNARY, GGML_OP_MAP_BINARY, + GGML_OP_MAP_CUSTOM1_F32, + GGML_OP_MAP_CUSTOM2_F32, + GGML_OP_MAP_CUSTOM3_F32, + GGML_OP_MAP_CUSTOM1, GGML_OP_MAP_CUSTOM2, GGML_OP_MAP_CUSTOM3, @@ -559,6 +572,8 @@ extern "C" { GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor); GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor); + GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1); + // use this to compute the memory overhead of a tensor GGML_API size_t ggml_tensor_overhead(void); @@ -1293,15 +1308,6 @@ extern "C" { int h0, int w); - // custom operators - - typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *); - typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *); - - typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *); - typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *); - typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *); - GGML_API struct ggml_tensor * ggml_unary( struct ggml_context * ctx, struct ggml_tensor * a, @@ -1312,63 +1318,137 @@ extern "C" { struct ggml_tensor * a, enum ggml_unary_op op); - GGML_API struct ggml_tensor * ggml_map_unary_f32( + // custom operators + + typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *); + typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *); + + typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *); + typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *); + typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *); + + GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_f32( struct ggml_context * ctx, struct ggml_tensor * a, - ggml_unary_op_f32_t fun); + ggml_unary_op_f32_t fun), + "use ggml_map_custom1 instead"); - GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32( + GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32( struct ggml_context * ctx, struct ggml_tensor * a, - ggml_unary_op_f32_t fun); + ggml_unary_op_f32_t fun), + "use ggml_map_custom1_inplace instead"); - GGML_API struct ggml_tensor * ggml_map_binary_f32( + GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, - ggml_binary_op_f32_t fun); + ggml_binary_op_f32_t fun), + "use ggml_map_custom2 instead"); - GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32( + GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, - ggml_binary_op_f32_t fun); + ggml_binary_op_f32_t fun), + "use ggml_map_custom2_inplace instead"); - GGML_API struct ggml_tensor * ggml_map_custom1_f32( + GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_f32( struct ggml_context * ctx, struct ggml_tensor * a, - ggml_custom1_op_f32_t fun); + ggml_custom1_op_f32_t fun), + "use ggml_map_custom1 instead"); - GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32( + GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32( struct ggml_context * ctx, struct ggml_tensor * a, - ggml_custom1_op_f32_t fun); + ggml_custom1_op_f32_t fun), + "use ggml_map_custom1_inplace instead"); - GGML_API struct ggml_tensor * ggml_map_custom2_f32( + GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, - ggml_custom2_op_f32_t fun); + ggml_custom2_op_f32_t fun), + "use ggml_map_custom2 instead"); - GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32( + GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, - ggml_custom2_op_f32_t fun); + ggml_custom2_op_f32_t fun), + "use ggml_map_custom2_inplace instead"); - GGML_API struct ggml_tensor * ggml_map_custom3_f32( + GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c, - ggml_custom3_op_f32_t fun); + ggml_custom3_op_f32_t fun), + "use ggml_map_custom3 instead"); - GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32( + GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c, - ggml_custom3_op_f32_t fun); + ggml_custom3_op_f32_t fun), + "use ggml_map_custom3_inplace instead"); + +// custom operators v2 + typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata); + typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata); + typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata); + + #define GGML_N_TASKS_MAX -1 + + GGML_API struct ggml_tensor * ggml_map_custom1( + struct ggml_context * ctx, + struct ggml_tensor * a, + ggml_custom1_op_t fun, + int n_tasks, + void * userdata); + + GGML_API struct ggml_tensor * ggml_map_custom1_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + ggml_custom1_op_t fun, + int n_tasks, + void * userdata); + + GGML_API struct ggml_tensor * ggml_map_custom2( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + ggml_custom2_op_t fun, + int n_tasks, + void * userdata); + + GGML_API struct ggml_tensor * ggml_map_custom2_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + ggml_custom2_op_t fun, + int n_tasks, + void * userdata); + + GGML_API struct ggml_tensor * ggml_map_custom3( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + struct ggml_tensor * c, + ggml_custom3_op_t fun, + int n_tasks, + void * userdata); + + GGML_API struct ggml_tensor * ggml_map_custom3_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + struct ggml_tensor * c, + ggml_custom3_op_t fun, + int n_tasks, + void * userdata); // loss function diff --git a/src/ggml.c b/src/ggml.c index 35c56151b..36105634e 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -3811,7 +3811,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = { "CROSS_ENTROPY_LOSS_BACK", }; -static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59"); +static_assert(GGML_OP_COUNT == 62, "GGML_OP_COUNT != 62"); static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "none", @@ -3883,7 +3883,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "cross_entropy_loss_back(x,y)", }; -static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59"); +static_assert(GGML_OP_COUNT == 62, "GGML_OP_COUNT != 62"); static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2"); @@ -4253,7 +4253,7 @@ static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) { tensor->nb[3] == tensor->nb[2]*tensor->ne[2]; } -static inline bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { +bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); return @@ -4632,6 +4632,11 @@ static struct ggml_tensor * ggml_new_tensor_impl( return result; } +static void ggml_get_op_params(const struct ggml_tensor * tensor, void * params, size_t params_size) { + assert(params_size <= GGML_MAX_OP_PARAMS); + memcpy(params, tensor->op_params, params_size); +} + static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) { assert(params_size <= GGML_MAX_OP_PARAMS); memcpy(tensor->op_params, params, params_size); @@ -6871,7 +6876,8 @@ GGML_API struct ggml_tensor * ggml_conv_1d( ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0), a->ne[2], 1, 1, }; - struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne); + + struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne); int32_t params[] = { s0, p0, d0 }; ggml_set_op_params(result, ¶ms, sizeof(params)); @@ -6886,7 +6892,7 @@ GGML_API struct ggml_tensor * ggml_conv_1d( // ggml_conv_2d -struct ggml_tensor* ggml_conv_2d( +struct ggml_tensor * ggml_conv_2d( struct ggml_context* ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -6910,7 +6916,8 @@ struct ggml_tensor* ggml_conv_2d( ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1), a->ne[3], b->ne[3], }; - struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); + + struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); int32_t params[] = { s0, s1, p0, p1, d0, d1 }; ggml_set_op_params(result, ¶ms, sizeof(params)); @@ -6926,7 +6933,7 @@ struct ggml_tensor* ggml_conv_2d( // ggml_conv_1d_ph -struct ggml_tensor* ggml_conv_1d_ph( +struct ggml_tensor * ggml_conv_1d_ph( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -6944,7 +6951,7 @@ static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, int p) { // ggml_pool_1d -struct ggml_tensor* ggml_pool_1d( +struct ggml_tensor * ggml_pool_1d( struct ggml_context * ctx, struct ggml_tensor * a, enum ggml_op_pool op, @@ -6963,7 +6970,8 @@ struct ggml_tensor* ggml_pool_1d( ggml_calc_pool_output_size(a->ne[0], k0, s0, p0), a->ne[1], }; - struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne); + + struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne); int32_t params[] = { op, k0, s0, p0 }; ggml_set_op_params(result, ¶ms, sizeof(params)); @@ -6977,7 +6985,7 @@ struct ggml_tensor* ggml_pool_1d( // ggml_pool_2d -struct ggml_tensor* ggml_pool_2d( +struct ggml_tensor * ggml_pool_2d( struct ggml_context * ctx, struct ggml_tensor * a, enum ggml_op_pool op, @@ -7000,7 +7008,8 @@ struct ggml_tensor* ggml_pool_2d( ggml_calc_pool_output_size(a->ne[1], k1, s1, p1), a->ne[2], }; - struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne); + + struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne); int32_t params[] = { op, k0, k1, s0, s1, p0, p1 }; ggml_set_op_params(result, ¶ms, sizeof(params)); @@ -7330,7 +7339,7 @@ struct ggml_tensor * ggml_map_binary_inplace_f32( return ggml_map_binary_impl_f32(ctx, a, b, fun, true); } -// ggml_map_custom1 +// ggml_map_custom1_f32 static struct ggml_tensor * ggml_map_custom1_impl_f32( struct ggml_context * ctx, @@ -7347,7 +7356,7 @@ static struct ggml_tensor * ggml_map_custom1_impl_f32( ggml_set_op_params(result, (const void *) &fun, sizeof(fun)); - result->op = GGML_OP_MAP_CUSTOM1; + result->op = GGML_OP_MAP_CUSTOM1_F32; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->src[0] = a; @@ -7368,7 +7377,7 @@ struct ggml_tensor * ggml_map_custom1_inplace_f32( return ggml_map_custom1_impl_f32(ctx, a, fun, true); } -// ggml_map_custom2 +// ggml_map_custom2_f32 static struct ggml_tensor * ggml_map_custom2_impl_f32( struct ggml_context * ctx, @@ -7386,7 +7395,7 @@ static struct ggml_tensor * ggml_map_custom2_impl_f32( ggml_set_op_params(result, (const void *) &fun, sizeof(fun)); - result->op = GGML_OP_MAP_CUSTOM2; + result->op = GGML_OP_MAP_CUSTOM2_F32; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->src[0] = a; result->src[1] = b; @@ -7410,7 +7419,7 @@ struct ggml_tensor * ggml_map_custom2_inplace_f32( return ggml_map_custom2_impl_f32(ctx, a, b, fun, true); } -// ggml_map_custom3 +// ggml_map_custom3_f32 static struct ggml_tensor * ggml_map_custom3_impl_f32( struct ggml_context * ctx, @@ -7429,7 +7438,7 @@ static struct ggml_tensor * ggml_map_custom3_impl_f32( ggml_set_op_params(result, (const void *) &fun, sizeof(fun)); - result->op = GGML_OP_MAP_CUSTOM3; + result->op = GGML_OP_MAP_CUSTOM3_F32; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->src[0] = a; result->src[1] = b; @@ -7456,6 +7465,190 @@ struct ggml_tensor * ggml_map_custom3_inplace_f32( return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, true); } +// ggml_map_custom1 +struct ggml_map_custom1_op_params { + ggml_custom1_op_t fun; + int n_tasks; + void * userdata; +}; + +static struct ggml_tensor * ggml_map_custom1_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + const ggml_custom1_op_t fun, + int n_tasks, + void * userdata, + bool inplace) { + GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0); + + bool is_node = false; + + if (!inplace && a->grad) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + struct ggml_map_custom1_op_params params = { + /*.fun =*/ fun, + /*.n_tasks =*/ n_tasks, + /*.userdata =*/ userdata + }; + ggml_set_op_params(result, (const void *) ¶ms, sizeof(params)); + + result->op = GGML_OP_MAP_CUSTOM1; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src[0] = a; + + return result; +} + +struct ggml_tensor * ggml_map_custom1( + struct ggml_context * ctx, + struct ggml_tensor * a, + const ggml_custom1_op_t fun, + int n_tasks, + void * userdata) { + return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false); +} + +struct ggml_tensor * ggml_map_custom1_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + const ggml_custom1_op_t fun, + int n_tasks, + void * userdata) { + return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true); +} + +// ggml_map_custom2 + +struct ggml_map_custom2_op_params { + ggml_custom2_op_t fun; + int n_tasks; + void * userdata; +}; + +static struct ggml_tensor * ggml_map_custom2_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + const ggml_custom2_op_t fun, + int n_tasks, + void * userdata, + bool inplace) { + GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0); + + bool is_node = false; + + if (!inplace && (a->grad || b->grad)) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + struct ggml_map_custom2_op_params params = { + /*.fun =*/ fun, + /*.n_tasks =*/ n_tasks, + /*.userdata =*/ userdata + }; + ggml_set_op_params(result, (const void *) ¶ms, sizeof(params)); + + result->op = GGML_OP_MAP_CUSTOM2; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src[0] = a; + result->src[1] = b; + + return result; +} + +struct ggml_tensor * ggml_map_custom2( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + const ggml_custom2_op_t fun, + int n_tasks, + void * userdata) { + return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false); +} + +struct ggml_tensor * ggml_map_custom2_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + const ggml_custom2_op_t fun, + int n_tasks, + void * userdata) { + return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true); +} + +// ggml_map_custom3 + +struct ggml_map_custom3_op_params { + ggml_custom3_op_t fun; + int n_tasks; + void * userdata; +}; + +static struct ggml_tensor * ggml_map_custom3_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + struct ggml_tensor * c, + const ggml_custom3_op_t fun, + int n_tasks, + void * userdata, + bool inplace) { + GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0); + + bool is_node = false; + + if (!inplace && (a->grad || b->grad || c->grad)) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + struct ggml_map_custom3_op_params params = { + /*.fun =*/ fun, + /*.n_tasks =*/ n_tasks, + /*.userdata =*/ userdata + }; + ggml_set_op_params(result, (const void *) ¶ms, sizeof(params)); + + result->op = GGML_OP_MAP_CUSTOM3; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src[0] = a; + result->src[1] = b; + result->src[2] = c; + + return result; +} + +struct ggml_tensor * ggml_map_custom3( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + struct ggml_tensor * c, + const ggml_custom3_op_t fun, + int n_tasks, + void * userdata) { + return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false); +} + +struct ggml_tensor * ggml_map_custom3_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + struct ggml_tensor * c, + const ggml_custom3_op_t fun, + int n_tasks, + void * userdata) { + return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true); +} + + + // ggml_cross_entropy_loss struct ggml_tensor * ggml_cross_entropy_loss( @@ -14208,24 +14401,6 @@ static void ggml_compute_forward_map_custom1_f32( fun(dst, a); } - -static void ggml_compute_forward_map_custom1( - const struct ggml_compute_params * params, - const struct ggml_tensor * a, - struct ggml_tensor * dst, - const ggml_custom1_op_f32_t fun) { - switch (a->type) { - case GGML_TYPE_F32: - { - ggml_compute_forward_map_custom1_f32(params, a, dst, fun); - } break; - default: - { - GGML_ASSERT(false); - } break; - } -} - // ggml_compute_forward_map_custom2 static void ggml_compute_forward_map_custom2_f32( @@ -14244,24 +14419,6 @@ static void ggml_compute_forward_map_custom2_f32( } -static void ggml_compute_forward_map_custom2( - const struct ggml_compute_params * params, - const struct ggml_tensor * a, - const struct ggml_tensor * b, - struct ggml_tensor * dst, - const ggml_custom2_op_f32_t fun) { - switch (a->type) { - case GGML_TYPE_F32: - { - ggml_compute_forward_map_custom2_f32(params, a, b, dst, fun); - } break; - default: - { - GGML_ASSERT(false); - } break; - } -} - // ggml_compute_forward_map_custom3 static void ggml_compute_forward_map_custom3_f32( @@ -14280,24 +14437,52 @@ static void ggml_compute_forward_map_custom3_f32( fun(dst, a, b, c); } +// ggml_compute_forward_map_custom1 + +static void ggml_compute_forward_map_custom1( + const struct ggml_compute_params * params, + const struct ggml_tensor * a, + struct ggml_tensor * dst) { + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) dst->op_params; + + p->fun(dst, a, params->ith, params->nth, p->userdata); +} + +// ggml_compute_forward_map_custom2 + +static void ggml_compute_forward_map_custom2( + const struct ggml_compute_params * params, + const struct ggml_tensor * a, + const struct ggml_tensor * b, + struct ggml_tensor * dst) { + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) dst->op_params; + + p->fun(dst, a, b, params->ith, params->nth, p->userdata); +} + +// ggml_compute_forward_map_custom3 static void ggml_compute_forward_map_custom3( const struct ggml_compute_params * params, const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, - struct ggml_tensor * dst, - const ggml_custom3_op_f32_t fun) { - switch (a->type) { - case GGML_TYPE_F32: - { - ggml_compute_forward_map_custom3_f32(params, a, b, c, dst, fun); - } break; - default: - { - GGML_ASSERT(false); - } break; + struct ggml_tensor * dst) { + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; } + + struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) dst->op_params; + + p->fun(dst, a, b, c, params->ith, params->nth, p->userdata); } // ggml_compute_forward_cross_entropy_loss @@ -14819,25 +15004,40 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm ggml_compute_forward_map_binary(params, tensor->src[0], tensor->src[1], tensor, fun); } break; - case GGML_OP_MAP_CUSTOM1: + case GGML_OP_MAP_CUSTOM1_F32: { ggml_custom1_op_f32_t fun; memcpy(&fun, tensor->op_params, sizeof(fun)); - ggml_compute_forward_map_custom1(params, tensor->src[0], tensor, fun); + ggml_compute_forward_map_custom1_f32(params, tensor->src[0], tensor, fun); } break; - case GGML_OP_MAP_CUSTOM2: + case GGML_OP_MAP_CUSTOM2_F32: { ggml_custom2_op_f32_t fun; memcpy(&fun, tensor->op_params, sizeof(fun)); - ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor, fun); + ggml_compute_forward_map_custom2_f32(params, tensor->src[0], tensor->src[1], tensor, fun); } break; - case GGML_OP_MAP_CUSTOM3: + case GGML_OP_MAP_CUSTOM3_F32: { ggml_custom3_op_f32_t fun; memcpy(&fun, tensor->op_params, sizeof(fun)); - ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun); + ggml_compute_forward_map_custom3_f32(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun); + } + break; + case GGML_OP_MAP_CUSTOM1: + { + ggml_compute_forward_map_custom1(params, tensor->src[0], tensor); + } + break; + case GGML_OP_MAP_CUSTOM2: + { + ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor); + } + break; + case GGML_OP_MAP_CUSTOM3: + { + ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor); } break; case GGML_OP_CROSS_ENTROPY_LOSS: @@ -15645,6 +15845,9 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor } break; case GGML_OP_MAP_UNARY: case GGML_OP_MAP_BINARY: + case GGML_OP_MAP_CUSTOM1_F32: + case GGML_OP_MAP_CUSTOM2_F32: + case GGML_OP_MAP_CUSTOM3_F32: case GGML_OP_MAP_CUSTOM1: case GGML_OP_MAP_CUSTOM2: case GGML_OP_MAP_CUSTOM3: @@ -16401,11 +16604,38 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) { case GGML_OP_WIN_UNPART: case GGML_OP_MAP_UNARY: case GGML_OP_MAP_BINARY: + case GGML_OP_MAP_CUSTOM1_F32: + case GGML_OP_MAP_CUSTOM2_F32: + case GGML_OP_MAP_CUSTOM3_F32: + { + n_tasks = 1; + } break; case GGML_OP_MAP_CUSTOM1: + { + struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) node->op_params; + if (p->n_tasks == GGML_N_TASKS_MAX) { + n_tasks = n_threads; + } else { + n_tasks = MIN(p->n_tasks, n_threads); + } + } break; case GGML_OP_MAP_CUSTOM2: + { + struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) node->op_params; + if (p->n_tasks == GGML_N_TASKS_MAX) { + n_tasks = n_threads; + } else { + n_tasks = MIN(p->n_tasks, n_threads); + } + } break; case GGML_OP_MAP_CUSTOM3: { - n_tasks = 1; + struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) node->op_params; + if (p->n_tasks == GGML_N_TASKS_MAX) { + n_tasks = n_threads; + } else { + n_tasks = MIN(p->n_tasks, n_threads); + } } break; case GGML_OP_CROSS_ENTROPY_LOSS: { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index af99e6a8a..f5d0160c6 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -292,3 +292,11 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86" AND GGML_OPENBLAS) target_compile_options(${TEST_TARGET} PRIVATE ${GGML_EXTRA_FLAGS}) endif() +# +# test-customop + +set(TEST_TARGET test-customop) +add_executable(${TEST_TARGET} ${TEST_TARGET}.c) +target_link_libraries(${TEST_TARGET} PRIVATE ggml) +add_test(NAME ${TEST_TARGET} COMMAND $) +set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw") diff --git a/tests/test-customop.c b/tests/test-customop.c new file mode 100644 index 000000000..7d44fa410 --- /dev/null +++ b/tests/test-customop.c @@ -0,0 +1,209 @@ +#include "ggml/ggml.h" + +#include +#include +#include +#include + +#if defined(_WIN32) + +#include +static LONG atomic_fetch_add(atomic_int * ptr, LONG inc) { + return InterlockedExchangeAdd(ptr, inc); +} + +#else +#include +#endif + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +struct ggml_context * make_ctx(void) { + struct ggml_init_params params = { + /*.mem_size =*/ 1 * 1024 * 1024, + /*.mem_buffer =*/ NULL, + /*.no_alloc =*/ false, + }; + + return ggml_init(params); +} + +char g_userdata[] = "ggml"; +atomic_int g_custom1_count = 0; +atomic_int g_custom2_count = 0; +atomic_int g_custom3_count = 0; + +void custom1(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata) { + // check that the userdata is correct + assert(userdata == NULL); + + assert(ggml_are_same_shape(dst, a)); + + atomic_fetch_add(&g_custom1_count, 1); + + const float * a_data = ggml_get_data_f32(a); + float * dst_data = ggml_get_data_f32(dst); + + + // parallelize by elements + const int ne = ggml_nelements(dst); + const int dr = (ne + nth - 1) / nth; + const int ie0 = dr * ith; + const int ie1 = MIN(ie0 + dr, ne); + + for (int i = ie0; i < ie1; ++i) { + dst_data[i] = a_data[i] * 2; + } +} + +void custom2(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata) { + // check that the userdata is correct + assert(userdata == g_userdata); + assert(strcmp(userdata, "ggml") == 0); + assert(ggml_are_same_shape(dst, a)); + assert(ggml_are_same_shape(dst, b)); + + atomic_fetch_add(&g_custom2_count, 1); + + const float * a_data = ggml_get_data_f32(a); + const float * b_data = ggml_get_data_f32(b); + float * dst_data = ggml_get_data_f32(dst); + + // parallelize by rows + const int nr = ggml_nrows(dst); + // number of rows per thread + const int dr = (nr + nth - 1) / nth; + // row range for this thread + const int ir0 = dr * ith; + const int ir1 = MIN(ir0 + dr, nr); + + // number of columns + const int nc = dst->ne[0]; + + for (int ir = ir0; ir < ir1; ++ir) { + for (int ic = 0; ic < nc; ++ic) { + const int i = ir * nc + ic; + dst_data[i] = a_data[i] + b_data[i]; + } + } +} + +void custom3(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata) { + // check that the userdata is correct + assert(userdata == g_userdata); + assert(strcmp(userdata, "ggml") == 0); + assert(ggml_are_same_shape(dst, a)); + assert(ggml_are_same_shape(dst, b)); + assert(ggml_are_same_shape(dst, c)); + + atomic_fetch_add(&g_custom3_count, 1); + + const float * a_data = ggml_get_data_f32(a); + const float * b_data = ggml_get_data_f32(b); + const float * c_data = ggml_get_data_f32(c); + float * dst_data = ggml_get_data_f32(dst); + + + // dont parallelize + assert(ith == 0); + + const int ne = ggml_nelements(dst); + + for (int i = 0; i < ne; ++i) { + dst_data[i] = a_data[i] + b_data[i] + c_data[i]; + } +} + +int main(int argc, const char** argv) { + + float buf1_f32[1024]; + for (int i = 0; i < 1024; ++i) { + buf1_f32[i] = (float)(i + 1); + } + float buf2_f32[1024]; + for (int i = 0; i < 1024; ++i) { + buf2_f32[i] = (float)(i + 1) * 2; + } + float buf3_f32[1024]; + for (int i = 0; i < 1024; ++i) { + buf3_f32[i] = (float)(i + 1) * 3; + } + + // map_custom1 + { + struct ggml_context * ctx = make_ctx(); + struct ggml_tensor * t = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2); + memcpy(t->data, buf1_f32, ggml_nbytes(t)); + + struct ggml_tensor * m1 = ggml_map_custom1(ctx, t, custom1, 2, NULL); + + struct ggml_cgraph graph = ggml_build_forward(m1); + + ggml_graph_compute_with_ctx(ctx, &graph, 4); + + const float * output = ggml_get_data_f32(m1); + + for (int i = 0; i < ggml_nelements(m1); ++i) { + assert(output[i] == buf1_f32[i] * 2); + } + assert(g_custom1_count == 2); + + ggml_free(ctx); + } + + // map_custom2 + { + struct ggml_context * ctx = make_ctx(); + struct ggml_tensor * t1 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2); + memcpy(t1->data, buf1_f32, ggml_nbytes(t1)); + struct ggml_tensor * t2 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2); + memcpy(t2->data, buf2_f32, ggml_nbytes(t2)); + + struct ggml_tensor * m2 = ggml_map_custom2(ctx, t1, t2, custom2, GGML_N_TASKS_MAX, g_userdata); + + struct ggml_cgraph graph = ggml_build_forward(m2); + + ggml_graph_compute_with_ctx(ctx, &graph, 4); + + const float * output = ggml_get_data_f32(m2); + + for (int i = 0; i < ggml_nelements(m2); ++i) { + assert(output[i] == buf1_f32[i] + buf2_f32[i]); + } + + assert(g_custom2_count == 4); + + ggml_free(ctx); + } + + // map_custom3 + { + struct ggml_context * ctx = make_ctx(); + struct ggml_tensor * t1 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2); + memcpy(t1->data, buf1_f32, ggml_nbytes(t1)); + struct ggml_tensor * t2 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2); + memcpy(t2->data, buf2_f32, ggml_nbytes(t2)); + struct ggml_tensor * t3 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2); + memcpy(t3->data, buf3_f32, ggml_nbytes(t3)); + + struct ggml_tensor * m3 = ggml_map_custom3(ctx, t1, t2, t3, custom3, 1, g_userdata); + + struct ggml_cgraph graph = ggml_build_forward(m3); + + ggml_graph_compute_with_ctx(ctx, &graph, 4); + + const float * output = ggml_get_data_f32(m3); + + for (int i = 0; i < ggml_nelements(m3); ++i) { + assert(output[i] == buf1_f32[i] + buf2_f32[i] + buf3_f32[i]); + } + + assert(g_custom3_count == 1); + + ggml_free(ctx); + } + + + return 0; +}