Add non-inplace version for the GGML_OP_ADD_REL_POS

ggerganov · ggerganov · Aug 21, 2023 · Aug 20, 2023 · Aug 21, 2023 · Aug 21, 2023
commit 9d6eaa8388ff7f88298e5e18e8eced47e2831451
diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h
@@ -1385,6 +1385,12 @@ extern "C" {
 
  // used in sam
 
+ GGML_API struct ggml_tensor * ggml_add_rel_pos(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * pw,
+ struct ggml_tensor * ph);
+
  GGML_API struct ggml_tensor * ggml_add_rel_pos_inplace(
  struct ggml_context * ctx,
  struct ggml_tensor * a,

diff --git a/src/ggml.c b/src/ggml.c
@@ -3930,6 +3930,7 @@ static void ggml_setup_op_has_task_pass(void) {
  p[GGML_OP_CONV_TRANSPOSE_2D ] = true;
  p[GGML_OP_FLASH_ATTN_BACK ] = true;
  p[GGML_OP_CROSS_ENTROPY_LOSS ] = true;
+ p[GGML_OP_ADD_REL_POS ] = true;
  }
 
  { // FINALIZE
@@ -7328,34 +7329,57 @@ struct ggml_tensor * ggml_get_rel_pos(
 
 // ggml_add_rel_pos
 
-struct ggml_tensor * ggml_add_rel_pos_inplace(
+static struct ggml_tensor * ggml_add_rel_pos_impl(
  struct ggml_context * ctx,
  struct ggml_tensor * a,
  struct ggml_tensor * pw,
- struct ggml_tensor * ph) {
- GGML_ASSERT(pw->ne[0] == ph->ne[0]);
- GGML_ASSERT(pw->ne[1] == ph->ne[1]);
- GGML_ASSERT(pw->ne[2] == ph->ne[2]);
- GGML_ASSERT(pw->ne[3] == ph->ne[3]);
- GGML_ASSERT(pw->ne[3] == a->ne[2]);
- GGML_ASSERT(pw->ne[0]*ph->ne[0] == a->ne[0]);
- GGML_ASSERT(pw->ne[1]*pw->ne[2] == a->ne[1]);
+ struct ggml_tensor * ph,
+ bool inplace) {
+ GGML_ASSERT(ggml_are_same_shape(pw, ph));
  GGML_ASSERT(ggml_is_contiguous(a));
  GGML_ASSERT(ggml_is_contiguous(pw));
  GGML_ASSERT(ggml_is_contiguous(ph));
- GGML_ASSERT(pw->type == GGML_TYPE_F32);
  GGML_ASSERT(ph->type == GGML_TYPE_F32);
+ GGML_ASSERT(pw->type == GGML_TYPE_F32);
+ GGML_ASSERT(pw->ne[3] == a->ne[2]);
+ GGML_ASSERT(pw->ne[0]*pw->ne[0] == a->ne[0]);
+ GGML_ASSERT(pw->ne[1]*pw->ne[2] == a->ne[1]);
+
+ bool is_node = false;
+
+ if (!inplace && (a->grad || pw->grad || ph->grad)) {
+ is_node = true;
+ }
+
+ struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
+ ggml_set_op_params_i32(result, 0, inplace ? 1 : 0);
 
- struct ggml_tensor * result = ggml_view_tensor(ctx, a);
  result->op = GGML_OP_ADD_REL_POS;
- result->grad = NULL;
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
  result->src[0] = a;
  result->src[1] = pw;
  result->src[2] = ph;
 
  return result;
 }
 
+
+struct ggml_tensor * ggml_add_rel_pos(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * pw,
+ struct ggml_tensor * ph) {
+ return ggml_add_rel_pos_impl(ctx, a, pw, ph, false);
+}
+
+struct ggml_tensor * ggml_add_rel_pos_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * pw,
+ struct ggml_tensor * ph) {
+ return ggml_add_rel_pos_impl(ctx, a, pw, ph, true);
+}
+
 // gmml_unary
 
 static struct ggml_tensor * ggml_unary_impl(
@@ -14607,7 +14631,12 @@ static void ggml_compute_forward_add_rel_pos_f32(
  const struct ggml_tensor * src1,
  const struct ggml_tensor * src2,
  struct ggml_tensor * dst) {
- UNUSED(src0);
+
+ const bool inplace = (bool) ((int32_t *) dst->op_params)[0];
+ if (!inplace && params->type == GGML_TASK_INIT) {
+ memcpy((char *) dst->data, (char *) src0->data, ggml_nbytes(dst));
+ return;
+ }
  if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
  return;
  }

diff --git a/tests/test-rel-pos.c b/tests/test-rel-pos.c
@@ -59,21 +59,29 @@ int main(int argc, const char** argv) {
  struct ggml_tensor * rw_f32 = ggml_cpy(ctx, rw, ggml_new_tensor_3d(ctx, GGML_TYPE_F32, 3, 2, 2));
  struct ggml_tensor * rh_f32 = ggml_cpy(ctx, rh, ggml_new_tensor_3d(ctx, GGML_TYPE_F32, 3, 2, 2));
 
- struct ggml_tensor * out = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 9, 4);
- float* out_d = (float*)out->data;
- for (int i = 0; i < ggml_nelements(out); ++i) {
- out_d[i] = 1.f;
+ struct ggml_tensor * in = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 9, 4);
+ struct ggml_tensor * out_inplace = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 9, 4);
+ float * in_d = (float*)in->data;
+ float * out_inplace_d = (float*)out_inplace->data;
+ for (int i = 0; i < ggml_nelements(in); ++i) {
+ in_d[i] = 1.f;
+ out_inplace_d[i] = 1.f;
  }
 
- out = ggml_add_rel_pos_inplace(ctx, out, rw_f32, rh_f32);
-
+ struct ggml_tensor * out = ggml_add_rel_pos(ctx, in, rw_f32, rh_f32);
  struct ggml_cgraph gf = ggml_build_forward(out);
  ggml_build_forward_expand(&gf, rw_f32);
  ggml_build_forward_expand(&gf, rh_f32);
-
  ggml_graph_compute_with_ctx(ctx, &gf, 1);
 
+ out_inplace = ggml_add_rel_pos_inplace(ctx, out_inplace, rw_f32, rh_f32);
+ struct ggml_cgraph gf_2 = ggml_build_forward(out_inplace);
+ ggml_build_forward_expand(&gf_2, rw_f32);
+ ggml_build_forward_expand(&gf_2, rh_f32);
+ ggml_graph_compute_with_ctx(ctx, &gf_2, 1);
+
  check_tensor(out, (float*)expected_out, 9, 4, 1);
+ check_tensor(out_inplace, (float*)expected_out, 9, 4, 1);
  }
 
  return 0;