Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: cuda implementation for ggml_conv_transpose_1d #854

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Next Next commit
conv transpose 1d passing test for 1d input and kernel
  • Loading branch information
balisujohn committed Jun 10, 2024
commit 70de8b7bbb098087a79d52ad7a0552e8e4fe598c
4 changes: 4 additions & 0 deletions src/ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "ggml-cuda/tsembd.cuh"
#include "ggml-cuda/unary.cuh"
#include "ggml-cuda/upscale.cuh"
#include "ggml-cuda/conv-transpose-1d.cuh"

#include <algorithm>
#include <array>
Expand Down Expand Up @@ -2338,6 +2339,9 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
case GGML_OP_IM2COL:
ggml_cuda_op_im2col(ctx, dst);
break;
case GGML_OP_CONV_TRANSPOSE_1D:
ggml_cuda_op_conv_transpose_1d(ctx,dst);
break;
case GGML_OP_POOL_2D:
ggml_cuda_op_pool2d(ctx, dst);
break;
Expand Down
65 changes: 65 additions & 0 deletions src/ggml-cuda/conv-transpose-1d.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#include "conv-transpose-1d.cuh"

static __global__ void conv_transpose_1d_kernel(
const int s0, const int p0, const int d0,
const int kernel_size, const int input_size, const int output_size,
const float * src0, const float * src1, float * dst) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx >= output_size) {
return;
}

int upper_bound = idx > input_size-1 ? input_size-1 : idx; //inclusive
int lower_bound = idx - kernel_size + 1 >= 0 ? idx - kernel_size + 1 : 0;

int initial_weight_idx = idx > kernel_size -1 ? kernel_size-1 : idx;


printf("idx: %d initial_weight_idx: %d\n", idx,initial_weight_idx);
printf("idx: %d upper bound: %d\n", idx, upper_bound);
printf("idx: %d lower bound: %d\n", idx, lower_bound);


for (int i = lower_bound; i <= upper_bound; i++)
{
dst[idx] += src0[initial_weight_idx-(i-lower_bound)] * src1[i];
}
//dst[idx] = 7;
}

static void conv_transpose_1d_f32_f32_cuda(
const int s0, const int p0, const int d0,
const int kernel_size, const int input_size, const int output_size,
const float * src0, const float * src1, float * dst,
cudaStream_t stream) {

const int num_blocks = (output_size + CUDA_CONV_TRANPOSE_1D_BLOCK_SIZE - 1) / CUDA_CONV_TRANPOSE_1D_BLOCK_SIZE;
conv_transpose_1d_kernel<<<num_blocks,CUDA_CONV_TRANPOSE_1D_BLOCK_SIZE, 0, stream>>>(s0,p0,d0,kernel_size, input_size, output_size, src0,src1, dst);
}

void ggml_cuda_op_conv_transpose_1d(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
const ggml_tensor * src0 = dst->src[0];
const float * src0_d = (const float *)src0->data;

const ggml_tensor * src1 = dst->src[1];
const float * src1_d = (const float *)src1->data;

float * dst_d = (float *)dst->data;
cudaStream_t stream = ctx.stream();

GGML_ASSERT(src0->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also assert contiguous src0 and src1


const int32_t * opts = (const int32_t *)dst->op_params;

const int s0 = 1;//opts[2];
const int p0 = 0;//opts[3];
const int d0 = 1;//opts[4];

const int64_t kernel_size = src0->ne[0];
const int64_t input_size = src1->ne[0];
const int64_t output_size = dst->ne[0];


conv_transpose_1d_f32_f32_cuda( s0,p0,d0,kernel_size, input_size, output_size, src0_d, src1_d, dst_d, stream);
}
5 changes: 5 additions & 0 deletions src/ggml-cuda/conv-transpose-1d.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#include "common.cuh"

#define CUDA_CONV_TRANPOSE_1D_BLOCK_SIZE 256

void ggml_cuda_op_conv_transpose_1d(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
2 changes: 1 addition & 1 deletion tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_
# test-conv-transpose

set(TEST_TARGET test-conv-transpose)
add_executable(${TEST_TARGET} ${TEST_TARGET}.c)
add_executable(${TEST_TARGET} ${TEST_TARGET}.cpp)
target_link_libraries(${TEST_TARGET} PRIVATE ggml)
add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}>)

Expand Down
247 changes: 0 additions & 247 deletions tests/test-conv-transpose.c

This file was deleted.