Skip to content

Commit

Permalink
Fix a few bugs in the cuBLAS TCGEMM
Browse files Browse the repository at this point in the history
  • Loading branch information
Sam Hatfield committed Jun 25, 2019
1 parent a12df9a commit c075e14
Showing 1 changed file with 10 additions and 10 deletions.
20 changes: 10 additions & 10 deletions cublas_gemm_c.cu
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@
// Handles CUDA errors
#define cudaErrCheck(stat) { cudaErrCheck_((stat), __FILE__, __LINE__); }
void cudaErrCheck_(cudaError_t stat, const char *file, int line) {
if (stat != cudaSuccess) {
fprintf(stderr, "CUDA Error: %s %s %d\n", cudaGetErrorString(stat), file, line);
}
if (stat != cudaSuccess) {
fprintf(stderr, "CUDA Error: %s %s %d\n", cudaGetErrorString(stat), file, line);
}
}

// Handles cuBLAS errors
#define cublasErrCheck(stat) { cublasErrCheck_((stat), __FILE__, __LINE__); }
void cublasErrCheck_(cublasStatus_t stat, const char *file, int line) {
if (stat != CUBLAS_STATUS_SUCCESS) {
fprintf(stderr, "cuBLAS Error: %d %s %d\n", stat, file, line);
}
if (stat != CUBLAS_STATUS_SUCCESS) {
fprintf(stderr, "cuBLAS Error: %d %s %d\n", stat, file, line);
}
}

// Performs matrix-matrix multiplication using Tensor Core.
Expand Down Expand Up @@ -59,17 +59,17 @@ extern "C" {
cublasHandle, transa_int, transb_int,
m, n, k,
&alpha,
a_h, CUDA_R_64F, lda,
b_h, CUDA_R_64F, ldb,
a_d, CUDA_R_64F, lda,
b_d, CUDA_R_64F, ldb,
&beta,
c_h, CUDA_R_64F, ldc,
c_d, CUDA_R_64F, ldc,
CUDA_R_64F,
CUBLAS_GEMM_DEFAULT
)
);

// Copy results back from device to host
cudaErrCheck(cudaMemcpy(c_d, c_h, m*n*sizeof(double), cudaMemcpyDeviceToHost));
cudaErrCheck(cudaMemcpy(c_h, c_d, m*n*sizeof(double), cudaMemcpyDeviceToHost));
cudaDeviceSynchronize();

// =========================================================================
Expand Down

0 comments on commit c075e14

Please sign in to comment.