Fix a few bugs in the cuBLAS TCGEMM

samhatfield · Jun 25, 2019 · c075e14 · c075e14
1 parent a12df9a
commit c075e14
Showing 1 changed file with 10 additions and 10 deletions.
diff --git a/cublas_gemm_c.cu b/cublas_gemm_c.cu
@@ -4,17 +4,17 @@
 // Handles CUDA errors
 #define cudaErrCheck(stat) { cudaErrCheck_((stat), __FILE__, __LINE__); }
 void cudaErrCheck_(cudaError_t stat, const char *file, int line) {
- if (stat != cudaSuccess) {
- fprintf(stderr, "CUDA Error: %s %s %d\n", cudaGetErrorString(stat), file, line);
- }
+  if (stat != cudaSuccess) {
+  fprintf(stderr, "CUDA Error: %s %s %d\n", cudaGetErrorString(stat), file, line);
+  }
 }
 
 // Handles cuBLAS errors
 #define cublasErrCheck(stat) { cublasErrCheck_((stat), __FILE__, __LINE__); }
 void cublasErrCheck_(cublasStatus_t stat, const char *file, int line) {
- if (stat != CUBLAS_STATUS_SUCCESS) {
- fprintf(stderr, "cuBLAS Error: %d %s %d\n", stat, file, line);
- }
+  if (stat != CUBLAS_STATUS_SUCCESS) {
+  fprintf(stderr, "cuBLAS Error: %d %s %d\n", stat, file, line);
+  }
 }
 
 // Performs matrix-matrix multiplication using Tensor Core.
@@ -59,17 +59,17 @@ extern "C" {
  cublasHandle, transa_int, transb_int,
  m, n, k,
  &alpha,
- a_h, CUDA_R_64F, lda,
- b_h, CUDA_R_64F, ldb,
+ a_d, CUDA_R_64F, lda,
+ b_d, CUDA_R_64F, ldb,
  &beta,
- c_h, CUDA_R_64F, ldc,
+ c_d, CUDA_R_64F, ldc,
  CUDA_R_64F,
  CUBLAS_GEMM_DEFAULT
  )
  );
 
  // Copy results back from device to host
- cudaErrCheck(cudaMemcpy(c_d, c_h, m*n*sizeof(double), cudaMemcpyDeviceToHost));
+ cudaErrCheck(cudaMemcpy(c_h, c_d, m*n*sizeof(double), cudaMemcpyDeviceToHost));
  cudaDeviceSynchronize();
 
  // =========================================================================