Create CuSPARSE bindings for different precisions.

+ Support IndexType int32 + Support ValueType float, double, complex float and complex double. + Add a description of the CuSPARSE benchmarks in the spmv file.
ginkgo-project · May 16, 2019 · 30a634a · 30a634a
1 parent 496421f
commit 30a634a
Show file tree

Hide file tree

Showing 3 changed files with 300 additions and 46 deletions.
diff --git a/benchmark/spmv/cuda_linops.hpp b/benchmark/spmv/cuda_linops.hpp
@@ -135,13 +135,13 @@ class CuspCsrmp
 
  const auto id = this->get_gpu_exec()->get_device_id();
  gko::device_guard g{id};
- GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDcsrmv_mp(
+ gko::kernels::cuda::cusparse::spmv_mp(
  this->get_gpu_exec()->get_cusparse_handle(), trans_,
  this->get_size()[0], this->get_size()[1],
  csr_->get_num_stored_elements(), &scalars.get_const_data()[0],
  this->get_descr(), csr_->get_const_values(),
  csr_->get_const_row_ptrs(), csr_->get_const_col_idxs(), db,
- &scalars.get_const_data()[1], dx));
+ &scalars.get_const_data()[1], dx);
  }
 
  CuspCsrmp(std::shared_ptr<const gko::Executor> exec,
@@ -195,13 +195,13 @@ class CuspCsr
 
  const auto id = this->get_gpu_exec()->get_device_id();
  gko::device_guard g{id};
- GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDcsrmv(
+ gko::kernels::cuda::cusparse::spmv(
  this->get_gpu_exec()->get_cusparse_handle(), trans_,
  this->get_size()[0], this->get_size()[1],
  csr_->get_num_stored_elements(), &scalars.get_const_data()[0],
  this->get_descr(), csr_->get_const_values(),
  csr_->get_const_row_ptrs(), csr_->get_const_col_idxs(), db,
- &scalars.get_const_data()[1], dx));
+ &scalars.get_const_data()[1], dx);
  }
 
  CuspCsr(std::shared_ptr<const gko::Executor> exec,
@@ -255,14 +255,14 @@ class CuspCsrmm
 
  const auto id = this->get_gpu_exec()->get_device_id();
  gko::device_guard g{id};
- GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDcsrmm(
+ gko::kernels::cuda::cusparse::spmm(
  this->get_gpu_exec()->get_cusparse_handle(), trans_,
  this->get_size()[0], dense_b->get_size()[1], this->get_size()[1],
  csr_->get_num_stored_elements(), &scalars.get_const_data()[0],
  this->get_descr(), csr_->get_const_values(),
  csr_->get_const_row_ptrs(), csr_->get_const_col_idxs(), db,
  dense_b->get_size()[0], &scalars.get_const_data()[1], dx,
- dense_x->get_size()[0]));
+ dense_x->get_size()[0]);
  }
 
  CuspCsrmm(std::shared_ptr<const gko::Executor> exec,
@@ -331,7 +331,6 @@ class CuspCsrEx
  ValueType alpha = gko::one<ValueType>();
  ValueType beta = gko::zero<ValueType>();
  gko::size_type buffer_size = 0;
- auto data_type = gko::kernels::cuda::cuda_data_type<ValueType>();
 
  const auto id = this->get_gpu_exec()->get_device_id();
  gko::device_guard g{id};
@@ -341,22 +340,19 @@ class CuspCsrEx
  auto handle = this->get_gpu_exec()->get_cusparse_handle();
  GKO_ASSERT_NO_CUSPARSE_ERRORS(
  cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST));
- GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseCsrmvEx_bufferSize(
+ gko::kernels::cuda::cusparse::spmv_buffersize<ValueType, IndexType>(
  handle, algmode_, trans_, this->get_size()[0], this->get_size()[1],
- csr_->get_num_stored_elements(), &alpha, data_type,
- this->get_descr(), csr_->get_const_values(), data_type,
- csr_->get_const_row_ptrs(), csr_->get_const_col_idxs(), db,
- data_type, &beta, data_type, dx, data_type, data_type,
- &buffer_size));
+ csr_->get_num_stored_elements(), &alpha, this->get_descr(),
+ csr_->get_const_values(), csr_->get_const_row_ptrs(),
+ csr_->get_const_col_idxs(), db, &beta, dx, &buffer_size);
  GKO_ASSERT_NO_CUDA_ERRORS(cudaMalloc(&buffer_, buffer_size));
  set_buffer_ = true;
 
- GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseCsrmvEx(
+ gko::kernels::cuda::cusparse::spmv<ValueType, IndexType>(
  handle, algmode_, trans_, this->get_size()[0], this->get_size()[1],
- csr_->get_num_stored_elements(), &alpha, data_type,
- this->get_descr(), csr_->get_const_values(), data_type,
- csr_->get_const_row_ptrs(), csr_->get_const_col_idxs(), db,
- data_type, &beta, data_type, dx, data_type, data_type, buffer_));
+ csr_->get_num_stored_elements(), &alpha, this->get_descr(),
+ csr_->get_const_values(), csr_->get_const_row_ptrs(),
+ csr_->get_const_col_idxs(), db, &beta, dx, buffer_);
 
  // Set the pointer mode back to the default DEVICE for Ginkgo
  GKO_ASSERT_NO_CUSPARSE_ERRORS(
@@ -412,11 +408,11 @@ class CuspHybrid
 
  const auto id = this->get_gpu_exec()->get_device_id();
  gko::device_guard g{id};
- GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDcsr2hyb(
+ gko::kernels::cuda::cusparse::csr2hyb(
  this->get_gpu_exec()->get_cusparse_handle(), this->get_size()[0],
  this->get_size()[1], this->get_descr(), t_csr->get_const_values(),
  t_csr->get_const_row_ptrs(), t_csr->get_const_col_idxs(), hyb_,
- Threshold, Partition));
+ Threshold, Partition);
  }
 
  ~CuspHybrid() override
@@ -441,10 +437,10 @@ class CuspHybrid
 
  const auto id = this->get_gpu_exec()->get_device_id();
  gko::device_guard g{id};
- GKO_ASSERT_NO_CUSPARSE_ERRORS(
- cusparseDhybmv(this->get_gpu_exec()->get_cusparse_handle(), trans_,
-  &scalars.get_const_data()[0], this->get_descr(),
-  hyb_, db, &scalars.get_const_data()[1], dx));
+ gko::kernels::cuda::cusparse::spmv(
+ this->get_gpu_exec()->get_cusparse_handle(), trans_,
+ &scalars.get_const_data()[0], this->get_descr(), hyb_, db,
+ &scalars.get_const_data()[1], dx);
  }
 
  CuspHybrid(std::shared_ptr<const gko::Executor> exec,

diff --git a/benchmark/spmv/spmv.cpp b/benchmark/spmv/spmv.cpp
@@ -83,8 +83,9 @@ DEFINE_string(
  formats, "coo",
  "A comma-separated list of formats to run."
  "Supported values are: coo, csr, ell, sellp, hybrid, hybrid0, "
- "hybrid25, hybrid33, hybridlimit0, hybridlimit25, hybridlimit33, "
- "hybridminstorage.\n"
+ "hybrid25, hybrid33, hybrid40, hybrid60, hybrid80, hybridlimit0, "
+ "hybridlimit25, hybridlimit33, hybridminstorage, cusp_csr, cusp_csrex, "
+ "cusp_csrmp, cusp_csrmm, cusp_coo, cusp_ell, cusp_hybrid.\n"
  "coo: Coordinate storage. The CUDA kernel uses the load-balancing approach "
  "suggested in Flegar et al.: Overcoming Load Imbalance for Irregular "
  "Sparse Matrices.\n"
@@ -94,11 +95,20 @@ DEFINE_string(
  "Matrix-Vector Multiplication on CUDA.\n"
  "sellp: Sliced Ellpack uses a default block size of 32.\n"
  "hybrid: Hybrid uses ell and coo to represent the matrix.\n"
- "hybrid0, hybrid25, hybrid33: Hybrid uses the row distribution to decide "
- "the partition.\n"
+ "hybrid0, hybrid25, hybrid33, hybrid40, hybrid60, hybrid80: Hybrid uses "
+ "the row distribution to decide the partition.\n"
  "hybridlimit0, hybridlimit25, hybrid33: Add the upper bound on the ell "
  "part of hybrid0, hybrid25, hybrid33.\n"
- "hybridminstorage: Hybrid uses the minimal storage to store the matrix.");
+ "hybridminstorage: Hybrid uses the minimal storage to store the matrix.\n"
+ "cusp_hybrid: benchmark CuSPARSE spmv with cusparseXhybmv and an automatic "
+ "partition.\n"
+ "cusp_coo: use cusparseXhybmv with a CUSPARSE_HYB_PARTITION_USER "
+ "partition.\n"
+ "cusp_ell: use cusparseXhybmv with CUSPARSE_HYB_PARTITION_MAX partition.\n"
+ "cusp_csr: benchmark CuSPARSE with the cusparseXcsrmv function.\n"
+ "cusp_csrex: benchmark CuSPARSE with the cusparseXcsrmvEx function.\n"
+ "cusp_csrmp: benchmark CuSPARSE with the cusparseXcsrmv_mp function.\n"
+ "cusp_csrmm: benchmark CuSPARSE with the cusparseXcsrmv_mm function.\n");
 
 DEFINE_uint32(nrhs, 1, "The number of right hand sides");