Skip to content

Commit

Permalink
Create CuSPARSE bindings for different precisions.
Browse files Browse the repository at this point in the history
+ Support IndexType int32
+ Support ValueType float, double, complex float and complex double.
+ Add a description of the CuSPARSE benchmarks in the spmv file.
  • Loading branch information
tcojean committed May 16, 2019
1 parent 496421f commit 30a634a
Show file tree
Hide file tree
Showing 3 changed files with 300 additions and 46 deletions.
44 changes: 20 additions & 24 deletions benchmark/spmv/cuda_linops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,13 @@ class CuspCsrmp

const auto id = this->get_gpu_exec()->get_device_id();
gko::device_guard g{id};
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDcsrmv_mp(
gko::kernels::cuda::cusparse::spmv_mp(
this->get_gpu_exec()->get_cusparse_handle(), trans_,
this->get_size()[0], this->get_size()[1],
csr_->get_num_stored_elements(), &scalars.get_const_data()[0],
this->get_descr(), csr_->get_const_values(),
csr_->get_const_row_ptrs(), csr_->get_const_col_idxs(), db,
&scalars.get_const_data()[1], dx));
&scalars.get_const_data()[1], dx);
}

CuspCsrmp(std::shared_ptr<const gko::Executor> exec,
Expand Down Expand Up @@ -195,13 +195,13 @@ class CuspCsr

const auto id = this->get_gpu_exec()->get_device_id();
gko::device_guard g{id};
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDcsrmv(
gko::kernels::cuda::cusparse::spmv(
this->get_gpu_exec()->get_cusparse_handle(), trans_,
this->get_size()[0], this->get_size()[1],
csr_->get_num_stored_elements(), &scalars.get_const_data()[0],
this->get_descr(), csr_->get_const_values(),
csr_->get_const_row_ptrs(), csr_->get_const_col_idxs(), db,
&scalars.get_const_data()[1], dx));
&scalars.get_const_data()[1], dx);
}

CuspCsr(std::shared_ptr<const gko::Executor> exec,
Expand Down Expand Up @@ -255,14 +255,14 @@ class CuspCsrmm

const auto id = this->get_gpu_exec()->get_device_id();
gko::device_guard g{id};
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDcsrmm(
gko::kernels::cuda::cusparse::spmm(
this->get_gpu_exec()->get_cusparse_handle(), trans_,
this->get_size()[0], dense_b->get_size()[1], this->get_size()[1],
csr_->get_num_stored_elements(), &scalars.get_const_data()[0],
this->get_descr(), csr_->get_const_values(),
csr_->get_const_row_ptrs(), csr_->get_const_col_idxs(), db,
dense_b->get_size()[0], &scalars.get_const_data()[1], dx,
dense_x->get_size()[0]));
dense_x->get_size()[0]);
}

CuspCsrmm(std::shared_ptr<const gko::Executor> exec,
Expand Down Expand Up @@ -331,7 +331,6 @@ class CuspCsrEx
ValueType alpha = gko::one<ValueType>();
ValueType beta = gko::zero<ValueType>();
gko::size_type buffer_size = 0;
auto data_type = gko::kernels::cuda::cuda_data_type<ValueType>();

const auto id = this->get_gpu_exec()->get_device_id();
gko::device_guard g{id};
Expand All @@ -341,22 +340,19 @@ class CuspCsrEx
auto handle = this->get_gpu_exec()->get_cusparse_handle();
GKO_ASSERT_NO_CUSPARSE_ERRORS(
cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST));
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseCsrmvEx_bufferSize(
gko::kernels::cuda::cusparse::spmv_buffersize<ValueType, IndexType>(
handle, algmode_, trans_, this->get_size()[0], this->get_size()[1],
csr_->get_num_stored_elements(), &alpha, data_type,
this->get_descr(), csr_->get_const_values(), data_type,
csr_->get_const_row_ptrs(), csr_->get_const_col_idxs(), db,
data_type, &beta, data_type, dx, data_type, data_type,
&buffer_size));
csr_->get_num_stored_elements(), &alpha, this->get_descr(),
csr_->get_const_values(), csr_->get_const_row_ptrs(),
csr_->get_const_col_idxs(), db, &beta, dx, &buffer_size);
GKO_ASSERT_NO_CUDA_ERRORS(cudaMalloc(&buffer_, buffer_size));
set_buffer_ = true;

GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseCsrmvEx(
gko::kernels::cuda::cusparse::spmv<ValueType, IndexType>(
handle, algmode_, trans_, this->get_size()[0], this->get_size()[1],
csr_->get_num_stored_elements(), &alpha, data_type,
this->get_descr(), csr_->get_const_values(), data_type,
csr_->get_const_row_ptrs(), csr_->get_const_col_idxs(), db,
data_type, &beta, data_type, dx, data_type, data_type, buffer_));
csr_->get_num_stored_elements(), &alpha, this->get_descr(),
csr_->get_const_values(), csr_->get_const_row_ptrs(),
csr_->get_const_col_idxs(), db, &beta, dx, buffer_);

// Set the pointer mode back to the default DEVICE for Ginkgo
GKO_ASSERT_NO_CUSPARSE_ERRORS(
Expand Down Expand Up @@ -412,11 +408,11 @@ class CuspHybrid

const auto id = this->get_gpu_exec()->get_device_id();
gko::device_guard g{id};
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDcsr2hyb(
gko::kernels::cuda::cusparse::csr2hyb(
this->get_gpu_exec()->get_cusparse_handle(), this->get_size()[0],
this->get_size()[1], this->get_descr(), t_csr->get_const_values(),
t_csr->get_const_row_ptrs(), t_csr->get_const_col_idxs(), hyb_,
Threshold, Partition));
Threshold, Partition);
}

~CuspHybrid() override
Expand All @@ -441,10 +437,10 @@ class CuspHybrid

const auto id = this->get_gpu_exec()->get_device_id();
gko::device_guard g{id};
GKO_ASSERT_NO_CUSPARSE_ERRORS(
cusparseDhybmv(this->get_gpu_exec()->get_cusparse_handle(), trans_,
&scalars.get_const_data()[0], this->get_descr(),
hyb_, db, &scalars.get_const_data()[1], dx));
gko::kernels::cuda::cusparse::spmv(
this->get_gpu_exec()->get_cusparse_handle(), trans_,
&scalars.get_const_data()[0], this->get_descr(), hyb_, db,
&scalars.get_const_data()[1], dx);
}

CuspHybrid(std::shared_ptr<const gko::Executor> exec,
Expand Down
20 changes: 15 additions & 5 deletions benchmark/spmv/spmv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,9 @@ DEFINE_string(
formats, "coo",
"A comma-separated list of formats to run."
"Supported values are: coo, csr, ell, sellp, hybrid, hybrid0, "
"hybrid25, hybrid33, hybridlimit0, hybridlimit25, hybridlimit33, "
"hybridminstorage.\n"
"hybrid25, hybrid33, hybrid40, hybrid60, hybrid80, hybridlimit0, "
"hybridlimit25, hybridlimit33, hybridminstorage, cusp_csr, cusp_csrex, "
"cusp_csrmp, cusp_csrmm, cusp_coo, cusp_ell, cusp_hybrid.\n"
"coo: Coordinate storage. The CUDA kernel uses the load-balancing approach "
"suggested in Flegar et al.: Overcoming Load Imbalance for Irregular "
"Sparse Matrices.\n"
Expand All @@ -94,11 +95,20 @@ DEFINE_string(
"Matrix-Vector Multiplication on CUDA.\n"
"sellp: Sliced Ellpack uses a default block size of 32.\n"
"hybrid: Hybrid uses ell and coo to represent the matrix.\n"
"hybrid0, hybrid25, hybrid33: Hybrid uses the row distribution to decide "
"the partition.\n"
"hybrid0, hybrid25, hybrid33, hybrid40, hybrid60, hybrid80: Hybrid uses "
"the row distribution to decide the partition.\n"
"hybridlimit0, hybridlimit25, hybrid33: Add the upper bound on the ell "
"part of hybrid0, hybrid25, hybrid33.\n"
"hybridminstorage: Hybrid uses the minimal storage to store the matrix.");
"hybridminstorage: Hybrid uses the minimal storage to store the matrix.\n"
"cusp_hybrid: benchmark CuSPARSE spmv with cusparseXhybmv and an automatic "
"partition.\n"
"cusp_coo: use cusparseXhybmv with a CUSPARSE_HYB_PARTITION_USER "
"partition.\n"
"cusp_ell: use cusparseXhybmv with CUSPARSE_HYB_PARTITION_MAX partition.\n"
"cusp_csr: benchmark CuSPARSE with the cusparseXcsrmv function.\n"
"cusp_csrex: benchmark CuSPARSE with the cusparseXcsrmvEx function.\n"
"cusp_csrmp: benchmark CuSPARSE with the cusparseXcsrmv_mp function.\n"
"cusp_csrmm: benchmark CuSPARSE with the cusparseXcsrmv_mm function.\n");

DEFINE_uint32(nrhs, 1, "The number of right hand sides");

Expand Down
Loading

0 comments on commit 30a634a

Please sign in to comment.