Merge distributed solvers

This PR will enable using distributed matrices and vector (#971 and #961) in the following iterative solvers: - Bicgstab - Cg - Cgs - Fcg - Ir Currently not supported are: - Bicg - [cb_]Gmres - Idr - Multigrid - Lower/Upper_trs The handling of the distributed/non-distributed data is done via additional dispatch routines that expand on precision_dispatch_real_complex, and helper routines to extract the underlying dense matrix from either a distributed or dense vector. Also, the residual norm stopping criteria implementation has been changed to also use a similar dispatch approach. This also contains some fixes regarding the doxygen documentation for the other distributed classes. Related PR: #976
ginkgo-project · Oct 31, 2022 · f90097e · f90097e
2 parents 8ec9757 + 9cdd7d0
commit f90097e
Show file tree

Hide file tree

Showing 45 changed files with 1,934 additions and 267 deletions.
diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
@@ -93,6 +93,7 @@ function(ginkgo_create_dpcpp_test test_name)
  target_compile_features(${test_target_name} PUBLIC cxx_std_17)
  target_compile_options(${test_target_name} PRIVATE ${GINKGO_DPCPP_FLAGS})
  target_link_options(${test_target_name} PRIVATE -fsycl-device-code-split=per_kernel)
+ ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
  ginkgo_add_test(${test_name} ${test_target_name} ${ARGN})
  # Note: MKL_ENV is empty on linux. Maybe need to apply MKL_ENV to all test.
  if (MKL_ENV)
@@ -127,6 +128,7 @@ function(ginkgo_create_cuda_test_internal test_name filename test_target_name)
  if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
  set_target_properties(${test_target_name} PROPERTIES CUDA_ARCHITECTURES OFF)
  endif()
+ ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
  ginkgo_add_test(${test_name} ${test_target_name} ${ARGN})
 endfunction(ginkgo_create_cuda_test_internal)
 

diff --git a/common/unified/matrix/dense_kernels.cpp b/common/unified/matrix/dense_kernels.cpp
@@ -383,13 +383,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 template <typename ValueType>
 void compute_squared_norm2(std::shared_ptr<const DefaultExecutor> exec,
  const matrix::Dense<ValueType>* x,
- matrix::Dense<remove_complex<ValueType>>* result)
+ matrix::Dense<remove_complex<ValueType>>* result,
+ array<char>& tmp)
 {
- run_kernel_col_reduction(
+ run_kernel_col_reduction_cached(
  exec,
  [] GKO_KERNEL(auto i, auto j, auto x) { return squared_norm(x(i, j)); },
  GKO_KERNEL_REDUCE_SUM(remove_complex<ValueType>), result->get_values(),
- x->get_size(), x);
+ x->get_size(), tmp, x);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(

diff --git a/core/distributed/helpers.hpp b/core/distributed/helpers.hpp
@@ -0,0 +1,126 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <memory>
+
+
+#include <ginkgo/config.hpp>
+#include <ginkgo/core/distributed/vector.hpp>
+#include <ginkgo/core/matrix/dense.hpp>
+
+
+namespace gko {
+namespace detail {
+
+
+template <typename ValueType>
+std::unique_ptr<matrix::Dense<ValueType>> create_with_config_of(
+ const matrix::Dense<ValueType>* mtx)
+{
+ return matrix::Dense<ValueType>::create(mtx->get_executor(),
+ mtx->get_size(), mtx->get_stride());
+}
+
+
+template <typename ValueType>
+const matrix::Dense<ValueType>* get_local(const matrix::Dense<ValueType>* mtx)
+{
+ return mtx;
+}
+
+
+template <typename ValueType>
+matrix::Dense<ValueType>* get_local(matrix::Dense<ValueType>* mtx)
+{
+ return mtx;
+}
+
+
+#if GINKGO_BUILD_MPI
+
+
+template <typename ValueType>
+std::unique_ptr<distributed::Vector<ValueType>> create_with_config_of(
+ const distributed::Vector<ValueType>* mtx)
+{
+ return distributed::Vector<ValueType>::create(
+ mtx->get_executor(), mtx->get_communicator(), mtx->get_size(),
+ mtx->get_local_vector()->get_size(),
+ mtx->get_local_vector()->get_stride());
+}
+
+
+template <typename ValueType>
+matrix::Dense<ValueType>* get_local(distributed::Vector<ValueType>* mtx)
+{
+ return const_cast<matrix::Dense<ValueType>*>(mtx->get_local_vector());
+}
+
+
+template <typename ValueType>
+const matrix::Dense<ValueType>* get_local(
+ const distributed::Vector<ValueType>* mtx)
+{
+ return mtx->get_local_vector();
+}
+
+
+#endif
+
+
+template <typename Arg>
+bool is_distributed(Arg* linop)
+{
+#if GINKGO_BUILD_MPI
+ return dynamic_cast<const distributed::DistributedBase*>(linop);
+#else
+ return false;
+#endif
+}
+
+
+template <typename Arg, typename... Rest>
+bool is_distributed(Arg* linop, Rest*... rest)
+{
+#if GINKGO_BUILD_MPI
+ bool is_distributed_value =
+ dynamic_cast<const distributed::DistributedBase*>(linop);
+ GKO_ASSERT(is_distributed_value == is_distributed(rest...));
+ return is_distributed_value;
+#else
+ return false;
+#endif
+}
+
+
+} // namespace detail
+} // namespace gko
diff --git a/core/distributed/matrix.cpp b/core/distributed/matrix.cpp
@@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/distributed/matrix.hpp>
 
 
+#include <ginkgo/core/base/precision_dispatch.hpp>
 #include <ginkgo/core/distributed/vector.hpp>
 #include <ginkgo/core/matrix/csr.hpp>
 
@@ -297,65 +298,65 @@ template <typename ValueType, typename LocalIndexType, typename GlobalIndexType>
 void Matrix<ValueType, LocalIndexType, GlobalIndexType>::apply_impl(
  const LinOp* b, LinOp* x) const
 {
- auto dense_b = as<global_vector_type>(b);
- auto dense_x = as<global_vector_type>(x);
- auto x_exec = x->get_executor();
- auto local_x = gko::matrix::Dense<ValueType>::create(
- x_exec, dense_x->get_local_vector()->get_size(),
- gko::make_array_view(
- x_exec, dense_x->get_local_vector()->get_num_stored_elements(),
- dense_x->get_local_values()),
- dense_x->get_local_vector()->get_stride());
- if (this->get_non_local_matrix()->get_size()) {
- auto req = this->communicate(dense_b->get_local_vector());
- local_mtx_->apply(dense_b->get_local_vector(), local_x.get());
- req.wait();
- auto exec = this->get_executor();
- auto use_host_buffer =
- exec->get_master() != exec && !gko::mpi::is_gpu_aware();
- if (use_host_buffer) {
- recv_buffer_->copy_from(host_recv_buffer_.get());
- }
- non_local_mtx_->apply(one_scalar_.get(), recv_buffer_.get(),
- one_scalar_.get(), local_x.get());
- } else {
- local_mtx_->apply(dense_b->get_local_vector(), local_x.get());
- }
+ distributed::precision_dispatch_real_complex<ValueType>(
+ [this](const auto dense_b, auto dense_x) {
+ auto x_exec = dense_x->get_executor();
+ auto local_x = gko::matrix::Dense<ValueType>::create(
+ x_exec, dense_x->get_local_vector()->get_size(),
+ gko::make_array_view(
+ x_exec,
+ dense_x->get_local_vector()->get_num_stored_elements(),
+ dense_x->get_local_values()),
+ dense_x->get_local_vector()->get_stride());
+
+ auto req = this->communicate(dense_b->get_local_vector());
+ local_mtx_->apply(dense_b->get_local_vector(), local_x.get());
+ req.wait();
+
+ auto exec = this->get_executor();
+ auto use_host_buffer =
+ exec->get_master() != exec && !gko::mpi::is_gpu_aware();
+ if (use_host_buffer) {
+ recv_buffer_->copy_from(host_recv_buffer_.get());
+ }
+ non_local_mtx_->apply(one_scalar_.get(), recv_buffer_.get(),
+ one_scalar_.get(), local_x.get());
+ },
+ b, x);
 }
 
 
 template <typename ValueType, typename LocalIndexType, typename GlobalIndexType>
 void Matrix<ValueType, LocalIndexType, GlobalIndexType>::apply_impl(
  const LinOp* alpha, const LinOp* b, const LinOp* beta, LinOp* x) const
 {
- auto dense_b = as<global_vector_type>(b);
- auto dense_x = as<global_vector_type>(x);
- const auto x_exec = x->get_executor();
- auto local_x = gko::matrix::Dense<ValueType>::create(
- x_exec, dense_x->get_local_vector()->get_size(),
- gko::make_array_view(
- x_exec, dense_x->get_local_vector()->get_num_stored_elements(),
- dense_x->get_local_values()),
- dense_x->get_local_vector()->get_stride());
- auto local_alpha = as<local_vector_type>(alpha);
- auto local_beta = as<local_vector_type>(beta);
- if (this->get_non_local_matrix()->get_size()) {
- auto req = this->communicate(dense_b->get_local_vector());
- local_mtx_->apply(local_alpha, dense_b->get_local_vector(), local_beta,
- local_x.get());
- req.wait();
- auto exec = this->get_executor();
- auto use_host_buffer =
- exec->get_master() != exec && !gko::mpi::is_gpu_aware();
- if (use_host_buffer) {
- recv_buffer_->copy_from(host_recv_buffer_.get());
- }
- non_local_mtx_->apply(local_alpha, recv_buffer_.get(),
- one_scalar_.get(), local_x.get());
- } else {
- local_mtx_->apply(local_alpha, dense_b->get_local_vector(), local_beta,
- local_x.get());
- }
+ distributed::precision_dispatch_real_complex<ValueType>(
+ [this](const auto local_alpha, const auto dense_b,
+ const auto local_beta, auto dense_x) {
+ const auto x_exec = dense_x->get_executor();
+ auto local_x = gko::matrix::Dense<ValueType>::create(
+ x_exec, dense_x->get_local_vector()->get_size(),
+ gko::make_array_view(
+ x_exec,
+ dense_x->get_local_vector()->get_num_stored_elements(),
+ dense_x->get_local_values()),
+ dense_x->get_local_vector()->get_stride());
+
+ auto req = this->communicate(dense_b->get_local_vector());
+ local_mtx_->apply(local_alpha, dense_b->get_local_vector(),
+ local_beta, local_x.get());
+ req.wait();
+
+ auto exec = this->get_executor();
+ auto use_host_buffer =
+ exec->get_master() != exec && !gko::mpi::is_gpu_aware();
+ if (use_host_buffer) {
+ recv_buffer_->copy_from(host_recv_buffer_.get());
+ }
+ non_local_mtx_->apply(local_alpha, recv_buffer_.get(),
+ one_scalar_.get(), local_x.get());
+ },
+ alpha, b, beta, x);
 }
 
 
@@ -394,7 +395,6 @@ Matrix<ValueType, LocalIndexType, GlobalIndexType>::operator=(
  gather_idxs_ = other.gather_idxs_;
  send_offsets_ = other.send_offsets_;
  recv_offsets_ = other.recv_offsets_;
- recv_sizes_ = other.recv_sizes_;
  send_sizes_ = other.send_sizes_;
  recv_sizes_ = other.recv_sizes_;
  non_local_to_global_ = other.non_local_to_global_;
@@ -419,7 +419,6 @@ Matrix<ValueType, LocalIndexType, GlobalIndexType>::operator=(Matrix&& other)
  gather_idxs_ = std::move(other.gather_idxs_);
  send_offsets_ = std::move(other.send_offsets_);
  recv_offsets_ = std::move(other.recv_offsets_);
- recv_sizes_ = std::move(other.recv_sizes_);
  send_sizes_ = std::move(other.send_sizes_);
  recv_sizes_ = std::move(other.recv_sizes_);
  non_local_to_global_ = std::move(other.non_local_to_global_);