Skip to content

Commit

Permalink
Merge distributed solvers
Browse files Browse the repository at this point in the history
This PR will enable using distributed matrices and vector (#971 and #961) in the following iterative solvers:
- Bicgstab
- Cg
- Cgs
- Fcg
- Ir

Currently not supported are:
- Bicg
- [cb_]Gmres
- Idr
- Multigrid
- Lower/Upper_trs

The handling of the distributed/non-distributed data is done via additional dispatch routines that expand on precision_dispatch_real_complex, and helper routines to extract the underlying dense matrix from either a distributed or dense vector. Also, the residual norm stopping criteria implementation has been changed to also use a similar dispatch approach.

This also contains some fixes regarding the doxygen documentation for the other distributed classes.

Related PR: #976
  • Loading branch information
MarcelKoch committed Oct 31, 2022
2 parents 8ec9757 + 9cdd7d0 commit f90097e
Show file tree
Hide file tree
Showing 45 changed files with 1,934 additions and 267 deletions.
2 changes: 2 additions & 0 deletions cmake/create_test.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ function(ginkgo_create_dpcpp_test test_name)
target_compile_features(${test_target_name} PUBLIC cxx_std_17)
target_compile_options(${test_target_name} PRIVATE ${GINKGO_DPCPP_FLAGS})
target_link_options(${test_target_name} PRIVATE -fsycl-device-code-split=per_kernel)
ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
ginkgo_add_test(${test_name} ${test_target_name} ${ARGN})
# Note: MKL_ENV is empty on linux. Maybe need to apply MKL_ENV to all test.
if (MKL_ENV)
Expand Down Expand Up @@ -127,6 +128,7 @@ function(ginkgo_create_cuda_test_internal test_name filename test_target_name)
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
set_target_properties(${test_target_name} PROPERTIES CUDA_ARCHITECTURES OFF)
endif()
ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
ginkgo_add_test(${test_name} ${test_target_name} ${ARGN})
endfunction(ginkgo_create_cuda_test_internal)

Expand Down
7 changes: 4 additions & 3 deletions common/unified/matrix/dense_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -383,13 +383,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
template <typename ValueType>
void compute_squared_norm2(std::shared_ptr<const DefaultExecutor> exec,
const matrix::Dense<ValueType>* x,
matrix::Dense<remove_complex<ValueType>>* result)
matrix::Dense<remove_complex<ValueType>>* result,
array<char>& tmp)
{
run_kernel_col_reduction(
run_kernel_col_reduction_cached(
exec,
[] GKO_KERNEL(auto i, auto j, auto x) { return squared_norm(x(i, j)); },
GKO_KERNEL_REDUCE_SUM(remove_complex<ValueType>), result->get_values(),
x->get_size(), x);
x->get_size(), tmp, x);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
Expand Down
126 changes: 126 additions & 0 deletions core/distributed/helpers.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/*******************************<GINKGO LICENSE>******************************
Copyright (c) 2017-2022, the Ginkgo authors
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************<GINKGO LICENSE>*******************************/

#include <memory>


#include <ginkgo/config.hpp>
#include <ginkgo/core/distributed/vector.hpp>
#include <ginkgo/core/matrix/dense.hpp>


namespace gko {
namespace detail {


template <typename ValueType>
std::unique_ptr<matrix::Dense<ValueType>> create_with_config_of(
const matrix::Dense<ValueType>* mtx)
{
return matrix::Dense<ValueType>::create(mtx->get_executor(),
mtx->get_size(), mtx->get_stride());
}


template <typename ValueType>
const matrix::Dense<ValueType>* get_local(const matrix::Dense<ValueType>* mtx)
{
return mtx;
}


template <typename ValueType>
matrix::Dense<ValueType>* get_local(matrix::Dense<ValueType>* mtx)
{
return mtx;
}


#if GINKGO_BUILD_MPI


template <typename ValueType>
std::unique_ptr<distributed::Vector<ValueType>> create_with_config_of(
const distributed::Vector<ValueType>* mtx)
{
return distributed::Vector<ValueType>::create(
mtx->get_executor(), mtx->get_communicator(), mtx->get_size(),
mtx->get_local_vector()->get_size(),
mtx->get_local_vector()->get_stride());
}


template <typename ValueType>
matrix::Dense<ValueType>* get_local(distributed::Vector<ValueType>* mtx)
{
return const_cast<matrix::Dense<ValueType>*>(mtx->get_local_vector());
}


template <typename ValueType>
const matrix::Dense<ValueType>* get_local(
const distributed::Vector<ValueType>* mtx)
{
return mtx->get_local_vector();
}


#endif


template <typename Arg>
bool is_distributed(Arg* linop)
{
#if GINKGO_BUILD_MPI
return dynamic_cast<const distributed::DistributedBase*>(linop);
#else
return false;
#endif
}


template <typename Arg, typename... Rest>
bool is_distributed(Arg* linop, Rest*... rest)
{
#if GINKGO_BUILD_MPI
bool is_distributed_value =
dynamic_cast<const distributed::DistributedBase*>(linop);
GKO_ASSERT(is_distributed_value == is_distributed(rest...));
return is_distributed_value;
#else
return false;
#endif
}


} // namespace detail
} // namespace gko
107 changes: 53 additions & 54 deletions core/distributed/matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <ginkgo/core/distributed/matrix.hpp>


#include <ginkgo/core/base/precision_dispatch.hpp>
#include <ginkgo/core/distributed/vector.hpp>
#include <ginkgo/core/matrix/csr.hpp>

Expand Down Expand Up @@ -297,65 +298,65 @@ template <typename ValueType, typename LocalIndexType, typename GlobalIndexType>
void Matrix<ValueType, LocalIndexType, GlobalIndexType>::apply_impl(
const LinOp* b, LinOp* x) const
{
auto dense_b = as<global_vector_type>(b);
auto dense_x = as<global_vector_type>(x);
auto x_exec = x->get_executor();
auto local_x = gko::matrix::Dense<ValueType>::create(
x_exec, dense_x->get_local_vector()->get_size(),
gko::make_array_view(
x_exec, dense_x->get_local_vector()->get_num_stored_elements(),
dense_x->get_local_values()),
dense_x->get_local_vector()->get_stride());
if (this->get_non_local_matrix()->get_size()) {
auto req = this->communicate(dense_b->get_local_vector());
local_mtx_->apply(dense_b->get_local_vector(), local_x.get());
req.wait();
auto exec = this->get_executor();
auto use_host_buffer =
exec->get_master() != exec && !gko::mpi::is_gpu_aware();
if (use_host_buffer) {
recv_buffer_->copy_from(host_recv_buffer_.get());
}
non_local_mtx_->apply(one_scalar_.get(), recv_buffer_.get(),
one_scalar_.get(), local_x.get());
} else {
local_mtx_->apply(dense_b->get_local_vector(), local_x.get());
}
distributed::precision_dispatch_real_complex<ValueType>(
[this](const auto dense_b, auto dense_x) {
auto x_exec = dense_x->get_executor();
auto local_x = gko::matrix::Dense<ValueType>::create(
x_exec, dense_x->get_local_vector()->get_size(),
gko::make_array_view(
x_exec,
dense_x->get_local_vector()->get_num_stored_elements(),
dense_x->get_local_values()),
dense_x->get_local_vector()->get_stride());

auto req = this->communicate(dense_b->get_local_vector());
local_mtx_->apply(dense_b->get_local_vector(), local_x.get());
req.wait();

auto exec = this->get_executor();
auto use_host_buffer =
exec->get_master() != exec && !gko::mpi::is_gpu_aware();
if (use_host_buffer) {
recv_buffer_->copy_from(host_recv_buffer_.get());
}
non_local_mtx_->apply(one_scalar_.get(), recv_buffer_.get(),
one_scalar_.get(), local_x.get());
},
b, x);
}


template <typename ValueType, typename LocalIndexType, typename GlobalIndexType>
void Matrix<ValueType, LocalIndexType, GlobalIndexType>::apply_impl(
const LinOp* alpha, const LinOp* b, const LinOp* beta, LinOp* x) const
{
auto dense_b = as<global_vector_type>(b);
auto dense_x = as<global_vector_type>(x);
const auto x_exec = x->get_executor();
auto local_x = gko::matrix::Dense<ValueType>::create(
x_exec, dense_x->get_local_vector()->get_size(),
gko::make_array_view(
x_exec, dense_x->get_local_vector()->get_num_stored_elements(),
dense_x->get_local_values()),
dense_x->get_local_vector()->get_stride());
auto local_alpha = as<local_vector_type>(alpha);
auto local_beta = as<local_vector_type>(beta);
if (this->get_non_local_matrix()->get_size()) {
auto req = this->communicate(dense_b->get_local_vector());
local_mtx_->apply(local_alpha, dense_b->get_local_vector(), local_beta,
local_x.get());
req.wait();
auto exec = this->get_executor();
auto use_host_buffer =
exec->get_master() != exec && !gko::mpi::is_gpu_aware();
if (use_host_buffer) {
recv_buffer_->copy_from(host_recv_buffer_.get());
}
non_local_mtx_->apply(local_alpha, recv_buffer_.get(),
one_scalar_.get(), local_x.get());
} else {
local_mtx_->apply(local_alpha, dense_b->get_local_vector(), local_beta,
local_x.get());
}
distributed::precision_dispatch_real_complex<ValueType>(
[this](const auto local_alpha, const auto dense_b,
const auto local_beta, auto dense_x) {
const auto x_exec = dense_x->get_executor();
auto local_x = gko::matrix::Dense<ValueType>::create(
x_exec, dense_x->get_local_vector()->get_size(),
gko::make_array_view(
x_exec,
dense_x->get_local_vector()->get_num_stored_elements(),
dense_x->get_local_values()),
dense_x->get_local_vector()->get_stride());

auto req = this->communicate(dense_b->get_local_vector());
local_mtx_->apply(local_alpha, dense_b->get_local_vector(),
local_beta, local_x.get());
req.wait();

auto exec = this->get_executor();
auto use_host_buffer =
exec->get_master() != exec && !gko::mpi::is_gpu_aware();
if (use_host_buffer) {
recv_buffer_->copy_from(host_recv_buffer_.get());
}
non_local_mtx_->apply(local_alpha, recv_buffer_.get(),
one_scalar_.get(), local_x.get());
},
alpha, b, beta, x);
}


Expand Down Expand Up @@ -394,7 +395,6 @@ Matrix<ValueType, LocalIndexType, GlobalIndexType>::operator=(
gather_idxs_ = other.gather_idxs_;
send_offsets_ = other.send_offsets_;
recv_offsets_ = other.recv_offsets_;
recv_sizes_ = other.recv_sizes_;
send_sizes_ = other.send_sizes_;
recv_sizes_ = other.recv_sizes_;
non_local_to_global_ = other.non_local_to_global_;
Expand All @@ -419,7 +419,6 @@ Matrix<ValueType, LocalIndexType, GlobalIndexType>::operator=(Matrix&& other)
gather_idxs_ = std::move(other.gather_idxs_);
send_offsets_ = std::move(other.send_offsets_);
recv_offsets_ = std::move(other.recv_offsets_);
recv_sizes_ = std::move(other.recv_sizes_);
send_sizes_ = std::move(other.send_sizes_);
recv_sizes_ = std::move(other.recv_sizes_);
non_local_to_global_ = std::move(other.non_local_to_global_);
Expand Down
Loading

0 comments on commit f90097e

Please sign in to comment.