Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved ParIlu kernels: interface & implemtation #319

Merged
merged 3 commits into from
Jun 27, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions core/device_hooks/common_kernels.inc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -575,10 +575,10 @@ namespace par_ilu_factorization {


template <typename ValueType, typename IndexType>
GKO_DECLARE_PAR_ILU_COMPUTE_NNZ_L_U_KERNEL(ValueType, IndexType)
GKO_DECLARE_PAR_ILU_INITIALIZE_ROW_PTRS_L_U_KERNEL(ValueType, IndexType)
GKO_NOT_COMPILED(GKO_HOOK_MODULE);
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_PAR_ILU_COMPUTE_NNZ_L_U_KERNEL);
GKO_DECLARE_PAR_ILU_INITIALIZE_ROW_PTRS_L_U_KERNEL);

template <typename ValueType, typename IndexType>
GKO_DECLARE_PAR_ILU_INITIALIZE_L_U_KERNEL(ValueType, IndexType)
Expand Down
48 changes: 39 additions & 9 deletions core/factorization/par_ilu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <memory>


#include <ginkgo/core/base/array.hpp>
#include <ginkgo/core/base/exception_helpers.hpp>
#include <ginkgo/core/base/polymorphic_object.hpp>
#include <ginkgo/core/base/types.hpp>
#include <ginkgo/core/matrix/coo.hpp>
Expand All @@ -51,7 +53,8 @@ namespace factorization {
namespace par_ilu_factorization {


GKO_REGISTER_OPERATION(compute_nnz_l_u, par_ilu_factorization::compute_nnz_l_u);
GKO_REGISTER_OPERATION(initialize_row_ptrs_l_u,
par_ilu_factorization::initialize_row_ptrs_l_u);
GKO_REGISTER_OPERATION(initialize_l_u, par_ilu_factorization::initialize_l_u);
GKO_REGISTER_OPERATION(compute_l_u_factors,
par_ilu_factorization::compute_l_u_factors);
Expand All @@ -69,7 +72,14 @@ ParIlu<ValueType, IndexType>::generate_l_u(
using CsrMatrix = matrix::Csr<ValueType, IndexType>;
using CooMatrix = matrix::Coo<ValueType, IndexType>;

GKO_ASSERT_IS_SQUARE_MATRIX(system_matrix);

const auto exec = this->get_executor();
const auto host_exec = exec->get_master();

// If required, it is also possible to make this a Factory parameter
auto csr_strategy = std::make_shared<typename CsrMatrix::cusparse>();

// Only copies the matrix if it is not on the same executor or was not in
// the right format. Throws an exception if it is not convertable.
std::unique_ptr<CsrMatrix> csr_system_matrix_unique_ptr{};
Expand All @@ -93,14 +103,34 @@ ParIlu<ValueType, IndexType>::generate_l_u(
}

const auto matrix_size = csr_system_matrix->get_size();
size_type l_nnz{};
size_type u_nnz{};
exec->run(par_ilu_factorization::make_compute_nnz_l_u(csr_system_matrix,
&l_nnz, &u_nnz));
auto l_factor =
l_matrix_type::create(exec, matrix_size, l_nnz /* TODO set strategy */);
auto u_factor =
u_matrix_type::create(exec, matrix_size, u_nnz /* TODO set strategy */);
const auto number_rows = matrix_size[0];
Array<IndexType> l_row_ptrs{exec, number_rows + 1};
Array<IndexType> u_row_ptrs{exec, number_rows + 1};
exec->run(par_ilu_factorization::make_initialize_row_ptrs_l_u(
csr_system_matrix, l_row_ptrs.get_data(), u_row_ptrs.get_data()));

IndexType l_nnz_it;
IndexType u_nnz_it;
// Since nnz is always at row_ptrs[m], it can be extracted easily
host_exec->copy_from(exec.get(), 1, l_row_ptrs.get_data() + number_rows,
&l_nnz_it);
host_exec->copy_from(exec.get(), 1, u_row_ptrs.get_data() + number_rows,
&u_nnz_it);
auto l_nnz = static_cast<size_type>(l_nnz_it);
auto u_nnz = static_cast<size_type>(u_nnz_it);

// Since `row_ptrs` of L and U is already created, the matrix can be
// directly created with it
Array<IndexType> l_col_idxs{exec, l_nnz};
Array<ValueType> l_vals{exec, l_nnz};
auto l_factor = l_matrix_type::create(exec, matrix_size, std::move(l_vals),
std::move(l_col_idxs),
std::move(l_row_ptrs), csr_strategy);
Array<IndexType> u_col_idxs{exec, u_nnz};
Array<ValueType> u_vals{exec, u_nnz};
auto u_factor = u_matrix_type::create(exec, matrix_size, std::move(u_vals),
std::move(u_col_idxs),
std::move(u_row_ptrs), csr_strategy);

exec->run(par_ilu_factorization::make_initialize_l_u(
csr_system_matrix, l_factor.get(), u_factor.get()));
Expand Down
23 changes: 12 additions & 11 deletions core/factorization/par_ilu_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,12 @@ namespace gko {
namespace kernels {


#define GKO_DECLARE_PAR_ILU_COMPUTE_NNZ_L_U_KERNEL(ValueType, IndexType) \
void compute_nnz_l_u( \
std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Csr<ValueType, IndexType> *system_matrix, \
size_type *l_nnz, size_type *u_nnz)
#define GKO_DECLARE_PAR_ILU_INITIALIZE_ROW_PTRS_L_U_KERNEL(ValueType, \
IndexType) \
void initialize_row_ptrs_l_u( \
std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Csr<ValueType, IndexType> *system_matrix, \
IndexType *l_row_ptrs, IndexType *u_row_ptrs)
#define GKO_DECLARE_PAR_ILU_INITIALIZE_L_U_KERNEL(ValueType, IndexType) \
void initialize_l_u( \
std::shared_ptr<const DefaultExecutor> exec, \
Expand All @@ -68,12 +69,12 @@ namespace kernels {
matrix::Csr<ValueType, IndexType> *u_factor)


#define GKO_DECLARE_ALL_AS_TEMPLATES \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_PAR_ILU_COMPUTE_NNZ_L_U_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_PAR_ILU_INITIALIZE_L_U_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
#define GKO_DECLARE_ALL_AS_TEMPLATES \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_PAR_ILU_INITIALIZE_ROW_PTRS_L_U_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_PAR_ILU_INITIALIZE_L_U_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_PAR_ILU_COMPUTE_L_U_FACTORS_KERNEL(ValueType, IndexType)


Expand Down
13 changes: 7 additions & 6 deletions cuda/factorization/par_ilu_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,17 @@ namespace par_ilu_factorization {


template <typename ValueType, typename IndexType>
void compute_nnz_l_u(std::shared_ptr<const DefaultExecutor> exec,
const matrix::Csr<ValueType, IndexType> *system_matrix,
size_type *l_nnz, size_type *u_nnz) GKO_NOT_IMPLEMENTED;
void initialize_row_ptrs_l_u(
std::shared_ptr<const CudaExecutor> exec,
const matrix::Csr<ValueType, IndexType> *system_matrix,
IndexType *l_row_ptrs, IndexType *u_row_ptrs) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_PAR_ILU_COMPUTE_NNZ_L_U_KERNEL);
GKO_DECLARE_PAR_ILU_INITIALIZE_ROW_PTRS_L_U_KERNEL);


template <typename ValueType, typename IndexType>
void initialize_l_u(std::shared_ptr<const DefaultExecutor> exec,
void initialize_l_u(std::shared_ptr<const CudaExecutor> exec,
const matrix::Csr<ValueType, IndexType> *system_matrix,
matrix::Csr<ValueType, IndexType> *csr_l,
matrix::Csr<ValueType, IndexType> *csr_u)
Expand All @@ -70,7 +71,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(

template <typename ValueType, typename IndexType>
void compute_l_u_factors(
std::shared_ptr<const DefaultExecutor> exec, size_type iterations,
std::shared_ptr<const CudaExecutor> exec, size_type iterations,
const matrix::Coo<ValueType, IndexType> *system_matrix,
matrix::Csr<ValueType, IndexType> *l_factor,
matrix::Csr<ValueType, IndexType> *u_factor) GKO_NOT_IMPLEMENTED;
Expand Down
50 changes: 33 additions & 17 deletions omp/factorization/par_ilu_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,29 +50,48 @@ namespace par_ilu_factorization {


template <typename ValueType, typename IndexType>
void compute_nnz_l_u(std::shared_ptr<const OmpExecutor> exec,
const matrix::Csr<ValueType, IndexType> *system_matrix,
size_type *l_nnz, size_type *u_nnz)
void initialize_row_ptrs_l_u(
std::shared_ptr<const OmpExecutor> exec,
const matrix::Csr<ValueType, IndexType> *system_matrix,
IndexType *l_row_ptrs, IndexType *u_row_ptrs)
{
auto row_ptrs = system_matrix->get_const_row_ptrs();
auto col_idxs = system_matrix->get_const_col_idxs();
*l_nnz = 0;
*u_nnz = 0;
for (size_type row = 0; row < system_matrix->get_size()[1]; ++row) {

l_row_ptrs[0] = 0;
u_row_ptrs[0] = 0;
// Calculate the NNZ per row first
#pragma omp parallel for
for (size_type row = 0; row < system_matrix->get_size()[0]; ++row) {
hartwiganzt marked this conversation as resolved.
Show resolved Hide resolved
size_type l_nnz{};
size_type u_nnz{};
for (size_type el = row_ptrs[row]; el < row_ptrs[row + 1]; ++el) {
size_type col = col_idxs[el];
if (col <= row) {
++(*l_nnz);
++l_nnz;
}
if (col >= row) {
++(*u_nnz);
++u_nnz;
}
}
l_row_ptrs[row + 1] = l_nnz;
u_row_ptrs[row + 1] = u_nnz;
}

// Now, compute the prefix-sum, to get proper row_ptrs for L and U
IndexType l_previous_nnz{};
IndexType u_previous_nnz{};
for (size_type row = 1; row < system_matrix->get_size()[0] + 1; ++row) {
l_previous_nnz += l_row_ptrs[row];
u_previous_nnz += u_row_ptrs[row];

l_row_ptrs[row] = l_previous_nnz;
u_row_ptrs[row] = u_previous_nnz;
}
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_PAR_ILU_COMPUTE_NNZ_L_U_KERNEL);
GKO_DECLARE_PAR_ILU_INITIALIZE_ROW_PTRS_L_U_KERNEL);


template <typename ValueType, typename IndexType>
Expand All @@ -85,19 +104,18 @@ void initialize_l_u(std::shared_ptr<const OmpExecutor> exec,
const auto col_idxs = system_matrix->get_const_col_idxs();
const auto vals = system_matrix->get_const_values();

auto row_ptrs_l = csr_l->get_row_ptrs();
const auto row_ptrs_l = csr_l->get_const_row_ptrs();
auto col_idxs_l = csr_l->get_col_idxs();
auto vals_l = csr_l->get_values();

auto row_ptrs_u = csr_u->get_row_ptrs();
const auto row_ptrs_u = csr_u->get_const_row_ptrs();
auto col_idxs_u = csr_u->get_col_idxs();
auto vals_u = csr_u->get_values();

size_type current_index_l{};
size_type current_index_u{};
row_ptrs_l[current_index_l] = zero<IndexType>();
row_ptrs_u[current_index_u] = zero<IndexType>();
#pragma omp parallel for
for (size_type row = 0; row < system_matrix->get_size()[0]; ++row) {
size_type current_index_l = row_ptrs_l[row];
size_type current_index_u = row_ptrs_u[row];
for (size_type el = row_ptrs[row]; el < row_ptrs[row + 1]; ++el) {
const auto col = col_idxs[el];
const auto val = vals[el];
Expand All @@ -120,8 +138,6 @@ void initialize_l_u(std::shared_ptr<const OmpExecutor> exec,
++current_index_u;
}
}
row_ptrs_l[row + 1] = current_index_l;
row_ptrs_u[row + 1] = current_index_u;
}
}

Expand Down
Loading