mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
add OpenMP parallelized version of DILU.
Implement graphcoloring to expose rows in level sets that that can be executed in parallel during the sparse triangular solves. Add copy of A matrix that is reordered to ensure continuous memory reads when traversing the matrix in level set order. TODO: add number of threads available as constructor argument in DILU
This commit is contained in:
parent
b00d3ca4bb
commit
5f6c97ff3b
@ -17,49 +17,87 @@
|
|||||||
#ifndef OPM_DILU_HEADER_INCLUDED
|
#ifndef OPM_DILU_HEADER_INCLUDED
|
||||||
#define OPM_DILU_HEADER_INCLUDED
|
#define OPM_DILU_HEADER_INCLUDED
|
||||||
|
|
||||||
#include <config.h>
|
|
||||||
#include <opm/common/ErrorMacros.hpp>
|
#include <opm/common/ErrorMacros.hpp>
|
||||||
#include <opm/common/TimingMacros.hpp>
|
#include <opm/common/TimingMacros.hpp>
|
||||||
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
|
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
|
||||||
|
|
||||||
#include <dune/common/fmatrix.hh>
|
#include <dune/common/fmatrix.hh>
|
||||||
#include <dune/common/version.hh>
|
|
||||||
#include <dune/common/unused.hh>
|
#include <dune/common/unused.hh>
|
||||||
|
#include <dune/common/version.hh>
|
||||||
#include <dune/istl/bcrsmatrix.hh>
|
#include <dune/istl/bcrsmatrix.hh>
|
||||||
|
|
||||||
|
#include <opm/simulators/linalg/GraphColoring.hpp>
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#if HAVE_OPENMP
|
||||||
|
#include <omp.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// TODO: rewrite factory and constructor to keep track of a number of threads variable
|
||||||
namespace Dune
|
namespace Dune
|
||||||
{
|
{
|
||||||
|
|
||||||
/*! \brief The sequential DILU preconditioner.
|
/*! \brief The OpenMP thread parallelized DILU preconditioner.
|
||||||
|
* \details Safe to run serially without OpenMP. When run in parallel
|
||||||
|
the matrix is assumed to be symmetric.
|
||||||
|
|
||||||
\tparam M The matrix type to operate on
|
\tparam M The matrix type to operate on
|
||||||
\tparam X Type of the update
|
\tparam X Type of the update
|
||||||
\tparam Y Type of the defect
|
\tparam Y Type of the defect
|
||||||
*/
|
*/
|
||||||
template <class M, class X, class Y>
|
template <class M, class X, class Y>
|
||||||
class SeqDilu : public PreconditionerWithUpdate<X, Y>
|
class MultithreadDILU : public PreconditionerWithUpdate<X, Y>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
//! \brief The matrix type the preconditioner is for.
|
//! \brief The matrix type the preconditioner is for.
|
||||||
using matrix_type = M;
|
using matrix_type = M;
|
||||||
//! \brief The domain type of the preconditioner.
|
//! \brief The domain type of the preconditioner.
|
||||||
using domain_type = X;
|
using domain_type = X;
|
||||||
//! \brief The range type of the preconditioner.
|
//! \brief The range type of the preconditioner.
|
||||||
using range_type = Y;
|
using range_type = Y;
|
||||||
//! \brief The field type of the preconditioner.
|
//! \brief The field type of the preconditioner.
|
||||||
using field_type = typename X::field_type;
|
using field_type = typename X::field_type;
|
||||||
//! \brief scalar type underlying the field_type
|
//! \brief scalar type underlying the field_type
|
||||||
|
|
||||||
/*! \brief Constructor.
|
/*! \brief Constructor gets all parameters to operate the prec.
|
||||||
Constructor gets all parameters to operate the prec.
|
|
||||||
\param A The matrix to operate on.
|
\param A The matrix to operate on.
|
||||||
*/
|
*/
|
||||||
SeqDilu(const M& A)
|
MultithreadDILU(const M& A)
|
||||||
: A_(A)
|
: A_(A)
|
||||||
|
, A_reordered_(M(A_.N(), A_.N(), A_.nonzeroes(), M::row_wise))
|
||||||
{
|
{
|
||||||
|
OPM_TIMEBLOCK(prec_construct);
|
||||||
|
// TODO: rewrite so this value is set by an argument to the constructor
|
||||||
|
#if HAVE_OPENMP
|
||||||
|
use_multithreading = omp_get_max_threads() > 1;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (use_multithreading) {
|
||||||
|
//! Assuming symmetric matrices using a lower triangular coloring to construct
|
||||||
|
//! the levels is sufficient
|
||||||
|
level_sets_ = Opm::getMatrixRowColoring(A_, Opm::ColoringType::LOWER);
|
||||||
|
reordered_to_natural_ = std::vector<std::size_t>(A_.N());
|
||||||
|
natural_to_reorder_ = std::vector<std::size_t>(A_.N());
|
||||||
|
int globCnt = 0;
|
||||||
|
for (const auto& level_set : level_sets_) {
|
||||||
|
for (const auto j : level_set) {
|
||||||
|
reordered_to_natural_[globCnt] = j;
|
||||||
|
natural_to_reorder_[j] = globCnt++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto dst_row_it = A_reordered_.createbegin(); dst_row_it != A_reordered_.createend(); ++dst_row_it) {
|
||||||
|
auto src_row = A_.begin() + reordered_to_natural_[dst_row_it.index()];
|
||||||
|
// For eleemnts in A
|
||||||
|
for (auto elem = src_row->begin(); elem != src_row->end(); elem++) {
|
||||||
|
dst_row_it.insert(elem.index());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Dinv_.resize(A_.N());
|
Dinv_.resize(A_.N());
|
||||||
// the Dinv matrix must be initialised
|
|
||||||
update();
|
update();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -67,32 +105,13 @@ class SeqDilu : public PreconditionerWithUpdate<X, Y>
|
|||||||
\brief Update the preconditioner.
|
\brief Update the preconditioner.
|
||||||
\copydoc Preconditioner::update()
|
\copydoc Preconditioner::update()
|
||||||
*/
|
*/
|
||||||
virtual void update() override
|
void update() override
|
||||||
{
|
{
|
||||||
OPM_TIMEBLOCK(update);
|
OPM_TIMEBLOCK(prec_update);
|
||||||
|
if (use_multithreading) {
|
||||||
auto endi = A_.end();
|
parallelUpdate();
|
||||||
for ( auto row = A_.begin(); row != endi; ++row) {
|
} else {
|
||||||
const auto row_i = row.index();
|
serialUpdate();
|
||||||
Dinv_[row_i] = A_[row_i][row_i];
|
|
||||||
}
|
|
||||||
|
|
||||||
for ( auto row = A_.begin(); row != endi; ++row)
|
|
||||||
{
|
|
||||||
const auto row_i = row.index();
|
|
||||||
auto Dinv_temp = Dinv_[row_i];
|
|
||||||
for (auto a_ij = row->begin(); a_ij.index() < row_i; ++a_ij)
|
|
||||||
{
|
|
||||||
const auto col_j = a_ij.index();
|
|
||||||
const auto a_ji = A_[col_j].find(row_i);
|
|
||||||
// if A[i, j] != 0 and A[j, i] != 0
|
|
||||||
if (a_ji != A_[col_j].end()) {
|
|
||||||
// Dinv_temp -= A[i, j] * d[j] * A[j, i]
|
|
||||||
Dinv_temp -= (*a_ij) * Dune::FieldMatrix(Dinv_[col_j]) * (*a_ji);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Dinv_temp.invert();
|
|
||||||
Dinv_[row_i] = Dinv_temp;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -100,7 +119,7 @@ class SeqDilu : public PreconditionerWithUpdate<X, Y>
|
|||||||
\brief Prepare the preconditioner.
|
\brief Prepare the preconditioner.
|
||||||
\copydoc Preconditioner::pre(X&,Y&)
|
\copydoc Preconditioner::pre(X&,Y&)
|
||||||
*/
|
*/
|
||||||
virtual void pre(X& v, Y& d) override
|
void pre(X& v, Y& d) override
|
||||||
{
|
{
|
||||||
DUNE_UNUSED_PARAMETER(v);
|
DUNE_UNUSED_PARAMETER(v);
|
||||||
DUNE_UNUSED_PARAMETER(d);
|
DUNE_UNUSED_PARAMETER(d);
|
||||||
@ -111,52 +130,13 @@ class SeqDilu : public PreconditionerWithUpdate<X, Y>
|
|||||||
\brief Apply the preconditioner.
|
\brief Apply the preconditioner.
|
||||||
\copydoc Preconditioner::apply(X&,const Y&)
|
\copydoc Preconditioner::apply(X&,const Y&)
|
||||||
*/
|
*/
|
||||||
virtual void apply(X& v, const Y& d) override
|
void apply(X& v, const Y& d) override
|
||||||
{
|
{
|
||||||
// M = (D + L_A) D^-1 (D + U_A) (a LU decomposition of M)
|
OPM_TIMEBLOCK(prec_apply);
|
||||||
// where L_A and U_A are the strictly lower and upper parts of A and M has the properties:
|
if (use_multithreading) {
|
||||||
// diag(A) = diag(M)
|
parallelApply(v, d);
|
||||||
// Working with defect d = b - Ax and update v = x_{n+1} - x_n
|
} else {
|
||||||
// solving the product M^-1(d) using upper and lower triangular solve
|
serialApply(v, d);
|
||||||
// v = M^{-1}*d = (D + U_A)^{-1} D (D + L_A)^{-1} * d
|
|
||||||
OPM_TIMEBLOCK(apply);
|
|
||||||
using Xblock = typename X::block_type;
|
|
||||||
using Yblock = typename Y::block_type;
|
|
||||||
|
|
||||||
// lower triangular solve: (D + L_A) y = d
|
|
||||||
auto endi = A_.end();
|
|
||||||
for (auto row = A_.begin(); row != endi; ++row)
|
|
||||||
{
|
|
||||||
const auto row_i = row.index();
|
|
||||||
Yblock rhs = d[row_i];
|
|
||||||
for (auto a_ij = (*row).begin(); a_ij.index() < row_i; ++a_ij) {
|
|
||||||
// if A[i][j] != 0
|
|
||||||
// rhs -= A[i][j]* y[j], where v_j stores y_j
|
|
||||||
const auto col_j = a_ij.index();
|
|
||||||
a_ij->mmv(v[col_j], rhs);
|
|
||||||
}
|
|
||||||
// y_i = Dinv_i * rhs
|
|
||||||
// storing y_i in v_i
|
|
||||||
Dinv_[row_i].mv(rhs, v[row_i]); // (D + L_A)_ii = D_i
|
|
||||||
}
|
|
||||||
|
|
||||||
// upper triangular solve: (D + U_A) v = Dy
|
|
||||||
auto rendi = A_.beforeBegin();
|
|
||||||
for (auto row = A_.beforeEnd(); row != rendi; --row)
|
|
||||||
{
|
|
||||||
const auto row_i = row.index();
|
|
||||||
// rhs = 0
|
|
||||||
Xblock rhs(0.0);
|
|
||||||
for (auto a_ij = (*row).beforeEnd(); a_ij.index() > row_i; --a_ij) {
|
|
||||||
// if A[i][j] != 0
|
|
||||||
// rhs += A[i][j]*v[j]
|
|
||||||
const auto col_j = a_ij.index();
|
|
||||||
a_ij->umv(v[col_j], rhs);
|
|
||||||
}
|
|
||||||
// calculate update v = M^-1*d
|
|
||||||
// v_i = y_i - Dinv_i*rhs
|
|
||||||
// before update v_i is y_i
|
|
||||||
Dinv_[row_i].mmv(rhs, v[row_i]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -164,11 +144,11 @@ class SeqDilu : public PreconditionerWithUpdate<X, Y>
|
|||||||
\brief Clean up.
|
\brief Clean up.
|
||||||
\copydoc Preconditioner::post(X&)
|
\copydoc Preconditioner::post(X&)
|
||||||
*/
|
*/
|
||||||
virtual void post(X& x) override
|
void post(X& x) override
|
||||||
{
|
{
|
||||||
DUNE_UNUSED_PARAMETER(x);
|
DUNE_UNUSED_PARAMETER(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<typename M::block_type> getDiagonal()
|
std::vector<typename M::block_type> getDiagonal()
|
||||||
{
|
{
|
||||||
return Dinv_;
|
return Dinv_;
|
||||||
@ -183,10 +163,198 @@ class SeqDilu : public PreconditionerWithUpdate<X, Y>
|
|||||||
private:
|
private:
|
||||||
//! \brief The matrix we operate on.
|
//! \brief The matrix we operate on.
|
||||||
const M& A_;
|
const M& A_;
|
||||||
|
//! \brief Copy of A_ that is reordered to store rows that can be computed simultaneously next to each other to
|
||||||
|
//! increase cache usage when multithreading
|
||||||
|
M A_reordered_;
|
||||||
//! \brief The inverse of the diagnal matrix
|
//! \brief The inverse of the diagnal matrix
|
||||||
std::vector<typename M::block_type> Dinv_;
|
std::vector<typename M::block_type> Dinv_;
|
||||||
|
//! \brief SparseTable storing each row by level
|
||||||
|
Opm::SparseTable<std::size_t> level_sets_;
|
||||||
|
//! \brief converts from index in reordered structure to index natural ordered structure
|
||||||
|
std::vector<std::size_t> reordered_to_natural_;
|
||||||
|
//! \brief converts from index in natural ordered structure to index reordered strucutre
|
||||||
|
std::vector<std::size_t> natural_to_reorder_;
|
||||||
|
//! \brief Boolean value describing whether or not to use multithreaded version of functions
|
||||||
|
bool use_multithreading{false};
|
||||||
|
|
||||||
|
void serialUpdate()
|
||||||
|
{
|
||||||
|
for (std::size_t row = 0; row < A_.N(); ++row) {
|
||||||
|
Dinv_[row] = A_[row][row];
|
||||||
|
}
|
||||||
|
for (auto row = A_.begin(); row != A_.end(); ++row) {
|
||||||
|
const auto row_i = row.index();
|
||||||
|
auto Dinv_temp = Dinv_[row_i];
|
||||||
|
for (auto a_ij = row->begin(); a_ij.index() < row_i; ++a_ij) {
|
||||||
|
const auto col_j = a_ij.index();
|
||||||
|
const auto a_ji = A_[col_j].find(row_i);
|
||||||
|
// if A[i, j] != 0 and A[j, i] != 0
|
||||||
|
if (a_ji != A_[col_j].end()) {
|
||||||
|
// Dinv_temp -= A[i, j] * d[j] * A[j, i]
|
||||||
|
Dinv_temp -= (*a_ij) * Dune::FieldMatrix(Dinv_[col_j]) * (*a_ji);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Dinv_temp.invert();
|
||||||
|
Dinv_[row_i] = Dinv_temp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void parallelUpdate()
|
||||||
|
{
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
|
for (std::size_t row = 0; row != A_.N(); ++row) {
|
||||||
|
Dinv_[natural_to_reorder_[row]] = A_[row][row];
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: is there a better/faster way of copying all values?
|
||||||
|
for (auto dst_row_it = A_reordered_.begin(); dst_row_it != A_reordered_.end(); ++dst_row_it) {
|
||||||
|
auto src_row = A_.begin() + reordered_to_natural_[dst_row_it.index()];
|
||||||
|
for (auto elem = src_row->begin(); elem != src_row->end(); elem++) {
|
||||||
|
A_reordered_[dst_row_it.index()][elem.index()] = *elem;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int level_start_idx = 0;
|
||||||
|
for (int level = 0; level < level_sets_.size(); ++level) {
|
||||||
|
const int num_of_rows_in_level = level_sets_[level].size();
|
||||||
|
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
|
for (int row_idx_in_level = 0; row_idx_in_level < num_of_rows_in_level; ++row_idx_in_level) {
|
||||||
|
auto row = A_reordered_.begin() + level_start_idx + row_idx_in_level;
|
||||||
|
const auto row_i = reordered_to_natural_[row.index()];
|
||||||
|
// auto Dinv_temp = Dinv_[row_i];
|
||||||
|
auto Dinv_temp = Dinv_[level_start_idx + row_idx_in_level];
|
||||||
|
for (auto a_ij = row->begin(); a_ij.index() < row_i; ++a_ij) {
|
||||||
|
const auto col_j = natural_to_reorder_[a_ij.index()];
|
||||||
|
const auto a_ji = A_reordered_[col_j].find(row_i);
|
||||||
|
if (a_ji != A_reordered_[col_j].end()) {
|
||||||
|
// Dinv_temp -= A[i, j] * d[j] * A[j, i]
|
||||||
|
Dinv_temp -= (*a_ij) * Dune::FieldMatrix(Dinv_[col_j]) * (*a_ji);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Dinv_temp.invert();
|
||||||
|
Dinv_[level_start_idx + row_idx_in_level] = Dinv_temp;
|
||||||
|
}
|
||||||
|
|
||||||
|
level_start_idx += num_of_rows_in_level;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void serialApply(X& v, const Y& d)
|
||||||
|
{
|
||||||
|
// M = (D + L_A) D^-1 (D + U_A) (a LU decomposition of M)
|
||||||
|
// where L_A and U_A are the strictly lower and upper parts of A and M has the properties:
|
||||||
|
// diag(A) = diag(M)
|
||||||
|
// Working with defect d = b - Ax and update v = x_{n+1} - x_n
|
||||||
|
// solving the product M^-1(d) using upper and lower triangular solve
|
||||||
|
// v = M^{-1}*d = (D + U_A)^{-1} D (D + L_A)^{-1} * d
|
||||||
|
// lower triangular solve: (D + L_A) y = d
|
||||||
|
using Xblock = typename X::block_type;
|
||||||
|
using Yblock = typename Y::block_type;
|
||||||
|
{
|
||||||
|
OPM_TIMEBLOCK(lower_solve);
|
||||||
|
auto endi = A_.end();
|
||||||
|
for (auto row = A_.begin(); row != endi; ++row) {
|
||||||
|
const auto row_i = row.index();
|
||||||
|
Yblock rhs = d[row_i];
|
||||||
|
for (auto a_ij = (*row).begin(); a_ij.index() < row_i; ++a_ij) {
|
||||||
|
// if A[i][j] != 0
|
||||||
|
// rhs -= A[i][j]* y[j], where v_j stores y_j
|
||||||
|
const auto col_j = a_ij.index();
|
||||||
|
a_ij->mmv(v[col_j], rhs);
|
||||||
|
}
|
||||||
|
// y_i = Dinv_i * rhs
|
||||||
|
// storing y_i in v_i
|
||||||
|
Dinv_[row_i].mv(rhs, v[row_i]); // (D + L_A)_ii = D_i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
OPM_TIMEBLOCK(upper_solve);
|
||||||
|
|
||||||
|
// upper triangular solve: (D + U_A) v = Dy
|
||||||
|
auto rendi = A_.beforeBegin();
|
||||||
|
for (auto row = A_.beforeEnd(); row != rendi; --row) {
|
||||||
|
const auto row_i = row.index();
|
||||||
|
// rhs = 0
|
||||||
|
Xblock rhs(0.0);
|
||||||
|
for (auto a_ij = (*row).beforeEnd(); a_ij.index() > row_i; --a_ij) {
|
||||||
|
// if A[i][j] != 0
|
||||||
|
// rhs += A[i][j]*v[j]
|
||||||
|
const auto col_j = a_ij.index();
|
||||||
|
a_ij->umv(v[col_j], rhs);
|
||||||
|
}
|
||||||
|
// calculate update v = M^-1*d
|
||||||
|
// v_i = y_i - Dinv_i*rhs
|
||||||
|
// before update v_i is y_i
|
||||||
|
Dinv_[row_i].mmv(rhs, v[row_i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void parallelApply(X& v, const Y& d)
|
||||||
|
{
|
||||||
|
using Xblock = typename X::block_type;
|
||||||
|
using Yblock = typename Y::block_type;
|
||||||
|
{
|
||||||
|
OPM_TIMEBLOCK(lower_solve);
|
||||||
|
int level_start_idx = 0;
|
||||||
|
for (int level = 0; level < level_sets_.size(); ++level) {
|
||||||
|
const int num_of_rows_in_level = level_sets_[level].size();
|
||||||
|
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
|
for (int row_idx_in_level = 0; row_idx_in_level < num_of_rows_in_level; ++row_idx_in_level) {
|
||||||
|
auto row = A_reordered_.begin() + level_start_idx + row_idx_in_level;
|
||||||
|
const auto row_i = reordered_to_natural_[row.index()];
|
||||||
|
Yblock rhs = d[row_i];
|
||||||
|
for (auto a_ij = (*row).begin(); a_ij.index() < row_i; ++a_ij) {
|
||||||
|
// if A[i][j] != 0
|
||||||
|
// rhs -= A[i][j]* y[j], where v_j stores y_j
|
||||||
|
const auto col_j = a_ij.index();
|
||||||
|
a_ij->mmv(v[col_j], rhs);
|
||||||
|
}
|
||||||
|
// y_i = Dinv_i * rhs
|
||||||
|
// storing y_i in v_i
|
||||||
|
Dinv_[level_start_idx + row_idx_in_level].mv(rhs, v[row_i]); // (D + L_A)_ii = D_i
|
||||||
|
}
|
||||||
|
level_start_idx += num_of_rows_in_level;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
int level_start_idx = A_.N();
|
||||||
|
// upper triangular solve: (D + U_A) v = Dy
|
||||||
|
for (int level = level_sets_.size() - 1; level >= 0; --level) {
|
||||||
|
const int num_of_rows_in_level = level_sets_[level].size();
|
||||||
|
level_start_idx -= num_of_rows_in_level;
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
|
for (int row_idx_in_level = num_of_rows_in_level - 1; row_idx_in_level >= 0; --row_idx_in_level) {
|
||||||
|
auto row = A_reordered_.begin() + level_start_idx + row_idx_in_level;
|
||||||
|
const auto row_i = reordered_to_natural_[row.index()];
|
||||||
|
Xblock rhs(0.0);
|
||||||
|
for (auto a_ij = (*row).beforeEnd(); a_ij.index() > row_i; --a_ij) {
|
||||||
|
// rhs += A[i][j]*v[j]
|
||||||
|
const auto col_j = a_ij.index();
|
||||||
|
a_ij->umv(v[col_j], rhs);
|
||||||
|
}
|
||||||
|
// calculate update v = M^-1*d
|
||||||
|
// v_i = y_i - Dinv_i*rhs
|
||||||
|
// before update v_i is y_i
|
||||||
|
Dinv_[level_start_idx + row_idx_in_level].mmv(rhs, v[row_i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Dune
|
} // namespace Dune
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -6,28 +6,28 @@
|
|||||||
namespace Dune
|
namespace Dune
|
||||||
{
|
{
|
||||||
template <class M, class X, class Y>
|
template <class M, class X, class Y>
|
||||||
class SeqDilu;
|
class MultithreadDILU;
|
||||||
|
|
||||||
namespace Amg
|
namespace Amg
|
||||||
{
|
{
|
||||||
/**
|
/**
|
||||||
* @brief Policy for the construction of the SeqDilu smoother
|
* @brief Policy for the construction of the MultithreadDILU smoother
|
||||||
*/
|
*/
|
||||||
template <class M, class X, class Y>
|
template <class M, class X, class Y>
|
||||||
struct ConstructionTraits<SeqDilu<M, X, Y>> {
|
struct ConstructionTraits<MultithreadDILU<M, X, Y>> {
|
||||||
using Arguments = DefaultConstructionArgs<SeqDilu<M, X, Y>>;
|
using Arguments = DefaultConstructionArgs<MultithreadDILU<M, X, Y>>;
|
||||||
|
|
||||||
#if DUNE_VERSION_NEWER(DUNE_ISTL, 2, 7)
|
#if DUNE_VERSION_NEWER(DUNE_ISTL, 2, 7)
|
||||||
static inline std::shared_ptr<SeqDilu<M, X, Y>> construct(Arguments& args) {
|
static inline std::shared_ptr<MultithreadDILU<M, X, Y>> construct(Arguments& args) {
|
||||||
return std::make_shared<SeqDilu<M, X, Y>>(args.getMatrix());
|
return std::make_shared<MultithreadDILU<M, X, Y>>(args.getMatrix());
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
static inline SeqDilu<M, X, Y>* construct(Arguments& args) {
|
static inline MultithreadDILU<M, X, Y>* construct(Arguments& args) {
|
||||||
return new SeqDilu<M, X, Y>(args.getMatrix());
|
return new MultithreadDILU<M, X, Y>(args.getMatrix());
|
||||||
}
|
}
|
||||||
|
|
||||||
static void deconstruct(SeqDilu<M, X, Y>* dilu) {
|
static void deconstruct(MultithreadDILU<M, X, Y>* dilu) {
|
||||||
delete dilu;
|
delete dilu;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -164,7 +164,7 @@ struct StandardPreconditioners
|
|||||||
});
|
});
|
||||||
F::addCreator("DILU", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
|
F::addCreator("DILU", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
|
||||||
DUNE_UNUSED_PARAMETER(prm);
|
DUNE_UNUSED_PARAMETER(prm);
|
||||||
return wrapBlockPreconditioner<SeqDilu<M, V, V>>(comm, op.getmat());
|
return wrapBlockPreconditioner<MultithreadDILU<M, V, V>>(comm, op.getmat());
|
||||||
});
|
});
|
||||||
F::addCreator("Jac", [](const O& op, const P& prm, const std::function<V()>&,
|
F::addCreator("Jac", [](const O& op, const P& prm, const std::function<V()>&,
|
||||||
std::size_t, const C& comm) {
|
std::size_t, const C& comm) {
|
||||||
@ -204,7 +204,7 @@ struct StandardPreconditioners
|
|||||||
return prec;
|
return prec;
|
||||||
}
|
}
|
||||||
else if (smoother == "DILU") {
|
else if (smoother == "DILU") {
|
||||||
using SeqSmoother = Dune::SeqDilu<M, V, V>;
|
using SeqSmoother = Dune::MultithreadDILU<M, V, V>;
|
||||||
using Smoother = Dune::BlockPreconditioner<V, V, C, SeqSmoother>;
|
using Smoother = Dune::BlockPreconditioner<V, V, C, SeqSmoother>;
|
||||||
using SmootherArgs = typename Dune::Amg::SmootherTraits<Smoother>::Arguments;
|
using SmootherArgs = typename Dune::Amg::SmootherTraits<Smoother>::Arguments;
|
||||||
SmootherArgs sargs;
|
SmootherArgs sargs;
|
||||||
@ -350,7 +350,7 @@ struct StandardPreconditioners<Operator,Dune::Amg::SequentialInformation>
|
|||||||
});
|
});
|
||||||
F::addCreator("DILU", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
|
F::addCreator("DILU", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
|
||||||
DUNE_UNUSED_PARAMETER(prm);
|
DUNE_UNUSED_PARAMETER(prm);
|
||||||
return std::make_shared<SeqDilu<M, V, V>>(op.getmat());
|
return std::make_shared<MultithreadDILU<M, V, V>>(op.getmat());
|
||||||
});
|
});
|
||||||
F::addCreator("Jac", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
|
F::addCreator("Jac", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
|
||||||
const int n = prm.get<int>("repeats", 1);
|
const int n = prm.get<int>("repeats", 1);
|
||||||
@ -385,7 +385,7 @@ struct StandardPreconditioners<Operator,Dune::Amg::SequentialInformation>
|
|||||||
using Smoother = SeqJac<M, V, V>;
|
using Smoother = SeqJac<M, V, V>;
|
||||||
return AMGHelper<O,C,M,V>::template makeAmgPreconditioner<Smoother>(op, prm);
|
return AMGHelper<O,C,M,V>::template makeAmgPreconditioner<Smoother>(op, prm);
|
||||||
} else if (smoother == "DILU") {
|
} else if (smoother == "DILU") {
|
||||||
using Smoother = SeqDilu<M, V, V>;
|
using Smoother = MultithreadDILU<M, V, V>;
|
||||||
return AMGHelper<O,C,M,V>::template makeAmgPreconditioner<Smoother>(op, prm);
|
return AMGHelper<O,C,M,V>::template makeAmgPreconditioner<Smoother>(op, prm);
|
||||||
} else if (smoother == "SOR") {
|
} else if (smoother == "SOR") {
|
||||||
using Smoother = SeqSOR<M, V, V>;
|
using Smoother = SeqSOR<M, V, V>;
|
||||||
|
@ -74,7 +74,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(SeqDILUDiagIsCorrect2x2NoZeros, T, NumericTypes)
|
|||||||
// D_11 = A_11 - L_10 D_00_inv U_01
|
// D_11 = A_11 - L_10 D_00_inv U_01
|
||||||
auto D_11 = A[1][1] - A[1][0] * D_00_inv * A[0][1];
|
auto D_11 = A[1][1] - A[1][0] * D_00_inv * A[0][1];
|
||||||
|
|
||||||
Dune::SeqDilu<Matrix, Vector, Vector> seqdilu(A);
|
Dune::MultithreadDILU<Matrix, Vector, Vector> seqdilu(A);
|
||||||
|
|
||||||
auto Dinv = seqdilu.getDiagonal();
|
auto Dinv = seqdilu.getDiagonal();
|
||||||
|
|
||||||
@ -140,7 +140,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(SeqDILUDiagIsCorrect2x2, T, NumericTypes)
|
|||||||
// D_11 = A_11 - L_10 D_00_inv U_01 = A_11
|
// D_11 = A_11 - L_10 D_00_inv U_01 = A_11
|
||||||
auto D_11 = A[1][1];
|
auto D_11 = A[1][1];
|
||||||
|
|
||||||
Dune::SeqDilu<Matrix, Vector, Vector> seqdilu(A);
|
Dune::MultithreadDILU<Matrix, Vector, Vector> seqdilu(A);
|
||||||
|
|
||||||
|
|
||||||
auto Dinv = seqdilu.getDiagonal();
|
auto Dinv = seqdilu.getDiagonal();
|
||||||
@ -258,7 +258,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(SeqDILUApplyIsCorrectNoZeros, T, NumericTypes)
|
|||||||
Vector new_x = x;
|
Vector new_x = x;
|
||||||
new_x += z;
|
new_x += z;
|
||||||
|
|
||||||
Dune::SeqDilu<Matrix, Vector, Vector> seqdilu(A);
|
Dune::MultithreadDILU<Matrix, Vector, Vector> seqdilu(A);
|
||||||
seqdilu.apply(x, b);
|
seqdilu.apply(x, b);
|
||||||
|
|
||||||
for (int i = 0; i < 2; ++i) {
|
for (int i = 0; i < 2; ++i) {
|
||||||
@ -363,7 +363,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(SeqDILUApplyIsCorrect1, T, NumericTypes)
|
|||||||
Vector new_x = x;
|
Vector new_x = x;
|
||||||
new_x += z;
|
new_x += z;
|
||||||
|
|
||||||
Dune::SeqDilu<Matrix, Vector, Vector> seqdilu(A);
|
Dune::MultithreadDILU<Matrix, Vector, Vector> seqdilu(A);
|
||||||
seqdilu.apply(x, b);
|
seqdilu.apply(x, b);
|
||||||
|
|
||||||
for (int i = 0; i < 2; ++i) {
|
for (int i = 0; i < 2; ++i) {
|
||||||
@ -462,7 +462,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(SeqDILUApplyIsCorrect2, T, NumericTypes)
|
|||||||
Vector new_x = x;
|
Vector new_x = x;
|
||||||
new_x += z;
|
new_x += z;
|
||||||
|
|
||||||
Dune::SeqDilu<Matrix, Vector, Vector> seqdilu(A);
|
Dune::MultithreadDILU<Matrix, Vector, Vector> seqdilu(A);
|
||||||
seqdilu.apply(x, b);
|
seqdilu.apply(x, b);
|
||||||
|
|
||||||
for (int i = 0; i < 2; ++i) {
|
for (int i = 0; i < 2; ++i) {
|
||||||
@ -573,7 +573,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(SeqDILUDiagIsCorrect3x3, T, NumericTypes)
|
|||||||
auto D_22_inv = D_22;
|
auto D_22_inv = D_22;
|
||||||
D_22_inv.invert();
|
D_22_inv.invert();
|
||||||
|
|
||||||
Dune::SeqDilu<Matrix, Vector, Vector> seqdilu(A);
|
Dune::MultithreadDILU<Matrix, Vector, Vector> seqdilu(A);
|
||||||
auto Dinv = seqdilu.getDiagonal();
|
auto Dinv = seqdilu.getDiagonal();
|
||||||
|
|
||||||
// diagonal stores inverse
|
// diagonal stores inverse
|
||||||
@ -766,7 +766,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(SeqDILUApplyIsCorrect3, T, NumericTypes)
|
|||||||
Vector new_x = x;
|
Vector new_x = x;
|
||||||
new_x += z;
|
new_x += z;
|
||||||
|
|
||||||
Dune::SeqDilu<Matrix, Vector, Vector> seqdilu(A);
|
Dune::MultithreadDILU<Matrix, Vector, Vector> seqdilu(A);
|
||||||
seqdilu.apply(x, b);
|
seqdilu.apply(x, b);
|
||||||
|
|
||||||
for (int i = 0; i < 3; ++i) {
|
for (int i = 0; i < 3; ++i) {
|
||||||
@ -812,7 +812,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(SeqDILUApplyIsEqualToDuneSeqILUApply, T, NumericTy
|
|||||||
A[1][1][1][1] = -1.0;
|
A[1][1][1][1] = -1.0;
|
||||||
|
|
||||||
|
|
||||||
Dune::SeqDilu<Matrix, Vector, Vector> seqdilu(A);
|
Dune::MultithreadDILU<Matrix, Vector, Vector> seqdilu(A);
|
||||||
Dune::SeqILU<Matrix, Vector, Vector> seqilu(A, 1.0);
|
Dune::SeqILU<Matrix, Vector, Vector> seqilu(A, 1.0);
|
||||||
|
|
||||||
Vector dilu_x(2);
|
Vector dilu_x(2);
|
||||||
|
Loading…
Reference in New Issue
Block a user