add OpenMP parallelized version of DILU.

Implement graphcoloring to expose rows in level sets that that can be executed in parallel during the sparse triangular solves. Add copy of A matrix that is reordered to ensure continuous memory reads when traversing the matrix in level set order. TODO: add number of threads available as constructor argument in DILU
2025-02-25 18:55:30 -06:00 · 2023-10-18 15:44:58 +02:00 · 2023-10-18 15:44:58 +02:00 · 5f6c97ff3b
commit 5f6c97ff3b
parent b00d3ca4bb
4 changed files with 281 additions and 113 deletions
--- a/opm/simulators/linalg/DILU.hpp
+++ b/opm/simulators/linalg/DILU.hpp
@ -17,49 +17,87 @@
 #ifndef OPM_DILU_HEADER_INCLUDED
 #define OPM_DILU_HEADER_INCLUDED

-#include <config.h>
 #include <opm/common/ErrorMacros.hpp>
 #include <opm/common/TimingMacros.hpp>
 #include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>

 #include <dune/common/fmatrix.hh>
-#include <dune/common/version.hh>
 #include <dune/common/unused.hh>
+#include <dune/common/version.hh>
 #include <dune/istl/bcrsmatrix.hh>

+#include <opm/simulators/linalg/GraphColoring.hpp>

+#include <cstddef>
+#include <vector>
+
+#if HAVE_OPENMP
+#include <omp.h>
+#endif
+
+// TODO: rewrite factory and constructor to keep track of a number of threads variable
 namespace Dune
 {

-/*! \brief The sequential DILU preconditioner.
+/*! \brief The OpenMP thread parallelized DILU preconditioner.
+ *  \details Safe to run serially without OpenMP. When run in parallel
+             the matrix is assumed to be symmetric.

   \tparam M The matrix type to operate on
   \tparam X Type of the update
   \tparam Y Type of the defect
 */
 template <class M, class X, class Y>
-class SeqDilu : public PreconditionerWithUpdate<X, Y>
+class MultithreadDILU : public PreconditionerWithUpdate<X, Y>
 {
-    public:
-        //! \brief The matrix type the preconditioner is for.
-        using matrix_type = M;
-        //! \brief The domain type of the preconditioner.
-        using domain_type = X;
-        //! \brief The range type of the preconditioner.
-        using range_type = Y;
-        //! \brief The field type of the preconditioner.
-        using field_type = typename X::field_type;
-        //! \brief scalar type underlying the field_type
+public:
+    //! \brief The matrix type the preconditioner is for.
+    using matrix_type = M;
+    //! \brief The domain type of the preconditioner.
+    using domain_type = X;
+    //! \brief The range type of the preconditioner.
+    using range_type = Y;
+    //! \brief The field type of the preconditioner.
+    using field_type = typename X::field_type;
+    //! \brief scalar type underlying the field_type

-    /*! \brief Constructor.
-       Constructor gets all parameters to operate the prec.
+    /*! \brief Constructor gets all parameters to operate the prec.
       \param A The matrix to operate on.
    */
-    SeqDilu(const M& A)
+    MultithreadDILU(const M& A)
        : A_(A)
+        , A_reordered_(M(A_.N(), A_.N(), A_.nonzeroes(), M::row_wise))
    {
+        OPM_TIMEBLOCK(prec_construct);
+        // TODO: rewrite so this value is set by an argument to the constructor
+#if HAVE_OPENMP
+        use_multithreading = omp_get_max_threads() > 1;
+#endif
+
+        if (use_multithreading) {
+            //! Assuming symmetric matrices using a lower triangular coloring to construct
+            //! the levels is sufficient
+            level_sets_ = Opm::getMatrixRowColoring(A_, Opm::ColoringType::LOWER);
+            reordered_to_natural_ = std::vector<std::size_t>(A_.N());
+            natural_to_reorder_ = std::vector<std::size_t>(A_.N());
+            int globCnt = 0;
+            for (const auto& level_set : level_sets_) {
+                for (const auto j : level_set) {
+                    reordered_to_natural_[globCnt] = j;
+                    natural_to_reorder_[j] = globCnt++;
+                }
+            }
+
+            for (auto dst_row_it = A_reordered_.createbegin(); dst_row_it != A_reordered_.createend(); ++dst_row_it) {
+                auto src_row = A_.begin() + reordered_to_natural_[dst_row_it.index()];
+                // For eleemnts in A
+                for (auto elem = src_row->begin(); elem != src_row->end(); elem++) {
+                    dst_row_it.insert(elem.index());
+                }
+            }
+        }
+
        Dinv_.resize(A_.N());
-        // the Dinv matrix must be initialised
        update();
    }

@ -67,32 +105,13 @@ class SeqDilu : public PreconditionerWithUpdate<X, Y>
       \brief Update the preconditioner.
       \copydoc Preconditioner::update()
    */
-    virtual void update() override
+    void update() override
    {
-        OPM_TIMEBLOCK(update);
-        
-        auto endi = A_.end();
-        for ( auto row = A_.begin(); row != endi; ++row) {
-            const auto row_i = row.index();
-            Dinv_[row_i] = A_[row_i][row_i];
-        }
-        
-        for ( auto row = A_.begin(); row != endi; ++row)
-        {
-            const auto row_i = row.index();
-            auto Dinv_temp = Dinv_[row_i];
-            for (auto a_ij = row->begin(); a_ij.index() < row_i; ++a_ij)
-            {
-                const auto col_j = a_ij.index();
-                const auto a_ji = A_[col_j].find(row_i);
-                // if A[i, j] != 0 and A[j, i] != 0
-                if (a_ji != A_[col_j].end()) {
-                    // Dinv_temp -= A[i, j] * d[j] * A[j, i]
-                    Dinv_temp -= (*a_ij) * Dune::FieldMatrix(Dinv_[col_j]) * (*a_ji);
-                }
-            }
-            Dinv_temp.invert();
-            Dinv_[row_i] = Dinv_temp;
+        OPM_TIMEBLOCK(prec_update);
+        if (use_multithreading) {
+            parallelUpdate();
+        } else {
+            serialUpdate();
        }
    }

@ -100,7 +119,7 @@ class SeqDilu : public PreconditionerWithUpdate<X, Y>
       \brief Prepare the preconditioner.
       \copydoc Preconditioner::pre(X&,Y&)
    */
-    virtual void pre(X& v, Y& d) override
+    void pre(X& v, Y& d) override
    {
        DUNE_UNUSED_PARAMETER(v);
        DUNE_UNUSED_PARAMETER(d);
@ -111,52 +130,13 @@ class SeqDilu : public PreconditionerWithUpdate<X, Y>
       \brief Apply the preconditioner.
       \copydoc Preconditioner::apply(X&,const Y&)
    */
-    virtual void apply(X& v, const Y& d) override
+    void apply(X& v, const Y& d) override
    {
-        // M = (D + L_A) D^-1 (D + U_A)   (a LU decomposition of M)
-        // where L_A and U_A are the strictly lower and upper parts of A and M has the properties:
-        // diag(A) = diag(M)
-        // Working with defect d = b - Ax and update v = x_{n+1} - x_n
-        // solving the product M^-1(d) using upper and lower triangular solve
-        // v = M^{-1}*d = (D + U_A)^{-1} D (D + L_A)^{-1} * d
-        OPM_TIMEBLOCK(apply);
-        using Xblock = typename X::block_type;
-        using Yblock = typename Y::block_type;
-        
-        // lower triangular solve: (D + L_A) y = d
-        auto endi = A_.end();
-        for (auto row = A_.begin(); row != endi; ++row)
-        {
-            const auto row_i = row.index();
-            Yblock rhs = d[row_i];
-            for (auto a_ij = (*row).begin(); a_ij.index() < row_i; ++a_ij) {
-                // if  A[i][j] != 0
-                // rhs -= A[i][j]* y[j], where v_j stores y_j
-                const auto col_j = a_ij.index();
-                a_ij->mmv(v[col_j], rhs);
-            }
-            // y_i = Dinv_i * rhs
-            // storing y_i in v_i
-            Dinv_[row_i].mv(rhs, v[row_i]);  // (D + L_A)_ii = D_i
-        }
-
-        // upper triangular solve: (D + U_A) v = Dy 
-        auto rendi = A_.beforeBegin();
-        for (auto row = A_.beforeEnd(); row != rendi; --row)
-        {
-            const auto row_i = row.index();
-            // rhs = 0
-            Xblock rhs(0.0);
-            for (auto a_ij = (*row).beforeEnd(); a_ij.index() > row_i; --a_ij) {
-                // if A[i][j] != 0
-                // rhs += A[i][j]*v[j]
-                const auto col_j = a_ij.index();
-                a_ij->umv(v[col_j], rhs);
-            }
-            // calculate update v = M^-1*d
-            // v_i = y_i - Dinv_i*rhs
-            // before update v_i is y_i
-            Dinv_[row_i].mmv(rhs, v[row_i]);
+        OPM_TIMEBLOCK(prec_apply);
+        if (use_multithreading) {
+            parallelApply(v, d);
+        } else {
+            serialApply(v, d);
        }
    }

@ -164,11 +144,11 @@ class SeqDilu : public PreconditionerWithUpdate<X, Y>
       \brief Clean up.
       \copydoc Preconditioner::post(X&)
    */
-    virtual void post(X& x) override
+    void post(X& x) override
    {
        DUNE_UNUSED_PARAMETER(x);
    }
-    
+
    std::vector<typename M::block_type> getDiagonal()
    {
        return Dinv_;
@ -183,10 +163,198 @@ class SeqDilu : public PreconditionerWithUpdate<X, Y>
 private:
    //! \brief The matrix we operate on.
    const M& A_;
+    //! \brief Copy of A_ that is reordered to store rows that can be computed simultaneously next to each other to
+    //! increase cache usage when multithreading
+    M A_reordered_;
    //! \brief The inverse of the diagnal matrix
    std::vector<typename M::block_type> Dinv_;
+    //! \brief SparseTable storing each row by level
+    Opm::SparseTable<std::size_t> level_sets_;
+    //! \brief converts from index in reordered structure to index natural ordered structure
+    std::vector<std::size_t> reordered_to_natural_;
+    //! \brief converts from index in natural ordered structure to index reordered strucutre
+    std::vector<std::size_t> natural_to_reorder_;
+    //! \brief Boolean value describing whether or not to use multithreaded version of functions
+    bool use_multithreading{false};
+
+    void serialUpdate()
+    {
+        for (std::size_t row = 0; row < A_.N(); ++row) {
+            Dinv_[row] = A_[row][row];
+        }
+        for (auto row = A_.begin(); row != A_.end(); ++row) {
+            const auto row_i = row.index();
+            auto Dinv_temp = Dinv_[row_i];
+            for (auto a_ij = row->begin(); a_ij.index() < row_i; ++a_ij) {
+                const auto col_j = a_ij.index();
+                const auto a_ji = A_[col_j].find(row_i);
+                // if A[i, j] != 0 and A[j, i] != 0
+                if (a_ji != A_[col_j].end()) {
+                    // Dinv_temp -= A[i, j] * d[j] * A[j, i]
+                    Dinv_temp -= (*a_ij) * Dune::FieldMatrix(Dinv_[col_j]) * (*a_ji);
+                }
+            }
+            Dinv_temp.invert();
+            Dinv_[row_i] = Dinv_temp;
+        }
+    }
+
+    void parallelUpdate()
+    {
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
+        for (std::size_t row = 0; row != A_.N(); ++row) {
+            Dinv_[natural_to_reorder_[row]] = A_[row][row];
+        }
+
+        // TODO: is there a better/faster way of copying all values?
+        for (auto dst_row_it = A_reordered_.begin(); dst_row_it != A_reordered_.end(); ++dst_row_it) {
+            auto src_row = A_.begin() + reordered_to_natural_[dst_row_it.index()];
+            for (auto elem = src_row->begin(); elem != src_row->end(); elem++) {
+                A_reordered_[dst_row_it.index()][elem.index()] = *elem;
+            }
+        }
+
+        int level_start_idx = 0;
+        for (int level = 0; level < level_sets_.size(); ++level) {
+            const int num_of_rows_in_level = level_sets_[level].size();
+
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
+            for (int row_idx_in_level = 0; row_idx_in_level < num_of_rows_in_level; ++row_idx_in_level) {
+                auto row = A_reordered_.begin() + level_start_idx + row_idx_in_level;
+                const auto row_i = reordered_to_natural_[row.index()];
+                // auto Dinv_temp = Dinv_[row_i];
+                auto Dinv_temp = Dinv_[level_start_idx + row_idx_in_level];
+                for (auto a_ij = row->begin(); a_ij.index() < row_i; ++a_ij) {
+                    const auto col_j = natural_to_reorder_[a_ij.index()];
+                    const auto a_ji = A_reordered_[col_j].find(row_i);
+                    if (a_ji != A_reordered_[col_j].end()) {
+                        // Dinv_temp -= A[i, j] * d[j] * A[j, i]
+                        Dinv_temp -= (*a_ij) * Dune::FieldMatrix(Dinv_[col_j]) * (*a_ji);
+                    }
+                }
+                Dinv_temp.invert();
+                Dinv_[level_start_idx + row_idx_in_level] = Dinv_temp;
+            }
+
+            level_start_idx += num_of_rows_in_level;
+        }
+    }
+
+    void serialApply(X& v, const Y& d)
+    {
+        // M = (D + L_A) D^-1 (D + U_A)   (a LU decomposition of M)
+        // where L_A and U_A are the strictly lower and upper parts of A and M has the properties:
+        // diag(A) = diag(M)
+        // Working with defect d = b - Ax and update v = x_{n+1} - x_n
+        // solving the product M^-1(d) using upper and lower triangular solve
+        // v = M^{-1}*d = (D + U_A)^{-1} D (D + L_A)^{-1} * d
+        // lower triangular solve: (D + L_A) y = d
+        using Xblock = typename X::block_type;
+        using Yblock = typename Y::block_type;
+        {
+            OPM_TIMEBLOCK(lower_solve);
+            auto endi = A_.end();
+            for (auto row = A_.begin(); row != endi; ++row) {
+                const auto row_i = row.index();
+                Yblock rhs = d[row_i];
+                for (auto a_ij = (*row).begin(); a_ij.index() < row_i; ++a_ij) {
+                    // if  A[i][j] != 0
+                    // rhs -= A[i][j]* y[j], where v_j stores y_j
+                    const auto col_j = a_ij.index();
+                    a_ij->mmv(v[col_j], rhs);
+                }
+                // y_i = Dinv_i * rhs
+                // storing y_i in v_i
+                Dinv_[row_i].mv(rhs, v[row_i]); // (D + L_A)_ii = D_i
+            }
+        }
+
+        {
+            OPM_TIMEBLOCK(upper_solve);
+
+            // upper triangular solve: (D + U_A) v = Dy
+            auto rendi = A_.beforeBegin();
+            for (auto row = A_.beforeEnd(); row != rendi; --row) {
+                const auto row_i = row.index();
+                // rhs = 0
+                Xblock rhs(0.0);
+                for (auto a_ij = (*row).beforeEnd(); a_ij.index() > row_i; --a_ij) {
+                    // if A[i][j] != 0
+                    // rhs += A[i][j]*v[j]
+                    const auto col_j = a_ij.index();
+                    a_ij->umv(v[col_j], rhs);
+                }
+                // calculate update v = M^-1*d
+                // v_i = y_i - Dinv_i*rhs
+                // before update v_i is y_i
+                Dinv_[row_i].mmv(rhs, v[row_i]);
+            }
+        }
+    }
+
+    void parallelApply(X& v, const Y& d)
+    {
+        using Xblock = typename X::block_type;
+        using Yblock = typename Y::block_type;
+        {
+            OPM_TIMEBLOCK(lower_solve);
+            int level_start_idx = 0;
+            for (int level = 0; level < level_sets_.size(); ++level) {
+                const int num_of_rows_in_level = level_sets_[level].size();
+
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
+                for (int row_idx_in_level = 0; row_idx_in_level < num_of_rows_in_level; ++row_idx_in_level) {
+                    auto row = A_reordered_.begin() + level_start_idx + row_idx_in_level;
+                    const auto row_i = reordered_to_natural_[row.index()];
+                    Yblock rhs = d[row_i];
+                    for (auto a_ij = (*row).begin(); a_ij.index() < row_i; ++a_ij) {
+                        // if  A[i][j] != 0
+                        // rhs -= A[i][j]* y[j], where v_j stores y_j
+                        const auto col_j = a_ij.index();
+                        a_ij->mmv(v[col_j], rhs);
+                    }
+                    // y_i = Dinv_i * rhs
+                    // storing y_i in v_i
+                    Dinv_[level_start_idx + row_idx_in_level].mv(rhs, v[row_i]); // (D + L_A)_ii = D_i
+                }
+                level_start_idx += num_of_rows_in_level;
+            }
+        }
+
+        {
+            int level_start_idx = A_.N();
+            //  upper triangular solve: (D + U_A) v = Dy
+            for (int level = level_sets_.size() - 1; level >= 0; --level) {
+                const int num_of_rows_in_level = level_sets_[level].size();
+                level_start_idx -= num_of_rows_in_level;
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
+                for (int row_idx_in_level = num_of_rows_in_level - 1; row_idx_in_level >= 0; --row_idx_in_level) {
+                    auto row = A_reordered_.begin() + level_start_idx + row_idx_in_level;
+                    const auto row_i = reordered_to_natural_[row.index()];
+                    Xblock rhs(0.0);
+                    for (auto a_ij = (*row).beforeEnd(); a_ij.index() > row_i; --a_ij) {
+                        // rhs += A[i][j]*v[j]
+                        const auto col_j = a_ij.index();
+                        a_ij->umv(v[col_j], rhs);
+                    }
+                    // calculate update v = M^-1*d
+                    // v_i = y_i - Dinv_i*rhs
+                    // before update v_i is y_i
+                    Dinv_[level_start_idx + row_idx_in_level].mmv(rhs, v[row_i]);
+                }
+            }
+        }
+    }
 };

 } // namespace Dune

-#endif
+#endif
--- a/opm/simulators/linalg/ExtraSmoothers.hpp
+++ b/opm/simulators/linalg/ExtraSmoothers.hpp
@ -6,28 +6,28 @@
 namespace Dune
 {
    template <class M, class X, class Y>
-    class SeqDilu;
+    class MultithreadDILU;

 namespace Amg
 {
    /**
-     * @brief Policy for the construction of the SeqDilu smoother
+     * @brief Policy for the construction of the MultithreadDILU smoother
     */
    template <class M, class X, class Y>
-    struct ConstructionTraits<SeqDilu<M, X, Y>> {
-    using Arguments = DefaultConstructionArgs<SeqDilu<M, X, Y>>;
+    struct ConstructionTraits<MultithreadDILU<M, X, Y>> {
+    using Arguments = DefaultConstructionArgs<MultithreadDILU<M, X, Y>>;

 #if DUNE_VERSION_NEWER(DUNE_ISTL, 2, 7)
-        static inline std::shared_ptr<SeqDilu<M, X, Y>> construct(Arguments& args) {
-            return std::make_shared<SeqDilu<M, X, Y>>(args.getMatrix());
+        static inline std::shared_ptr<MultithreadDILU<M, X, Y>> construct(Arguments& args) {
+            return std::make_shared<MultithreadDILU<M, X, Y>>(args.getMatrix());
        }

 #else
-        static inline SeqDilu<M, X, Y>* construct(Arguments& args) {
-            return new SeqDilu<M, X, Y>(args.getMatrix());
+        static inline MultithreadDILU<M, X, Y>* construct(Arguments& args) {
+            return new MultithreadDILU<M, X, Y>(args.getMatrix());
        }

-        static void deconstruct(SeqDilu<M, X, Y>* dilu) {
+        static void deconstruct(MultithreadDILU<M, X, Y>* dilu) {
            delete dilu;
        }
 #endif
--- a/opm/simulators/linalg/PreconditionerFactory_impl.hpp
+++ b/opm/simulators/linalg/PreconditionerFactory_impl.hpp
@ -164,7 +164,7 @@ struct StandardPreconditioners
        });
        F::addCreator("DILU", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
          DUNE_UNUSED_PARAMETER(prm);
-          return wrapBlockPreconditioner<SeqDilu<M, V, V>>(comm, op.getmat());
+          return wrapBlockPreconditioner<MultithreadDILU<M, V, V>>(comm, op.getmat());
        });
        F::addCreator("Jac", [](const O& op, const P& prm, const std::function<V()>&,
                     std::size_t, const C& comm) {
@ -204,7 +204,7 @@ struct StandardPreconditioners
              return prec;
            }
            else if (smoother == "DILU") {
-              using SeqSmoother = Dune::SeqDilu<M, V, V>;
+              using SeqSmoother = Dune::MultithreadDILU<M, V, V>;
              using Smoother = Dune::BlockPreconditioner<V, V, C, SeqSmoother>;
              using SmootherArgs = typename Dune::Amg::SmootherTraits<Smoother>::Arguments;
              SmootherArgs sargs;
@ -350,7 +350,7 @@ struct StandardPreconditioners<Operator,Dune::Amg::SequentialInformation>
        });
        F::addCreator("DILU", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
            DUNE_UNUSED_PARAMETER(prm);
-            return std::make_shared<SeqDilu<M, V, V>>(op.getmat());
+            return std::make_shared<MultithreadDILU<M, V, V>>(op.getmat());
        });
        F::addCreator("Jac", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
            const int n = prm.get<int>("repeats", 1);
@ -385,7 +385,7 @@ struct StandardPreconditioners<Operator,Dune::Amg::SequentialInformation>
                    using Smoother = SeqJac<M, V, V>;
                    return AMGHelper<O,C,M,V>::template makeAmgPreconditioner<Smoother>(op, prm);
                } else if (smoother == "DILU") {
-                    using Smoother = SeqDilu<M, V, V>;
+                    using Smoother = MultithreadDILU<M, V, V>;
                    return AMGHelper<O,C,M,V>::template makeAmgPreconditioner<Smoother>(op, prm);
                } else if (smoother == "SOR") {
                    using Smoother = SeqSOR<M, V, V>;
--- a/tests/test_dilu.cpp
+++ b/tests/test_dilu.cpp
@ -74,7 +74,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(SeqDILUDiagIsCorrect2x2NoZeros, T, NumericTypes)
    // D_11 = A_11 - L_10 D_00_inv U_01
    auto D_11 = A[1][1] - A[1][0] * D_00_inv * A[0][1];

-    Dune::SeqDilu<Matrix, Vector, Vector> seqdilu(A);
+    Dune::MultithreadDILU<Matrix, Vector, Vector> seqdilu(A);

    auto Dinv = seqdilu.getDiagonal();

@ -140,7 +140,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(SeqDILUDiagIsCorrect2x2, T, NumericTypes)
    // D_11 = A_11 - L_10 D_00_inv U_01 = A_11
    auto D_11 = A[1][1];

-    Dune::SeqDilu<Matrix, Vector, Vector> seqdilu(A);
+    Dune::MultithreadDILU<Matrix, Vector, Vector> seqdilu(A);


    auto Dinv = seqdilu.getDiagonal();
@ -258,7 +258,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(SeqDILUApplyIsCorrectNoZeros, T, NumericTypes)
    Vector new_x = x;
    new_x += z;
    
-    Dune::SeqDilu<Matrix, Vector, Vector> seqdilu(A);
+    Dune::MultithreadDILU<Matrix, Vector, Vector> seqdilu(A);
    seqdilu.apply(x, b);

    for (int i = 0; i < 2; ++i) {
@ -363,7 +363,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(SeqDILUApplyIsCorrect1, T, NumericTypes)
    Vector new_x = x;
    new_x += z;
    
-    Dune::SeqDilu<Matrix, Vector, Vector> seqdilu(A);
+    Dune::MultithreadDILU<Matrix, Vector, Vector> seqdilu(A);
    seqdilu.apply(x, b);

    for (int i = 0; i < 2; ++i) {
@ -462,7 +462,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(SeqDILUApplyIsCorrect2, T, NumericTypes)
    Vector new_x = x;
    new_x += z;
    
-    Dune::SeqDilu<Matrix, Vector, Vector> seqdilu(A);
+    Dune::MultithreadDILU<Matrix, Vector, Vector> seqdilu(A);
    seqdilu.apply(x, b);

    for (int i = 0; i < 2; ++i) {
@ -573,7 +573,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(SeqDILUDiagIsCorrect3x3, T, NumericTypes)
    auto D_22_inv = D_22;
    D_22_inv.invert();

-    Dune::SeqDilu<Matrix, Vector, Vector> seqdilu(A);
+    Dune::MultithreadDILU<Matrix, Vector, Vector> seqdilu(A);
    auto Dinv = seqdilu.getDiagonal();

    // diagonal stores inverse
@ -766,7 +766,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(SeqDILUApplyIsCorrect3, T, NumericTypes)
    Vector new_x = x;
    new_x += z;
    
-    Dune::SeqDilu<Matrix, Vector, Vector> seqdilu(A);
+    Dune::MultithreadDILU<Matrix, Vector, Vector> seqdilu(A);
    seqdilu.apply(x, b);

    for (int i = 0; i < 3; ++i) {
@ -812,7 +812,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(SeqDILUApplyIsEqualToDuneSeqILUApply, T, NumericTy
    A[1][1][1][1] = -1.0;


-    Dune::SeqDilu<Matrix, Vector, Vector> seqdilu(A);
+    Dune::MultithreadDILU<Matrix, Vector, Vector> seqdilu(A);
    Dune::SeqILU<Matrix, Vector, Vector> seqilu(A, 1.0);

    Vector dilu_x(2);