Added cmdline parameter --ilu-reorder-strategy

2025-02-25 18:55:30 -06:00 · 2020-10-15 10:52:10 +02:00 · 2020-10-15 10:52:10 +02:00 · a372a1e4bd
commit a372a1e4bd
parent 77099d1b65
8 changed files with 44 additions and 25 deletions
--- a/opm/simulators/linalg/FlowLinearSolverParameters.hpp
+++ b/opm/simulators/linalg/FlowLinearSolverParameters.hpp
@ -129,6 +129,10 @@ template<class TypeTag, class MyTypeTag>
 struct OpenclPlatformId {
    using type = UndefinedProperty;
 };
+template<class TypeTag, class MyTypeTag>
+struct IluReorderStrategy {
+    using type = UndefinedProperty;
+};

 template<class TypeTag>
 struct LinearSolverReduction<TypeTag, TTag::FlowIstlSolverParams> {
@ -220,6 +224,10 @@ template<class TypeTag>
 struct OpenclPlatformId<TypeTag, TTag::FlowIstlSolverParams> {
    static constexpr int value = 0;
 };
+template<class TypeTag>
+struct IluReorderStrategy<TypeTag, TTag::FlowIstlSolverParams> {
+    static constexpr auto value = "graph_coloring";
+};

 } // namespace Opm::Properties

@ -249,7 +257,7 @@ namespace Opm
        int opencl_platform_id_;
        int cpr_max_ell_iter_ = 20;
        int cpr_reuse_setup_ = 0;
-        bool use_gpu_;
+        std::string ilu_reorder_strategy_;

        template <class TypeTag>
        void init()
@ -274,6 +282,7 @@ namespace Opm
            gpu_mode_ = EWOMS_GET_PARAM(TypeTag, std::string, GpuMode);
            bda_device_id_ = EWOMS_GET_PARAM(TypeTag, int, BdaDeviceId);
            opencl_platform_id_ = EWOMS_GET_PARAM(TypeTag, int, OpenclPlatformId);
+            ilu_reorder_strategy_ = EWOMS_GET_PARAM(TypeTag, std::string, IluReorderStrategy);
        }

        template <class TypeTag>
@ -298,6 +307,7 @@ namespace Opm
            EWOMS_REGISTER_PARAM(TypeTag, std::string, GpuMode, "Use GPU cusparseSolver or openclSolver as the linear solver, usage: '--gpu-mode=[none|cusparse|opencl]'");
            EWOMS_REGISTER_PARAM(TypeTag, int, BdaDeviceId, "Choose device ID for cusparseSolver or openclSolver, use 'nvidia-smi' or 'clinfo' to determine valid IDs");
            EWOMS_REGISTER_PARAM(TypeTag, int, OpenclPlatformId, "Choose platform ID for openclSolver, use 'clinfo' to determine valid platform IDs");
+            EWOMS_REGISTER_PARAM(TypeTag, std::string, IluReorderStrategy, "Choose the reordering strategy for openclSolver, usage: '--ilu-reorder-strategy=[level_scheduling|graph_coloring], level_scheduling behaves like Dune and cusparse, graph_coloring is more aggressive and likely to be faster, but is random-based and generally increases the number of linear solves and linear iterations significantly.");
        }

        FlowLinearSolverParameters() { reset(); }
@ -320,6 +330,7 @@ namespace Opm
            gpu_mode_                 = "none";
            bda_device_id_            = 0;
            opencl_platform_id_       = 0;
+            ilu_reorder_strategy_     = "graph_coloring";
        }
    };

--- a/opm/simulators/linalg/ISTLSolverEbos.hpp
+++ b/opm/simulators/linalg/ISTLSolverEbos.hpp
@ -138,8 +138,9 @@ namespace Opm
                const int deviceID = EWOMS_GET_PARAM(TypeTag, int, BdaDeviceId);
                const int maxit = EWOMS_GET_PARAM(TypeTag, int, LinearSolverMaxIter);
                const double tolerance = EWOMS_GET_PARAM(TypeTag, double, LinearSolverReduction);
+                const std::string opencl_ilu_reorder = EWOMS_GET_PARAM(TypeTag, std::string, OpenclIluReorder);
                const int linear_solver_verbosity = parameters_.linear_solver_verbosity_;
-                bdaBridge.reset(new BdaBridge<Matrix, Vector, block_size>(gpu_mode, linear_solver_verbosity, maxit, tolerance, platformID, deviceID));
+                bdaBridge.reset(new BdaBridge<Matrix, Vector, block_size>(gpu_mode, linear_solver_verbosity, maxit, tolerance, platformID, deviceID, opencl_ilu_reorder));
            }
 #else
            if (EWOMS_GET_PARAM(TypeTag, std::string, GpuMode) != "none") {
--- a/opm/simulators/linalg/bda/BILU0.cpp
+++ b/opm/simulators/linalg/bda/BILU0.cpp
@ -35,11 +35,15 @@ namespace bda
    using Dune::Timer;

    template <unsigned int block_size>
-    BILU0<block_size>::BILU0(bool level_scheduling_, bool graph_coloring_, int verbosity_) :
-        verbosity(verbosity_), level_scheduling(level_scheduling_), graph_coloring(graph_coloring_)
+    BILU0<block_size>::BILU0(std::string ilu_reorder_strategy, int verbosity_) :
+        verbosity(verbosity_)
    {
-        if (level_scheduling == graph_coloring) {
-            OPM_THROW(std::logic_error, "Error, either level_scheduling or graph_coloring must be true, not both\n");
+        if (ilu_reorder_strategy.compare("level_scheduling") == 0) {
+            level_scheduling = true;
+        } else if (ilu_reorder_strategy.compare("graph_coloring") == 0) {
+            graph_coloring = true;
+        } else {
+            OPM_THROW(std::logic_error, "Error invalid argument for --ilu-reorder-strategy, usage: '--ilu-reorder-strategy=[level_scheduling|graph_coloring]'");
        }
    }

@ -79,16 +83,20 @@ namespace bda
        Timer t_analysis;
        rmat = std::make_shared<BlockedMatrix<block_size> >(mat->Nb, mat->nnzbs);
        LUmat = std::make_unique<BlockedMatrix<block_size> >(*rmat);
+        std::ostringstream out;
        if (level_scheduling) {
+            out << "BILU0 reordering strategy: " << "level_scheduling\n";
            findLevelScheduling(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb, &numColors, toOrder, fromOrder, rowsPerColor);
        } else if (graph_coloring) {
+            out << "BILU0 reordering strategy: " << "graph_coloring\n";
            findGraphColoring<block_size>(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb, mat->Nb, mat->Nb, &numColors, toOrder, fromOrder, rowsPerColor);
+        } else {
+            OPM_THROW(std::logic_error, "Error ilu reordering strategy not set correctly\n");
        }
        if(verbosity >= 3){
-            std::ostringstream out;
            out << "BILU0 analysis took: " << t_analysis.stop() << " s, " << numColors << " colors";
-            OpmLog::info(out.str());
        }
+        OpmLog::info(out.str());

        delete[] CSCRowIndices;
        delete[] CSCColPointers;
@ -317,7 +325,7 @@ namespace bda


 #define INSTANTIATE_BDA_FUNCTIONS(n)                                                     \
-template BILU0<n>::BILU0(bool, bool, int);                                               \
+template BILU0<n>::BILU0(std::string, int);                                              \
 template BILU0<n>::~BILU0();                                                             \
 template bool BILU0<n>::init(BlockedMatrix<n>*);                                         \
 template bool BILU0<n>::create_preconditioner(BlockedMatrix<n>*);                        \
--- a/opm/simulators/linalg/bda/BILU0.hpp
+++ b/opm/simulators/linalg/bda/BILU0.hpp
@ -47,7 +47,9 @@ namespace bda
        int numColors;
        int verbosity;

-        bool level_scheduling, graph_coloring;
+        // Level Scheduling respects the dependencies in the original matrix, and behaves like Dune and cusparse
+        // Graph Coloring is more aggresive and is likely to increase the number of linearizations and linear iterations to converge significantly, but can still be faster on GPU because it results in more parallelism
+        bool level_scheduling = false, graph_coloring = false;

        typedef struct {
            cl::Buffer Lvals, Uvals, invDiagVals;
@ -68,7 +70,7 @@ namespace bda

    public:

-        BILU0(bool level_scheduling, bool graph_coloring, int verbosity);
+        BILU0(std::string ilu_reorder_strategy, int verbosity);

        ~BILU0();

--- a/opm/simulators/linalg/bda/BdaBridge.cpp
+++ b/opm/simulators/linalg/bda/BdaBridge.cpp
@ -40,7 +40,7 @@ namespace Opm
    using bda::SolverStatus;

 template <class BridgeMatrix, class BridgeVector, int block_size>
-BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string gpu_mode, int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID)
+BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string gpu_mode, int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID OPM_UNUSED, unsigned int deviceID, std::string ilu_reorder_strategy OPM_UNUSED)
 {
    if (gpu_mode.compare("cusparse") == 0) {
 #if HAVE_CUDA
@ -52,7 +52,7 @@ BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string gpu_mod
    } else if (gpu_mode.compare("opencl") == 0) {
 #if HAVE_OPENCL
        use_gpu = true;
-        backend.reset(new bda::openclSolverBackend<block_size>(linear_solver_verbosity, maxit, tolerance, platformID, deviceID));
+        backend.reset(new bda::openclSolverBackend<block_size>(linear_solver_verbosity, maxit, tolerance, platformID, deviceID, ilu_reorder_strategy));
 #else
        OPM_THROW(std::logic_error, "Error openclSolver was chosen, but OpenCL was not found by CMake");
 #endif
@ -217,7 +217,7 @@ void BdaBridge<BridgeMatrix, BridgeVector, block_size>::get_result(BridgeVector
 template BdaBridge<Dune::BCRSMatrix<Opm::MatrixBlock<double, n, n>, std::allocator<Opm::MatrixBlock<double, n, n> > >,              \
 Dune::BlockVector<Dune::FieldVector<double, n>, std::allocator<Dune::FieldVector<double, n> > >,                                    \
 n>::BdaBridge                                                                                                                       \
-(std::string gpu_mode_, int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID);  \
+(std::string gpu_mode_, int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID, std::string ilu_reorder_strategy);  \
                                                                                                                                    \
 template void BdaBridge<Dune::BCRSMatrix<Opm::MatrixBlock<double, n, n>, std::allocator<Opm::MatrixBlock<double, n, n> > >,         \
 Dune::BlockVector<Dune::FieldVector<double, n>, std::allocator<Dune::FieldVector<double, n> > >,                                    \
--- a/opm/simulators/linalg/bda/BdaBridge.hpp
+++ b/opm/simulators/linalg/bda/BdaBridge.hpp
@ -56,7 +56,8 @@ public:
    /// \param[in] tolerance                  required relative tolerance for BdaSolver
    /// \param[in] platformID                 the OpenCL platform ID to be used
    /// \param[in] deviceID                   the device ID to be used by the cusparse- and openclSolvers, too high values could cause runtime errors
-    BdaBridge(std::string gpu_mode, int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID);
+    /// \param[in] ilu_reorder_strategy       select either level_scheduling or graph_coloring, see BILU0.hpp for explanation
+    BdaBridge(std::string gpu_mode, int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID, std::string ilu_reorder_strategy);


    /// Solve linear system, A*x = b
--- a/opm/simulators/linalg/bda/openclSolverBackend.cpp
+++ b/opm/simulators/linalg/bda/openclSolverBackend.cpp
@ -37,11 +37,6 @@
 // otherwise, the nonzeroes of the matrix are assumed to be in a contiguous array, and a single GPU memcpy is enough
 #define COPY_ROW_BY_ROW 0

-// Level Scheduling respects the depencies in the original matrix
-// Graph Coloring is more aggresive and is likely to change the number of linearizations and linear iterations to converge, but can still be faster on GPU because it results in more parallelism
-#define LEVEL_SCHEDULING 0
-#define GRAPH_COLORING   1
-
 namespace bda
 {

@ -49,8 +44,8 @@ using Opm::OpmLog;
 using Dune::Timer;

 template <unsigned int block_size>
-openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int platformID_, unsigned int deviceID_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_, platformID_, deviceID_) {
-    prec = new Preconditioner(LEVEL_SCHEDULING, GRAPH_COLORING, verbosity_);
+openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int platformID_, unsigned int deviceID_, std::string ilu_reorder_strategy) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_, platformID_, deviceID_) {
+    prec = new Preconditioner(ilu_reorder_strategy, verbosity_);
    wcontainer = new WContainer();
 }

@ -734,8 +729,8 @@ SolverStatus openclSolverBackend<block_size>::solve_system(int N_, int nnz_, int
 }


-#define INSTANTIATE_BDA_FUNCTIONS(n)                                                                 \
-template openclSolverBackend<n>::openclSolverBackend(int, int, double, unsigned int, unsigned int);  \
+#define INSTANTIATE_BDA_FUNCTIONS(n)                                                                              \
+template openclSolverBackend<n>::openclSolverBackend(int, int, double, unsigned int, unsigned int, std::string);  \

 INSTANTIATE_BDA_FUNCTIONS(1);
 INSTANTIATE_BDA_FUNCTIONS(2);
--- a/opm/simulators/linalg/bda/openclSolverBackend.hpp
+++ b/opm/simulators/linalg/bda/openclSolverBackend.hpp
@ -178,7 +178,8 @@ public:
    /// \param[in] tolerance                  required relative tolerance for openclSolver
    /// \param[in] platformID                 the OpenCL platform to be used
    /// \param[in] deviceID                   the device to be used
-    openclSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID);
+    /// \param[in] ilu_reorder_strategy       select either level_scheduling or graph_coloring, see BILU0.hpp for explanation
+    openclSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID, std::string ilu_reorder_strategy);

    /// Destroy a openclSolver, and free memory
    ~openclSolverBackend();