Merge pull request #2858 from Tongdongq/ilu-reorder-strategy

Added cmdline parameter --ilu-reorder-strategy
This commit is contained in:
Markus Blatt 2020-11-17 09:35:12 +01:00 committed by GitHub
commit 64fa5a2b30
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 89 additions and 30 deletions

View File

@ -166,11 +166,13 @@ list (APPEND PUBLIC_HEADER_FILES
opm/simulators/linalg/bda/cuda_header.hpp
opm/simulators/linalg/bda/cusparseSolverBackend.hpp
opm/simulators/linalg/bda/Reorder.hpp
opm/simulators/linalg/bda/ILUReorder.hpp
opm/simulators/linalg/bda/opencl.hpp
opm/simulators/linalg/bda/openclKernels.hpp
opm/simulators/linalg/bda/openclSolverBackend.hpp
opm/simulators/linalg/bda/MultisegmentWellContribution.hpp
opm/simulators/linalg/bda/WellContributions.hpp
opm/simulators/linalg/bda/WellContributionsOCLContainer.hpp
opm/simulators/linalg/amgcpr.hh
opm/simulators/linalg/twolevelmethodcpr.hh
opm/simulators/linalg/ExtractParallelGridInformationToISTL.hpp

View File

@ -129,6 +129,10 @@ template<class TypeTag, class MyTypeTag>
struct OpenclPlatformId {
using type = UndefinedProperty;
};
template<class TypeTag, class MyTypeTag>
struct OpenclIluReorder {
using type = UndefinedProperty;
};
template<class TypeTag>
struct LinearSolverReduction<TypeTag, TTag::FlowIstlSolverParams> {
@ -220,6 +224,10 @@ template<class TypeTag>
struct OpenclPlatformId<TypeTag, TTag::FlowIstlSolverParams> {
static constexpr int value = 0;
};
template<class TypeTag>
struct OpenclIluReorder<TypeTag, TTag::FlowIstlSolverParams> {
static constexpr auto value = "graph_coloring";
};
} // namespace Opm::Properties
@ -249,7 +257,7 @@ namespace Opm
int opencl_platform_id_;
int cpr_max_ell_iter_ = 20;
int cpr_reuse_setup_ = 0;
bool use_gpu_;
std::string opencl_ilu_reorder_;
template <class TypeTag>
void init()
@ -274,6 +282,7 @@ namespace Opm
gpu_mode_ = EWOMS_GET_PARAM(TypeTag, std::string, GpuMode);
bda_device_id_ = EWOMS_GET_PARAM(TypeTag, int, BdaDeviceId);
opencl_platform_id_ = EWOMS_GET_PARAM(TypeTag, int, OpenclPlatformId);
opencl_ilu_reorder_ = EWOMS_GET_PARAM(TypeTag, std::string, OpenclIluReorder);
}
template <class TypeTag>
@ -298,6 +307,7 @@ namespace Opm
EWOMS_REGISTER_PARAM(TypeTag, std::string, GpuMode, "Use GPU cusparseSolver or openclSolver as the linear solver, usage: '--gpu-mode=[none|cusparse|opencl]'");
EWOMS_REGISTER_PARAM(TypeTag, int, BdaDeviceId, "Choose device ID for cusparseSolver or openclSolver, use 'nvidia-smi' or 'clinfo' to determine valid IDs");
EWOMS_REGISTER_PARAM(TypeTag, int, OpenclPlatformId, "Choose platform ID for openclSolver, use 'clinfo' to determine valid platform IDs");
EWOMS_REGISTER_PARAM(TypeTag, std::string, OpenclIluReorder, "Choose the reordering strategy for ILU for openclSolver, usage: '--opencl-ilu-reorder=[level_scheduling|graph_coloring], level_scheduling behaves like Dune and cusparse, graph_coloring is more aggressive and likely to be faster, but is random-based and generally increases the number of linear solves and linear iterations significantly.");
}
FlowLinearSolverParameters() { reset(); }
@ -320,6 +330,7 @@ namespace Opm
gpu_mode_ = "none";
bda_device_id_ = 0;
opencl_platform_id_ = 0;
opencl_ilu_reorder_ = "graph_coloring";
}
};

View File

@ -138,8 +138,9 @@ namespace Opm
const int deviceID = EWOMS_GET_PARAM(TypeTag, int, BdaDeviceId);
const int maxit = EWOMS_GET_PARAM(TypeTag, int, LinearSolverMaxIter);
const double tolerance = EWOMS_GET_PARAM(TypeTag, double, LinearSolverReduction);
const std::string opencl_ilu_reorder = EWOMS_GET_PARAM(TypeTag, std::string, OpenclIluReorder);
const int linear_solver_verbosity = parameters_.linear_solver_verbosity_;
bdaBridge.reset(new BdaBridge<Matrix, Vector, block_size>(gpu_mode, linear_solver_verbosity, maxit, tolerance, platformID, deviceID));
bdaBridge.reset(new BdaBridge<Matrix, Vector, block_size>(gpu_mode, linear_solver_verbosity, maxit, tolerance, platformID, deviceID, opencl_ilu_reorder));
}
#else
if (EWOMS_GET_PARAM(TypeTag, std::string, GpuMode) != "none") {

View File

@ -35,13 +35,9 @@ namespace bda
using Dune::Timer;
template <unsigned int block_size>
BILU0<block_size>::BILU0(bool level_scheduling_, bool graph_coloring_, int verbosity_) :
verbosity(verbosity_), level_scheduling(level_scheduling_), graph_coloring(graph_coloring_)
{
if (level_scheduling == graph_coloring) {
OPM_THROW(std::logic_error, "Error, either level_scheduling or graph_coloring must be true, not both\n");
}
}
BILU0<block_size>::BILU0(ILUReorder opencl_ilu_reorder_, int verbosity_) :
verbosity(verbosity_), opencl_ilu_reorder(opencl_ilu_reorder_)
{}
template <unsigned int block_size>
BILU0<block_size>::~BILU0()
@ -79,16 +75,20 @@ namespace bda
Timer t_analysis;
rmat = std::make_shared<BlockedMatrix<block_size> >(mat->Nb, mat->nnzbs);
LUmat = std::make_unique<BlockedMatrix<block_size> >(*rmat);
if (level_scheduling) {
std::ostringstream out;
if (opencl_ilu_reorder == ILUReorder::LEVEL_SCHEDULING) {
out << "BILU0 reordering strategy: " << "level_scheduling\n";
findLevelScheduling(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb, &numColors, toOrder, fromOrder, rowsPerColor);
} else if (graph_coloring) {
} else if (opencl_ilu_reorder == ILUReorder::GRAPH_COLORING) {
out << "BILU0 reordering strategy: " << "graph_coloring\n";
findGraphColoring<block_size>(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb, mat->Nb, mat->Nb, &numColors, toOrder, fromOrder, rowsPerColor);
} else {
OPM_THROW(std::logic_error, "Error ilu reordering strategy not set correctly\n");
}
if(verbosity >= 3){
std::ostringstream out;
out << "BILU0 analysis took: " << t_analysis.stop() << " s, " << numColors << " colors";
OpmLog::info(out.str());
}
OpmLog::info(out.str());
delete[] CSCRowIndices;
delete[] CSCColPointers;
@ -317,7 +317,7 @@ namespace bda
#define INSTANTIATE_BDA_FUNCTIONS(n) \
template BILU0<n>::BILU0(bool, bool, int); \
template BILU0<n>::BILU0(ILUReorder, int); \
template BILU0<n>::~BILU0(); \
template bool BILU0<n>::init(BlockedMatrix<n>*); \
template bool BILU0<n>::create_preconditioner(BlockedMatrix<n>*); \

View File

@ -21,6 +21,7 @@
#define BILU0_HPP
#include <opm/simulators/linalg/bda/BlockedMatrix.hpp>
#include <opm/simulators/linalg/bda/ILUReorder.hpp>
#include <opm/simulators/linalg/bda/opencl.hpp>
@ -47,7 +48,7 @@ namespace bda
int numColors;
int verbosity;
bool level_scheduling, graph_coloring;
ILUReorder opencl_ilu_reorder;
typedef struct {
cl::Buffer Lvals, Uvals, invDiagVals;
@ -68,7 +69,7 @@ namespace bda
public:
BILU0(bool level_scheduling, bool graph_coloring, int verbosity);
BILU0(ILUReorder opencl_ilu_reorder, int verbosity);
~BILU0();

View File

@ -38,9 +38,10 @@ namespace Opm
using bda::BdaResult;
using bda::BdaSolver;
using bda::SolverStatus;
using bda::ILUReorder;
template <class BridgeMatrix, class BridgeVector, int block_size>
BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string gpu_mode, int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID)
BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string gpu_mode, int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID OPM_UNUSED, unsigned int deviceID, std::string opencl_ilu_reorder OPM_UNUSED)
{
if (gpu_mode.compare("cusparse") == 0) {
#if HAVE_CUDA
@ -52,7 +53,15 @@ BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string gpu_mod
} else if (gpu_mode.compare("opencl") == 0) {
#if HAVE_OPENCL
use_gpu = true;
backend.reset(new bda::openclSolverBackend<block_size>(linear_solver_verbosity, maxit, tolerance, platformID, deviceID));
ILUReorder ilu_reorder = bda::ILUReorder::GRAPH_COLORING;
if (opencl_ilu_reorder == "level_scheduling") {
ilu_reorder = bda::ILUReorder::LEVEL_SCHEDULING;
} else if (opencl_ilu_reorder == "graph_coloring") {
ilu_reorder = bda::ILUReorder::GRAPH_COLORING;
} else {
OPM_THROW(std::logic_error, "Error invalid argument for --opencl-ilu-reorder, usage: '--opencl-ilu-reorder=[level_scheduling|graph_coloring]'");
}
backend.reset(new bda::openclSolverBackend<block_size>(linear_solver_verbosity, maxit, tolerance, platformID, deviceID, ilu_reorder));
#else
OPM_THROW(std::logic_error, "Error openclSolver was chosen, but OpenCL was not found by CMake");
#endif
@ -217,7 +226,7 @@ void BdaBridge<BridgeMatrix, BridgeVector, block_size>::get_result(BridgeVector
template BdaBridge<Dune::BCRSMatrix<Opm::MatrixBlock<double, n, n>, std::allocator<Opm::MatrixBlock<double, n, n> > >, \
Dune::BlockVector<Dune::FieldVector<double, n>, std::allocator<Dune::FieldVector<double, n> > >, \
n>::BdaBridge \
(std::string gpu_mode_, int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID); \
(std::string gpu_mode_, int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID, std::string opencl_ilu_reorder); \
\
template void BdaBridge<Dune::BCRSMatrix<Opm::MatrixBlock<double, n, n>, std::allocator<Opm::MatrixBlock<double, n, n> > >, \
Dune::BlockVector<Dune::FieldVector<double, n>, std::allocator<Dune::FieldVector<double, n> > >, \

View File

@ -25,6 +25,7 @@
#include "dune/istl/bcrsmatrix.hh"
#include <opm/simulators/linalg/matrixblock.hh>
#include <opm/simulators/linalg/bda/ILUReorder.hpp>
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#if HAVE_CUDA
@ -39,6 +40,7 @@ namespace Opm
{
typedef Dune::InverseOperatorResult InverseOperatorResult;
using bda::ILUReorder;
/// BdaBridge acts as interface between opm-simulators with the BdaSolvers
template <class BridgeMatrix, class BridgeVector, int block_size>
@ -56,7 +58,8 @@ public:
/// \param[in] tolerance required relative tolerance for BdaSolver
/// \param[in] platformID the OpenCL platform ID to be used
/// \param[in] deviceID the device ID to be used by the cusparse- and openclSolvers, too high values could cause runtime errors
BdaBridge(std::string gpu_mode, int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID);
/// \param[in] opencl_ilu_reorder select either level_scheduling or graph_coloring, see BILU0.hpp for explanation
BdaBridge(std::string gpu_mode, int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID, std::string opencl_ilu_reorder);
/// Solve linear system, A*x = b

View File

@ -0,0 +1,35 @@
/*
Copyright 2020 Equinor ASA
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef ILUREORDER_HEADER_INCLUDED
#define ILUREORDER_HEADER_INCLUDED
namespace bda
{
// Level Scheduling respects the dependencies in the original matrix, and behaves like Dune and cusparse
// Graph Coloring is more aggresive and is likely to increase the number of linearizations and linear iterations to converge significantly, but can still be faster on GPU because it results in more parallelism
enum class ILUReorder {
LEVEL_SCHEDULING,
GRAPH_COLORING
};
}
#endif

View File

@ -37,11 +37,6 @@
// otherwise, the nonzeroes of the matrix are assumed to be in a contiguous array, and a single GPU memcpy is enough
#define COPY_ROW_BY_ROW 0
// Level Scheduling respects the depencies in the original matrix
// Graph Coloring is more aggresive and is likely to change the number of linearizations and linear iterations to converge, but can still be faster on GPU because it results in more parallelism
#define LEVEL_SCHEDULING 0
#define GRAPH_COLORING 1
namespace bda
{
@ -49,8 +44,8 @@ using Opm::OpmLog;
using Dune::Timer;
template <unsigned int block_size>
openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int platformID_, unsigned int deviceID_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_, platformID_, deviceID_) {
prec = new Preconditioner(LEVEL_SCHEDULING, GRAPH_COLORING, verbosity_);
openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int platformID_, unsigned int deviceID_, ILUReorder opencl_ilu_reorder) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_, platformID_, deviceID_) {
prec = new Preconditioner(opencl_ilu_reorder, verbosity_);
wcontainer = new WContainer();
}
@ -734,8 +729,8 @@ SolverStatus openclSolverBackend<block_size>::solve_system(int N_, int nnz_, int
}
#define INSTANTIATE_BDA_FUNCTIONS(n) \
template openclSolverBackend<n>::openclSolverBackend(int, int, double, unsigned int, unsigned int); \
#define INSTANTIATE_BDA_FUNCTIONS(n) \
template openclSolverBackend<n>::openclSolverBackend(int, int, double, unsigned int, unsigned int, ILUReorder); \
INSTANTIATE_BDA_FUNCTIONS(1);
INSTANTIATE_BDA_FUNCTIONS(2);

View File

@ -23,6 +23,7 @@
#include <opm/simulators/linalg/bda/opencl.hpp>
#include <opm/simulators/linalg/bda/BdaResult.hpp>
#include <opm/simulators/linalg/bda/BdaSolver.hpp>
#include <opm/simulators/linalg/bda/ILUReorder.hpp>
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#include <opm/simulators/linalg/bda/WellContributionsOCLContainer.hpp>
#include <opm/simulators/linalg/bda/BILU0.hpp>
@ -178,7 +179,8 @@ public:
/// \param[in] tolerance required relative tolerance for openclSolver
/// \param[in] platformID the OpenCL platform to be used
/// \param[in] deviceID the device to be used
openclSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID);
/// \param[in] opencl_ilu_reorder select either level_scheduling or graph_coloring, see BILU0.hpp for explanation
openclSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID, ILUReorder opencl_ilu_reorder);
/// Destroy a openclSolver, and free memory
~openclSolverBackend();