From f2225503c484bf133eb5029cb2b34bf8db504606 Mon Sep 17 00:00:00 2001 From: Tong Dong Qiu Date: Thu, 25 Nov 2021 13:40:30 +0100 Subject: [PATCH] Combine BILU0 and CPR preconditioner --- opm/simulators/linalg/bda/BILU0.cpp | 12 +-- opm/simulators/linalg/bda/BILU0.hpp | 2 +- opm/simulators/linalg/bda/CPR.cpp | 47 +++++++++-- opm/simulators/linalg/bda/CPR.hpp | 31 ++++++- .../linalg/bda/openclSolverBackend.cpp | 83 +++++++------------ .../linalg/bda/openclSolverBackend.hpp | 3 +- 6 files changed, 106 insertions(+), 72 deletions(-) diff --git a/opm/simulators/linalg/bda/BILU0.cpp b/opm/simulators/linalg/bda/BILU0.cpp index e057ef92c..92fb3bea7 100644 --- a/opm/simulators/linalg/bda/BILU0.cpp +++ b/opm/simulators/linalg/bda/BILU0.cpp @@ -54,7 +54,7 @@ BILU0::~BILU0() } template - bool BILU0::init(BlockedMatrix *mat) + bool BILU0::init(const BlockedMatrix *mat) { const unsigned int bs = block_size; @@ -305,14 +305,8 @@ void BILU0::setOpenCLQueue(cl::CommandQueue *queue_) { } -#define INSTANTIATE_BDA_FUNCTIONS(n) \ -template BILU0::BILU0(ILUReorder, int); \ -template BILU0::~BILU0(); \ -template bool BILU0::init(BlockedMatrix*); \ -template bool BILU0::create_preconditioner(BlockedMatrix*); \ -template void BILU0::apply(const cl::Buffer&, cl::Buffer&); \ -template void BILU0::setOpenCLContext(cl::Context*); \ -template void BILU0::setOpenCLQueue(cl::CommandQueue*); +#define INSTANTIATE_BDA_FUNCTIONS(n) \ +template class BILU0; INSTANTIATE_BDA_FUNCTIONS(1); diff --git a/opm/simulators/linalg/bda/BILU0.hpp b/opm/simulators/linalg/bda/BILU0.hpp index de182101c..d862ddc7c 100644 --- a/opm/simulators/linalg/bda/BILU0.hpp +++ b/opm/simulators/linalg/bda/BILU0.hpp @@ -91,7 +91,7 @@ namespace Accelerator ~BILU0(); // analysis - bool init(BlockedMatrix *mat); + bool init(const BlockedMatrix *mat); // ilu_decomposition bool create_preconditioner(BlockedMatrix *mat); diff --git a/opm/simulators/linalg/bda/CPR.cpp b/opm/simulators/linalg/bda/CPR.cpp index 469d97ec1..1a9aaf0b1 100644 --- a/opm/simulators/linalg/bda/CPR.cpp +++ b/opm/simulators/linalg/bda/CPR.cpp @@ -43,14 +43,15 @@ using Opm::OpmLog; using Dune::Timer; template -CPR::CPR(int verbosity_, ILUReorder opencl_ilu_reorder_) : - verbosity(verbosity_), opencl_ilu_reorder(opencl_ilu_reorder_) -{} +CPR::CPR(int verbosity_, ILUReorder opencl_ilu_reorder_, bool use_amg_) : + verbosity(verbosity_), opencl_ilu_reorder(opencl_ilu_reorder_), use_amg(use_amg_) +{ + bilu0 = std::make_unique >(opencl_ilu_reorder, verbosity_); +} template -void CPR::init(int Nb_, int nnzb_, std::shared_ptr& context_, std::shared_ptr& queue_) -{ +void CPR::init(int Nb_, int nnzb_, std::shared_ptr& context_, std::shared_ptr& queue_) { this->Nb = Nb_; this->nnzb = nnzb_; this->N = Nb_ * block_size; @@ -64,9 +65,34 @@ void CPR::init(int Nb_, int nnzb_, std::shared_ptr& con coarse_y.resize(Nb); weights.resize(N); diagIndices.resize(1); + + bilu0->setOpenCLContext(context.get()); + bilu0->setOpenCLQueue(queue.get()); } +template +bool CPR::analyse_matrix(BlockedMatrix *mat_) { + bool success = bilu0->init(mat_); + + if (opencl_ilu_reorder == ILUReorder::NONE) { + mat = mat_; + } else { + mat = bilu0->getRMat(); + } + return success; +} + + +template +bool CPR::create_preconditioner(BlockedMatrix *mat_) { + bool result = bilu0->create_preconditioner(mat_); + if (use_amg) { + create_preconditioner_amg(mat); // already points to bilu0::rmat if needed + } + return result; +} + // return the absolute value of the N elements for which the absolute value is highest double get_absmax(const double *data, const int N) { return std::abs(*std::max_element(data, data + N, [](double a, double b){return std::fabs(a) < std::fabs(b);})); @@ -156,7 +182,7 @@ void CPR::opencl_upload() { template -void CPR::create_preconditioner(BlockedMatrix *mat_) { +void CPR::create_preconditioner_amg(BlockedMatrix *mat_) { this->mat = mat_; try{ @@ -450,7 +476,7 @@ void CPR::amg_cycle_gpu(const int level, cl::Buffer &y, cl::Buffer & // x = prec(y) template -void CPR::apply(const cl::Buffer& y, cl::Buffer& x) { +void CPR::apply_amg(const cl::Buffer& y, cl::Buffer& x) { // 0-initialize u and x vectors events.resize(d_u.size() + 1); err = queue->enqueueFillBuffer(*d_coarse_x, 0, 0, sizeof(double) * Nb, nullptr, &events[0]); @@ -472,6 +498,13 @@ void CPR::apply(const cl::Buffer& y, cl::Buffer& x) { OpenclKernels::add_coarse_pressure_correction(*d_coarse_x, x, pressure_idx, Nb); } +template +void CPR::apply(const cl::Buffer& y, cl::Buffer& x) { + bilu0->apply(y, x); + if (use_amg) { + apply_amg(y, x); + } +} #define INSTANTIATE_BDA_FUNCTIONS(n) \ diff --git a/opm/simulators/linalg/bda/CPR.hpp b/opm/simulators/linalg/bda/CPR.hpp index cd1964970..62d66510a 100644 --- a/opm/simulators/linalg/bda/CPR.hpp +++ b/opm/simulators/linalg/bda/CPR.hpp @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -69,7 +70,10 @@ private: std::unique_ptr d_coarse_y, d_coarse_x; // stores the scalar vectors std::once_flag opencl_buffers_allocated; // only allocate OpenCL Buffers once + std::unique_ptr > bilu0; // Blocked ILU0 preconditioner BlockedMatrix *mat = nullptr; // input matrix, blocked + + bool use_amg; // enable AMG preconditioner on pressure component using DuneMat = Dune::BCRSMatrix >; using DuneVec = Dune::BlockVector >; using MatrixOperator = Dune::MatrixAdapter; @@ -105,19 +109,42 @@ private: // Copy matrices and vectors to GPU void opencl_upload(); + // apply pressure correction to vector + void apply_amg(const cl::Buffer& y, cl::Buffer& x); + void amg_cycle_gpu(const int level, cl::Buffer &y, cl::Buffer &x); + void create_preconditioner_amg(BlockedMatrix *mat); + public: - CPR(int verbosity, ILUReorder opencl_ilu_reorder); + CPR(int verbosity, ILUReorder opencl_ilu_reorder, bool use_amg); void init(int Nb, int nnzb, std::shared_ptr& context, std::shared_ptr& queue); + bool analyse_matrix(BlockedMatrix *mat); + // apply preconditioner, x = prec(y) + // always applies blocked ilu0 + // also applies amg for pressure component if use_amg is true void apply(const cl::Buffer& y, cl::Buffer& x); - void create_preconditioner(BlockedMatrix *mat); + bool create_preconditioner(BlockedMatrix *mat); + int* getToOrder() + { + return bilu0->getToOrder(); + } + + int* getFromOrder() + { + return bilu0->getFromOrder(); + } + + BlockedMatrix* getRMat() + { + return bilu0->getRMat(); + } }; } // namespace Accelerator diff --git a/opm/simulators/linalg/bda/openclSolverBackend.cpp b/opm/simulators/linalg/bda/openclSolverBackend.cpp index 878d36288..c4263724b 100644 --- a/opm/simulators/linalg/bda/openclSolverBackend.cpp +++ b/opm/simulators/linalg/bda/openclSolverBackend.cpp @@ -47,18 +47,20 @@ using Dune::Timer; template openclSolverBackend::openclSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int platformID_, unsigned int deviceID_, ILUReorder opencl_ilu_reorder_, std::string linsolver) : BdaSolver(verbosity_, maxit_, tolerance_, platformID_, deviceID_), opencl_ilu_reorder(opencl_ilu_reorder_) { - if (linsolver.compare("ilu0") == 0) { - use_cpr = false; - } else if (linsolver.compare("cpr_quasiimpes") == 0) { - use_cpr = true; + bool use_amg = false; // default case if linsolver == "ilu0" + if (linsolver.compare("cpr_quasiimpes") == 0) { + use_amg = true; + } else if (linsolver.compare("cpr_trueimpes") == 0) { + OPM_THROW(std::logic_error, "Error openclSolver does not support --linsolver=cpr_trueimpes"); } else { OPM_THROW(std::logic_error, "Error unknown value for argument --linsolver, " + linsolver); } - bilu0 = std::make_unique >(opencl_ilu_reorder, verbosity_); - if (use_cpr) { - cpr = std::make_unique >(verbosity_, opencl_ilu_reorder); - } + // bilu0 = std::make_unique >(opencl_ilu_reorder, verbosity_); + cpr = std::make_unique >(verbosity_, opencl_ilu_reorder, use_amg); + // if (use_amg) { + // cpr = std::make_unique >(verbosity_, opencl_ilu_reorder); + // } std::ostringstream out; try { @@ -205,9 +207,10 @@ openclSolverBackend::openclSolverBackend(int verbosity_, int maxit_, template openclSolverBackend::openclSolverBackend(int verbosity_, int maxit_, double tolerance_, ILUReorder opencl_ilu_reorder_) : - BdaSolver(verbosity_, maxit_, tolerance_), use_cpr(false), opencl_ilu_reorder(opencl_ilu_reorder_) + BdaSolver(verbosity_, maxit_, tolerance_), opencl_ilu_reorder(opencl_ilu_reorder_) { - bilu0 = std::make_unique >(opencl_ilu_reorder, verbosity_); + // bilu0 = std::make_unique >(opencl_ilu_reorder, verbosity_); + cpr = std::make_unique >(verbosity_, opencl_ilu_reorder, /*use_amg=*/false); } template @@ -229,7 +232,7 @@ void openclSolverBackend::gpu_pbicgstab(WellContributions& wellContr double rho, rhop, beta, alpha, omega, tmp1, tmp2; double norm, norm_0; - Timer t_total, t_bilu0(false), t_cpr(false), t_spmv(false), t_well(false), t_rest(false); + Timer t_total, t_prec(false), t_spmv(false), t_well(false), t_rest(false); // set r to the initial residual // if initial x guess is not 0, must call applyblockedscaleadd(), not implemented @@ -275,15 +278,9 @@ void openclSolverBackend::gpu_pbicgstab(WellContributions& wellContr t_rest.stop(); // pw = prec(p) - t_bilu0.start(); - bilu0->apply(d_p, d_pw); - t_bilu0.stop(); - - if (use_cpr) { - t_cpr.start(); - cpr->apply(d_p, d_pw); - t_cpr.stop(); - } + t_prec.start(); + cpr->apply(d_p, d_pw); + t_prec.stop(); // v = A * pw t_spmv.start(); @@ -312,15 +309,9 @@ void openclSolverBackend::gpu_pbicgstab(WellContributions& wellContr it += 0.5; // s = prec(r) - t_bilu0.start(); - bilu0->apply(d_r, d_s); - t_bilu0.stop(); - - if (use_cpr) { - t_cpr.start(); - cpr->apply(d_r, d_s); - t_cpr.stop(); - } + t_prec.start(); + cpr->apply(d_r, d_s); + t_prec.stop(); // t = A * s t_spmv.start(); @@ -368,10 +359,7 @@ void openclSolverBackend::gpu_pbicgstab(WellContributions& wellContr } if (verbosity >= 4) { std::ostringstream out; - out << "openclSolver::ilu_apply: " << t_bilu0.elapsed() << " s\n"; - if (use_cpr) { - out << "openclSolver::cpr_apply: " << t_cpr.elapsed() << " s\n"; - } + out << "openclSolver::prec_apply: " << t_prec.elapsed() << " s\n"; out << "wellContributions::apply: " << t_well.elapsed() << " s\n"; out << "openclSolver::spmv: " << t_spmv.elapsed() << " s\n"; out << "openclSolver::rest: " << t_rest.elapsed() << " s\n"; @@ -397,12 +385,7 @@ void openclSolverBackend::initialize(int N_, int nnz_, int dim, doub out.clear(); try { - bilu0->setOpenCLContext(context.get()); - bilu0->setOpenCLQueue(queue.get()); - - if (use_cpr) { - cpr->init(Nb, nnzb, context, queue); - } + cpr->init(Nb, nnzb, context, queue); #if COPY_ROW_BY_ROW vals_contiguous = new double[N]; @@ -534,16 +517,21 @@ template bool openclSolverBackend::analyse_matrix() { Timer t; - bool success = bilu0->init(mat.get()); + // bool success = bilu0->init(mat.get()); + bool success = cpr->analyse_matrix(mat.get()); if (opencl_ilu_reorder == ILUReorder::NONE) { rmat = mat.get(); } else { - toOrder = bilu0->getToOrder(); - fromOrder = bilu0->getFromOrder(); - rmat = bilu0->getRMat(); + // toOrder = bilu0->getToOrder(); + // fromOrder = bilu0->getFromOrder(); + // rmat = bilu0->getRMat(); + toOrder = cpr->getToOrder(); + fromOrder = cpr->getFromOrder(); + rmat = cpr->getRMat(); } + if (verbosity > 2) { std::ostringstream out; out << "openclSolver::analyse_matrix(): " << t.stop() << " s"; @@ -581,14 +569,7 @@ template bool openclSolverBackend::create_preconditioner() { Timer t; - bool result = bilu0->create_preconditioner(mat.get()); - if (use_cpr) { - if (opencl_ilu_reorder == ILUReorder::NONE) { - cpr->create_preconditioner(mat.get()); - } else { - cpr->create_preconditioner(bilu0->getRMat()); - } - } + bool result = cpr->create_preconditioner(mat.get()); if (verbosity > 2) { std::ostringstream out; diff --git a/opm/simulators/linalg/bda/openclSolverBackend.hpp b/opm/simulators/linalg/bda/openclSolverBackend.hpp index 310c461a7..e6988eb73 100644 --- a/opm/simulators/linalg/bda/openclSolverBackend.hpp +++ b/opm/simulators/linalg/bda/openclSolverBackend.hpp @@ -69,10 +69,9 @@ private: std::vector devices; - std::unique_ptr > bilu0; // Blocked ILU0 preconditioner std::unique_ptr > cpr; // Constrained Pressure Residual preconditioner + // can perform blocked ILU0 and AMG on pressure component bool is_root; // allow for nested solvers, the root solver is called by BdaBridge - bool use_cpr; // allow to enable CPR int *toOrder = nullptr, *fromOrder = nullptr; // BILU0 reorders rows of the matrix via these mappings bool analysis_done = false; std::unique_ptr mat = nullptr; // original matrix