Add Preconditioner, superclass of BILU0 and CPR

This commit is contained in:
Tong Dong Qiu 2021-11-30 13:20:09 +01:00
parent 50d0486b28
commit 0881089406
11 changed files with 257 additions and 97 deletions

View File

@ -104,6 +104,7 @@ if(OPENCL_FOUND)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/opencl.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/openclKernels.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/OpenclMatrix.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/opencl/Preconditioner.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/openclSolverBackend.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/openclWellContributions.cpp)
endif()
@ -262,6 +263,7 @@ list (APPEND PUBLIC_HEADER_FILES
opm/simulators/linalg/bda/opencl.hpp
opm/simulators/linalg/bda/openclKernels.hpp
opm/simulators/linalg/bda/OpenclMatrix.hpp
opm/simulators/linalg/bda/opencl/Preconditioner.hpp
opm/simulators/linalg/bda/openclSolverBackend.hpp
opm/simulators/linalg/bda/openclWellContributions.hpp
opm/simulators/linalg/bda/Matrix.hpp

View File

@ -40,7 +40,7 @@ using Dune::Timer;
template <unsigned int block_size>
BILU0<block_size>::BILU0(ILUReorder opencl_ilu_reorder_, int verbosity_) :
verbosity(verbosity_), opencl_ilu_reorder(opencl_ilu_reorder_)
Preconditioner<block_size>(verbosity_), opencl_ilu_reorder(opencl_ilu_reorder_)
{
#if CHOW_PATEL
chowPatelIlu.setVerbosity(verbosity);
@ -53,15 +53,23 @@ BILU0<block_size>::~BILU0()
delete[] invDiagVals;
}
template <unsigned int block_size>
void BILU0<block_size>::init(int Nb, int nnzb, std::shared_ptr<cl::Context>& context_, std::shared_ptr<cl::CommandQueue>& queue_)
{
context = context_.get();
queue = queue_.get();
}
template <unsigned int block_size>
bool BILU0<block_size>::init(const BlockedMatrix *mat)
bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat)
{
const unsigned int bs = block_size;
this->N = mat->Nb * block_size;
this->Nb = mat->Nb;
this->nnz = mat->nnzbs * block_size * block_size;
this->nnzbs = mat->nnzbs;
this->nnzb = mat->nnzbs;
int *CSCRowIndices = nullptr;
int *CSCColPointers = nullptr;
@ -71,7 +79,7 @@ BILU0<block_size>::~BILU0()
} else {
toOrder.resize(Nb);
fromOrder.resize(Nb);
CSCRowIndices = new int[nnzbs];
CSCRowIndices = new int[nnzb];
CSCColPointers = new int[Nb + 1];
rmat = std::make_shared<BlockedMatrix>(mat->Nb, mat->nnzbs, block_size);
LUmat = std::make_unique<BlockedMatrix>(*rmat);
@ -295,15 +303,6 @@ BILU0<block_size>::~BILU0()
}
template <unsigned int block_size>
void BILU0<block_size>::setOpenCLContext(cl::Context *context_) {
this->context = context_;
}
template <unsigned int block_size>
void BILU0<block_size>::setOpenCLQueue(cl::CommandQueue *queue_) {
this->queue = queue_;
}
#define INSTANTIATE_BDA_FUNCTIONS(n) \
template class BILU0<n>;

View File

@ -27,6 +27,7 @@
#include <opm/simulators/linalg/bda/opencl.hpp>
#include <opm/simulators/linalg/bda/openclKernels.hpp>
#include <opm/simulators/linalg/bda/opencl/Preconditioner.hpp>
#include <opm/simulators/linalg/bda/ChowPatelIlu.hpp>
@ -35,17 +36,20 @@ namespace Opm
namespace Accelerator
{
/// This class implementa a Blocked ILU0 preconditioner
/// This class implements a Blocked ILU0 preconditioner
/// The decomposition is done on CPU, and reorders the rows of the matrix
template <unsigned int block_size>
class BILU0
class BILU0 : public Preconditioner<block_size>
{
typedef Preconditioner<block_size> Base;
using Base::N;
using Base::Nb;
using Base::nnz;
using Base::nnzb;
using Base::verbosity;
private:
int N; // number of rows of the matrix
int Nb; // number of blockrows of the matrix
int nnz; // number of nonzeroes of the matrix (scalar)
int nnzbs; // number of blocks of the matrix
std::unique_ptr<BlockedMatrix> LUmat = nullptr;
std::shared_ptr<BlockedMatrix> rmat = nullptr; // only used with PAR_SIM
#if CHOW_PATEL
@ -57,7 +61,6 @@ namespace Accelerator
std::vector<int> rowsPerColorPrefix; // the prefix sum of rowsPerColor
std::vector<int> toOrder, fromOrder;
int numColors;
int verbosity;
std::once_flag pattern_uploaded;
ILUReorder opencl_ilu_reorder;
@ -90,29 +93,28 @@ namespace Accelerator
~BILU0();
// analysis
bool init(const BlockedMatrix *mat);
void init(int Nb, int nnzb, std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue) override;
// analysis, find reordering if specified
bool analyze_matrix(BlockedMatrix *mat) override;
// ilu_decomposition
bool create_preconditioner(BlockedMatrix *mat);
bool create_preconditioner(BlockedMatrix *mat) override;
// apply preconditioner, x = prec(y)
void apply(const cl::Buffer& y, cl::Buffer& x);
void apply(const cl::Buffer& y, cl::Buffer& x) override;
void setOpenCLContext(cl::Context *context);
void setOpenCLQueue(cl::CommandQueue *queue);
int* getToOrder()
int* getToOrder() override
{
return toOrder.data();
}
int* getFromOrder()
int* getFromOrder() override
{
return fromOrder.data();
}
BlockedMatrix* getRMat()
BlockedMatrix* getRMat() override
{
return rmat.get();
}

View File

@ -43,8 +43,8 @@ using Opm::OpmLog;
using Dune::Timer;
template <unsigned int block_size>
CPR<block_size>::CPR(int verbosity_, ILUReorder opencl_ilu_reorder_, bool use_amg_) :
verbosity(verbosity_), opencl_ilu_reorder(opencl_ilu_reorder_), use_amg(use_amg_)
CPR<block_size>::CPR(int verbosity_, ILUReorder opencl_ilu_reorder_) :
Preconditioner<block_size>(verbosity_), opencl_ilu_reorder(opencl_ilu_reorder_)
{
bilu0 = std::make_unique<BILU0<block_size> >(opencl_ilu_reorder, verbosity_);
}
@ -66,14 +66,13 @@ void CPR<block_size>::init(int Nb_, int nnzb_, std::shared_ptr<cl::Context>& con
weights.resize(N);
diagIndices.resize(1);
bilu0->setOpenCLContext(context.get());
bilu0->setOpenCLQueue(queue.get());
bilu0->init(Nb, nnzb, context, queue);
}
template <unsigned int block_size>
bool CPR<block_size>::analyse_matrix(BlockedMatrix *mat_) {
bool success = bilu0->init(mat_);
bool CPR<block_size>::analyze_matrix(BlockedMatrix *mat_) {
bool success = bilu0->analyze_matrix(mat_);
if (opencl_ilu_reorder == ILUReorder::NONE) {
mat = mat_;
@ -93,14 +92,13 @@ bool CPR<block_size>::create_preconditioner(BlockedMatrix *mat_) {
out << "CPR create_preconditioner bilu0(): " << t_bilu0.stop() << " s";
OpmLog::info(out.str());
}
if (use_amg) {
Dune::Timer t_amg;
create_preconditioner_amg(mat); // already points to bilu0::rmat if needed
if (verbosity >= 3) {
std::ostringstream out;
out << "CPR create_preconditioner_amg(): " << t_amg.stop() << " s";
OpmLog::info(out.str());
}
Dune::Timer t_amg;
create_preconditioner_amg(mat); // already points to bilu0::rmat if needed
if (verbosity >= 3) {
std::ostringstream out;
out << "CPR create_preconditioner_amg(): " << t_amg.stop() << " s";
OpmLog::info(out.str());
}
return result;
}
@ -507,14 +505,12 @@ void CPR<block_size>::apply(const cl::Buffer& y, cl::Buffer& x) {
OpmLog::info(out.str());
}
if (use_amg) {
Dune::Timer t_amg;
apply_amg(y, x);
if (verbosity >= 4) {
std::ostringstream out;
out << "CPR apply amg(): " << t_amg.stop() << " s";
OpmLog::info(out.str());
}
Dune::Timer t_amg;
apply_amg(y, x);
if (verbosity >= 4) {
std::ostringstream out;
out << "CPR apply amg(): " << t_amg.stop() << " s";
OpmLog::info(out.str());
}
}

View File

@ -30,6 +30,7 @@
#include <opm/simulators/linalg/bda/Matrix.hpp>
#include <opm/simulators/linalg/bda/OpenclMatrix.hpp>
#include <opm/simulators/linalg/bda/ILUReorder.hpp>
#include <opm/simulators/linalg/bda/opencl/Preconditioner.hpp>
#include <opm/simulators/linalg/bda/openclKernels.hpp>
#include <opm/simulators/linalg/bda/ChowPatelIlu.hpp>
@ -47,16 +48,17 @@ class BlockedMatrix;
/// This class implements a Constrained Pressure Residual (CPR) preconditioner
template <unsigned int block_size>
class CPR
class CPR : public Preconditioner<block_size>
{
typedef Preconditioner<block_size> Base;
using Base::N;
using Base::Nb;
using Base::nnz;
using Base::nnzb;
using Base::verbosity;
private:
int N; // number of rows of the matrix
int Nb; // number of blockrows of the matrix
int nnz; // number of nonzeroes of the matrix (scalar)
int nnzb; // number of blocks of the matrix
int verbosity;
int num_levels;
std::vector<double> weights, coarse_vals, coarse_x, coarse_y;
std::vector<Matrix> Amatrices, Rmatrices; // scalar matrices that represent the AMG hierarchy
@ -75,7 +77,6 @@ private:
std::unique_ptr<BILU0<block_size> > bilu0; // Blocked ILU0 preconditioner
BlockedMatrix *mat = nullptr; // input matrix, blocked
bool use_amg; // enable AMG preconditioner on pressure component
using DuneMat = Dune::BCRSMatrix<Dune::FieldMatrix<double, 1, 1> >;
using DuneVec = Dune::BlockVector<Dune::FieldVector<double, 1> >;
using MatrixOperator = Dune::MatrixAdapter<DuneMat, DuneVec, DuneVec>;
@ -122,30 +123,29 @@ private:
public:
CPR(int verbosity, ILUReorder opencl_ilu_reorder, bool use_amg);
CPR(int verbosity, ILUReorder opencl_ilu_reorder);
void init(int Nb, int nnzb, std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue);
void init(int Nb, int nnzb, std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue) override;
bool analyse_matrix(BlockedMatrix *mat);
bool analyze_matrix(BlockedMatrix *mat) override;
// apply preconditioner, x = prec(y)
// always applies blocked ilu0
// also applies amg for pressure component if use_amg is true
void apply(const cl::Buffer& y, cl::Buffer& x);
// applies blocked ilu0
// also applies amg for pressure component
void apply(const cl::Buffer& y, cl::Buffer& x) override;
bool create_preconditioner(BlockedMatrix *mat);
bool create_preconditioner(BlockedMatrix *mat) override;
int* getToOrder()
int* getToOrder() override
{
return bilu0->getToOrder();
}
int* getFromOrder()
int* getFromOrder() override
{
return bilu0->getFromOrder();
}
BlockedMatrix* getRMat()
BlockedMatrix* getRMat() override
{
return bilu0->getRMat();
}

View File

@ -21,6 +21,11 @@
#include <opm/simulators/linalg/bda/opencl.hpp>
#include <string>
namespace Opm
{
namespace Accelerator
{
/// Translate OpenCL error codes to strings
/// Integer - String combinations are defined in CL/cl.h
/// \param[in] error error code
@ -95,3 +100,6 @@ std::string getErrorString(cl_int error)
default: return "UNKNOWN_CL_CODE";
}
}
} // namespace Accelerator
} // namespace Opm

View File

@ -33,7 +33,16 @@
#include <string>
namespace Opm
{
namespace Accelerator
{
/// Translate OpenCL error codes to strings
/// Integer - String combinations are defined in CL/cl.h
/// \param[in] error error code
std::string getErrorString(cl_int error);
} // namespace Accelerator
} // namespace Opm

View File

@ -0,0 +1,60 @@
/*
Copyright 2021 Equinor ASA
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#include <config.h>
#include <memory>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/bda/BILU0.hpp>
#include <opm/simulators/linalg/bda/CPR.hpp>
#include <opm/simulators/linalg/bda/opencl/Preconditioner.hpp>
namespace Opm
{
namespace Accelerator
{
template <unsigned int block_size>
std::unique_ptr<Preconditioner<block_size> > Preconditioner<block_size>::create(PreconditionerType type, int verbosity, ILUReorder opencl_ilu_reorder) {
if (type == PreconditionerType::BILU0) {
return std::make_unique<Opm::Accelerator::BILU0<block_size> >(opencl_ilu_reorder, verbosity);
} else if (type == PreconditionerType::CPR) {
return std::make_unique<Opm::Accelerator::CPR<block_size> >(verbosity, opencl_ilu_reorder);
} else {
OPM_THROW(std::logic_error, "Invalid PreconditionerType");
}
}
#define INSTANTIATE_BDA_FUNCTIONS(n) \
template std::unique_ptr<Preconditioner<n> > Preconditioner<n>::create(PreconditionerType, int, ILUReorder);
INSTANTIATE_BDA_FUNCTIONS(1);
INSTANTIATE_BDA_FUNCTIONS(2);
INSTANTIATE_BDA_FUNCTIONS(3);
INSTANTIATE_BDA_FUNCTIONS(4);
INSTANTIATE_BDA_FUNCTIONS(5);
INSTANTIATE_BDA_FUNCTIONS(6);
#undef INSTANTIATE_BDA_FUNCTIONS
} //namespace Accelerator
} //namespace Opm

View File

@ -0,0 +1,80 @@
/*
Copyright 2021 Equinor ASA
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_PRECONDITIONER_HEADER_INCLUDED
#define OPM_PRECONDITIONER_HEADER_INCLUDED
#include <opm/simulators/linalg/bda/opencl.hpp>
#include <opm/simulators/linalg/bda/ILUReorder.hpp>
namespace Opm
{
namespace Accelerator
{
class BlockedMatrix;
template <unsigned int block_size>
class Preconditioner
{
protected:
int N = 0; // number of rows of the matrix
int Nb = 0; // number of blockrows of the matrix
int nnz = 0; // number of nonzeroes of the matrix (scalar)
int nnzb = 0; // number of blocks of the matrix
int verbosity = 0;
Preconditioner(int verbosity_) :
verbosity(verbosity_)
{};
public:
enum PreconditionerType {
BILU0,
CPR
};
static std::unique_ptr<Preconditioner> create(PreconditionerType type, int verbosity, ILUReorder opencl_ilu_reorder);
virtual void init(int Nb, int nnzb, std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue);
// apply preconditioner, x = prec(y)
virtual void apply(const cl::Buffer& y, cl::Buffer& x) = 0;
// analyze matrix, e.g. the sparsity pattern
// probably only called once
virtual bool analyze_matrix(BlockedMatrix *mat) = 0;
// create/update preconditioner, probably used every linear solve
virtual bool create_preconditioner(BlockedMatrix *mat) = 0;
// get reordering mappings
virtual int* getToOrder() = 0;
virtual int* getFromOrder() = 0;
// get reordered matrix
virtual BlockedMatrix* getRMat() = 0;
};
} //namespace Accelerator
} //namespace Opm
#endif

View File

@ -47,20 +47,23 @@ using Dune::Timer;
template <unsigned int block_size>
openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int platformID_, unsigned int deviceID_, ILUReorder opencl_ilu_reorder_, std::string linsolver) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_, platformID_, deviceID_), opencl_ilu_reorder(opencl_ilu_reorder_) {
bool use_amg = false; // default case if linsolver == "ilu0"
if (linsolver.compare("cpr_quasiimpes") == 0) {
use_amg = true;
bool use_cpr;
if (linsolver.compare("ilu0") == 0) {
use_cpr = false;
} else if (linsolver.compare("cpr_quasiimpes") == 0) {
use_cpr = true;
} else if (linsolver.compare("cpr_trueimpes") == 0) {
OPM_THROW(std::logic_error, "Error openclSolver does not support --linsolver=cpr_trueimpes");
} else {
OPM_THROW(std::logic_error, "Error unknown value for argument --linsolver, " + linsolver);
}
// bilu0 = std::make_unique<BILU0<block_size> >(opencl_ilu_reorder, verbosity_);
cpr = std::make_unique<CPR<block_size> >(verbosity_, opencl_ilu_reorder, use_amg);
// if (use_amg) {
// cpr = std::make_unique<CPR<block_size> >(verbosity_, opencl_ilu_reorder);
// }
using PreconditionerType = typename Preconditioner<block_size>::PreconditionerType;
if (use_cpr) {
prec = Preconditioner<block_size>::create(PreconditionerType::CPR, verbosity, opencl_ilu_reorder);
} else {
prec = Preconditioner<block_size>::create(PreconditionerType::BILU0, verbosity, opencl_ilu_reorder);
}
std::ostringstream out;
try {
@ -209,8 +212,8 @@ template <unsigned int block_size>
openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_, ILUReorder opencl_ilu_reorder_) :
BdaSolver<block_size>(verbosity_, maxit_, tolerance_), opencl_ilu_reorder(opencl_ilu_reorder_)
{
// bilu0 = std::make_unique<BILU0<block_size> >(opencl_ilu_reorder, verbosity_);
cpr = std::make_unique<CPR<block_size> >(verbosity_, opencl_ilu_reorder, /*use_amg=*/false);
// prec = std::make_unique<BILU0<block_size> >(opencl_ilu_reorder, verbosity_);
// cpr = std::make_unique<CPR<block_size> >(verbosity_, opencl_ilu_reorder, /*use_amg=*/false);
}
template <unsigned int block_size>
@ -279,7 +282,7 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
// pw = prec(p)
t_prec.start();
cpr->apply(d_p, d_pw);
prec->apply(d_p, d_pw);
t_prec.stop();
// v = A * pw
@ -310,7 +313,7 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
// s = prec(r)
t_prec.start();
cpr->apply(d_r, d_s);
prec->apply(d_r, d_s);
t_prec.stop();
// t = A * s
@ -385,7 +388,7 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
out.clear();
try {
cpr->init(Nb, nnzb, context, queue);
prec->init(Nb, nnzb, context, queue);
#if COPY_ROW_BY_ROW
vals_contiguous = new double[N];
@ -514,11 +517,11 @@ void openclSolverBackend<block_size>::update_system_on_gpu() {
template <unsigned int block_size>
bool openclSolverBackend<block_size>::analyse_matrix() {
bool openclSolverBackend<block_size>::analyze_matrix() {
Timer t;
// bool success = bilu0->init(mat.get());
bool success = cpr->analyse_matrix(mat.get());
bool success = prec->analyze_matrix(mat.get());
if (opencl_ilu_reorder == ILUReorder::NONE) {
rmat = mat.get();
@ -526,22 +529,22 @@ bool openclSolverBackend<block_size>::analyse_matrix() {
// toOrder = bilu0->getToOrder();
// fromOrder = bilu0->getFromOrder();
// rmat = bilu0->getRMat();
toOrder = cpr->getToOrder();
fromOrder = cpr->getFromOrder();
rmat = cpr->getRMat();
toOrder = prec->getToOrder();
fromOrder = prec->getFromOrder();
rmat = prec->getRMat();
}
if (verbosity > 2) {
std::ostringstream out;
out << "openclSolver::analyse_matrix(): " << t.stop() << " s";
out << "openclSolver::analyze_matrix(): " << t.stop() << " s";
OpmLog::info(out.str());
}
analysis_done = true;
return success;
} // end analyse_matrix()
} // end analyze_matrix()
template <unsigned int block_size>
@ -569,7 +572,7 @@ template <unsigned int block_size>
bool openclSolverBackend<block_size>::create_preconditioner() {
Timer t;
bool result = cpr->create_preconditioner(mat.get());
bool result = prec->create_preconditioner(mat.get());
if (verbosity > 2) {
std::ostringstream out;
@ -633,7 +636,7 @@ SolverStatus openclSolverBackend<block_size>::solve_system(int N_, int nnz_, int
if (initialized == false) {
initialize(N_, nnz_, dim, vals, rows, cols);
if (analysis_done == false) {
if (!analyse_matrix()) {
if (!analyze_matrix()) {
return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED;
}
}

View File

@ -27,7 +27,8 @@
#include <opm/simulators/linalg/bda/ILUReorder.hpp>
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#include <opm/simulators/linalg/bda/BILU0.hpp>
#include <opm/simulators/linalg/bda/CPR.hpp>
#include <opm/simulators/linalg/bda/opencl/Preconditioner.hpp>
#include <tuple>
@ -69,7 +70,7 @@ private:
std::vector<cl::Device> devices;
std::unique_ptr<CPR<block_size> > cpr; // Constrained Pressure Residual preconditioner
std::unique_ptr<Preconditioner<block_size> > prec;
// can perform blocked ILU0 and AMG on pressure component
bool is_root; // allow for nested solvers, the root solver is called by BdaBridge
int *toOrder = nullptr, *fromOrder = nullptr; // BILU0 reorders rows of the matrix via these mappings
@ -159,9 +160,9 @@ private:
/// Update linear system on GPU, don't copy rowpointers and colindices, they stay the same
void update_system_on_gpu();
/// Analyse sparsity pattern to extract parallelism
/// Analyze sparsity pattern to extract parallelism
/// \return true iff analysis was successful
bool analyse_matrix();
bool analyze_matrix();
/// Perform ilu0-decomposition
/// \return true iff decomposition was successful