Combine BILU0 and CPR preconditioner

This commit is contained in:
Tong Dong Qiu 2021-11-25 13:40:30 +01:00
parent 11d54f31f5
commit f2225503c4
6 changed files with 106 additions and 72 deletions

View File

@ -54,7 +54,7 @@ BILU0<block_size>::~BILU0()
}
template <unsigned int block_size>
bool BILU0<block_size>::init(BlockedMatrix *mat)
bool BILU0<block_size>::init(const BlockedMatrix *mat)
{
const unsigned int bs = block_size;
@ -305,14 +305,8 @@ void BILU0<block_size>::setOpenCLQueue(cl::CommandQueue *queue_) {
}
#define INSTANTIATE_BDA_FUNCTIONS(n) \
template BILU0<n>::BILU0(ILUReorder, int); \
template BILU0<n>::~BILU0(); \
template bool BILU0<n>::init(BlockedMatrix*); \
template bool BILU0<n>::create_preconditioner(BlockedMatrix*); \
template void BILU0<n>::apply(const cl::Buffer&, cl::Buffer&); \
template void BILU0<n>::setOpenCLContext(cl::Context*); \
template void BILU0<n>::setOpenCLQueue(cl::CommandQueue*);
#define INSTANTIATE_BDA_FUNCTIONS(n) \
template class BILU0<n>;
INSTANTIATE_BDA_FUNCTIONS(1);

View File

@ -91,7 +91,7 @@ namespace Accelerator
~BILU0();
// analysis
bool init(BlockedMatrix *mat);
bool init(const BlockedMatrix *mat);
// ilu_decomposition
bool create_preconditioner(BlockedMatrix *mat);

View File

@ -43,14 +43,15 @@ using Opm::OpmLog;
using Dune::Timer;
template <unsigned int block_size>
CPR<block_size>::CPR(int verbosity_, ILUReorder opencl_ilu_reorder_) :
verbosity(verbosity_), opencl_ilu_reorder(opencl_ilu_reorder_)
{}
CPR<block_size>::CPR(int verbosity_, ILUReorder opencl_ilu_reorder_, bool use_amg_) :
verbosity(verbosity_), opencl_ilu_reorder(opencl_ilu_reorder_), use_amg(use_amg_)
{
bilu0 = std::make_unique<BILU0<block_size> >(opencl_ilu_reorder, verbosity_);
}
template <unsigned int block_size>
void CPR<block_size>::init(int Nb_, int nnzb_, std::shared_ptr<cl::Context>& context_, std::shared_ptr<cl::CommandQueue>& queue_)
{
void CPR<block_size>::init(int Nb_, int nnzb_, std::shared_ptr<cl::Context>& context_, std::shared_ptr<cl::CommandQueue>& queue_) {
this->Nb = Nb_;
this->nnzb = nnzb_;
this->N = Nb_ * block_size;
@ -64,9 +65,34 @@ void CPR<block_size>::init(int Nb_, int nnzb_, std::shared_ptr<cl::Context>& con
coarse_y.resize(Nb);
weights.resize(N);
diagIndices.resize(1);
bilu0->setOpenCLContext(context.get());
bilu0->setOpenCLQueue(queue.get());
}
template <unsigned int block_size>
bool CPR<block_size>::analyse_matrix(BlockedMatrix *mat_) {
bool success = bilu0->init(mat_);
if (opencl_ilu_reorder == ILUReorder::NONE) {
mat = mat_;
} else {
mat = bilu0->getRMat();
}
return success;
}
template <unsigned int block_size>
bool CPR<block_size>::create_preconditioner(BlockedMatrix *mat_) {
bool result = bilu0->create_preconditioner(mat_);
if (use_amg) {
create_preconditioner_amg(mat); // already points to bilu0::rmat if needed
}
return result;
}
// return the absolute value of the N elements for which the absolute value is highest
double get_absmax(const double *data, const int N) {
return std::abs(*std::max_element(data, data + N, [](double a, double b){return std::fabs(a) < std::fabs(b);}));
@ -156,7 +182,7 @@ void CPR<block_size>::opencl_upload() {
template <unsigned int block_size>
void CPR<block_size>::create_preconditioner(BlockedMatrix *mat_) {
void CPR<block_size>::create_preconditioner_amg(BlockedMatrix *mat_) {
this->mat = mat_;
try{
@ -450,7 +476,7 @@ void CPR<block_size>::amg_cycle_gpu(const int level, cl::Buffer &y, cl::Buffer &
// x = prec(y)
template <unsigned int block_size>
void CPR<block_size>::apply(const cl::Buffer& y, cl::Buffer& x) {
void CPR<block_size>::apply_amg(const cl::Buffer& y, cl::Buffer& x) {
// 0-initialize u and x vectors
events.resize(d_u.size() + 1);
err = queue->enqueueFillBuffer(*d_coarse_x, 0, 0, sizeof(double) * Nb, nullptr, &events[0]);
@ -472,6 +498,13 @@ void CPR<block_size>::apply(const cl::Buffer& y, cl::Buffer& x) {
OpenclKernels::add_coarse_pressure_correction(*d_coarse_x, x, pressure_idx, Nb);
}
template <unsigned int block_size>
void CPR<block_size>::apply(const cl::Buffer& y, cl::Buffer& x) {
bilu0->apply(y, x);
if (use_amg) {
apply_amg(y, x);
}
}
#define INSTANTIATE_BDA_FUNCTIONS(n) \

View File

@ -25,6 +25,7 @@
#include <dune/istl/paamg/matrixhierarchy.hh>
#include <dune/istl/umfpack.hh>
#include <opm/simulators/linalg/bda/BILU0.hpp>
#include <opm/simulators/linalg/bda/opencl.hpp>
#include <opm/simulators/linalg/bda/Matrix.hpp>
#include <opm/simulators/linalg/bda/OpenclMatrix.hpp>
@ -69,7 +70,10 @@ private:
std::unique_ptr<cl::Buffer> d_coarse_y, d_coarse_x; // stores the scalar vectors
std::once_flag opencl_buffers_allocated; // only allocate OpenCL Buffers once
std::unique_ptr<BILU0<block_size> > bilu0; // Blocked ILU0 preconditioner
BlockedMatrix *mat = nullptr; // input matrix, blocked
bool use_amg; // enable AMG preconditioner on pressure component
using DuneMat = Dune::BCRSMatrix<Dune::FieldMatrix<double, 1, 1> >;
using DuneVec = Dune::BlockVector<Dune::FieldVector<double, 1> >;
using MatrixOperator = Dune::MatrixAdapter<DuneMat, DuneVec, DuneVec>;
@ -105,19 +109,42 @@ private:
// Copy matrices and vectors to GPU
void opencl_upload();
// apply pressure correction to vector
void apply_amg(const cl::Buffer& y, cl::Buffer& x);
void amg_cycle_gpu(const int level, cl::Buffer &y, cl::Buffer &x);
void create_preconditioner_amg(BlockedMatrix *mat);
public:
CPR(int verbosity, ILUReorder opencl_ilu_reorder);
CPR(int verbosity, ILUReorder opencl_ilu_reorder, bool use_amg);
void init(int Nb, int nnzb, std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue);
bool analyse_matrix(BlockedMatrix *mat);
// apply preconditioner, x = prec(y)
// always applies blocked ilu0
// also applies amg for pressure component if use_amg is true
void apply(const cl::Buffer& y, cl::Buffer& x);
void create_preconditioner(BlockedMatrix *mat);
bool create_preconditioner(BlockedMatrix *mat);
int* getToOrder()
{
return bilu0->getToOrder();
}
int* getFromOrder()
{
return bilu0->getFromOrder();
}
BlockedMatrix* getRMat()
{
return bilu0->getRMat();
}
};
} // namespace Accelerator

View File

@ -47,18 +47,20 @@ using Dune::Timer;
template <unsigned int block_size>
openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int platformID_, unsigned int deviceID_, ILUReorder opencl_ilu_reorder_, std::string linsolver) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_, platformID_, deviceID_), opencl_ilu_reorder(opencl_ilu_reorder_) {
if (linsolver.compare("ilu0") == 0) {
use_cpr = false;
} else if (linsolver.compare("cpr_quasiimpes") == 0) {
use_cpr = true;
bool use_amg = false; // default case if linsolver == "ilu0"
if (linsolver.compare("cpr_quasiimpes") == 0) {
use_amg = true;
} else if (linsolver.compare("cpr_trueimpes") == 0) {
OPM_THROW(std::logic_error, "Error openclSolver does not support --linsolver=cpr_trueimpes");
} else {
OPM_THROW(std::logic_error, "Error unknown value for argument --linsolver, " + linsolver);
}
bilu0 = std::make_unique<BILU0<block_size> >(opencl_ilu_reorder, verbosity_);
if (use_cpr) {
cpr = std::make_unique<CPR<block_size> >(verbosity_, opencl_ilu_reorder);
}
// bilu0 = std::make_unique<BILU0<block_size> >(opencl_ilu_reorder, verbosity_);
cpr = std::make_unique<CPR<block_size> >(verbosity_, opencl_ilu_reorder, use_amg);
// if (use_amg) {
// cpr = std::make_unique<CPR<block_size> >(verbosity_, opencl_ilu_reorder);
// }
std::ostringstream out;
try {
@ -205,9 +207,10 @@ openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_,
template <unsigned int block_size>
openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_, ILUReorder opencl_ilu_reorder_) :
BdaSolver<block_size>(verbosity_, maxit_, tolerance_), use_cpr(false), opencl_ilu_reorder(opencl_ilu_reorder_)
BdaSolver<block_size>(verbosity_, maxit_, tolerance_), opencl_ilu_reorder(opencl_ilu_reorder_)
{
bilu0 = std::make_unique<BILU0<block_size> >(opencl_ilu_reorder, verbosity_);
// bilu0 = std::make_unique<BILU0<block_size> >(opencl_ilu_reorder, verbosity_);
cpr = std::make_unique<CPR<block_size> >(verbosity_, opencl_ilu_reorder, /*use_amg=*/false);
}
template <unsigned int block_size>
@ -229,7 +232,7 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
double rho, rhop, beta, alpha, omega, tmp1, tmp2;
double norm, norm_0;
Timer t_total, t_bilu0(false), t_cpr(false), t_spmv(false), t_well(false), t_rest(false);
Timer t_total, t_prec(false), t_spmv(false), t_well(false), t_rest(false);
// set r to the initial residual
// if initial x guess is not 0, must call applyblockedscaleadd(), not implemented
@ -275,15 +278,9 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
t_rest.stop();
// pw = prec(p)
t_bilu0.start();
bilu0->apply(d_p, d_pw);
t_bilu0.stop();
if (use_cpr) {
t_cpr.start();
cpr->apply(d_p, d_pw);
t_cpr.stop();
}
t_prec.start();
cpr->apply(d_p, d_pw);
t_prec.stop();
// v = A * pw
t_spmv.start();
@ -312,15 +309,9 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
it += 0.5;
// s = prec(r)
t_bilu0.start();
bilu0->apply(d_r, d_s);
t_bilu0.stop();
if (use_cpr) {
t_cpr.start();
cpr->apply(d_r, d_s);
t_cpr.stop();
}
t_prec.start();
cpr->apply(d_r, d_s);
t_prec.stop();
// t = A * s
t_spmv.start();
@ -368,10 +359,7 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
}
if (verbosity >= 4) {
std::ostringstream out;
out << "openclSolver::ilu_apply: " << t_bilu0.elapsed() << " s\n";
if (use_cpr) {
out << "openclSolver::cpr_apply: " << t_cpr.elapsed() << " s\n";
}
out << "openclSolver::prec_apply: " << t_prec.elapsed() << " s\n";
out << "wellContributions::apply: " << t_well.elapsed() << " s\n";
out << "openclSolver::spmv: " << t_spmv.elapsed() << " s\n";
out << "openclSolver::rest: " << t_rest.elapsed() << " s\n";
@ -397,12 +385,7 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
out.clear();
try {
bilu0->setOpenCLContext(context.get());
bilu0->setOpenCLQueue(queue.get());
if (use_cpr) {
cpr->init(Nb, nnzb, context, queue);
}
cpr->init(Nb, nnzb, context, queue);
#if COPY_ROW_BY_ROW
vals_contiguous = new double[N];
@ -534,16 +517,21 @@ template <unsigned int block_size>
bool openclSolverBackend<block_size>::analyse_matrix() {
Timer t;
bool success = bilu0->init(mat.get());
// bool success = bilu0->init(mat.get());
bool success = cpr->analyse_matrix(mat.get());
if (opencl_ilu_reorder == ILUReorder::NONE) {
rmat = mat.get();
} else {
toOrder = bilu0->getToOrder();
fromOrder = bilu0->getFromOrder();
rmat = bilu0->getRMat();
// toOrder = bilu0->getToOrder();
// fromOrder = bilu0->getFromOrder();
// rmat = bilu0->getRMat();
toOrder = cpr->getToOrder();
fromOrder = cpr->getFromOrder();
rmat = cpr->getRMat();
}
if (verbosity > 2) {
std::ostringstream out;
out << "openclSolver::analyse_matrix(): " << t.stop() << " s";
@ -581,14 +569,7 @@ template <unsigned int block_size>
bool openclSolverBackend<block_size>::create_preconditioner() {
Timer t;
bool result = bilu0->create_preconditioner(mat.get());
if (use_cpr) {
if (opencl_ilu_reorder == ILUReorder::NONE) {
cpr->create_preconditioner(mat.get());
} else {
cpr->create_preconditioner(bilu0->getRMat());
}
}
bool result = cpr->create_preconditioner(mat.get());
if (verbosity > 2) {
std::ostringstream out;

View File

@ -69,10 +69,9 @@ private:
std::vector<cl::Device> devices;
std::unique_ptr<BILU0<block_size> > bilu0; // Blocked ILU0 preconditioner
std::unique_ptr<CPR<block_size> > cpr; // Constrained Pressure Residual preconditioner
// can perform blocked ILU0 and AMG on pressure component
bool is_root; // allow for nested solvers, the root solver is called by BdaBridge
bool use_cpr; // allow to enable CPR
int *toOrder = nullptr, *fromOrder = nullptr; // BILU0 reorders rows of the matrix via these mappings
bool analysis_done = false;
std::unique_ptr<BlockedMatrix> mat = nullptr; // original matrix