CPR: template Scalar type

This commit is contained in:
Arne Morten Kvarving 2024-04-15 22:38:04 +02:00
parent 1f39e6a9a9
commit 7e1f4629ed
3 changed files with 158 additions and 149 deletions

View File

@ -34,33 +34,31 @@
#include <opm/simulators/linalg/bda/opencl/OpenclMatrix.hpp>
#include <opm/simulators/linalg/bda/opencl/openclKernels.hpp>
namespace Opm::Accelerator {
using Opm::OpmLog;
using Dune::Timer;
template <unsigned int block_size>
CPR<block_size>::CPR(bool opencl_ilu_parallel_, int verbosity_)
template<class Scalar, unsigned int block_size>
CPR<Scalar,block_size>::CPR(bool opencl_ilu_parallel_, int verbosity_)
: Base(verbosity_)
, opencl_ilu_parallel(opencl_ilu_parallel_)
{
bilu0 = std::make_unique<BILU0<double,block_size> >(opencl_ilu_parallel, verbosity_);
bilu0 = std::make_unique<BILU0<Scalar,block_size> >(opencl_ilu_parallel, verbosity_);
diagIndices.resize(1);
}
template <unsigned int block_size>
void CPR<block_size>::setOpencl(std::shared_ptr<cl::Context>& context_, std::shared_ptr<cl::CommandQueue>& queue_) {
template<class Scalar, unsigned int block_size>
void CPR<Scalar,block_size>::
setOpencl(std::shared_ptr<cl::Context>& context_, std::shared_ptr<cl::CommandQueue>& queue_)
{
context = context_;
queue = queue_;
bilu0->setOpencl(context, queue);
}
template <unsigned int block_size>
bool CPR<block_size>::analyze_matrix(BlockedMatrix<double>* mat_)
template<class Scalar, unsigned int block_size>
bool CPR<Scalar,block_size>::analyze_matrix(BlockedMatrix<Scalar>* mat_)
{
this->Nb = mat_->Nb;
this->nnzb = mat_->nnzbs;
@ -72,9 +70,9 @@ bool CPR<block_size>::analyze_matrix(BlockedMatrix<double>* mat_)
return success;
}
template <unsigned int block_size>
bool CPR<block_size>::analyze_matrix(BlockedMatrix<double>* mat_,
BlockedMatrix<double>* jacMat)
template<class Scalar, unsigned int block_size>
bool CPR<Scalar,block_size>::
analyze_matrix(BlockedMatrix<Scalar>* mat_, BlockedMatrix<Scalar>* jacMat)
{
this->Nb = mat_->Nb;
this->nnzb = mat_->nnzbs;
@ -87,10 +85,9 @@ bool CPR<block_size>::analyze_matrix(BlockedMatrix<double>* mat_,
return success;
}
template <unsigned int block_size>
bool CPR<block_size>::
create_preconditioner(BlockedMatrix<double>* mat_,
BlockedMatrix<double>* jacMat)
template<class Scalar, unsigned int block_size>
bool CPR<Scalar,block_size>::
create_preconditioner(BlockedMatrix<Scalar>* mat_, BlockedMatrix<Scalar>* jacMat)
{
Dune::Timer t_bilu0;
bool result = bilu0->create_preconditioner(mat_, jacMat);
@ -110,8 +107,9 @@ bool CPR<block_size>::
return result;
}
template <unsigned int block_size>
bool CPR<block_size>::create_preconditioner(BlockedMatrix<double>* mat_)
template<class Scalar, unsigned int block_size>
bool CPR<Scalar,block_size>::
create_preconditioner(BlockedMatrix<Scalar>* mat_)
{
Dune::Timer t_bilu0;
bool result = bilu0->create_preconditioner(mat_);
@ -131,26 +129,30 @@ bool CPR<block_size>::create_preconditioner(BlockedMatrix<double>* mat_)
return result;
}
// return the absolute value of the N elements for which the absolute value is highest
double get_absmax(const double *data, const int N) {
return std::abs(*std::max_element(data, data + N, [](double a, double b){return std::fabs(a) < std::fabs(b);}));
template<class Scalar>
Scalar get_absmax(const Scalar* data, const int N)
{
return std::abs(*std::max_element(data, data + N,
[](Scalar a, Scalar b)
{ return std::fabs(a) < std::fabs(b); }));
}
// solve A^T * x = b
void solve_transposed_3x3(const double *A, const double *b, double *x) {
template<class Scalar>
void solve_transposed_3x3(const Scalar* A, const Scalar* b, Scalar* x)
{
const int B = 3;
// from dune-common/densematrix.hh, but transposed, so replace [r*B+c] with [r+c*B]
double t4 = A[0+0*B] * A[1+1*B];
double t6 = A[0+0*B] * A[1+2*B];
double t8 = A[0+1*B] * A[1+0*B];
double t10 = A[0+2*B] * A[1+0*B];
double t12 = A[0+1*B] * A[2+0*B];
double t14 = A[0+2*B] * A[2+0*B];
Scalar t4 = A[0+0*B] * A[1+1*B];
Scalar t6 = A[0+0*B] * A[1+2*B];
Scalar t8 = A[0+1*B] * A[1+0*B];
Scalar t10 = A[0+2*B] * A[1+0*B];
Scalar t12 = A[0+1*B] * A[2+0*B];
Scalar t14 = A[0+2*B] * A[2+0*B];
double d = (t4*A[2+2*B]-t6*A[2+1*B]-t8*A[2+2*B]+
t10*A[2+1*B]+t12*A[1+2*B]-t14*A[1+1*B]); //determinant
Scalar d = (t4*A[2+2*B]-t6*A[2+1*B]-t8*A[2+2*B]+
t10*A[2+1*B]+t12*A[1+2*B]-t14*A[1+1*B]); // determinant
x[0] = (b[0]*A[1+1*B]*A[2+2*B] - b[0]*A[2+1*B]*A[1+2*B]
- b[1] *A[0+1*B]*A[2+2*B] + b[1]*A[2+1*B]*A[0+2*B]
@ -165,44 +167,49 @@ void solve_transposed_3x3(const double *A, const double *b, double *x) {
+ A[2+0*B] *A[0+1*B]*b[1] - A[2+0*B]*A[1+1*B]*b[0]) / d;
}
template <unsigned int block_size>
void CPR<block_size>::init_opencl_buffers() {
template<class Scalar, unsigned int block_size>
void CPR<Scalar, block_size>::init_opencl_buffers()
{
d_Amatrices.reserve(num_levels);
d_Rmatrices.reserve(num_levels - 1);
d_invDiags.reserve(num_levels - 1);
for (Matrix<double>& m : Amatrices) {
for (Matrix<Scalar>& m : Amatrices) {
d_Amatrices.emplace_back(context.get(), m.N, m.M, m.nnzs, 1);
}
for (Matrix<double>& m : Rmatrices) {
for (Matrix<Scalar>& m : Rmatrices) {
d_Rmatrices.emplace_back(context.get(), m.N, m.M, m.nnzs, 1);
d_f.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(double) * m.N);
d_u.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(double) * m.N);
d_f.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(Scalar) * m.N);
d_u.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(Scalar) * m.N);
d_PcolIndices.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(int) * m.M);
d_invDiags.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(double) * m.M); // create a cl::Buffer
d_t.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(double) * m.M);
d_invDiags.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(Scalar) * m.M); // create a cl::Buffer
d_t.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(Scalar) * m.M);
}
d_weights = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * N);
d_rs = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * N);
d_mat = std::make_unique<OpenclMatrix<double>>(context.get(), Nb, Nb, nnzb, block_size);
d_coarse_y = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * Nb);
d_coarse_x = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * Nb);
d_weights = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(Scalar) * N);
d_rs = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(Scalar) * N);
d_mat = std::make_unique<OpenclMatrix<Scalar>>(context.get(), Nb, Nb, nnzb, block_size);
d_coarse_y = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(Scalar) * Nb);
d_coarse_x = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(Scalar) * Nb);
}
template <unsigned int block_size>
void CPR<block_size>::opencl_upload() {
template<class Scalar, unsigned int block_size>
void CPR<Scalar,block_size>::opencl_upload()
{
d_mat->upload(queue.get(), mat);
err = CL_SUCCESS;
events.resize(2 * Rmatrices.size() + 1);
err |= queue->enqueueWriteBuffer(*d_weights, CL_FALSE, 0, sizeof(double) * N, weights.data(), nullptr, &events[0]);
err |= queue->enqueueWriteBuffer(*d_weights, CL_FALSE, 0,
sizeof(Scalar) * N, weights.data(), nullptr, &events[0]);
for (unsigned int i = 0; i < Rmatrices.size(); ++i) {
d_Amatrices[i].upload(queue.get(), &Amatrices[i]);
err |= queue->enqueueWriteBuffer(d_invDiags[i], CL_FALSE, 0, sizeof(double) * Amatrices[i].N, invDiags[i].data(), nullptr, &events[2*i+1]);
err |= queue->enqueueWriteBuffer(d_PcolIndices[i], CL_FALSE, 0, sizeof(int) * Amatrices[i].N, PcolIndices[i].data(), nullptr, &events[2*i+2]);
err |= queue->enqueueWriteBuffer(d_invDiags[i], CL_FALSE, 0,
sizeof(Scalar) * Amatrices[i].N, invDiags[i].data(),
nullptr, &events[2*i+1]);
err |= queue->enqueueWriteBuffer(d_PcolIndices[i], CL_FALSE, 0,
sizeof(int) * Amatrices[i].N, PcolIndices[i].data(),
nullptr, &events[2*i+2]);
}
cl::WaitForEvents(events);
events.clear();
@ -215,8 +222,9 @@ void CPR<block_size>::opencl_upload() {
}
}
template <unsigned int block_size>
void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_)
template<class Scalar, unsigned int block_size>
void CPR<Scalar,block_size>::
create_preconditioner_amg(BlockedMatrix<Scalar>* mat_)
{
this->mat = mat_;
@ -225,8 +233,8 @@ void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_)
coarse_y.resize(Nb);
weights.resize(N);
try{
double rhs[] = {0, 0, 0};
try {
Scalar rhs[] = {0, 0, 0};
rhs[pressure_idx] = 1;
// find diagonal index for each row
@ -244,12 +252,12 @@ void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_)
// calculate weights for each row
for (int row = 0; row < Nb; ++row) {
// solve to find weights
double *row_weights = weights.data() + block_size * row; // weights for this row
Scalar* row_weights = weights.data() + block_size * row; // weights for this row
solve_transposed_3x3(mat->nnzValues + block_size * block_size * diagIndices[0][row], rhs, row_weights);
// normalize weights for this row
double abs_max = get_absmax(row_weights, block_size);
for(unsigned int i = 0; i < block_size; i++){
Scalar abs_max = get_absmax(row_weights, block_size);
for (unsigned int i = 0; i < block_size; i++) {
row_weights[i] /= abs_max;
}
}
@ -260,9 +268,9 @@ void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_)
int start = mat->rowPointers[row];
int end = mat->rowPointers[row + 1];
for (int idx = start; idx < end; ++idx) {
double *block = mat->nnzValues + idx * block_size * block_size;
double *row_weights = weights.data() + block_size * row;
double value = 0.0;
Scalar* block = mat->nnzValues + idx * block_size * block_size;
Scalar* row_weights = weights.data() + block_size * row;
Scalar value = 0.0;
for (unsigned int i = 0; i < block_size; ++i) {
value += block[block_size * i + pressure_idx] * row_weights[i];
}
@ -279,10 +287,10 @@ void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_)
if (recalculate_aggregates) {
dune_coarse = std::make_unique<DuneMat>(Nb, Nb, nnzb, DuneMat::row_wise);
typedef DuneMat::CreateIterator Iter;
using Iter = typename DuneMat::CreateIterator;
// setup sparsity pattern
for(Iter row = dune_coarse->createbegin(); row != dune_coarse->createend(); ++row){
for (Iter row = dune_coarse->createbegin(); row != dune_coarse->createend(); ++row) {
int start = mat->rowPointers[row.index()];
int end = mat->rowPointers[row.index() + 1];
for (int idx = start; idx < end; ++idx) {
@ -305,7 +313,7 @@ void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_)
Dune::Amg::SequentialInformation seqinfo;
dune_amg = std::make_unique<DuneAmg>(dune_op, Dune::stackobject_to_shared_ptr(seqinfo));
Opm::PropertyTree property_tree;
PropertyTree property_tree;
property_tree.put("alpha", 0.333333333333);
// The matrix has a symmetric sparsity pattern, but the values are not symmetric
@ -318,7 +326,7 @@ void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_)
num_pre_smooth_steps = c.getNoPreSmoothSteps();
num_post_smooth_steps = c.getNoPostSmoothSteps();
dune_amg->build<OverlapFlags>(c);
dune_amg->template build<OverlapFlags>(c);
analyzeHierarchy();
analyzeAggregateMaps();
@ -354,10 +362,10 @@ void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_)
}
}
template <unsigned int block_size>
void CPR<block_size>::analyzeHierarchy() {
const DuneAmg::ParallelMatrixHierarchy& matrixHierarchy = dune_amg->matrices();
template<class Scalar, unsigned int block_size>
void CPR<Scalar,block_size>::analyzeHierarchy()
{
const typename DuneAmg::ParallelMatrixHierarchy& matrixHierarchy = dune_amg->matrices();
// store coarsest AMG level in umfpack format, also performs LU decomposition
umfpack.setMatrix((*matrixHierarchy.coarsest()).getmat());
@ -375,8 +383,8 @@ void CPR<block_size>::analyzeHierarchy() {
// matrixIter.dereference() returns MatrixAdapter
// matrixIter.dereference().getmat() returns BCRSMat
DuneAmg::ParallelMatrixHierarchy::ConstIterator matrixIter = matrixHierarchy.finest();
for(int level = 0; level < num_levels; ++matrixIter, ++level) {
typename DuneAmg::ParallelMatrixHierarchy::ConstIterator matrixIter = matrixHierarchy.finest();
for (int level = 0; level < num_levels; ++matrixIter, ++level) {
const auto& A = matrixIter.dereference().getmat();
level_sizes[level] = A.N();
diagIndices[level].reserve(A.N());
@ -398,38 +406,38 @@ void CPR<block_size>::analyzeHierarchy() {
}
}
Opm::BdaBridge<DuneMat, DuneVec, 1>::copySparsityPatternFromISTL(A, Amatrices.back().rowPointers, Amatrices.back().colIndices);
BdaBridge<DuneMat, DuneVec, 1>::copySparsityPatternFromISTL(A, Amatrices.back().rowPointers,
Amatrices.back().colIndices);
// compute inverse diagonal values for current level
invDiags.emplace_back(A.N());
for (unsigned int row = 0; row < A.N(); ++row) {
invDiags.back()[row] = 1 / Amatrices.back().nnzValues[diagIndices[level][row]];
invDiags.back()[row] = 1.0 / Amatrices.back().nnzValues[diagIndices[level][row]];
}
}
}
template <unsigned int block_size>
void CPR<block_size>::analyzeAggregateMaps() {
template<class Scalar, unsigned int block_size>
void CPR<Scalar,block_size>::analyzeAggregateMaps()
{
PcolIndices.resize(num_levels - 1);
Rmatrices.clear();
const DuneAmg::AggregatesMapList& aggregatesMaps = dune_amg->aggregatesMaps();
const typename DuneAmg::AggregatesMapList& aggregatesMaps = dune_amg->aggregatesMaps();
DuneAmg::AggregatesMapList::const_iterator mapIter = aggregatesMaps.begin();
for(int level = 0; level < num_levels - 1; ++mapIter, ++level) {
DuneAmg::AggregatesMap *map = *mapIter;
typename DuneAmg::AggregatesMapList::const_iterator mapIter = aggregatesMaps.begin();
for (int level = 0; level < num_levels - 1; ++mapIter, ++level) {
typename DuneAmg::AggregatesMap* map = *mapIter;
Rmatrices.emplace_back(level_sizes[level+1], level_sizes[level], level_sizes[level]);
std::fill(Rmatrices.back().nnzValues.begin(), Rmatrices.back().nnzValues.end(), 1.0);
// get indices for each row of P and R
std::vector<std::vector<unsigned> > indicesR(level_sizes[level+1]);
std::vector<std::vector<unsigned>> indicesR(level_sizes[level+1]);
PcolIndices[level].resize(level_sizes[level]);
using AggregateIterator = DuneAmg::AggregatesMap::const_iterator;
for(AggregateIterator ai = map->begin(); ai != map->end(); ++ai){
using AggregateIterator = typename DuneAmg::AggregatesMap::const_iterator;
for (AggregateIterator ai = map->begin(); ai != map->end(); ++ai) {
if (*ai != DuneAmg::AggregatesMap::ISOLATED) {
const long int diff = ai - map->begin();
PcolIndices[level][diff] = *ai;
@ -449,20 +457,20 @@ void CPR<block_size>::analyzeAggregateMaps() {
}
}
template <unsigned int block_size>
void CPR<block_size>::amg_cycle_gpu(const int level, cl::Buffer& y, cl::Buffer& x)
template<class Scalar, unsigned int block_size>
void CPR<Scalar,block_size>::amg_cycle_gpu(const int level, cl::Buffer& y, cl::Buffer& x)
{
OpenclMatrix<double>* A = &d_Amatrices[level];
OpenclMatrix<double>* R = &d_Rmatrices[level];
OpenclMatrix<Scalar>* A = &d_Amatrices[level];
OpenclMatrix<Scalar>* R = &d_Rmatrices[level];
int Ncur = A->Nb;
if (level == num_levels - 1) {
// solve coarsest level
std::vector<double> h_y(Ncur), h_x(Ncur, 0);
std::vector<Scalar> h_y(Ncur), h_x(Ncur, 0);
events.resize(1);
err = queue->enqueueReadBuffer(y, CL_FALSE, 0, sizeof(double) * Ncur, h_y.data(), nullptr, &events[0]);
err = queue->enqueueReadBuffer(y, CL_FALSE, 0,
sizeof(Scalar) * Ncur, h_y.data(), nullptr, &events[0]);
cl::WaitForEvents(events);
events.clear();
if (err != CL_SUCCESS) {
@ -474,7 +482,8 @@ void CPR<block_size>::amg_cycle_gpu(const int level, cl::Buffer& y, cl::Buffer&
umfpack.apply(h_x.data(), h_y.data());
events.resize(1);
err = queue->enqueueWriteBuffer(x, CL_FALSE, 0, sizeof(double) * Ncur, h_x.data(), nullptr, &events[0]);
err = queue->enqueueWriteBuffer(x, CL_FALSE, 0,
sizeof(Scalar) * Ncur, h_x.data(), nullptr, &events[0]);
cl::WaitForEvents(events);
events.clear();
if (err != CL_SUCCESS) {
@ -490,35 +499,37 @@ void CPR<block_size>::amg_cycle_gpu(const int level, cl::Buffer& y, cl::Buffer&
cl::Buffer& u = d_u[level]; // u was 0-initialized earlier
// presmooth
double jacobi_damping = 0.65; // default value in amgcl: 0.72
for (unsigned i = 0; i < num_pre_smooth_steps; ++i){
OpenclKernels<double>::residual(A->nnzValues, A->colIndices, A->rowPointers, x, y, t, Ncur, 1);
OpenclKernels<double>::vmul(jacobi_damping, d_invDiags[level], t, x, Ncur);
Scalar jacobi_damping = 0.65; // default value in amgcl: 0.72
for (unsigned i = 0; i < num_pre_smooth_steps; ++i) {
OpenclKernels<Scalar>::residual(A->nnzValues, A->colIndices, A->rowPointers, x, y, t, Ncur, 1);
OpenclKernels<Scalar>::vmul(jacobi_damping, d_invDiags[level], t, x, Ncur);
}
// move to coarser level
OpenclKernels<double>::residual(A->nnzValues, A->colIndices, A->rowPointers, x, y, t, Ncur, 1);
OpenclKernels<double>::spmv(R->nnzValues, R->colIndices, R->rowPointers, t, f, Nnext, 1, true);
OpenclKernels<Scalar>::residual(A->nnzValues, A->colIndices, A->rowPointers, x, y, t, Ncur, 1);
OpenclKernels<Scalar>::spmv(R->nnzValues, R->colIndices, R->rowPointers, t, f, Nnext, 1, true);
amg_cycle_gpu(level + 1, f, u);
OpenclKernels<double>::prolongate_vector(u, x, d_PcolIndices[level], Ncur);
OpenclKernels<Scalar>::prolongate_vector(u, x, d_PcolIndices[level], Ncur);
// postsmooth
for (unsigned i = 0; i < num_post_smooth_steps; ++i){
OpenclKernels<double>::residual(A->nnzValues, A->colIndices, A->rowPointers,
for (unsigned i = 0; i < num_post_smooth_steps; ++i) {
OpenclKernels<Scalar>::residual(A->nnzValues, A->colIndices, A->rowPointers,
x, y, t, Ncur, 1);
OpenclKernels<double>::vmul(jacobi_damping, d_invDiags[level], t, x, Ncur);
OpenclKernels<Scalar>::vmul(jacobi_damping, d_invDiags[level], t, x, Ncur);
}
}
// x = prec(y)
template <unsigned int block_size>
void CPR<block_size>::apply_amg(const cl::Buffer& y, cl::Buffer& x) {
template<class Scalar, unsigned int block_size>
void CPR<Scalar,block_size>::apply_amg(const cl::Buffer& y, cl::Buffer& x)
{
// 0-initialize u and x vectors
events.resize(d_u.size() + 1);
err = queue->enqueueFillBuffer(*d_coarse_x, 0, 0, sizeof(double) * Nb, nullptr, &events[0]);
err = queue->enqueueFillBuffer(*d_coarse_x, 0, 0,
sizeof(Scalar) * Nb, nullptr, &events[0]);
for (unsigned int i = 0; i < d_u.size(); ++i) {
err |= queue->enqueueFillBuffer(d_u[i], 0, 0, sizeof(double) * Rmatrices[i].N, nullptr, &events[i + 1]);
err |= queue->enqueueFillBuffer(d_u[i], 0, 0,
sizeof(Scalar) * Rmatrices[i].N, nullptr, &events[i + 1]);
}
cl::WaitForEvents(events);
events.clear();
@ -527,17 +538,18 @@ void CPR<block_size>::apply_amg(const cl::Buffer& y, cl::Buffer& x) {
OPM_THROW(std::logic_error, "CPR OpenCL enqueueWriteBuffer error");
}
OpenclKernels<double>::residual(d_mat->nnzValues, d_mat->colIndices,
OpenclKernels<Scalar>::residual(d_mat->nnzValues, d_mat->colIndices,
d_mat->rowPointers, x, y, *d_rs, Nb, block_size);
OpenclKernels<double>::full_to_pressure_restriction(*d_rs, *d_weights, *d_coarse_y, Nb);
OpenclKernels<Scalar>::full_to_pressure_restriction(*d_rs, *d_weights, *d_coarse_y, Nb);
amg_cycle_gpu(0, *d_coarse_y, *d_coarse_x);
OpenclKernels<double>::add_coarse_pressure_correction(*d_coarse_x, x, pressure_idx, Nb);
OpenclKernels<Scalar>::add_coarse_pressure_correction(*d_coarse_x, x, pressure_idx, Nb);
}
template <unsigned int block_size>
void CPR<block_size>::apply(const cl::Buffer& y, cl::Buffer& x) {
template<class Scalar, unsigned int block_size>
void CPR<Scalar,block_size>::apply(const cl::Buffer& y, cl::Buffer& x)
{
Dune::Timer t_bilu0;
bilu0->apply(y, x);
if (verbosity >= 4) {
@ -555,19 +567,14 @@ void CPR<block_size>::apply(const cl::Buffer& y, cl::Buffer& x) {
}
}
#define INSTANCE_TYPE(T) \
template class CPR<T,1>; \
template class CPR<T,2>; \
template class CPR<T,3>; \
template class CPR<T,4>; \
template class CPR<T,5>; \
template class CPR<T,6>;
#define INSTANTIATE_BDA_FUNCTIONS(n) \
template class CPR<n>;
INSTANTIATE_BDA_FUNCTIONS(1);
INSTANTIATE_BDA_FUNCTIONS(2);
INSTANTIATE_BDA_FUNCTIONS(3);
INSTANTIATE_BDA_FUNCTIONS(4);
INSTANTIATE_BDA_FUNCTIONS(5);
INSTANTIATE_BDA_FUNCTIONS(6);
#undef INSTANTIATE_BDA_FUNCTIONS
INSTANCE_TYPE(double)
} // namespace Opm::Accelerator

View File

@ -38,10 +38,10 @@ namespace Opm::Accelerator {
template<class Scalar> class BlockedMatrix;
/// This class implements a Constrained Pressure Residual (CPR) preconditioner
template <unsigned int block_size>
class CPR : public Preconditioner<double,block_size>
template<class Scalar, unsigned int block_size>
class CPR : public Preconditioner<Scalar,block_size>
{
using Base = Preconditioner<double,block_size>;
using Base = Preconditioner<Scalar,block_size>;
using Base::N;
using Base::Nb;
@ -55,25 +55,25 @@ class CPR : public Preconditioner<double,block_size>
private:
int num_levels;
std::vector<double> weights, coarse_vals, coarse_x, coarse_y;
std::vector<Matrix<double>> Amatrices, Rmatrices; // scalar matrices that represent the AMG hierarchy
std::vector<OpenclMatrix<double>> d_Amatrices, d_Rmatrices; // scalar matrices that represent the AMG hierarchy
std::vector<Scalar> weights, coarse_vals, coarse_x, coarse_y;
std::vector<Matrix<Scalar>> Amatrices, Rmatrices; // scalar matrices that represent the AMG hierarchy
std::vector<OpenclMatrix<Scalar>> d_Amatrices, d_Rmatrices; // scalar matrices that represent the AMG hierarchy
std::vector<std::vector<int> > PcolIndices; // prolongation does not need a full matrix, only store colIndices
std::vector<cl::Buffer> d_PcolIndices;
std::vector<std::vector<double> > invDiags; // inverse of diagonal of Amatrices
std::vector<std::vector<Scalar>> invDiags; // inverse of diagonal of Amatrices
std::vector<cl::Buffer> d_invDiags;
std::vector<cl::Buffer> d_t, d_f, d_u; // intermediate vectors used during amg cycle
std::unique_ptr<cl::Buffer> d_rs; // use before extracting the pressure
std::unique_ptr<cl::Buffer> d_weights; // the quasiimpes weights, used to extract pressure
std::unique_ptr<OpenclMatrix<double>> d_mat; // stores blocked matrix
std::unique_ptr<OpenclMatrix<Scalar>> d_mat; // stores blocked matrix
std::unique_ptr<cl::Buffer> d_coarse_y, d_coarse_x; // stores the scalar vectors
std::once_flag opencl_buffers_allocated; // only allocate OpenCL Buffers once
std::unique_ptr<BILU0<double,block_size>> bilu0; // Blocked ILU0 preconditioner
BlockedMatrix<double>* mat = nullptr; // input matrix, blocked
std::unique_ptr<BILU0<Scalar,block_size>> bilu0; // Blocked ILU0 preconditioner
BlockedMatrix<Scalar>* mat = nullptr; // input matrix, blocked
using DuneMat = Dune::BCRSMatrix<Dune::FieldMatrix<double, 1, 1> >;
using DuneVec = Dune::BlockVector<Dune::FieldVector<double, 1> >;
using DuneMat = Dune::BCRSMatrix<Dune::FieldMatrix<Scalar, 1, 1> >;
using DuneVec = Dune::BlockVector<Dune::FieldVector<Scalar, 1> >;
using MatrixOperator = Dune::MatrixAdapter<DuneMat, DuneVec, DuneVec>;
using DuneAmg = Dune::Amg::MatrixHierarchy<MatrixOperator, Dune::Amg::SequentialInformation>;
std::unique_ptr<DuneAmg> dune_amg;
@ -109,31 +109,33 @@ private:
void amg_cycle_gpu(const int level, cl::Buffer &y, cl::Buffer &x);
void create_preconditioner_amg(BlockedMatrix<double>* mat);
void create_preconditioner_amg(BlockedMatrix<Scalar>* mat);
public:
CPR(bool opencl_ilu_parallel, int verbosity);
bool analyze_matrix(BlockedMatrix<double>* mat) override;
bool analyze_matrix(BlockedMatrix<double>* mat,
BlockedMatrix<double>* jacMat) override;
bool analyze_matrix(BlockedMatrix<Scalar>* mat) override;
bool analyze_matrix(BlockedMatrix<Scalar>* mat,
BlockedMatrix<Scalar>* jacMat) override;
// set own Opencl variables, but also that of the bilu0 preconditioner
void setOpencl(std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue) override;
void setOpencl(std::shared_ptr<cl::Context>& context,
std::shared_ptr<cl::CommandQueue>& queue) override;
// applies blocked ilu0
// also applies amg for pressure component
void apply(const cl::Buffer& y, cl::Buffer& x) override;
bool create_preconditioner(BlockedMatrix<double>* mat) override;
bool create_preconditioner(BlockedMatrix<double>* mat,
BlockedMatrix<double>* jacMat) override;
bool create_preconditioner(BlockedMatrix<Scalar>* mat) override;
bool create_preconditioner(BlockedMatrix<Scalar>* mat,
BlockedMatrix<Scalar>* jacMat) override;
};
// solve A^T * x = b
// A should represent a 3x3 matrix
// x and b are vectors with 3 elements
void solve_transposed_3x3(const double *A, const double *b, double *x);
template<class Scalar>
void solve_transposed_3x3(const Scalar* A, const Scalar* b, Scalar* x);
} // namespace Opm::Accelerator

View File

@ -49,7 +49,7 @@ Preconditioner<Scalar,block_size>::create(Type type, bool opencl_ilu_parallel, i
case Type::BILU0:
return std::make_unique<BILU0<Scalar,block_size>>(opencl_ilu_parallel, verbosity);
case Type::CPR:
return std::make_unique<CPR<block_size> >(opencl_ilu_parallel, verbosity);
return std::make_unique<CPR<Scalar,block_size>>(opencl_ilu_parallel, verbosity);
case Type::BISAI:
return std::make_unique<BISAI<Scalar,block_size>>(opencl_ilu_parallel, verbosity);
}