CPR: template Scalar type

This commit is contained in:
Arne Morten Kvarving 2024-04-15 22:38:04 +02:00
parent 1f39e6a9a9
commit 7e1f4629ed
3 changed files with 158 additions and 149 deletions

View File

@ -34,33 +34,31 @@
#include <opm/simulators/linalg/bda/opencl/OpenclMatrix.hpp> #include <opm/simulators/linalg/bda/opencl/OpenclMatrix.hpp>
#include <opm/simulators/linalg/bda/opencl/openclKernels.hpp> #include <opm/simulators/linalg/bda/opencl/openclKernels.hpp>
namespace Opm::Accelerator { namespace Opm::Accelerator {
using Opm::OpmLog;
using Dune::Timer; using Dune::Timer;
template <unsigned int block_size> template<class Scalar, unsigned int block_size>
CPR<block_size>::CPR(bool opencl_ilu_parallel_, int verbosity_) CPR<Scalar,block_size>::CPR(bool opencl_ilu_parallel_, int verbosity_)
: Base(verbosity_) : Base(verbosity_)
, opencl_ilu_parallel(opencl_ilu_parallel_) , opencl_ilu_parallel(opencl_ilu_parallel_)
{ {
bilu0 = std::make_unique<BILU0<double,block_size> >(opencl_ilu_parallel, verbosity_); bilu0 = std::make_unique<BILU0<Scalar,block_size> >(opencl_ilu_parallel, verbosity_);
diagIndices.resize(1); diagIndices.resize(1);
} }
template<class Scalar, unsigned int block_size>
template <unsigned int block_size> void CPR<Scalar,block_size>::
void CPR<block_size>::setOpencl(std::shared_ptr<cl::Context>& context_, std::shared_ptr<cl::CommandQueue>& queue_) { setOpencl(std::shared_ptr<cl::Context>& context_, std::shared_ptr<cl::CommandQueue>& queue_)
{
context = context_; context = context_;
queue = queue_; queue = queue_;
bilu0->setOpencl(context, queue); bilu0->setOpencl(context, queue);
} }
template <unsigned int block_size> template<class Scalar, unsigned int block_size>
bool CPR<block_size>::analyze_matrix(BlockedMatrix<double>* mat_) bool CPR<Scalar,block_size>::analyze_matrix(BlockedMatrix<Scalar>* mat_)
{ {
this->Nb = mat_->Nb; this->Nb = mat_->Nb;
this->nnzb = mat_->nnzbs; this->nnzb = mat_->nnzbs;
@ -72,9 +70,9 @@ bool CPR<block_size>::analyze_matrix(BlockedMatrix<double>* mat_)
return success; return success;
} }
template <unsigned int block_size> template<class Scalar, unsigned int block_size>
bool CPR<block_size>::analyze_matrix(BlockedMatrix<double>* mat_, bool CPR<Scalar,block_size>::
BlockedMatrix<double>* jacMat) analyze_matrix(BlockedMatrix<Scalar>* mat_, BlockedMatrix<Scalar>* jacMat)
{ {
this->Nb = mat_->Nb; this->Nb = mat_->Nb;
this->nnzb = mat_->nnzbs; this->nnzb = mat_->nnzbs;
@ -87,10 +85,9 @@ bool CPR<block_size>::analyze_matrix(BlockedMatrix<double>* mat_,
return success; return success;
} }
template <unsigned int block_size> template<class Scalar, unsigned int block_size>
bool CPR<block_size>:: bool CPR<Scalar,block_size>::
create_preconditioner(BlockedMatrix<double>* mat_, create_preconditioner(BlockedMatrix<Scalar>* mat_, BlockedMatrix<Scalar>* jacMat)
BlockedMatrix<double>* jacMat)
{ {
Dune::Timer t_bilu0; Dune::Timer t_bilu0;
bool result = bilu0->create_preconditioner(mat_, jacMat); bool result = bilu0->create_preconditioner(mat_, jacMat);
@ -110,8 +107,9 @@ bool CPR<block_size>::
return result; return result;
} }
template <unsigned int block_size> template<class Scalar, unsigned int block_size>
bool CPR<block_size>::create_preconditioner(BlockedMatrix<double>* mat_) bool CPR<Scalar,block_size>::
create_preconditioner(BlockedMatrix<Scalar>* mat_)
{ {
Dune::Timer t_bilu0; Dune::Timer t_bilu0;
bool result = bilu0->create_preconditioner(mat_); bool result = bilu0->create_preconditioner(mat_);
@ -131,26 +129,30 @@ bool CPR<block_size>::create_preconditioner(BlockedMatrix<double>* mat_)
return result; return result;
} }
// return the absolute value of the N elements for which the absolute value is highest // return the absolute value of the N elements for which the absolute value is highest
double get_absmax(const double *data, const int N) { template<class Scalar>
return std::abs(*std::max_element(data, data + N, [](double a, double b){return std::fabs(a) < std::fabs(b);})); Scalar get_absmax(const Scalar* data, const int N)
{
return std::abs(*std::max_element(data, data + N,
[](Scalar a, Scalar b)
{ return std::fabs(a) < std::fabs(b); }));
} }
// solve A^T * x = b // solve A^T * x = b
void solve_transposed_3x3(const double *A, const double *b, double *x) { template<class Scalar>
void solve_transposed_3x3(const Scalar* A, const Scalar* b, Scalar* x)
{
const int B = 3; const int B = 3;
// from dune-common/densematrix.hh, but transposed, so replace [r*B+c] with [r+c*B] // from dune-common/densematrix.hh, but transposed, so replace [r*B+c] with [r+c*B]
double t4 = A[0+0*B] * A[1+1*B]; Scalar t4 = A[0+0*B] * A[1+1*B];
double t6 = A[0+0*B] * A[1+2*B]; Scalar t6 = A[0+0*B] * A[1+2*B];
double t8 = A[0+1*B] * A[1+0*B]; Scalar t8 = A[0+1*B] * A[1+0*B];
double t10 = A[0+2*B] * A[1+0*B]; Scalar t10 = A[0+2*B] * A[1+0*B];
double t12 = A[0+1*B] * A[2+0*B]; Scalar t12 = A[0+1*B] * A[2+0*B];
double t14 = A[0+2*B] * A[2+0*B]; Scalar t14 = A[0+2*B] * A[2+0*B];
double d = (t4*A[2+2*B]-t6*A[2+1*B]-t8*A[2+2*B]+ Scalar d = (t4*A[2+2*B]-t6*A[2+1*B]-t8*A[2+2*B]+
t10*A[2+1*B]+t12*A[1+2*B]-t14*A[1+1*B]); //determinant t10*A[2+1*B]+t12*A[1+2*B]-t14*A[1+1*B]); // determinant
x[0] = (b[0]*A[1+1*B]*A[2+2*B] - b[0]*A[2+1*B]*A[1+2*B] x[0] = (b[0]*A[1+1*B]*A[2+2*B] - b[0]*A[2+1*B]*A[1+2*B]
- b[1] *A[0+1*B]*A[2+2*B] + b[1]*A[2+1*B]*A[0+2*B] - b[1] *A[0+1*B]*A[2+2*B] + b[1]*A[2+1*B]*A[0+2*B]
@ -165,44 +167,49 @@ void solve_transposed_3x3(const double *A, const double *b, double *x) {
+ A[2+0*B] *A[0+1*B]*b[1] - A[2+0*B]*A[1+1*B]*b[0]) / d; + A[2+0*B] *A[0+1*B]*b[1] - A[2+0*B]*A[1+1*B]*b[0]) / d;
} }
template<class Scalar, unsigned int block_size>
template <unsigned int block_size> void CPR<Scalar, block_size>::init_opencl_buffers()
void CPR<block_size>::init_opencl_buffers() { {
d_Amatrices.reserve(num_levels); d_Amatrices.reserve(num_levels);
d_Rmatrices.reserve(num_levels - 1); d_Rmatrices.reserve(num_levels - 1);
d_invDiags.reserve(num_levels - 1); d_invDiags.reserve(num_levels - 1);
for (Matrix<double>& m : Amatrices) { for (Matrix<Scalar>& m : Amatrices) {
d_Amatrices.emplace_back(context.get(), m.N, m.M, m.nnzs, 1); d_Amatrices.emplace_back(context.get(), m.N, m.M, m.nnzs, 1);
} }
for (Matrix<double>& m : Rmatrices) { for (Matrix<Scalar>& m : Rmatrices) {
d_Rmatrices.emplace_back(context.get(), m.N, m.M, m.nnzs, 1); d_Rmatrices.emplace_back(context.get(), m.N, m.M, m.nnzs, 1);
d_f.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(double) * m.N); d_f.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(Scalar) * m.N);
d_u.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(double) * m.N); d_u.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(Scalar) * m.N);
d_PcolIndices.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(int) * m.M); d_PcolIndices.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(int) * m.M);
d_invDiags.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(double) * m.M); // create a cl::Buffer d_invDiags.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(Scalar) * m.M); // create a cl::Buffer
d_t.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(double) * m.M); d_t.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(Scalar) * m.M);
} }
d_weights = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * N); d_weights = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(Scalar) * N);
d_rs = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * N); d_rs = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(Scalar) * N);
d_mat = std::make_unique<OpenclMatrix<double>>(context.get(), Nb, Nb, nnzb, block_size); d_mat = std::make_unique<OpenclMatrix<Scalar>>(context.get(), Nb, Nb, nnzb, block_size);
d_coarse_y = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * Nb); d_coarse_y = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(Scalar) * Nb);
d_coarse_x = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * Nb); d_coarse_x = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(Scalar) * Nb);
} }
template<class Scalar, unsigned int block_size>
template <unsigned int block_size> void CPR<Scalar,block_size>::opencl_upload()
void CPR<block_size>::opencl_upload() { {
d_mat->upload(queue.get(), mat); d_mat->upload(queue.get(), mat);
err = CL_SUCCESS; err = CL_SUCCESS;
events.resize(2 * Rmatrices.size() + 1); events.resize(2 * Rmatrices.size() + 1);
err |= queue->enqueueWriteBuffer(*d_weights, CL_FALSE, 0, sizeof(double) * N, weights.data(), nullptr, &events[0]); err |= queue->enqueueWriteBuffer(*d_weights, CL_FALSE, 0,
sizeof(Scalar) * N, weights.data(), nullptr, &events[0]);
for (unsigned int i = 0; i < Rmatrices.size(); ++i) { for (unsigned int i = 0; i < Rmatrices.size(); ++i) {
d_Amatrices[i].upload(queue.get(), &Amatrices[i]); d_Amatrices[i].upload(queue.get(), &Amatrices[i]);
err |= queue->enqueueWriteBuffer(d_invDiags[i], CL_FALSE, 0, sizeof(double) * Amatrices[i].N, invDiags[i].data(), nullptr, &events[2*i+1]); err |= queue->enqueueWriteBuffer(d_invDiags[i], CL_FALSE, 0,
err |= queue->enqueueWriteBuffer(d_PcolIndices[i], CL_FALSE, 0, sizeof(int) * Amatrices[i].N, PcolIndices[i].data(), nullptr, &events[2*i+2]); sizeof(Scalar) * Amatrices[i].N, invDiags[i].data(),
nullptr, &events[2*i+1]);
err |= queue->enqueueWriteBuffer(d_PcolIndices[i], CL_FALSE, 0,
sizeof(int) * Amatrices[i].N, PcolIndices[i].data(),
nullptr, &events[2*i+2]);
} }
cl::WaitForEvents(events); cl::WaitForEvents(events);
events.clear(); events.clear();
@ -215,8 +222,9 @@ void CPR<block_size>::opencl_upload() {
} }
} }
template <unsigned int block_size> template<class Scalar, unsigned int block_size>
void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_) void CPR<Scalar,block_size>::
create_preconditioner_amg(BlockedMatrix<Scalar>* mat_)
{ {
this->mat = mat_; this->mat = mat_;
@ -225,8 +233,8 @@ void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_)
coarse_y.resize(Nb); coarse_y.resize(Nb);
weights.resize(N); weights.resize(N);
try{ try {
double rhs[] = {0, 0, 0}; Scalar rhs[] = {0, 0, 0};
rhs[pressure_idx] = 1; rhs[pressure_idx] = 1;
// find diagonal index for each row // find diagonal index for each row
@ -244,12 +252,12 @@ void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_)
// calculate weights for each row // calculate weights for each row
for (int row = 0; row < Nb; ++row) { for (int row = 0; row < Nb; ++row) {
// solve to find weights // solve to find weights
double *row_weights = weights.data() + block_size * row; // weights for this row Scalar* row_weights = weights.data() + block_size * row; // weights for this row
solve_transposed_3x3(mat->nnzValues + block_size * block_size * diagIndices[0][row], rhs, row_weights); solve_transposed_3x3(mat->nnzValues + block_size * block_size * diagIndices[0][row], rhs, row_weights);
// normalize weights for this row // normalize weights for this row
double abs_max = get_absmax(row_weights, block_size); Scalar abs_max = get_absmax(row_weights, block_size);
for(unsigned int i = 0; i < block_size; i++){ for (unsigned int i = 0; i < block_size; i++) {
row_weights[i] /= abs_max; row_weights[i] /= abs_max;
} }
} }
@ -260,9 +268,9 @@ void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_)
int start = mat->rowPointers[row]; int start = mat->rowPointers[row];
int end = mat->rowPointers[row + 1]; int end = mat->rowPointers[row + 1];
for (int idx = start; idx < end; ++idx) { for (int idx = start; idx < end; ++idx) {
double *block = mat->nnzValues + idx * block_size * block_size; Scalar* block = mat->nnzValues + idx * block_size * block_size;
double *row_weights = weights.data() + block_size * row; Scalar* row_weights = weights.data() + block_size * row;
double value = 0.0; Scalar value = 0.0;
for (unsigned int i = 0; i < block_size; ++i) { for (unsigned int i = 0; i < block_size; ++i) {
value += block[block_size * i + pressure_idx] * row_weights[i]; value += block[block_size * i + pressure_idx] * row_weights[i];
} }
@ -279,10 +287,10 @@ void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_)
if (recalculate_aggregates) { if (recalculate_aggregates) {
dune_coarse = std::make_unique<DuneMat>(Nb, Nb, nnzb, DuneMat::row_wise); dune_coarse = std::make_unique<DuneMat>(Nb, Nb, nnzb, DuneMat::row_wise);
typedef DuneMat::CreateIterator Iter; using Iter = typename DuneMat::CreateIterator;
// setup sparsity pattern // setup sparsity pattern
for(Iter row = dune_coarse->createbegin(); row != dune_coarse->createend(); ++row){ for (Iter row = dune_coarse->createbegin(); row != dune_coarse->createend(); ++row) {
int start = mat->rowPointers[row.index()]; int start = mat->rowPointers[row.index()];
int end = mat->rowPointers[row.index() + 1]; int end = mat->rowPointers[row.index() + 1];
for (int idx = start; idx < end; ++idx) { for (int idx = start; idx < end; ++idx) {
@ -305,7 +313,7 @@ void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_)
Dune::Amg::SequentialInformation seqinfo; Dune::Amg::SequentialInformation seqinfo;
dune_amg = std::make_unique<DuneAmg>(dune_op, Dune::stackobject_to_shared_ptr(seqinfo)); dune_amg = std::make_unique<DuneAmg>(dune_op, Dune::stackobject_to_shared_ptr(seqinfo));
Opm::PropertyTree property_tree; PropertyTree property_tree;
property_tree.put("alpha", 0.333333333333); property_tree.put("alpha", 0.333333333333);
// The matrix has a symmetric sparsity pattern, but the values are not symmetric // The matrix has a symmetric sparsity pattern, but the values are not symmetric
@ -318,7 +326,7 @@ void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_)
num_pre_smooth_steps = c.getNoPreSmoothSteps(); num_pre_smooth_steps = c.getNoPreSmoothSteps();
num_post_smooth_steps = c.getNoPostSmoothSteps(); num_post_smooth_steps = c.getNoPostSmoothSteps();
dune_amg->build<OverlapFlags>(c); dune_amg->template build<OverlapFlags>(c);
analyzeHierarchy(); analyzeHierarchy();
analyzeAggregateMaps(); analyzeAggregateMaps();
@ -354,10 +362,10 @@ void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_)
} }
} }
template<class Scalar, unsigned int block_size>
template <unsigned int block_size> void CPR<Scalar,block_size>::analyzeHierarchy()
void CPR<block_size>::analyzeHierarchy() { {
const DuneAmg::ParallelMatrixHierarchy& matrixHierarchy = dune_amg->matrices(); const typename DuneAmg::ParallelMatrixHierarchy& matrixHierarchy = dune_amg->matrices();
// store coarsest AMG level in umfpack format, also performs LU decomposition // store coarsest AMG level in umfpack format, also performs LU decomposition
umfpack.setMatrix((*matrixHierarchy.coarsest()).getmat()); umfpack.setMatrix((*matrixHierarchy.coarsest()).getmat());
@ -375,8 +383,8 @@ void CPR<block_size>::analyzeHierarchy() {
// matrixIter.dereference() returns MatrixAdapter // matrixIter.dereference() returns MatrixAdapter
// matrixIter.dereference().getmat() returns BCRSMat // matrixIter.dereference().getmat() returns BCRSMat
DuneAmg::ParallelMatrixHierarchy::ConstIterator matrixIter = matrixHierarchy.finest(); typename DuneAmg::ParallelMatrixHierarchy::ConstIterator matrixIter = matrixHierarchy.finest();
for(int level = 0; level < num_levels; ++matrixIter, ++level) { for (int level = 0; level < num_levels; ++matrixIter, ++level) {
const auto& A = matrixIter.dereference().getmat(); const auto& A = matrixIter.dereference().getmat();
level_sizes[level] = A.N(); level_sizes[level] = A.N();
diagIndices[level].reserve(A.N()); diagIndices[level].reserve(A.N());
@ -398,38 +406,38 @@ void CPR<block_size>::analyzeHierarchy() {
} }
} }
Opm::BdaBridge<DuneMat, DuneVec, 1>::copySparsityPatternFromISTL(A, Amatrices.back().rowPointers, Amatrices.back().colIndices); BdaBridge<DuneMat, DuneVec, 1>::copySparsityPatternFromISTL(A, Amatrices.back().rowPointers,
Amatrices.back().colIndices);
// compute inverse diagonal values for current level // compute inverse diagonal values for current level
invDiags.emplace_back(A.N()); invDiags.emplace_back(A.N());
for (unsigned int row = 0; row < A.N(); ++row) { for (unsigned int row = 0; row < A.N(); ++row) {
invDiags.back()[row] = 1 / Amatrices.back().nnzValues[diagIndices[level][row]]; invDiags.back()[row] = 1.0 / Amatrices.back().nnzValues[diagIndices[level][row]];
} }
} }
} }
template<class Scalar, unsigned int block_size>
template <unsigned int block_size> void CPR<Scalar,block_size>::analyzeAggregateMaps()
void CPR<block_size>::analyzeAggregateMaps() { {
PcolIndices.resize(num_levels - 1); PcolIndices.resize(num_levels - 1);
Rmatrices.clear(); Rmatrices.clear();
const DuneAmg::AggregatesMapList& aggregatesMaps = dune_amg->aggregatesMaps(); const typename DuneAmg::AggregatesMapList& aggregatesMaps = dune_amg->aggregatesMaps();
DuneAmg::AggregatesMapList::const_iterator mapIter = aggregatesMaps.begin(); typename DuneAmg::AggregatesMapList::const_iterator mapIter = aggregatesMaps.begin();
for(int level = 0; level < num_levels - 1; ++mapIter, ++level) { for (int level = 0; level < num_levels - 1; ++mapIter, ++level) {
DuneAmg::AggregatesMap *map = *mapIter; typename DuneAmg::AggregatesMap* map = *mapIter;
Rmatrices.emplace_back(level_sizes[level+1], level_sizes[level], level_sizes[level]); Rmatrices.emplace_back(level_sizes[level+1], level_sizes[level], level_sizes[level]);
std::fill(Rmatrices.back().nnzValues.begin(), Rmatrices.back().nnzValues.end(), 1.0); std::fill(Rmatrices.back().nnzValues.begin(), Rmatrices.back().nnzValues.end(), 1.0);
// get indices for each row of P and R // get indices for each row of P and R
std::vector<std::vector<unsigned> > indicesR(level_sizes[level+1]); std::vector<std::vector<unsigned>> indicesR(level_sizes[level+1]);
PcolIndices[level].resize(level_sizes[level]); PcolIndices[level].resize(level_sizes[level]);
using AggregateIterator = DuneAmg::AggregatesMap::const_iterator; using AggregateIterator = typename DuneAmg::AggregatesMap::const_iterator;
for(AggregateIterator ai = map->begin(); ai != map->end(); ++ai){ for (AggregateIterator ai = map->begin(); ai != map->end(); ++ai) {
if (*ai != DuneAmg::AggregatesMap::ISOLATED) { if (*ai != DuneAmg::AggregatesMap::ISOLATED) {
const long int diff = ai - map->begin(); const long int diff = ai - map->begin();
PcolIndices[level][diff] = *ai; PcolIndices[level][diff] = *ai;
@ -449,20 +457,20 @@ void CPR<block_size>::analyzeAggregateMaps() {
} }
} }
template<class Scalar, unsigned int block_size>
template <unsigned int block_size> void CPR<Scalar,block_size>::amg_cycle_gpu(const int level, cl::Buffer& y, cl::Buffer& x)
void CPR<block_size>::amg_cycle_gpu(const int level, cl::Buffer& y, cl::Buffer& x)
{ {
OpenclMatrix<double>* A = &d_Amatrices[level]; OpenclMatrix<Scalar>* A = &d_Amatrices[level];
OpenclMatrix<double>* R = &d_Rmatrices[level]; OpenclMatrix<Scalar>* R = &d_Rmatrices[level];
int Ncur = A->Nb; int Ncur = A->Nb;
if (level == num_levels - 1) { if (level == num_levels - 1) {
// solve coarsest level // solve coarsest level
std::vector<double> h_y(Ncur), h_x(Ncur, 0); std::vector<Scalar> h_y(Ncur), h_x(Ncur, 0);
events.resize(1); events.resize(1);
err = queue->enqueueReadBuffer(y, CL_FALSE, 0, sizeof(double) * Ncur, h_y.data(), nullptr, &events[0]); err = queue->enqueueReadBuffer(y, CL_FALSE, 0,
sizeof(Scalar) * Ncur, h_y.data(), nullptr, &events[0]);
cl::WaitForEvents(events); cl::WaitForEvents(events);
events.clear(); events.clear();
if (err != CL_SUCCESS) { if (err != CL_SUCCESS) {
@ -474,7 +482,8 @@ void CPR<block_size>::amg_cycle_gpu(const int level, cl::Buffer& y, cl::Buffer&
umfpack.apply(h_x.data(), h_y.data()); umfpack.apply(h_x.data(), h_y.data());
events.resize(1); events.resize(1);
err = queue->enqueueWriteBuffer(x, CL_FALSE, 0, sizeof(double) * Ncur, h_x.data(), nullptr, &events[0]); err = queue->enqueueWriteBuffer(x, CL_FALSE, 0,
sizeof(Scalar) * Ncur, h_x.data(), nullptr, &events[0]);
cl::WaitForEvents(events); cl::WaitForEvents(events);
events.clear(); events.clear();
if (err != CL_SUCCESS) { if (err != CL_SUCCESS) {
@ -490,35 +499,37 @@ void CPR<block_size>::amg_cycle_gpu(const int level, cl::Buffer& y, cl::Buffer&
cl::Buffer& u = d_u[level]; // u was 0-initialized earlier cl::Buffer& u = d_u[level]; // u was 0-initialized earlier
// presmooth // presmooth
double jacobi_damping = 0.65; // default value in amgcl: 0.72 Scalar jacobi_damping = 0.65; // default value in amgcl: 0.72
for (unsigned i = 0; i < num_pre_smooth_steps; ++i){ for (unsigned i = 0; i < num_pre_smooth_steps; ++i) {
OpenclKernels<double>::residual(A->nnzValues, A->colIndices, A->rowPointers, x, y, t, Ncur, 1); OpenclKernels<Scalar>::residual(A->nnzValues, A->colIndices, A->rowPointers, x, y, t, Ncur, 1);
OpenclKernels<double>::vmul(jacobi_damping, d_invDiags[level], t, x, Ncur); OpenclKernels<Scalar>::vmul(jacobi_damping, d_invDiags[level], t, x, Ncur);
} }
// move to coarser level // move to coarser level
OpenclKernels<double>::residual(A->nnzValues, A->colIndices, A->rowPointers, x, y, t, Ncur, 1); OpenclKernels<Scalar>::residual(A->nnzValues, A->colIndices, A->rowPointers, x, y, t, Ncur, 1);
OpenclKernels<double>::spmv(R->nnzValues, R->colIndices, R->rowPointers, t, f, Nnext, 1, true); OpenclKernels<Scalar>::spmv(R->nnzValues, R->colIndices, R->rowPointers, t, f, Nnext, 1, true);
amg_cycle_gpu(level + 1, f, u); amg_cycle_gpu(level + 1, f, u);
OpenclKernels<double>::prolongate_vector(u, x, d_PcolIndices[level], Ncur); OpenclKernels<Scalar>::prolongate_vector(u, x, d_PcolIndices[level], Ncur);
// postsmooth // postsmooth
for (unsigned i = 0; i < num_post_smooth_steps; ++i){ for (unsigned i = 0; i < num_post_smooth_steps; ++i) {
OpenclKernels<double>::residual(A->nnzValues, A->colIndices, A->rowPointers, OpenclKernels<Scalar>::residual(A->nnzValues, A->colIndices, A->rowPointers,
x, y, t, Ncur, 1); x, y, t, Ncur, 1);
OpenclKernels<double>::vmul(jacobi_damping, d_invDiags[level], t, x, Ncur); OpenclKernels<Scalar>::vmul(jacobi_damping, d_invDiags[level], t, x, Ncur);
} }
} }
// x = prec(y) // x = prec(y)
template <unsigned int block_size> template<class Scalar, unsigned int block_size>
void CPR<block_size>::apply_amg(const cl::Buffer& y, cl::Buffer& x) { void CPR<Scalar,block_size>::apply_amg(const cl::Buffer& y, cl::Buffer& x)
{
// 0-initialize u and x vectors // 0-initialize u and x vectors
events.resize(d_u.size() + 1); events.resize(d_u.size() + 1);
err = queue->enqueueFillBuffer(*d_coarse_x, 0, 0, sizeof(double) * Nb, nullptr, &events[0]); err = queue->enqueueFillBuffer(*d_coarse_x, 0, 0,
sizeof(Scalar) * Nb, nullptr, &events[0]);
for (unsigned int i = 0; i < d_u.size(); ++i) { for (unsigned int i = 0; i < d_u.size(); ++i) {
err |= queue->enqueueFillBuffer(d_u[i], 0, 0, sizeof(double) * Rmatrices[i].N, nullptr, &events[i + 1]); err |= queue->enqueueFillBuffer(d_u[i], 0, 0,
sizeof(Scalar) * Rmatrices[i].N, nullptr, &events[i + 1]);
} }
cl::WaitForEvents(events); cl::WaitForEvents(events);
events.clear(); events.clear();
@ -527,17 +538,18 @@ void CPR<block_size>::apply_amg(const cl::Buffer& y, cl::Buffer& x) {
OPM_THROW(std::logic_error, "CPR OpenCL enqueueWriteBuffer error"); OPM_THROW(std::logic_error, "CPR OpenCL enqueueWriteBuffer error");
} }
OpenclKernels<double>::residual(d_mat->nnzValues, d_mat->colIndices, OpenclKernels<Scalar>::residual(d_mat->nnzValues, d_mat->colIndices,
d_mat->rowPointers, x, y, *d_rs, Nb, block_size); d_mat->rowPointers, x, y, *d_rs, Nb, block_size);
OpenclKernels<double>::full_to_pressure_restriction(*d_rs, *d_weights, *d_coarse_y, Nb); OpenclKernels<Scalar>::full_to_pressure_restriction(*d_rs, *d_weights, *d_coarse_y, Nb);
amg_cycle_gpu(0, *d_coarse_y, *d_coarse_x); amg_cycle_gpu(0, *d_coarse_y, *d_coarse_x);
OpenclKernels<double>::add_coarse_pressure_correction(*d_coarse_x, x, pressure_idx, Nb); OpenclKernels<Scalar>::add_coarse_pressure_correction(*d_coarse_x, x, pressure_idx, Nb);
} }
template <unsigned int block_size> template<class Scalar, unsigned int block_size>
void CPR<block_size>::apply(const cl::Buffer& y, cl::Buffer& x) { void CPR<Scalar,block_size>::apply(const cl::Buffer& y, cl::Buffer& x)
{
Dune::Timer t_bilu0; Dune::Timer t_bilu0;
bilu0->apply(y, x); bilu0->apply(y, x);
if (verbosity >= 4) { if (verbosity >= 4) {
@ -555,19 +567,14 @@ void CPR<block_size>::apply(const cl::Buffer& y, cl::Buffer& x) {
} }
} }
#define INSTANCE_TYPE(T) \
template class CPR<T,1>; \
template class CPR<T,2>; \
template class CPR<T,3>; \
template class CPR<T,4>; \
template class CPR<T,5>; \
template class CPR<T,6>;
#define INSTANTIATE_BDA_FUNCTIONS(n) \ INSTANCE_TYPE(double)
template class CPR<n>;
INSTANTIATE_BDA_FUNCTIONS(1);
INSTANTIATE_BDA_FUNCTIONS(2);
INSTANTIATE_BDA_FUNCTIONS(3);
INSTANTIATE_BDA_FUNCTIONS(4);
INSTANTIATE_BDA_FUNCTIONS(5);
INSTANTIATE_BDA_FUNCTIONS(6);
#undef INSTANTIATE_BDA_FUNCTIONS
} // namespace Opm::Accelerator } // namespace Opm::Accelerator

View File

@ -38,10 +38,10 @@ namespace Opm::Accelerator {
template<class Scalar> class BlockedMatrix; template<class Scalar> class BlockedMatrix;
/// This class implements a Constrained Pressure Residual (CPR) preconditioner /// This class implements a Constrained Pressure Residual (CPR) preconditioner
template <unsigned int block_size> template<class Scalar, unsigned int block_size>
class CPR : public Preconditioner<double,block_size> class CPR : public Preconditioner<Scalar,block_size>
{ {
using Base = Preconditioner<double,block_size>; using Base = Preconditioner<Scalar,block_size>;
using Base::N; using Base::N;
using Base::Nb; using Base::Nb;
@ -55,25 +55,25 @@ class CPR : public Preconditioner<double,block_size>
private: private:
int num_levels; int num_levels;
std::vector<double> weights, coarse_vals, coarse_x, coarse_y; std::vector<Scalar> weights, coarse_vals, coarse_x, coarse_y;
std::vector<Matrix<double>> Amatrices, Rmatrices; // scalar matrices that represent the AMG hierarchy std::vector<Matrix<Scalar>> Amatrices, Rmatrices; // scalar matrices that represent the AMG hierarchy
std::vector<OpenclMatrix<double>> d_Amatrices, d_Rmatrices; // scalar matrices that represent the AMG hierarchy std::vector<OpenclMatrix<Scalar>> d_Amatrices, d_Rmatrices; // scalar matrices that represent the AMG hierarchy
std::vector<std::vector<int> > PcolIndices; // prolongation does not need a full matrix, only store colIndices std::vector<std::vector<int> > PcolIndices; // prolongation does not need a full matrix, only store colIndices
std::vector<cl::Buffer> d_PcolIndices; std::vector<cl::Buffer> d_PcolIndices;
std::vector<std::vector<double> > invDiags; // inverse of diagonal of Amatrices std::vector<std::vector<Scalar>> invDiags; // inverse of diagonal of Amatrices
std::vector<cl::Buffer> d_invDiags; std::vector<cl::Buffer> d_invDiags;
std::vector<cl::Buffer> d_t, d_f, d_u; // intermediate vectors used during amg cycle std::vector<cl::Buffer> d_t, d_f, d_u; // intermediate vectors used during amg cycle
std::unique_ptr<cl::Buffer> d_rs; // use before extracting the pressure std::unique_ptr<cl::Buffer> d_rs; // use before extracting the pressure
std::unique_ptr<cl::Buffer> d_weights; // the quasiimpes weights, used to extract pressure std::unique_ptr<cl::Buffer> d_weights; // the quasiimpes weights, used to extract pressure
std::unique_ptr<OpenclMatrix<double>> d_mat; // stores blocked matrix std::unique_ptr<OpenclMatrix<Scalar>> d_mat; // stores blocked matrix
std::unique_ptr<cl::Buffer> d_coarse_y, d_coarse_x; // stores the scalar vectors std::unique_ptr<cl::Buffer> d_coarse_y, d_coarse_x; // stores the scalar vectors
std::once_flag opencl_buffers_allocated; // only allocate OpenCL Buffers once std::once_flag opencl_buffers_allocated; // only allocate OpenCL Buffers once
std::unique_ptr<BILU0<double,block_size>> bilu0; // Blocked ILU0 preconditioner std::unique_ptr<BILU0<Scalar,block_size>> bilu0; // Blocked ILU0 preconditioner
BlockedMatrix<double>* mat = nullptr; // input matrix, blocked BlockedMatrix<Scalar>* mat = nullptr; // input matrix, blocked
using DuneMat = Dune::BCRSMatrix<Dune::FieldMatrix<double, 1, 1> >; using DuneMat = Dune::BCRSMatrix<Dune::FieldMatrix<Scalar, 1, 1> >;
using DuneVec = Dune::BlockVector<Dune::FieldVector<double, 1> >; using DuneVec = Dune::BlockVector<Dune::FieldVector<Scalar, 1> >;
using MatrixOperator = Dune::MatrixAdapter<DuneMat, DuneVec, DuneVec>; using MatrixOperator = Dune::MatrixAdapter<DuneMat, DuneVec, DuneVec>;
using DuneAmg = Dune::Amg::MatrixHierarchy<MatrixOperator, Dune::Amg::SequentialInformation>; using DuneAmg = Dune::Amg::MatrixHierarchy<MatrixOperator, Dune::Amg::SequentialInformation>;
std::unique_ptr<DuneAmg> dune_amg; std::unique_ptr<DuneAmg> dune_amg;
@ -109,31 +109,33 @@ private:
void amg_cycle_gpu(const int level, cl::Buffer &y, cl::Buffer &x); void amg_cycle_gpu(const int level, cl::Buffer &y, cl::Buffer &x);
void create_preconditioner_amg(BlockedMatrix<double>* mat); void create_preconditioner_amg(BlockedMatrix<Scalar>* mat);
public: public:
CPR(bool opencl_ilu_parallel, int verbosity); CPR(bool opencl_ilu_parallel, int verbosity);
bool analyze_matrix(BlockedMatrix<double>* mat) override; bool analyze_matrix(BlockedMatrix<Scalar>* mat) override;
bool analyze_matrix(BlockedMatrix<double>* mat, bool analyze_matrix(BlockedMatrix<Scalar>* mat,
BlockedMatrix<double>* jacMat) override; BlockedMatrix<Scalar>* jacMat) override;
// set own Opencl variables, but also that of the bilu0 preconditioner // set own Opencl variables, but also that of the bilu0 preconditioner
void setOpencl(std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue) override; void setOpencl(std::shared_ptr<cl::Context>& context,
std::shared_ptr<cl::CommandQueue>& queue) override;
// applies blocked ilu0 // applies blocked ilu0
// also applies amg for pressure component // also applies amg for pressure component
void apply(const cl::Buffer& y, cl::Buffer& x) override; void apply(const cl::Buffer& y, cl::Buffer& x) override;
bool create_preconditioner(BlockedMatrix<double>* mat) override; bool create_preconditioner(BlockedMatrix<Scalar>* mat) override;
bool create_preconditioner(BlockedMatrix<double>* mat, bool create_preconditioner(BlockedMatrix<Scalar>* mat,
BlockedMatrix<double>* jacMat) override; BlockedMatrix<Scalar>* jacMat) override;
}; };
// solve A^T * x = b // solve A^T * x = b
// A should represent a 3x3 matrix // A should represent a 3x3 matrix
// x and b are vectors with 3 elements // x and b are vectors with 3 elements
void solve_transposed_3x3(const double *A, const double *b, double *x); template<class Scalar>
void solve_transposed_3x3(const Scalar* A, const Scalar* b, Scalar* x);
} // namespace Opm::Accelerator } // namespace Opm::Accelerator

View File

@ -49,7 +49,7 @@ Preconditioner<Scalar,block_size>::create(Type type, bool opencl_ilu_parallel, i
case Type::BILU0: case Type::BILU0:
return std::make_unique<BILU0<Scalar,block_size>>(opencl_ilu_parallel, verbosity); return std::make_unique<BILU0<Scalar,block_size>>(opencl_ilu_parallel, verbosity);
case Type::CPR: case Type::CPR:
return std::make_unique<CPR<block_size> >(opencl_ilu_parallel, verbosity); return std::make_unique<CPR<Scalar,block_size>>(opencl_ilu_parallel, verbosity);
case Type::BISAI: case Type::BISAI:
return std::make_unique<BISAI<Scalar,block_size>>(opencl_ilu_parallel, verbosity); return std::make_unique<BISAI<Scalar,block_size>>(opencl_ilu_parallel, verbosity);
} }