mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
CPR: template Scalar type
This commit is contained in:
parent
1f39e6a9a9
commit
7e1f4629ed
@ -34,33 +34,31 @@
|
||||
#include <opm/simulators/linalg/bda/opencl/OpenclMatrix.hpp>
|
||||
#include <opm/simulators/linalg/bda/opencl/openclKernels.hpp>
|
||||
|
||||
|
||||
namespace Opm::Accelerator {
|
||||
|
||||
using Opm::OpmLog;
|
||||
using Dune::Timer;
|
||||
|
||||
template <unsigned int block_size>
|
||||
CPR<block_size>::CPR(bool opencl_ilu_parallel_, int verbosity_)
|
||||
template<class Scalar, unsigned int block_size>
|
||||
CPR<Scalar,block_size>::CPR(bool opencl_ilu_parallel_, int verbosity_)
|
||||
: Base(verbosity_)
|
||||
, opencl_ilu_parallel(opencl_ilu_parallel_)
|
||||
{
|
||||
bilu0 = std::make_unique<BILU0<double,block_size> >(opencl_ilu_parallel, verbosity_);
|
||||
bilu0 = std::make_unique<BILU0<Scalar,block_size> >(opencl_ilu_parallel, verbosity_);
|
||||
diagIndices.resize(1);
|
||||
}
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
void CPR<block_size>::setOpencl(std::shared_ptr<cl::Context>& context_, std::shared_ptr<cl::CommandQueue>& queue_) {
|
||||
|
||||
template<class Scalar, unsigned int block_size>
|
||||
void CPR<Scalar,block_size>::
|
||||
setOpencl(std::shared_ptr<cl::Context>& context_, std::shared_ptr<cl::CommandQueue>& queue_)
|
||||
{
|
||||
context = context_;
|
||||
queue = queue_;
|
||||
|
||||
bilu0->setOpencl(context, queue);
|
||||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool CPR<block_size>::analyze_matrix(BlockedMatrix<double>* mat_)
|
||||
template<class Scalar, unsigned int block_size>
|
||||
bool CPR<Scalar,block_size>::analyze_matrix(BlockedMatrix<Scalar>* mat_)
|
||||
{
|
||||
this->Nb = mat_->Nb;
|
||||
this->nnzb = mat_->nnzbs;
|
||||
@ -72,9 +70,9 @@ bool CPR<block_size>::analyze_matrix(BlockedMatrix<double>* mat_)
|
||||
return success;
|
||||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool CPR<block_size>::analyze_matrix(BlockedMatrix<double>* mat_,
|
||||
BlockedMatrix<double>* jacMat)
|
||||
template<class Scalar, unsigned int block_size>
|
||||
bool CPR<Scalar,block_size>::
|
||||
analyze_matrix(BlockedMatrix<Scalar>* mat_, BlockedMatrix<Scalar>* jacMat)
|
||||
{
|
||||
this->Nb = mat_->Nb;
|
||||
this->nnzb = mat_->nnzbs;
|
||||
@ -87,10 +85,9 @@ bool CPR<block_size>::analyze_matrix(BlockedMatrix<double>* mat_,
|
||||
return success;
|
||||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool CPR<block_size>::
|
||||
create_preconditioner(BlockedMatrix<double>* mat_,
|
||||
BlockedMatrix<double>* jacMat)
|
||||
template<class Scalar, unsigned int block_size>
|
||||
bool CPR<Scalar,block_size>::
|
||||
create_preconditioner(BlockedMatrix<Scalar>* mat_, BlockedMatrix<Scalar>* jacMat)
|
||||
{
|
||||
Dune::Timer t_bilu0;
|
||||
bool result = bilu0->create_preconditioner(mat_, jacMat);
|
||||
@ -110,8 +107,9 @@ bool CPR<block_size>::
|
||||
return result;
|
||||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool CPR<block_size>::create_preconditioner(BlockedMatrix<double>* mat_)
|
||||
template<class Scalar, unsigned int block_size>
|
||||
bool CPR<Scalar,block_size>::
|
||||
create_preconditioner(BlockedMatrix<Scalar>* mat_)
|
||||
{
|
||||
Dune::Timer t_bilu0;
|
||||
bool result = bilu0->create_preconditioner(mat_);
|
||||
@ -131,25 +129,29 @@ bool CPR<block_size>::create_preconditioner(BlockedMatrix<double>* mat_)
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
// return the absolute value of the N elements for which the absolute value is highest
|
||||
double get_absmax(const double *data, const int N) {
|
||||
return std::abs(*std::max_element(data, data + N, [](double a, double b){return std::fabs(a) < std::fabs(b);}));
|
||||
template<class Scalar>
|
||||
Scalar get_absmax(const Scalar* data, const int N)
|
||||
{
|
||||
return std::abs(*std::max_element(data, data + N,
|
||||
[](Scalar a, Scalar b)
|
||||
{ return std::fabs(a) < std::fabs(b); }));
|
||||
}
|
||||
|
||||
|
||||
// solve A^T * x = b
|
||||
void solve_transposed_3x3(const double *A, const double *b, double *x) {
|
||||
template<class Scalar>
|
||||
void solve_transposed_3x3(const Scalar* A, const Scalar* b, Scalar* x)
|
||||
{
|
||||
const int B = 3;
|
||||
// from dune-common/densematrix.hh, but transposed, so replace [r*B+c] with [r+c*B]
|
||||
double t4 = A[0+0*B] * A[1+1*B];
|
||||
double t6 = A[0+0*B] * A[1+2*B];
|
||||
double t8 = A[0+1*B] * A[1+0*B];
|
||||
double t10 = A[0+2*B] * A[1+0*B];
|
||||
double t12 = A[0+1*B] * A[2+0*B];
|
||||
double t14 = A[0+2*B] * A[2+0*B];
|
||||
Scalar t4 = A[0+0*B] * A[1+1*B];
|
||||
Scalar t6 = A[0+0*B] * A[1+2*B];
|
||||
Scalar t8 = A[0+1*B] * A[1+0*B];
|
||||
Scalar t10 = A[0+2*B] * A[1+0*B];
|
||||
Scalar t12 = A[0+1*B] * A[2+0*B];
|
||||
Scalar t14 = A[0+2*B] * A[2+0*B];
|
||||
|
||||
double d = (t4*A[2+2*B]-t6*A[2+1*B]-t8*A[2+2*B]+
|
||||
Scalar d = (t4*A[2+2*B]-t6*A[2+1*B]-t8*A[2+2*B]+
|
||||
t10*A[2+1*B]+t12*A[1+2*B]-t14*A[1+1*B]); // determinant
|
||||
|
||||
x[0] = (b[0]*A[1+1*B]*A[2+2*B] - b[0]*A[2+1*B]*A[1+2*B]
|
||||
@ -165,44 +167,49 @@ void solve_transposed_3x3(const double *A, const double *b, double *x) {
|
||||
+ A[2+0*B] *A[0+1*B]*b[1] - A[2+0*B]*A[1+1*B]*b[0]) / d;
|
||||
}
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
void CPR<block_size>::init_opencl_buffers() {
|
||||
template<class Scalar, unsigned int block_size>
|
||||
void CPR<Scalar, block_size>::init_opencl_buffers()
|
||||
{
|
||||
d_Amatrices.reserve(num_levels);
|
||||
d_Rmatrices.reserve(num_levels - 1);
|
||||
d_invDiags.reserve(num_levels - 1);
|
||||
for (Matrix<double>& m : Amatrices) {
|
||||
for (Matrix<Scalar>& m : Amatrices) {
|
||||
d_Amatrices.emplace_back(context.get(), m.N, m.M, m.nnzs, 1);
|
||||
}
|
||||
for (Matrix<double>& m : Rmatrices) {
|
||||
for (Matrix<Scalar>& m : Rmatrices) {
|
||||
d_Rmatrices.emplace_back(context.get(), m.N, m.M, m.nnzs, 1);
|
||||
d_f.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(double) * m.N);
|
||||
d_u.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(double) * m.N);
|
||||
d_f.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(Scalar) * m.N);
|
||||
d_u.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(Scalar) * m.N);
|
||||
|
||||
d_PcolIndices.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(int) * m.M);
|
||||
d_invDiags.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(double) * m.M); // create a cl::Buffer
|
||||
d_t.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(double) * m.M);
|
||||
d_invDiags.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(Scalar) * m.M); // create a cl::Buffer
|
||||
d_t.emplace_back(*context, CL_MEM_READ_WRITE, sizeof(Scalar) * m.M);
|
||||
}
|
||||
d_weights = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * N);
|
||||
d_rs = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * N);
|
||||
d_mat = std::make_unique<OpenclMatrix<double>>(context.get(), Nb, Nb, nnzb, block_size);
|
||||
d_coarse_y = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * Nb);
|
||||
d_coarse_x = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * Nb);
|
||||
d_weights = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(Scalar) * N);
|
||||
d_rs = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(Scalar) * N);
|
||||
d_mat = std::make_unique<OpenclMatrix<Scalar>>(context.get(), Nb, Nb, nnzb, block_size);
|
||||
d_coarse_y = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(Scalar) * Nb);
|
||||
d_coarse_x = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(Scalar) * Nb);
|
||||
}
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
void CPR<block_size>::opencl_upload() {
|
||||
template<class Scalar, unsigned int block_size>
|
||||
void CPR<Scalar,block_size>::opencl_upload()
|
||||
{
|
||||
d_mat->upload(queue.get(), mat);
|
||||
|
||||
err = CL_SUCCESS;
|
||||
events.resize(2 * Rmatrices.size() + 1);
|
||||
err |= queue->enqueueWriteBuffer(*d_weights, CL_FALSE, 0, sizeof(double) * N, weights.data(), nullptr, &events[0]);
|
||||
err |= queue->enqueueWriteBuffer(*d_weights, CL_FALSE, 0,
|
||||
sizeof(Scalar) * N, weights.data(), nullptr, &events[0]);
|
||||
for (unsigned int i = 0; i < Rmatrices.size(); ++i) {
|
||||
d_Amatrices[i].upload(queue.get(), &Amatrices[i]);
|
||||
|
||||
err |= queue->enqueueWriteBuffer(d_invDiags[i], CL_FALSE, 0, sizeof(double) * Amatrices[i].N, invDiags[i].data(), nullptr, &events[2*i+1]);
|
||||
err |= queue->enqueueWriteBuffer(d_PcolIndices[i], CL_FALSE, 0, sizeof(int) * Amatrices[i].N, PcolIndices[i].data(), nullptr, &events[2*i+2]);
|
||||
err |= queue->enqueueWriteBuffer(d_invDiags[i], CL_FALSE, 0,
|
||||
sizeof(Scalar) * Amatrices[i].N, invDiags[i].data(),
|
||||
nullptr, &events[2*i+1]);
|
||||
err |= queue->enqueueWriteBuffer(d_PcolIndices[i], CL_FALSE, 0,
|
||||
sizeof(int) * Amatrices[i].N, PcolIndices[i].data(),
|
||||
nullptr, &events[2*i+2]);
|
||||
}
|
||||
cl::WaitForEvents(events);
|
||||
events.clear();
|
||||
@ -215,8 +222,9 @@ void CPR<block_size>::opencl_upload() {
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_)
|
||||
template<class Scalar, unsigned int block_size>
|
||||
void CPR<Scalar,block_size>::
|
||||
create_preconditioner_amg(BlockedMatrix<Scalar>* mat_)
|
||||
{
|
||||
this->mat = mat_;
|
||||
|
||||
@ -226,7 +234,7 @@ void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_)
|
||||
weights.resize(N);
|
||||
|
||||
try {
|
||||
double rhs[] = {0, 0, 0};
|
||||
Scalar rhs[] = {0, 0, 0};
|
||||
rhs[pressure_idx] = 1;
|
||||
|
||||
// find diagonal index for each row
|
||||
@ -244,11 +252,11 @@ void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_)
|
||||
// calculate weights for each row
|
||||
for (int row = 0; row < Nb; ++row) {
|
||||
// solve to find weights
|
||||
double *row_weights = weights.data() + block_size * row; // weights for this row
|
||||
Scalar* row_weights = weights.data() + block_size * row; // weights for this row
|
||||
solve_transposed_3x3(mat->nnzValues + block_size * block_size * diagIndices[0][row], rhs, row_weights);
|
||||
|
||||
// normalize weights for this row
|
||||
double abs_max = get_absmax(row_weights, block_size);
|
||||
Scalar abs_max = get_absmax(row_weights, block_size);
|
||||
for (unsigned int i = 0; i < block_size; i++) {
|
||||
row_weights[i] /= abs_max;
|
||||
}
|
||||
@ -260,9 +268,9 @@ void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_)
|
||||
int start = mat->rowPointers[row];
|
||||
int end = mat->rowPointers[row + 1];
|
||||
for (int idx = start; idx < end; ++idx) {
|
||||
double *block = mat->nnzValues + idx * block_size * block_size;
|
||||
double *row_weights = weights.data() + block_size * row;
|
||||
double value = 0.0;
|
||||
Scalar* block = mat->nnzValues + idx * block_size * block_size;
|
||||
Scalar* row_weights = weights.data() + block_size * row;
|
||||
Scalar value = 0.0;
|
||||
for (unsigned int i = 0; i < block_size; ++i) {
|
||||
value += block[block_size * i + pressure_idx] * row_weights[i];
|
||||
}
|
||||
@ -279,7 +287,7 @@ void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_)
|
||||
if (recalculate_aggregates) {
|
||||
dune_coarse = std::make_unique<DuneMat>(Nb, Nb, nnzb, DuneMat::row_wise);
|
||||
|
||||
typedef DuneMat::CreateIterator Iter;
|
||||
using Iter = typename DuneMat::CreateIterator;
|
||||
|
||||
// setup sparsity pattern
|
||||
for (Iter row = dune_coarse->createbegin(); row != dune_coarse->createend(); ++row) {
|
||||
@ -305,7 +313,7 @@ void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_)
|
||||
Dune::Amg::SequentialInformation seqinfo;
|
||||
dune_amg = std::make_unique<DuneAmg>(dune_op, Dune::stackobject_to_shared_ptr(seqinfo));
|
||||
|
||||
Opm::PropertyTree property_tree;
|
||||
PropertyTree property_tree;
|
||||
property_tree.put("alpha", 0.333333333333);
|
||||
|
||||
// The matrix has a symmetric sparsity pattern, but the values are not symmetric
|
||||
@ -318,7 +326,7 @@ void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_)
|
||||
num_pre_smooth_steps = c.getNoPreSmoothSteps();
|
||||
num_post_smooth_steps = c.getNoPostSmoothSteps();
|
||||
|
||||
dune_amg->build<OverlapFlags>(c);
|
||||
dune_amg->template build<OverlapFlags>(c);
|
||||
|
||||
analyzeHierarchy();
|
||||
analyzeAggregateMaps();
|
||||
@ -354,10 +362,10 @@ void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
void CPR<block_size>::analyzeHierarchy() {
|
||||
const DuneAmg::ParallelMatrixHierarchy& matrixHierarchy = dune_amg->matrices();
|
||||
template<class Scalar, unsigned int block_size>
|
||||
void CPR<Scalar,block_size>::analyzeHierarchy()
|
||||
{
|
||||
const typename DuneAmg::ParallelMatrixHierarchy& matrixHierarchy = dune_amg->matrices();
|
||||
|
||||
// store coarsest AMG level in umfpack format, also performs LU decomposition
|
||||
umfpack.setMatrix((*matrixHierarchy.coarsest()).getmat());
|
||||
@ -375,7 +383,7 @@ void CPR<block_size>::analyzeHierarchy() {
|
||||
|
||||
// matrixIter.dereference() returns MatrixAdapter
|
||||
// matrixIter.dereference().getmat() returns BCRSMat
|
||||
DuneAmg::ParallelMatrixHierarchy::ConstIterator matrixIter = matrixHierarchy.finest();
|
||||
typename DuneAmg::ParallelMatrixHierarchy::ConstIterator matrixIter = matrixHierarchy.finest();
|
||||
for (int level = 0; level < num_levels; ++matrixIter, ++level) {
|
||||
const auto& A = matrixIter.dereference().getmat();
|
||||
level_sizes[level] = A.N();
|
||||
@ -398,28 +406,28 @@ void CPR<block_size>::analyzeHierarchy() {
|
||||
}
|
||||
}
|
||||
|
||||
Opm::BdaBridge<DuneMat, DuneVec, 1>::copySparsityPatternFromISTL(A, Amatrices.back().rowPointers, Amatrices.back().colIndices);
|
||||
BdaBridge<DuneMat, DuneVec, 1>::copySparsityPatternFromISTL(A, Amatrices.back().rowPointers,
|
||||
Amatrices.back().colIndices);
|
||||
|
||||
// compute inverse diagonal values for current level
|
||||
invDiags.emplace_back(A.N());
|
||||
for (unsigned int row = 0; row < A.N(); ++row) {
|
||||
invDiags.back()[row] = 1 / Amatrices.back().nnzValues[diagIndices[level][row]];
|
||||
invDiags.back()[row] = 1.0 / Amatrices.back().nnzValues[diagIndices[level][row]];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
void CPR<block_size>::analyzeAggregateMaps() {
|
||||
|
||||
template<class Scalar, unsigned int block_size>
|
||||
void CPR<Scalar,block_size>::analyzeAggregateMaps()
|
||||
{
|
||||
PcolIndices.resize(num_levels - 1);
|
||||
Rmatrices.clear();
|
||||
|
||||
const DuneAmg::AggregatesMapList& aggregatesMaps = dune_amg->aggregatesMaps();
|
||||
const typename DuneAmg::AggregatesMapList& aggregatesMaps = dune_amg->aggregatesMaps();
|
||||
|
||||
DuneAmg::AggregatesMapList::const_iterator mapIter = aggregatesMaps.begin();
|
||||
typename DuneAmg::AggregatesMapList::const_iterator mapIter = aggregatesMaps.begin();
|
||||
for (int level = 0; level < num_levels - 1; ++mapIter, ++level) {
|
||||
DuneAmg::AggregatesMap *map = *mapIter;
|
||||
typename DuneAmg::AggregatesMap* map = *mapIter;
|
||||
|
||||
Rmatrices.emplace_back(level_sizes[level+1], level_sizes[level], level_sizes[level]);
|
||||
std::fill(Rmatrices.back().nnzValues.begin(), Rmatrices.back().nnzValues.end(), 1.0);
|
||||
@ -428,7 +436,7 @@ void CPR<block_size>::analyzeAggregateMaps() {
|
||||
std::vector<std::vector<unsigned>> indicesR(level_sizes[level+1]);
|
||||
PcolIndices[level].resize(level_sizes[level]);
|
||||
|
||||
using AggregateIterator = DuneAmg::AggregatesMap::const_iterator;
|
||||
using AggregateIterator = typename DuneAmg::AggregatesMap::const_iterator;
|
||||
for (AggregateIterator ai = map->begin(); ai != map->end(); ++ai) {
|
||||
if (*ai != DuneAmg::AggregatesMap::ISOLATED) {
|
||||
const long int diff = ai - map->begin();
|
||||
@ -449,20 +457,20 @@ void CPR<block_size>::analyzeAggregateMaps() {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
void CPR<block_size>::amg_cycle_gpu(const int level, cl::Buffer& y, cl::Buffer& x)
|
||||
template<class Scalar, unsigned int block_size>
|
||||
void CPR<Scalar,block_size>::amg_cycle_gpu(const int level, cl::Buffer& y, cl::Buffer& x)
|
||||
{
|
||||
OpenclMatrix<double>* A = &d_Amatrices[level];
|
||||
OpenclMatrix<double>* R = &d_Rmatrices[level];
|
||||
OpenclMatrix<Scalar>* A = &d_Amatrices[level];
|
||||
OpenclMatrix<Scalar>* R = &d_Rmatrices[level];
|
||||
int Ncur = A->Nb;
|
||||
|
||||
if (level == num_levels - 1) {
|
||||
// solve coarsest level
|
||||
std::vector<double> h_y(Ncur), h_x(Ncur, 0);
|
||||
std::vector<Scalar> h_y(Ncur), h_x(Ncur, 0);
|
||||
|
||||
events.resize(1);
|
||||
err = queue->enqueueReadBuffer(y, CL_FALSE, 0, sizeof(double) * Ncur, h_y.data(), nullptr, &events[0]);
|
||||
err = queue->enqueueReadBuffer(y, CL_FALSE, 0,
|
||||
sizeof(Scalar) * Ncur, h_y.data(), nullptr, &events[0]);
|
||||
cl::WaitForEvents(events);
|
||||
events.clear();
|
||||
if (err != CL_SUCCESS) {
|
||||
@ -474,7 +482,8 @@ void CPR<block_size>::amg_cycle_gpu(const int level, cl::Buffer& y, cl::Buffer&
|
||||
umfpack.apply(h_x.data(), h_y.data());
|
||||
|
||||
events.resize(1);
|
||||
err = queue->enqueueWriteBuffer(x, CL_FALSE, 0, sizeof(double) * Ncur, h_x.data(), nullptr, &events[0]);
|
||||
err = queue->enqueueWriteBuffer(x, CL_FALSE, 0,
|
||||
sizeof(Scalar) * Ncur, h_x.data(), nullptr, &events[0]);
|
||||
cl::WaitForEvents(events);
|
||||
events.clear();
|
||||
if (err != CL_SUCCESS) {
|
||||
@ -490,35 +499,37 @@ void CPR<block_size>::amg_cycle_gpu(const int level, cl::Buffer& y, cl::Buffer&
|
||||
cl::Buffer& u = d_u[level]; // u was 0-initialized earlier
|
||||
|
||||
// presmooth
|
||||
double jacobi_damping = 0.65; // default value in amgcl: 0.72
|
||||
Scalar jacobi_damping = 0.65; // default value in amgcl: 0.72
|
||||
for (unsigned i = 0; i < num_pre_smooth_steps; ++i) {
|
||||
OpenclKernels<double>::residual(A->nnzValues, A->colIndices, A->rowPointers, x, y, t, Ncur, 1);
|
||||
OpenclKernels<double>::vmul(jacobi_damping, d_invDiags[level], t, x, Ncur);
|
||||
OpenclKernels<Scalar>::residual(A->nnzValues, A->colIndices, A->rowPointers, x, y, t, Ncur, 1);
|
||||
OpenclKernels<Scalar>::vmul(jacobi_damping, d_invDiags[level], t, x, Ncur);
|
||||
}
|
||||
|
||||
// move to coarser level
|
||||
OpenclKernels<double>::residual(A->nnzValues, A->colIndices, A->rowPointers, x, y, t, Ncur, 1);
|
||||
OpenclKernels<double>::spmv(R->nnzValues, R->colIndices, R->rowPointers, t, f, Nnext, 1, true);
|
||||
OpenclKernels<Scalar>::residual(A->nnzValues, A->colIndices, A->rowPointers, x, y, t, Ncur, 1);
|
||||
OpenclKernels<Scalar>::spmv(R->nnzValues, R->colIndices, R->rowPointers, t, f, Nnext, 1, true);
|
||||
amg_cycle_gpu(level + 1, f, u);
|
||||
OpenclKernels<double>::prolongate_vector(u, x, d_PcolIndices[level], Ncur);
|
||||
OpenclKernels<Scalar>::prolongate_vector(u, x, d_PcolIndices[level], Ncur);
|
||||
|
||||
// postsmooth
|
||||
for (unsigned i = 0; i < num_post_smooth_steps; ++i) {
|
||||
OpenclKernels<double>::residual(A->nnzValues, A->colIndices, A->rowPointers,
|
||||
OpenclKernels<Scalar>::residual(A->nnzValues, A->colIndices, A->rowPointers,
|
||||
x, y, t, Ncur, 1);
|
||||
OpenclKernels<double>::vmul(jacobi_damping, d_invDiags[level], t, x, Ncur);
|
||||
OpenclKernels<Scalar>::vmul(jacobi_damping, d_invDiags[level], t, x, Ncur);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// x = prec(y)
|
||||
template <unsigned int block_size>
|
||||
void CPR<block_size>::apply_amg(const cl::Buffer& y, cl::Buffer& x) {
|
||||
template<class Scalar, unsigned int block_size>
|
||||
void CPR<Scalar,block_size>::apply_amg(const cl::Buffer& y, cl::Buffer& x)
|
||||
{
|
||||
// 0-initialize u and x vectors
|
||||
events.resize(d_u.size() + 1);
|
||||
err = queue->enqueueFillBuffer(*d_coarse_x, 0, 0, sizeof(double) * Nb, nullptr, &events[0]);
|
||||
err = queue->enqueueFillBuffer(*d_coarse_x, 0, 0,
|
||||
sizeof(Scalar) * Nb, nullptr, &events[0]);
|
||||
for (unsigned int i = 0; i < d_u.size(); ++i) {
|
||||
err |= queue->enqueueFillBuffer(d_u[i], 0, 0, sizeof(double) * Rmatrices[i].N, nullptr, &events[i + 1]);
|
||||
err |= queue->enqueueFillBuffer(d_u[i], 0, 0,
|
||||
sizeof(Scalar) * Rmatrices[i].N, nullptr, &events[i + 1]);
|
||||
}
|
||||
cl::WaitForEvents(events);
|
||||
events.clear();
|
||||
@ -527,17 +538,18 @@ void CPR<block_size>::apply_amg(const cl::Buffer& y, cl::Buffer& x) {
|
||||
OPM_THROW(std::logic_error, "CPR OpenCL enqueueWriteBuffer error");
|
||||
}
|
||||
|
||||
OpenclKernels<double>::residual(d_mat->nnzValues, d_mat->colIndices,
|
||||
OpenclKernels<Scalar>::residual(d_mat->nnzValues, d_mat->colIndices,
|
||||
d_mat->rowPointers, x, y, *d_rs, Nb, block_size);
|
||||
OpenclKernels<double>::full_to_pressure_restriction(*d_rs, *d_weights, *d_coarse_y, Nb);
|
||||
OpenclKernels<Scalar>::full_to_pressure_restriction(*d_rs, *d_weights, *d_coarse_y, Nb);
|
||||
|
||||
amg_cycle_gpu(0, *d_coarse_y, *d_coarse_x);
|
||||
|
||||
OpenclKernels<double>::add_coarse_pressure_correction(*d_coarse_x, x, pressure_idx, Nb);
|
||||
OpenclKernels<Scalar>::add_coarse_pressure_correction(*d_coarse_x, x, pressure_idx, Nb);
|
||||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
void CPR<block_size>::apply(const cl::Buffer& y, cl::Buffer& x) {
|
||||
template<class Scalar, unsigned int block_size>
|
||||
void CPR<Scalar,block_size>::apply(const cl::Buffer& y, cl::Buffer& x)
|
||||
{
|
||||
Dune::Timer t_bilu0;
|
||||
bilu0->apply(y, x);
|
||||
if (verbosity >= 4) {
|
||||
@ -555,19 +567,14 @@ void CPR<block_size>::apply(const cl::Buffer& y, cl::Buffer& x) {
|
||||
}
|
||||
}
|
||||
|
||||
#define INSTANCE_TYPE(T) \
|
||||
template class CPR<T,1>; \
|
||||
template class CPR<T,2>; \
|
||||
template class CPR<T,3>; \
|
||||
template class CPR<T,4>; \
|
||||
template class CPR<T,5>; \
|
||||
template class CPR<T,6>;
|
||||
|
||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||
template class CPR<n>;
|
||||
|
||||
INSTANTIATE_BDA_FUNCTIONS(1);
|
||||
INSTANTIATE_BDA_FUNCTIONS(2);
|
||||
INSTANTIATE_BDA_FUNCTIONS(3);
|
||||
INSTANTIATE_BDA_FUNCTIONS(4);
|
||||
INSTANTIATE_BDA_FUNCTIONS(5);
|
||||
INSTANTIATE_BDA_FUNCTIONS(6);
|
||||
|
||||
#undef INSTANTIATE_BDA_FUNCTIONS
|
||||
INSTANCE_TYPE(double)
|
||||
|
||||
} // namespace Opm::Accelerator
|
||||
|
||||
|
||||
|
@ -38,10 +38,10 @@ namespace Opm::Accelerator {
|
||||
template<class Scalar> class BlockedMatrix;
|
||||
|
||||
/// This class implements a Constrained Pressure Residual (CPR) preconditioner
|
||||
template <unsigned int block_size>
|
||||
class CPR : public Preconditioner<double,block_size>
|
||||
template<class Scalar, unsigned int block_size>
|
||||
class CPR : public Preconditioner<Scalar,block_size>
|
||||
{
|
||||
using Base = Preconditioner<double,block_size>;
|
||||
using Base = Preconditioner<Scalar,block_size>;
|
||||
|
||||
using Base::N;
|
||||
using Base::Nb;
|
||||
@ -55,25 +55,25 @@ class CPR : public Preconditioner<double,block_size>
|
||||
|
||||
private:
|
||||
int num_levels;
|
||||
std::vector<double> weights, coarse_vals, coarse_x, coarse_y;
|
||||
std::vector<Matrix<double>> Amatrices, Rmatrices; // scalar matrices that represent the AMG hierarchy
|
||||
std::vector<OpenclMatrix<double>> d_Amatrices, d_Rmatrices; // scalar matrices that represent the AMG hierarchy
|
||||
std::vector<Scalar> weights, coarse_vals, coarse_x, coarse_y;
|
||||
std::vector<Matrix<Scalar>> Amatrices, Rmatrices; // scalar matrices that represent the AMG hierarchy
|
||||
std::vector<OpenclMatrix<Scalar>> d_Amatrices, d_Rmatrices; // scalar matrices that represent the AMG hierarchy
|
||||
std::vector<std::vector<int> > PcolIndices; // prolongation does not need a full matrix, only store colIndices
|
||||
std::vector<cl::Buffer> d_PcolIndices;
|
||||
std::vector<std::vector<double> > invDiags; // inverse of diagonal of Amatrices
|
||||
std::vector<std::vector<Scalar>> invDiags; // inverse of diagonal of Amatrices
|
||||
std::vector<cl::Buffer> d_invDiags;
|
||||
std::vector<cl::Buffer> d_t, d_f, d_u; // intermediate vectors used during amg cycle
|
||||
std::unique_ptr<cl::Buffer> d_rs; // use before extracting the pressure
|
||||
std::unique_ptr<cl::Buffer> d_weights; // the quasiimpes weights, used to extract pressure
|
||||
std::unique_ptr<OpenclMatrix<double>> d_mat; // stores blocked matrix
|
||||
std::unique_ptr<OpenclMatrix<Scalar>> d_mat; // stores blocked matrix
|
||||
std::unique_ptr<cl::Buffer> d_coarse_y, d_coarse_x; // stores the scalar vectors
|
||||
std::once_flag opencl_buffers_allocated; // only allocate OpenCL Buffers once
|
||||
|
||||
std::unique_ptr<BILU0<double,block_size>> bilu0; // Blocked ILU0 preconditioner
|
||||
BlockedMatrix<double>* mat = nullptr; // input matrix, blocked
|
||||
std::unique_ptr<BILU0<Scalar,block_size>> bilu0; // Blocked ILU0 preconditioner
|
||||
BlockedMatrix<Scalar>* mat = nullptr; // input matrix, blocked
|
||||
|
||||
using DuneMat = Dune::BCRSMatrix<Dune::FieldMatrix<double, 1, 1> >;
|
||||
using DuneVec = Dune::BlockVector<Dune::FieldVector<double, 1> >;
|
||||
using DuneMat = Dune::BCRSMatrix<Dune::FieldMatrix<Scalar, 1, 1> >;
|
||||
using DuneVec = Dune::BlockVector<Dune::FieldVector<Scalar, 1> >;
|
||||
using MatrixOperator = Dune::MatrixAdapter<DuneMat, DuneVec, DuneVec>;
|
||||
using DuneAmg = Dune::Amg::MatrixHierarchy<MatrixOperator, Dune::Amg::SequentialInformation>;
|
||||
std::unique_ptr<DuneAmg> dune_amg;
|
||||
@ -109,31 +109,33 @@ private:
|
||||
|
||||
void amg_cycle_gpu(const int level, cl::Buffer &y, cl::Buffer &x);
|
||||
|
||||
void create_preconditioner_amg(BlockedMatrix<double>* mat);
|
||||
void create_preconditioner_amg(BlockedMatrix<Scalar>* mat);
|
||||
|
||||
public:
|
||||
CPR(bool opencl_ilu_parallel, int verbosity);
|
||||
|
||||
bool analyze_matrix(BlockedMatrix<double>* mat) override;
|
||||
bool analyze_matrix(BlockedMatrix<double>* mat,
|
||||
BlockedMatrix<double>* jacMat) override;
|
||||
bool analyze_matrix(BlockedMatrix<Scalar>* mat) override;
|
||||
bool analyze_matrix(BlockedMatrix<Scalar>* mat,
|
||||
BlockedMatrix<Scalar>* jacMat) override;
|
||||
|
||||
// set own Opencl variables, but also that of the bilu0 preconditioner
|
||||
void setOpencl(std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue) override;
|
||||
void setOpencl(std::shared_ptr<cl::Context>& context,
|
||||
std::shared_ptr<cl::CommandQueue>& queue) override;
|
||||
|
||||
// applies blocked ilu0
|
||||
// also applies amg for pressure component
|
||||
void apply(const cl::Buffer& y, cl::Buffer& x) override;
|
||||
|
||||
bool create_preconditioner(BlockedMatrix<double>* mat) override;
|
||||
bool create_preconditioner(BlockedMatrix<double>* mat,
|
||||
BlockedMatrix<double>* jacMat) override;
|
||||
bool create_preconditioner(BlockedMatrix<Scalar>* mat) override;
|
||||
bool create_preconditioner(BlockedMatrix<Scalar>* mat,
|
||||
BlockedMatrix<Scalar>* jacMat) override;
|
||||
};
|
||||
|
||||
// solve A^T * x = b
|
||||
// A should represent a 3x3 matrix
|
||||
// x and b are vectors with 3 elements
|
||||
void solve_transposed_3x3(const double *A, const double *b, double *x);
|
||||
template<class Scalar>
|
||||
void solve_transposed_3x3(const Scalar* A, const Scalar* b, Scalar* x);
|
||||
|
||||
} // namespace Opm::Accelerator
|
||||
|
||||
|
@ -49,7 +49,7 @@ Preconditioner<Scalar,block_size>::create(Type type, bool opencl_ilu_parallel, i
|
||||
case Type::BILU0:
|
||||
return std::make_unique<BILU0<Scalar,block_size>>(opencl_ilu_parallel, verbosity);
|
||||
case Type::CPR:
|
||||
return std::make_unique<CPR<block_size> >(opencl_ilu_parallel, verbosity);
|
||||
return std::make_unique<CPR<Scalar,block_size>>(opencl_ilu_parallel, verbosity);
|
||||
case Type::BISAI:
|
||||
return std::make_unique<BISAI<Scalar,block_size>>(opencl_ilu_parallel, verbosity);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user