BISAI: template Scalar type

This commit is contained in:
Arne Morten Kvarving 2024-04-15 22:38:04 +02:00
parent 8ea523fa68
commit 1f39e6a9a9
3 changed files with 157 additions and 102 deletions

View File

@ -39,18 +39,20 @@ namespace Opm::Accelerator {
using Opm::OpmLog; using Opm::OpmLog;
using Dune::Timer; using Dune::Timer;
template <unsigned int block_size> template<class Scalar, unsigned int block_size>
BISAI<block_size>::BISAI(bool opencl_ilu_parallel_, int verbosity_) BISAI<Scalar,block_size>::BISAI(bool opencl_ilu_parallel_, int verbosity_)
: Base(verbosity_) : Base(verbosity_)
{ {
#if CHOW_PATEL #if CHOW_PATEL
OPM_THROW(std::logic_error, "Error --linear-solver=isai cannot be used if ChowPatelIlu is used, probably defined by CMake\n"); OPM_THROW(std::logic_error, "Error --linear-solver=isai cannot be used if ChowPatelIlu is used, probably defined by CMake\n");
#endif #endif
bilu0 = std::make_unique<BILU0<double,block_size>>(opencl_ilu_parallel_, verbosity_); bilu0 = std::make_unique<BILU0<Scalar,block_size>>(opencl_ilu_parallel_, verbosity_);
} }
template <unsigned int block_size> template<class Scalar, unsigned int block_size>
void BISAI<block_size>::setOpencl(std::shared_ptr<cl::Context>& context_, std::shared_ptr<cl::CommandQueue>& queue_) void BISAI<Scalar,block_size>::
setOpencl(std::shared_ptr<cl::Context>& context_,
std::shared_ptr<cl::CommandQueue>& queue_)
{ {
context = context_; context = context_;
queue = queue_; queue = queue_;
@ -58,7 +60,9 @@ void BISAI<block_size>::setOpencl(std::shared_ptr<cl::Context>& context_, std::s
bilu0->setOpencl(context, queue); bilu0->setOpencl(context, queue);
} }
std::vector<int> buildCsrToCscOffsetMap(std::vector<int> colPointers, std::vector<int> rowIndices){ std::vector<int>
buildCsrToCscOffsetMap(std::vector<int> colPointers, std::vector<int> rowIndices)
{
std::vector<int> aux(colPointers); // colPointers must be copied to this vector std::vector<int> aux(colPointers); // colPointers must be copied to this vector
std::vector<int> csrToCscOffsetMap(rowIndices.size()); // map must have the same size as the indices vector std::vector<int> csrToCscOffsetMap(rowIndices.size()); // map must have the same size as the indices vector
@ -74,15 +78,15 @@ std::vector<int> buildCsrToCscOffsetMap(std::vector<int> colPointers, std::vecto
return csrToCscOffsetMap; return csrToCscOffsetMap;
} }
template <unsigned int block_size> template<class Scalar, unsigned int block_size>
bool BISAI<block_size>::analyze_matrix(BlockedMatrix<double>* mat) bool BISAI<Scalar,block_size>::analyze_matrix(BlockedMatrix<Scalar>* mat)
{ {
return analyze_matrix(mat, nullptr); return analyze_matrix(mat, nullptr);
} }
template <unsigned int block_size> template<class Scalar, unsigned int block_size>
bool BISAI<block_size>::analyze_matrix(BlockedMatrix<double>* mat, bool BISAI<Scalar,block_size>::
BlockedMatrix<double>* jacMat) analyze_matrix(BlockedMatrix<Scalar>* mat, BlockedMatrix<Scalar>* jacMat)
{ {
const unsigned int bs = block_size; const unsigned int bs = block_size;
auto *m = mat; auto *m = mat;
@ -103,21 +107,22 @@ bool BISAI<block_size>::analyze_matrix(BlockedMatrix<double>* mat,
} }
} }
template <unsigned int block_size> template<class Scalar, unsigned int block_size>
void BISAI<block_size>::buildLowerSubsystemsStructures(){ void BISAI<Scalar,block_size>::buildLowerSubsystemsStructures()
{
lower.subsystemPointers.assign(Nb + 1, 0); lower.subsystemPointers.assign(Nb + 1, 0);
Dune::Timer t_buildLowerSubsystemsStructures; Dune::Timer t_buildLowerSubsystemsStructures;
for(int tcol = 0; tcol < Nb; tcol++){ for (int tcol = 0; tcol < Nb; tcol++) {
int frow = diagIndex[tcol] + 1; int frow = diagIndex[tcol] + 1;
int lrow = colPointers[tcol + 1]; int lrow = colPointers[tcol + 1];
int nx = lrow - frow; int nx = lrow - frow;
int nv = 0; int nv = 0;
for(int sweep = 0; sweep < nx - 1; sweep++){ for (int sweep = 0; sweep < nx - 1; sweep++) {
for(int xid = sweep + 1; xid < nx; xid++){ for (int xid = sweep + 1; xid < nx; xid++) {
for(int ptr = diagIndex[rowIndices[frow + sweep]] + 1; ptr < colPointers[rowIndices[frow + sweep + 1]]; ptr++){ for( int ptr = diagIndex[rowIndices[frow + sweep]] + 1; ptr < colPointers[rowIndices[frow + sweep + 1]]; ptr++) {
if(rowIndices[ptr] == rowIndices[frow + xid]){ if(rowIndices[ptr] == rowIndices[frow + xid]){
lower.nzIndices.push_back(csrToCscOffsetMap[ptr]); lower.nzIndices.push_back(csrToCscOffsetMap[ptr]);
lower.knownRhsIndices.push_back(csrToCscOffsetMap[frow + sweep]); lower.knownRhsIndices.push_back(csrToCscOffsetMap[frow + sweep]);
@ -131,29 +136,31 @@ void BISAI<block_size>::buildLowerSubsystemsStructures(){
lower.subsystemPointers[tcol + 1] = lower.subsystemPointers[tcol] + nv; lower.subsystemPointers[tcol + 1] = lower.subsystemPointers[tcol] + nv;
} }
if(verbosity >= 4){ if (verbosity >= 4) {
std::ostringstream out; std::ostringstream out;
out << "BISAI buildLowerSubsystemsStructures time: " << t_buildLowerSubsystemsStructures.stop() << " s"; out << "BISAI buildLowerSubsystemsStructures time: "
<< t_buildLowerSubsystemsStructures.stop() << " s";
OpmLog::info(out.str()); OpmLog::info(out.str());
} }
} }
template <unsigned int block_size> template<class Scalar, unsigned int block_size>
void BISAI<block_size>::buildUpperSubsystemsStructures(){ void BISAI<Scalar,block_size>::buildUpperSubsystemsStructures()
{
upper.subsystemPointers.assign(Nb + 1, 0); upper.subsystemPointers.assign(Nb + 1, 0);
Dune::Timer t_buildUpperSubsystemsStructures; Dune::Timer t_buildUpperSubsystemsStructures;
for(int tcol = 0; tcol < Nb; tcol++){ for (int tcol = 0; tcol < Nb; tcol++) {
int frow = colPointers[tcol]; int frow = colPointers[tcol];
int lrow = diagIndex[tcol]; int lrow = diagIndex[tcol];
int nx = lrow - frow + 1; int nx = lrow - frow + 1;
int nv = 0; int nv = 0;
for(int sweep = 0; sweep < nx - 1; sweep++){ for (int sweep = 0; sweep < nx - 1; sweep++) {
for(int xid = 0; xid < nx; xid++){ for (int xid = 0; xid < nx; xid++) {
for(int ptr = colPointers[rowIndices[lrow - sweep]]; ptr < diagIndex[rowIndices[lrow - sweep]]; ptr++){ for (int ptr = colPointers[rowIndices[lrow - sweep]]; ptr < diagIndex[rowIndices[lrow - sweep]]; ptr++) {
if(rowIndices[ptr] == rowIndices[lrow - xid]){ if (rowIndices[ptr] == rowIndices[lrow - xid]) {
upper.nzIndices.push_back(csrToCscOffsetMap[ptr]); upper.nzIndices.push_back(csrToCscOffsetMap[ptr]);
upper.knownRhsIndices.push_back(csrToCscOffsetMap[lrow - sweep]); upper.knownRhsIndices.push_back(csrToCscOffsetMap[lrow - sweep]);
upper.unknownRhsIndices.push_back(csrToCscOffsetMap[lrow - xid]); upper.unknownRhsIndices.push_back(csrToCscOffsetMap[lrow - xid]);
@ -166,17 +173,17 @@ void BISAI<block_size>::buildUpperSubsystemsStructures(){
upper.subsystemPointers[tcol + 1] = upper.subsystemPointers[tcol] + nv; upper.subsystemPointers[tcol + 1] = upper.subsystemPointers[tcol] + nv;
} }
if(verbosity >= 4){ if (verbosity >= 4) {
std::ostringstream out; std::ostringstream out;
out << "BISAI buildUpperSubsystemsStructures time: " << t_buildUpperSubsystemsStructures.stop() << " s"; out << "BISAI buildUpperSubsystemsStructures time: "
<< t_buildUpperSubsystemsStructures.stop() << " s";
OpmLog::info(out.str()); OpmLog::info(out.str());
} }
} }
template <unsigned int block_size> template<class Scalar, unsigned int block_size>
bool BISAI<block_size>:: bool BISAI<Scalar,block_size>::
create_preconditioner(BlockedMatrix<double>* mat, create_preconditioner(BlockedMatrix<Scalar>* mat, BlockedMatrix<Scalar>* jacMat)
BlockedMatrix<double>* jacMat)
{ {
const unsigned int bs = block_size; const unsigned int bs = block_size;
@ -199,48 +206,93 @@ create_preconditioner(BlockedMatrix<double>* mat,
buildLowerSubsystemsStructures(); buildLowerSubsystemsStructures();
buildUpperSubsystemsStructures(); buildUpperSubsystemsStructures();
d_colPointers = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * colPointers.size()); d_colPointers = cl::Buffer(*context, CL_MEM_READ_WRITE,
d_rowIndices = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * rowIndices.size()); sizeof(int) * colPointers.size());
d_csrToCscOffsetMap = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * csrToCscOffsetMap.size()); d_rowIndices = cl::Buffer(*context, CL_MEM_READ_WRITE,
d_diagIndex = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * diagIndex.size()); sizeof(int) * rowIndices.size());
d_invLvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * nnzb * bs * bs); d_csrToCscOffsetMap = cl::Buffer(*context, CL_MEM_READ_WRITE,
d_invUvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * nnzb * bs * bs); sizeof(int) * csrToCscOffsetMap.size());
d_invL_x = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * Nb * bs); d_diagIndex = cl::Buffer(*context, CL_MEM_READ_WRITE,
d_lower.subsystemPointers = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * lower.subsystemPointers.size()); sizeof(int) * diagIndex.size());
d_upper.subsystemPointers = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * upper.subsystemPointers.size()); d_invLvals = cl::Buffer(*context, CL_MEM_READ_WRITE,
sizeof(Scalar) * nnzb * bs * bs);
d_invUvals = cl::Buffer(*context, CL_MEM_READ_WRITE,
sizeof(Scalar) * nnzb * bs * bs);
d_invL_x = cl::Buffer(*context, CL_MEM_READ_WRITE,
sizeof(Scalar) * Nb * bs);
d_lower.subsystemPointers = cl::Buffer(*context, CL_MEM_READ_WRITE,
sizeof(int) * lower.subsystemPointers.size());
d_upper.subsystemPointers = cl::Buffer(*context, CL_MEM_READ_WRITE,
sizeof(int) * upper.subsystemPointers.size());
if(!lower.nzIndices.empty()){ // knownRhsIndices and unknownRhsIndices will also be empty if nzIndices is empty if (!lower.nzIndices.empty()) { // knownRhsIndices and unknownRhsIndices will also be empty if nzIndices is empty
d_lower.nzIndices = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * lower.nzIndices.size()); d_lower.nzIndices = cl::Buffer(*context, CL_MEM_READ_WRITE,
d_lower.knownRhsIndices = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * lower.knownRhsIndices.size()); sizeof(int) * lower.nzIndices.size());
d_lower.unknownRhsIndices = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * lower.unknownRhsIndices.size()); d_lower.knownRhsIndices = cl::Buffer(*context, CL_MEM_READ_WRITE,
sizeof(int) * lower.knownRhsIndices.size());
d_lower.unknownRhsIndices = cl::Buffer(*context, CL_MEM_READ_WRITE,
sizeof(int) * lower.unknownRhsIndices.size());
} }
if(!upper.nzIndices.empty()){ // knownRhsIndices and unknownRhsIndices will also be empty if nzIndices is empty if (!upper.nzIndices.empty()) { // knownRhsIndices and unknownRhsIndices will also be empty if nzIndices is empty
d_upper.nzIndices = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * upper.nzIndices.size()); d_upper.nzIndices = cl::Buffer(*context, CL_MEM_READ_WRITE,
d_upper.knownRhsIndices = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * upper.knownRhsIndices.size()); sizeof(int) * upper.nzIndices.size());
d_upper.unknownRhsIndices = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * upper.unknownRhsIndices.size()); d_upper.knownRhsIndices = cl::Buffer(*context, CL_MEM_READ_WRITE,
sizeof(int) * upper.knownRhsIndices.size());
d_upper.unknownRhsIndices = cl::Buffer(*context, CL_MEM_READ_WRITE,
sizeof(int) * upper.unknownRhsIndices.size());
} }
events.resize(6); events.resize(6);
err = queue->enqueueWriteBuffer(d_colPointers, CL_FALSE, 0, colPointers.size() * sizeof(int), colPointers.data(), nullptr, &events[0]); err = queue->enqueueWriteBuffer(d_colPointers, CL_FALSE, 0,
err |= queue->enqueueWriteBuffer(d_rowIndices, CL_FALSE, 0, rowIndices.size() * sizeof(int), rowIndices.data(), nullptr, &events[1]); colPointers.size() * sizeof(int),
err |= queue->enqueueWriteBuffer(d_csrToCscOffsetMap, CL_FALSE, 0, csrToCscOffsetMap.size() * sizeof(int), csrToCscOffsetMap.data(), nullptr, &events[2]); colPointers.data(), nullptr, &events[0]);
err |= queue->enqueueWriteBuffer(d_diagIndex, CL_FALSE, 0, diagIndex.size() * sizeof(int), diagIndex.data(), nullptr, &events[3]); err |= queue->enqueueWriteBuffer(d_rowIndices, CL_FALSE, 0,
err |= queue->enqueueWriteBuffer(d_lower.subsystemPointers, CL_FALSE, 0, sizeof(int) * lower.subsystemPointers.size(), lower.subsystemPointers.data(), nullptr, &events[4]); rowIndices.size() * sizeof(int),
err |= queue->enqueueWriteBuffer(d_upper.subsystemPointers, CL_FALSE, 0, sizeof(int) * upper.subsystemPointers.size(), upper.subsystemPointers.data(), nullptr, &events[5]); rowIndices.data(), nullptr, &events[1]);
err |= queue->enqueueWriteBuffer(d_csrToCscOffsetMap, CL_FALSE, 0,
csrToCscOffsetMap.size() * sizeof(int),
csrToCscOffsetMap.data(), nullptr, &events[2]);
err |= queue->enqueueWriteBuffer(d_diagIndex, CL_FALSE, 0,
diagIndex.size() * sizeof(int),
diagIndex.data(), nullptr, &events[3]);
err |= queue->enqueueWriteBuffer(d_lower.subsystemPointers, CL_FALSE, 0,
sizeof(int) * lower.subsystemPointers.size(),
lower.subsystemPointers.data(), nullptr, &events[4]);
err |= queue->enqueueWriteBuffer(d_upper.subsystemPointers, CL_FALSE, 0,
sizeof(int) * upper.subsystemPointers.size(),
upper.subsystemPointers.data(), nullptr, &events[5]);
if(!lower.nzIndices.empty()){ if (!lower.nzIndices.empty()) {
events.resize(events.size() + 3); events.resize(events.size() + 3);
err |= queue->enqueueWriteBuffer(d_lower.nzIndices, CL_FALSE, 0, sizeof(int) * lower.nzIndices.size(), lower.nzIndices.data(), nullptr, &events[events.size() - 3]); err |= queue->enqueueWriteBuffer(d_lower.nzIndices, CL_FALSE, 0,
err |= queue->enqueueWriteBuffer(d_lower.knownRhsIndices, CL_FALSE, 0, sizeof(int) * lower.knownRhsIndices.size(), lower.knownRhsIndices.data(), nullptr, &events[events.size() - 2]); sizeof(int) * lower.nzIndices.size(),
err |= queue->enqueueWriteBuffer(d_lower.unknownRhsIndices, CL_FALSE, 0, sizeof(int) * lower.unknownRhsIndices.size(), lower.unknownRhsIndices.data(), nullptr, &events[events.size() - 1]); lower.nzIndices.data(), nullptr,
&events[events.size() - 3]);
err |= queue->enqueueWriteBuffer(d_lower.knownRhsIndices, CL_FALSE, 0,
sizeof(int) * lower.knownRhsIndices.size(),
lower.knownRhsIndices.data(), nullptr,
&events[events.size() - 2]);
err |= queue->enqueueWriteBuffer(d_lower.unknownRhsIndices, CL_FALSE, 0,
sizeof(int) * lower.unknownRhsIndices.size(),
lower.unknownRhsIndices.data(), nullptr,
&events[events.size() - 1]);
} }
if(!upper.nzIndices.empty()){ if (!upper.nzIndices.empty()) {
events.resize(events.size() + 3); events.resize(events.size() + 3);
err |= queue->enqueueWriteBuffer(d_upper.nzIndices, CL_FALSE, 0, sizeof(int) * upper.nzIndices.size(), upper.nzIndices.data(), nullptr, &events[events.size() - 3]); err |= queue->enqueueWriteBuffer(d_upper.nzIndices, CL_FALSE,
err |= queue->enqueueWriteBuffer(d_upper.knownRhsIndices, CL_FALSE, 0, sizeof(int) * upper.knownRhsIndices.size(), upper.knownRhsIndices.data(), nullptr, &events[events.size() - 2]); 0, sizeof(int) * upper.nzIndices.size(),
err |= queue->enqueueWriteBuffer(d_upper.unknownRhsIndices, CL_FALSE, 0, sizeof(int) * upper.unknownRhsIndices.size(), upper.unknownRhsIndices.data(), nullptr, &events[events.size() - 1]); upper.nzIndices.data(), nullptr,
&events[events.size() - 3]);
err |= queue->enqueueWriteBuffer(d_upper.knownRhsIndices, CL_FALSE, 0,
sizeof(int) * upper.knownRhsIndices.size(),
upper.knownRhsIndices.data(), nullptr,
&events[events.size() - 2]);
err |= queue->enqueueWriteBuffer(d_upper.unknownRhsIndices, CL_FALSE, 0,
sizeof(int) * upper.unknownRhsIndices.size(),
upper.unknownRhsIndices.data(), nullptr,
&events[events.size() - 1]);
} }
cl::WaitForEvents(events); cl::WaitForEvents(events);
@ -255,12 +307,14 @@ create_preconditioner(BlockedMatrix<double>* mat,
std::tie(d_LUvals, d_invDiagVals) = bilu0->get_preconditioner_data(); std::tie(d_LUvals, d_invDiagVals) = bilu0->get_preconditioner_data();
events.resize(2); events.resize(2);
err = queue->enqueueFillBuffer(d_invLvals, 0, 0, sizeof(double) * nnzb * bs * bs, nullptr, &events[0]); err = queue->enqueueFillBuffer(d_invLvals, 0, 0,
err |= queue->enqueueFillBuffer(d_invUvals, 0, 0, sizeof(double) * nnzb * bs * bs, nullptr, &events[1]); sizeof(Scalar) * nnzb * bs * bs, nullptr, &events[0]);
err |= queue->enqueueFillBuffer(d_invUvals, 0, 0,
sizeof(Scalar) * nnzb * bs * bs, nullptr, &events[1]);
cl::WaitForEvents(events); cl::WaitForEvents(events);
events.clear(); events.clear();
OpenclKernels<double>::isaiL(d_diagIndex, d_colPointers, d_csrToCscOffsetMap, OpenclKernels<Scalar>::isaiL(d_diagIndex, d_colPointers, d_csrToCscOffsetMap,
d_lower.subsystemPointers, d_lower.nzIndices, d_lower.subsystemPointers, d_lower.nzIndices,
d_lower.unknownRhsIndices, d_lower.knownRhsIndices, d_lower.unknownRhsIndices, d_lower.knownRhsIndices,
d_LUvals, d_invLvals, Nb); d_LUvals, d_invLvals, Nb);
@ -270,7 +324,7 @@ create_preconditioner(BlockedMatrix<double>* mat,
d_upper.knownRhsIndices, d_LUvals, d_upper.knownRhsIndices, d_LUvals,
d_invDiagVals, d_invUvals, Nb); d_invDiagVals, d_invUvals, Nb);
if(verbosity >= 4){ if (verbosity >= 4) {
std::ostringstream out; std::ostringstream out;
out << "BISAI createPreconditioner time: " << t_preconditioner.stop() << " s"; out << "BISAI createPreconditioner time: " << t_preconditioner.stop() << " s";
OpmLog::info(out.str()); OpmLog::info(out.str());
@ -279,34 +333,34 @@ create_preconditioner(BlockedMatrix<double>* mat,
return true; return true;
} }
template <unsigned int block_size> template<class Scalar, unsigned int block_size>
bool BISAI<block_size>::create_preconditioner(BlockedMatrix<double>* mat) bool BISAI<Scalar,block_size>::
create_preconditioner(BlockedMatrix<Scalar>* mat)
{ {
return create_preconditioner(mat, nullptr); return create_preconditioner(mat, nullptr);
} }
template <unsigned int block_size> template<class Scalar, unsigned int block_size>
void BISAI<block_size>::apply(const cl::Buffer& x, cl::Buffer& y){ void BISAI<Scalar,block_size>::apply(const cl::Buffer& x, cl::Buffer& y)
{
const unsigned int bs = block_size; const unsigned int bs = block_size;
OpenclKernels<double>::spmv(d_invLvals, d_rowIndices, d_colPointers, OpenclKernels<Scalar>::spmv(d_invLvals, d_rowIndices, d_colPointers,
x, d_invL_x, Nb, bs, true, true); // application of isaiL is a simple spmv with addition x, d_invL_x, Nb, bs, true, true); // application of isaiL is a simple spmv with addition
// (to compensate for the unitary diagonal that is not // (to compensate for the unitary diagonal that is not
// included in isaiL, for simplicity) // included in isaiL, for simplicity)
OpenclKernels<double>::spmv(d_invUvals, d_rowIndices, d_colPointers, OpenclKernels<Scalar>::spmv(d_invUvals, d_rowIndices, d_colPointers,
d_invL_x, y, Nb, bs); // application of isaiU is a simple spmv d_invL_x, y, Nb, bs); // application of isaiU is a simple spmv
} }
#define INSTANTIATE_BDA_FUNCTIONS(n) \ #define INSTANCE_TYPE(T) \
template class BISAI<n>; template class BISAI<T,1>; \
template class BISAI<T,2>; \
template class BISAI<T,3>; \
template class BISAI<T,4>; \
template class BISAI<T,5>; \
template class BISAI<T,6>;
INSTANTIATE_BDA_FUNCTIONS(1); INSTANCE_TYPE(double)
INSTANTIATE_BDA_FUNCTIONS(2);
INSTANTIATE_BDA_FUNCTIONS(3);
INSTANTIATE_BDA_FUNCTIONS(4);
INSTANTIATE_BDA_FUNCTIONS(5);
INSTANTIATE_BDA_FUNCTIONS(6);
#undef INSTANTIATE_BDA_FUNCTIONS
} // namespace Opm::Accelerator } // namespace Opm::Accelerator

View File

@ -32,10 +32,10 @@ template<class Scalar> class BlockedMatrix;
/// This class implements a Blocked version of the Incomplete Sparse Approximate Inverse (ISAI) preconditioner. /// This class implements a Blocked version of the Incomplete Sparse Approximate Inverse (ISAI) preconditioner.
/// Inspired by the paper "Incomplete Sparse Approximate Inverses for Parallel Preconditioning" by Anzt et. al. /// Inspired by the paper "Incomplete Sparse Approximate Inverses for Parallel Preconditioning" by Anzt et. al.
template <unsigned int block_size> template<class Scalar, unsigned int block_size>
class BISAI : public Preconditioner<double,block_size> class BISAI : public Preconditioner<Scalar,block_size>
{ {
using Base = Preconditioner<double,block_size>; using Base = Preconditioner<Scalar,block_size>;
using Base::N; using Base::N;
using Base::Nb; using Base::Nb;
@ -54,8 +54,8 @@ private:
std::vector<int> rowIndices; std::vector<int> rowIndices;
std::vector<int> diagIndex; std::vector<int> diagIndex;
std::vector<int> csrToCscOffsetMap; std::vector<int> csrToCscOffsetMap;
std::vector<double> invLvals; std::vector<Scalar> invLvals;
std::vector<double> invUvals; std::vector<Scalar> invUvals;
cl::Buffer d_colPointers; cl::Buffer d_colPointers;
cl::Buffer d_rowIndices; cl::Buffer d_rowIndices;
@ -68,10 +68,10 @@ private:
cl::Buffer d_invL_x; cl::Buffer d_invL_x;
bool opencl_ilu_parallel; bool opencl_ilu_parallel;
std::unique_ptr<BILU0<double,block_size>> bilu0; std::unique_ptr<BILU0<Scalar,block_size>> bilu0;
/// Struct that holds the structure of the small subsystems for each column /// Struct that holds the structure of the small subsystems for each column
typedef struct{ struct subsystemStructure {
/// This vector holds the cumulative sum for the number of non-zero blocks for each subsystem. /// This vector holds the cumulative sum for the number of non-zero blocks for each subsystem.
/// Works similarly to row and column pointers for the CSR and CSC matrix representations. /// Works similarly to row and column pointers for the CSR and CSC matrix representations.
std::vector<int> subsystemPointers; std::vector<int> subsystemPointers;
@ -85,15 +85,15 @@ private:
std::vector<int> knownRhsIndices; std::vector<int> knownRhsIndices;
/// This vector holds the indices of the unknown values of the right hand sides of the subsystems. /// This vector holds the indices of the unknown values of the right hand sides of the subsystems.
std::vector<int> unknownRhsIndices; std::vector<int> unknownRhsIndices;
} subsystemStructure; };
/// GPU version of subsystemStructure /// GPU version of subsystemStructure
typedef struct{ struct subsystemStructureGPU {
cl::Buffer subsystemPointers; cl::Buffer subsystemPointers;
cl::Buffer nzIndices; cl::Buffer nzIndices;
cl::Buffer knownRhsIndices; cl::Buffer knownRhsIndices;
cl::Buffer unknownRhsIndices; cl::Buffer unknownRhsIndices;
} subsystemStructureGPU; } ;
subsystemStructure lower, upper; subsystemStructure lower, upper;
subsystemStructureGPU d_lower, d_upper; subsystemStructureGPU d_lower, d_upper;
@ -110,17 +110,18 @@ public:
BISAI(bool opencl_ilu_parallel, int verbosity); BISAI(bool opencl_ilu_parallel, int verbosity);
// set own Opencl variables, but also that of the bilu0 preconditioner // set own Opencl variables, but also that of the bilu0 preconditioner
void setOpencl(std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue) override; void setOpencl(std::shared_ptr<cl::Context>& context,
std::shared_ptr<cl::CommandQueue>& queue) override;
// analysis, extract parallelism // analysis, extract parallelism
bool analyze_matrix(BlockedMatrix<double>* mat) override; bool analyze_matrix(BlockedMatrix<Scalar>* mat) override;
bool analyze_matrix(BlockedMatrix<double>* mat, bool analyze_matrix(BlockedMatrix<Scalar>* mat,
BlockedMatrix<double>* jacMat) override; BlockedMatrix<Scalar>* jacMat) override;
// ilu_decomposition // ilu_decomposition
bool create_preconditioner(BlockedMatrix<double>* mat) override; bool create_preconditioner(BlockedMatrix<Scalar>* mat) override;
bool create_preconditioner(BlockedMatrix<double>* mat, bool create_preconditioner(BlockedMatrix<Scalar>* mat,
BlockedMatrix<double>* jacMat) override; BlockedMatrix<Scalar>* jacMat) override;
// apply preconditioner, x = prec(y) // apply preconditioner, x = prec(y)
void apply(const cl::Buffer& y, cl::Buffer& x) override; void apply(const cl::Buffer& y, cl::Buffer& x) override;

View File

@ -47,11 +47,11 @@ Preconditioner<Scalar,block_size>::create(Type type, bool opencl_ilu_parallel, i
{ {
switch (type ) { switch (type ) {
case Type::BILU0: case Type::BILU0:
return std::make_unique<BILU0<Scalar,block_size> >(opencl_ilu_parallel, verbosity); return std::make_unique<BILU0<Scalar,block_size>>(opencl_ilu_parallel, verbosity);
case Type::CPR: case Type::CPR:
return std::make_unique<CPR<block_size> >(opencl_ilu_parallel, verbosity); return std::make_unique<CPR<block_size> >(opencl_ilu_parallel, verbosity);
case Type::BISAI: case Type::BISAI:
return std::make_unique<BISAI<block_size> >(opencl_ilu_parallel, verbosity); return std::make_unique<BISAI<Scalar,block_size>>(opencl_ilu_parallel, verbosity);
} }
OPM_THROW(std::logic_error, OPM_THROW(std::logic_error,