mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
BISAI: template Scalar type
This commit is contained in:
parent
8ea523fa68
commit
1f39e6a9a9
@ -39,18 +39,20 @@ namespace Opm::Accelerator {
|
||||
using Opm::OpmLog;
|
||||
using Dune::Timer;
|
||||
|
||||
template <unsigned int block_size>
|
||||
BISAI<block_size>::BISAI(bool opencl_ilu_parallel_, int verbosity_)
|
||||
template<class Scalar, unsigned int block_size>
|
||||
BISAI<Scalar,block_size>::BISAI(bool opencl_ilu_parallel_, int verbosity_)
|
||||
: Base(verbosity_)
|
||||
{
|
||||
#if CHOW_PATEL
|
||||
OPM_THROW(std::logic_error, "Error --linear-solver=isai cannot be used if ChowPatelIlu is used, probably defined by CMake\n");
|
||||
#endif
|
||||
bilu0 = std::make_unique<BILU0<double,block_size>>(opencl_ilu_parallel_, verbosity_);
|
||||
bilu0 = std::make_unique<BILU0<Scalar,block_size>>(opencl_ilu_parallel_, verbosity_);
|
||||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
void BISAI<block_size>::setOpencl(std::shared_ptr<cl::Context>& context_, std::shared_ptr<cl::CommandQueue>& queue_)
|
||||
template<class Scalar, unsigned int block_size>
|
||||
void BISAI<Scalar,block_size>::
|
||||
setOpencl(std::shared_ptr<cl::Context>& context_,
|
||||
std::shared_ptr<cl::CommandQueue>& queue_)
|
||||
{
|
||||
context = context_;
|
||||
queue = queue_;
|
||||
@ -58,7 +60,9 @@ void BISAI<block_size>::setOpencl(std::shared_ptr<cl::Context>& context_, std::s
|
||||
bilu0->setOpencl(context, queue);
|
||||
}
|
||||
|
||||
std::vector<int> buildCsrToCscOffsetMap(std::vector<int> colPointers, std::vector<int> rowIndices){
|
||||
std::vector<int>
|
||||
buildCsrToCscOffsetMap(std::vector<int> colPointers, std::vector<int> rowIndices)
|
||||
{
|
||||
std::vector<int> aux(colPointers); // colPointers must be copied to this vector
|
||||
std::vector<int> csrToCscOffsetMap(rowIndices.size()); // map must have the same size as the indices vector
|
||||
|
||||
@ -74,15 +78,15 @@ std::vector<int> buildCsrToCscOffsetMap(std::vector<int> colPointers, std::vecto
|
||||
return csrToCscOffsetMap;
|
||||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool BISAI<block_size>::analyze_matrix(BlockedMatrix<double>* mat)
|
||||
template<class Scalar, unsigned int block_size>
|
||||
bool BISAI<Scalar,block_size>::analyze_matrix(BlockedMatrix<Scalar>* mat)
|
||||
{
|
||||
return analyze_matrix(mat, nullptr);
|
||||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool BISAI<block_size>::analyze_matrix(BlockedMatrix<double>* mat,
|
||||
BlockedMatrix<double>* jacMat)
|
||||
template<class Scalar, unsigned int block_size>
|
||||
bool BISAI<Scalar,block_size>::
|
||||
analyze_matrix(BlockedMatrix<Scalar>* mat, BlockedMatrix<Scalar>* jacMat)
|
||||
{
|
||||
const unsigned int bs = block_size;
|
||||
auto *m = mat;
|
||||
@ -103,21 +107,22 @@ bool BISAI<block_size>::analyze_matrix(BlockedMatrix<double>* mat,
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
void BISAI<block_size>::buildLowerSubsystemsStructures(){
|
||||
template<class Scalar, unsigned int block_size>
|
||||
void BISAI<Scalar,block_size>::buildLowerSubsystemsStructures()
|
||||
{
|
||||
lower.subsystemPointers.assign(Nb + 1, 0);
|
||||
|
||||
Dune::Timer t_buildLowerSubsystemsStructures;
|
||||
|
||||
for(int tcol = 0; tcol < Nb; tcol++){
|
||||
for (int tcol = 0; tcol < Nb; tcol++) {
|
||||
int frow = diagIndex[tcol] + 1;
|
||||
int lrow = colPointers[tcol + 1];
|
||||
int nx = lrow - frow;
|
||||
int nv = 0;
|
||||
|
||||
for(int sweep = 0; sweep < nx - 1; sweep++){
|
||||
for(int xid = sweep + 1; xid < nx; xid++){
|
||||
for(int ptr = diagIndex[rowIndices[frow + sweep]] + 1; ptr < colPointers[rowIndices[frow + sweep + 1]]; ptr++){
|
||||
for (int sweep = 0; sweep < nx - 1; sweep++) {
|
||||
for (int xid = sweep + 1; xid < nx; xid++) {
|
||||
for( int ptr = diagIndex[rowIndices[frow + sweep]] + 1; ptr < colPointers[rowIndices[frow + sweep + 1]]; ptr++) {
|
||||
if(rowIndices[ptr] == rowIndices[frow + xid]){
|
||||
lower.nzIndices.push_back(csrToCscOffsetMap[ptr]);
|
||||
lower.knownRhsIndices.push_back(csrToCscOffsetMap[frow + sweep]);
|
||||
@ -131,29 +136,31 @@ void BISAI<block_size>::buildLowerSubsystemsStructures(){
|
||||
lower.subsystemPointers[tcol + 1] = lower.subsystemPointers[tcol] + nv;
|
||||
}
|
||||
|
||||
if(verbosity >= 4){
|
||||
if (verbosity >= 4) {
|
||||
std::ostringstream out;
|
||||
out << "BISAI buildLowerSubsystemsStructures time: " << t_buildLowerSubsystemsStructures.stop() << " s";
|
||||
out << "BISAI buildLowerSubsystemsStructures time: "
|
||||
<< t_buildLowerSubsystemsStructures.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
void BISAI<block_size>::buildUpperSubsystemsStructures(){
|
||||
template<class Scalar, unsigned int block_size>
|
||||
void BISAI<Scalar,block_size>::buildUpperSubsystemsStructures()
|
||||
{
|
||||
upper.subsystemPointers.assign(Nb + 1, 0);
|
||||
|
||||
Dune::Timer t_buildUpperSubsystemsStructures;
|
||||
|
||||
for(int tcol = 0; tcol < Nb; tcol++){
|
||||
for (int tcol = 0; tcol < Nb; tcol++) {
|
||||
int frow = colPointers[tcol];
|
||||
int lrow = diagIndex[tcol];
|
||||
int nx = lrow - frow + 1;
|
||||
int nv = 0;
|
||||
|
||||
for(int sweep = 0; sweep < nx - 1; sweep++){
|
||||
for(int xid = 0; xid < nx; xid++){
|
||||
for(int ptr = colPointers[rowIndices[lrow - sweep]]; ptr < diagIndex[rowIndices[lrow - sweep]]; ptr++){
|
||||
if(rowIndices[ptr] == rowIndices[lrow - xid]){
|
||||
for (int sweep = 0; sweep < nx - 1; sweep++) {
|
||||
for (int xid = 0; xid < nx; xid++) {
|
||||
for (int ptr = colPointers[rowIndices[lrow - sweep]]; ptr < diagIndex[rowIndices[lrow - sweep]]; ptr++) {
|
||||
if (rowIndices[ptr] == rowIndices[lrow - xid]) {
|
||||
upper.nzIndices.push_back(csrToCscOffsetMap[ptr]);
|
||||
upper.knownRhsIndices.push_back(csrToCscOffsetMap[lrow - sweep]);
|
||||
upper.unknownRhsIndices.push_back(csrToCscOffsetMap[lrow - xid]);
|
||||
@ -166,17 +173,17 @@ void BISAI<block_size>::buildUpperSubsystemsStructures(){
|
||||
upper.subsystemPointers[tcol + 1] = upper.subsystemPointers[tcol] + nv;
|
||||
}
|
||||
|
||||
if(verbosity >= 4){
|
||||
if (verbosity >= 4) {
|
||||
std::ostringstream out;
|
||||
out << "BISAI buildUpperSubsystemsStructures time: " << t_buildUpperSubsystemsStructures.stop() << " s";
|
||||
out << "BISAI buildUpperSubsystemsStructures time: "
|
||||
<< t_buildUpperSubsystemsStructures.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool BISAI<block_size>::
|
||||
create_preconditioner(BlockedMatrix<double>* mat,
|
||||
BlockedMatrix<double>* jacMat)
|
||||
template<class Scalar, unsigned int block_size>
|
||||
bool BISAI<Scalar,block_size>::
|
||||
create_preconditioner(BlockedMatrix<Scalar>* mat, BlockedMatrix<Scalar>* jacMat)
|
||||
{
|
||||
const unsigned int bs = block_size;
|
||||
|
||||
@ -199,48 +206,93 @@ create_preconditioner(BlockedMatrix<double>* mat,
|
||||
buildLowerSubsystemsStructures();
|
||||
buildUpperSubsystemsStructures();
|
||||
|
||||
d_colPointers = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * colPointers.size());
|
||||
d_rowIndices = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * rowIndices.size());
|
||||
d_csrToCscOffsetMap = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * csrToCscOffsetMap.size());
|
||||
d_diagIndex = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * diagIndex.size());
|
||||
d_invLvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * nnzb * bs * bs);
|
||||
d_invUvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * nnzb * bs * bs);
|
||||
d_invL_x = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * Nb * bs);
|
||||
d_lower.subsystemPointers = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * lower.subsystemPointers.size());
|
||||
d_upper.subsystemPointers = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * upper.subsystemPointers.size());
|
||||
d_colPointers = cl::Buffer(*context, CL_MEM_READ_WRITE,
|
||||
sizeof(int) * colPointers.size());
|
||||
d_rowIndices = cl::Buffer(*context, CL_MEM_READ_WRITE,
|
||||
sizeof(int) * rowIndices.size());
|
||||
d_csrToCscOffsetMap = cl::Buffer(*context, CL_MEM_READ_WRITE,
|
||||
sizeof(int) * csrToCscOffsetMap.size());
|
||||
d_diagIndex = cl::Buffer(*context, CL_MEM_READ_WRITE,
|
||||
sizeof(int) * diagIndex.size());
|
||||
d_invLvals = cl::Buffer(*context, CL_MEM_READ_WRITE,
|
||||
sizeof(Scalar) * nnzb * bs * bs);
|
||||
d_invUvals = cl::Buffer(*context, CL_MEM_READ_WRITE,
|
||||
sizeof(Scalar) * nnzb * bs * bs);
|
||||
d_invL_x = cl::Buffer(*context, CL_MEM_READ_WRITE,
|
||||
sizeof(Scalar) * Nb * bs);
|
||||
d_lower.subsystemPointers = cl::Buffer(*context, CL_MEM_READ_WRITE,
|
||||
sizeof(int) * lower.subsystemPointers.size());
|
||||
d_upper.subsystemPointers = cl::Buffer(*context, CL_MEM_READ_WRITE,
|
||||
sizeof(int) * upper.subsystemPointers.size());
|
||||
|
||||
if(!lower.nzIndices.empty()){ // knownRhsIndices and unknownRhsIndices will also be empty if nzIndices is empty
|
||||
d_lower.nzIndices = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * lower.nzIndices.size());
|
||||
d_lower.knownRhsIndices = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * lower.knownRhsIndices.size());
|
||||
d_lower.unknownRhsIndices = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * lower.unknownRhsIndices.size());
|
||||
if (!lower.nzIndices.empty()) { // knownRhsIndices and unknownRhsIndices will also be empty if nzIndices is empty
|
||||
d_lower.nzIndices = cl::Buffer(*context, CL_MEM_READ_WRITE,
|
||||
sizeof(int) * lower.nzIndices.size());
|
||||
d_lower.knownRhsIndices = cl::Buffer(*context, CL_MEM_READ_WRITE,
|
||||
sizeof(int) * lower.knownRhsIndices.size());
|
||||
d_lower.unknownRhsIndices = cl::Buffer(*context, CL_MEM_READ_WRITE,
|
||||
sizeof(int) * lower.unknownRhsIndices.size());
|
||||
}
|
||||
|
||||
if(!upper.nzIndices.empty()){ // knownRhsIndices and unknownRhsIndices will also be empty if nzIndices is empty
|
||||
d_upper.nzIndices = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * upper.nzIndices.size());
|
||||
d_upper.knownRhsIndices = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * upper.knownRhsIndices.size());
|
||||
d_upper.unknownRhsIndices = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * upper.unknownRhsIndices.size());
|
||||
if (!upper.nzIndices.empty()) { // knownRhsIndices and unknownRhsIndices will also be empty if nzIndices is empty
|
||||
d_upper.nzIndices = cl::Buffer(*context, CL_MEM_READ_WRITE,
|
||||
sizeof(int) * upper.nzIndices.size());
|
||||
d_upper.knownRhsIndices = cl::Buffer(*context, CL_MEM_READ_WRITE,
|
||||
sizeof(int) * upper.knownRhsIndices.size());
|
||||
d_upper.unknownRhsIndices = cl::Buffer(*context, CL_MEM_READ_WRITE,
|
||||
sizeof(int) * upper.unknownRhsIndices.size());
|
||||
}
|
||||
|
||||
events.resize(6);
|
||||
err = queue->enqueueWriteBuffer(d_colPointers, CL_FALSE, 0, colPointers.size() * sizeof(int), colPointers.data(), nullptr, &events[0]);
|
||||
err |= queue->enqueueWriteBuffer(d_rowIndices, CL_FALSE, 0, rowIndices.size() * sizeof(int), rowIndices.data(), nullptr, &events[1]);
|
||||
err |= queue->enqueueWriteBuffer(d_csrToCscOffsetMap, CL_FALSE, 0, csrToCscOffsetMap.size() * sizeof(int), csrToCscOffsetMap.data(), nullptr, &events[2]);
|
||||
err |= queue->enqueueWriteBuffer(d_diagIndex, CL_FALSE, 0, diagIndex.size() * sizeof(int), diagIndex.data(), nullptr, &events[3]);
|
||||
err |= queue->enqueueWriteBuffer(d_lower.subsystemPointers, CL_FALSE, 0, sizeof(int) * lower.subsystemPointers.size(), lower.subsystemPointers.data(), nullptr, &events[4]);
|
||||
err |= queue->enqueueWriteBuffer(d_upper.subsystemPointers, CL_FALSE, 0, sizeof(int) * upper.subsystemPointers.size(), upper.subsystemPointers.data(), nullptr, &events[5]);
|
||||
err = queue->enqueueWriteBuffer(d_colPointers, CL_FALSE, 0,
|
||||
colPointers.size() * sizeof(int),
|
||||
colPointers.data(), nullptr, &events[0]);
|
||||
err |= queue->enqueueWriteBuffer(d_rowIndices, CL_FALSE, 0,
|
||||
rowIndices.size() * sizeof(int),
|
||||
rowIndices.data(), nullptr, &events[1]);
|
||||
err |= queue->enqueueWriteBuffer(d_csrToCscOffsetMap, CL_FALSE, 0,
|
||||
csrToCscOffsetMap.size() * sizeof(int),
|
||||
csrToCscOffsetMap.data(), nullptr, &events[2]);
|
||||
err |= queue->enqueueWriteBuffer(d_diagIndex, CL_FALSE, 0,
|
||||
diagIndex.size() * sizeof(int),
|
||||
diagIndex.data(), nullptr, &events[3]);
|
||||
err |= queue->enqueueWriteBuffer(d_lower.subsystemPointers, CL_FALSE, 0,
|
||||
sizeof(int) * lower.subsystemPointers.size(),
|
||||
lower.subsystemPointers.data(), nullptr, &events[4]);
|
||||
err |= queue->enqueueWriteBuffer(d_upper.subsystemPointers, CL_FALSE, 0,
|
||||
sizeof(int) * upper.subsystemPointers.size(),
|
||||
upper.subsystemPointers.data(), nullptr, &events[5]);
|
||||
|
||||
if(!lower.nzIndices.empty()){
|
||||
if (!lower.nzIndices.empty()) {
|
||||
events.resize(events.size() + 3);
|
||||
err |= queue->enqueueWriteBuffer(d_lower.nzIndices, CL_FALSE, 0, sizeof(int) * lower.nzIndices.size(), lower.nzIndices.data(), nullptr, &events[events.size() - 3]);
|
||||
err |= queue->enqueueWriteBuffer(d_lower.knownRhsIndices, CL_FALSE, 0, sizeof(int) * lower.knownRhsIndices.size(), lower.knownRhsIndices.data(), nullptr, &events[events.size() - 2]);
|
||||
err |= queue->enqueueWriteBuffer(d_lower.unknownRhsIndices, CL_FALSE, 0, sizeof(int) * lower.unknownRhsIndices.size(), lower.unknownRhsIndices.data(), nullptr, &events[events.size() - 1]);
|
||||
err |= queue->enqueueWriteBuffer(d_lower.nzIndices, CL_FALSE, 0,
|
||||
sizeof(int) * lower.nzIndices.size(),
|
||||
lower.nzIndices.data(), nullptr,
|
||||
&events[events.size() - 3]);
|
||||
err |= queue->enqueueWriteBuffer(d_lower.knownRhsIndices, CL_FALSE, 0,
|
||||
sizeof(int) * lower.knownRhsIndices.size(),
|
||||
lower.knownRhsIndices.data(), nullptr,
|
||||
&events[events.size() - 2]);
|
||||
err |= queue->enqueueWriteBuffer(d_lower.unknownRhsIndices, CL_FALSE, 0,
|
||||
sizeof(int) * lower.unknownRhsIndices.size(),
|
||||
lower.unknownRhsIndices.data(), nullptr,
|
||||
&events[events.size() - 1]);
|
||||
}
|
||||
|
||||
if(!upper.nzIndices.empty()){
|
||||
if (!upper.nzIndices.empty()) {
|
||||
events.resize(events.size() + 3);
|
||||
err |= queue->enqueueWriteBuffer(d_upper.nzIndices, CL_FALSE, 0, sizeof(int) * upper.nzIndices.size(), upper.nzIndices.data(), nullptr, &events[events.size() - 3]);
|
||||
err |= queue->enqueueWriteBuffer(d_upper.knownRhsIndices, CL_FALSE, 0, sizeof(int) * upper.knownRhsIndices.size(), upper.knownRhsIndices.data(), nullptr, &events[events.size() - 2]);
|
||||
err |= queue->enqueueWriteBuffer(d_upper.unknownRhsIndices, CL_FALSE, 0, sizeof(int) * upper.unknownRhsIndices.size(), upper.unknownRhsIndices.data(), nullptr, &events[events.size() - 1]);
|
||||
err |= queue->enqueueWriteBuffer(d_upper.nzIndices, CL_FALSE,
|
||||
0, sizeof(int) * upper.nzIndices.size(),
|
||||
upper.nzIndices.data(), nullptr,
|
||||
&events[events.size() - 3]);
|
||||
err |= queue->enqueueWriteBuffer(d_upper.knownRhsIndices, CL_FALSE, 0,
|
||||
sizeof(int) * upper.knownRhsIndices.size(),
|
||||
upper.knownRhsIndices.data(), nullptr,
|
||||
&events[events.size() - 2]);
|
||||
err |= queue->enqueueWriteBuffer(d_upper.unknownRhsIndices, CL_FALSE, 0,
|
||||
sizeof(int) * upper.unknownRhsIndices.size(),
|
||||
upper.unknownRhsIndices.data(), nullptr,
|
||||
&events[events.size() - 1]);
|
||||
}
|
||||
|
||||
cl::WaitForEvents(events);
|
||||
@ -255,12 +307,14 @@ create_preconditioner(BlockedMatrix<double>* mat,
|
||||
std::tie(d_LUvals, d_invDiagVals) = bilu0->get_preconditioner_data();
|
||||
|
||||
events.resize(2);
|
||||
err = queue->enqueueFillBuffer(d_invLvals, 0, 0, sizeof(double) * nnzb * bs * bs, nullptr, &events[0]);
|
||||
err |= queue->enqueueFillBuffer(d_invUvals, 0, 0, sizeof(double) * nnzb * bs * bs, nullptr, &events[1]);
|
||||
err = queue->enqueueFillBuffer(d_invLvals, 0, 0,
|
||||
sizeof(Scalar) * nnzb * bs * bs, nullptr, &events[0]);
|
||||
err |= queue->enqueueFillBuffer(d_invUvals, 0, 0,
|
||||
sizeof(Scalar) * nnzb * bs * bs, nullptr, &events[1]);
|
||||
cl::WaitForEvents(events);
|
||||
events.clear();
|
||||
|
||||
OpenclKernels<double>::isaiL(d_diagIndex, d_colPointers, d_csrToCscOffsetMap,
|
||||
OpenclKernels<Scalar>::isaiL(d_diagIndex, d_colPointers, d_csrToCscOffsetMap,
|
||||
d_lower.subsystemPointers, d_lower.nzIndices,
|
||||
d_lower.unknownRhsIndices, d_lower.knownRhsIndices,
|
||||
d_LUvals, d_invLvals, Nb);
|
||||
@ -270,7 +324,7 @@ create_preconditioner(BlockedMatrix<double>* mat,
|
||||
d_upper.knownRhsIndices, d_LUvals,
|
||||
d_invDiagVals, d_invUvals, Nb);
|
||||
|
||||
if(verbosity >= 4){
|
||||
if (verbosity >= 4) {
|
||||
std::ostringstream out;
|
||||
out << "BISAI createPreconditioner time: " << t_preconditioner.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
@ -279,34 +333,34 @@ create_preconditioner(BlockedMatrix<double>* mat,
|
||||
return true;
|
||||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool BISAI<block_size>::create_preconditioner(BlockedMatrix<double>* mat)
|
||||
template<class Scalar, unsigned int block_size>
|
||||
bool BISAI<Scalar,block_size>::
|
||||
create_preconditioner(BlockedMatrix<Scalar>* mat)
|
||||
{
|
||||
return create_preconditioner(mat, nullptr);
|
||||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
void BISAI<block_size>::apply(const cl::Buffer& x, cl::Buffer& y){
|
||||
template<class Scalar, unsigned int block_size>
|
||||
void BISAI<Scalar,block_size>::apply(const cl::Buffer& x, cl::Buffer& y)
|
||||
{
|
||||
const unsigned int bs = block_size;
|
||||
|
||||
OpenclKernels<double>::spmv(d_invLvals, d_rowIndices, d_colPointers,
|
||||
OpenclKernels<Scalar>::spmv(d_invLvals, d_rowIndices, d_colPointers,
|
||||
x, d_invL_x, Nb, bs, true, true); // application of isaiL is a simple spmv with addition
|
||||
// (to compensate for the unitary diagonal that is not
|
||||
// included in isaiL, for simplicity)
|
||||
OpenclKernels<double>::spmv(d_invUvals, d_rowIndices, d_colPointers,
|
||||
OpenclKernels<Scalar>::spmv(d_invUvals, d_rowIndices, d_colPointers,
|
||||
d_invL_x, y, Nb, bs); // application of isaiU is a simple spmv
|
||||
}
|
||||
|
||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||
template class BISAI<n>;
|
||||
#define INSTANCE_TYPE(T) \
|
||||
template class BISAI<T,1>; \
|
||||
template class BISAI<T,2>; \
|
||||
template class BISAI<T,3>; \
|
||||
template class BISAI<T,4>; \
|
||||
template class BISAI<T,5>; \
|
||||
template class BISAI<T,6>;
|
||||
|
||||
INSTANTIATE_BDA_FUNCTIONS(1);
|
||||
INSTANTIATE_BDA_FUNCTIONS(2);
|
||||
INSTANTIATE_BDA_FUNCTIONS(3);
|
||||
INSTANTIATE_BDA_FUNCTIONS(4);
|
||||
INSTANTIATE_BDA_FUNCTIONS(5);
|
||||
INSTANTIATE_BDA_FUNCTIONS(6);
|
||||
|
||||
#undef INSTANTIATE_BDA_FUNCTIONS
|
||||
INSTANCE_TYPE(double)
|
||||
|
||||
} // namespace Opm::Accelerator
|
||||
|
@ -32,10 +32,10 @@ template<class Scalar> class BlockedMatrix;
|
||||
|
||||
/// This class implements a Blocked version of the Incomplete Sparse Approximate Inverse (ISAI) preconditioner.
|
||||
/// Inspired by the paper "Incomplete Sparse Approximate Inverses for Parallel Preconditioning" by Anzt et. al.
|
||||
template <unsigned int block_size>
|
||||
class BISAI : public Preconditioner<double,block_size>
|
||||
template<class Scalar, unsigned int block_size>
|
||||
class BISAI : public Preconditioner<Scalar,block_size>
|
||||
{
|
||||
using Base = Preconditioner<double,block_size>;
|
||||
using Base = Preconditioner<Scalar,block_size>;
|
||||
|
||||
using Base::N;
|
||||
using Base::Nb;
|
||||
@ -54,8 +54,8 @@ private:
|
||||
std::vector<int> rowIndices;
|
||||
std::vector<int> diagIndex;
|
||||
std::vector<int> csrToCscOffsetMap;
|
||||
std::vector<double> invLvals;
|
||||
std::vector<double> invUvals;
|
||||
std::vector<Scalar> invLvals;
|
||||
std::vector<Scalar> invUvals;
|
||||
|
||||
cl::Buffer d_colPointers;
|
||||
cl::Buffer d_rowIndices;
|
||||
@ -68,10 +68,10 @@ private:
|
||||
cl::Buffer d_invL_x;
|
||||
|
||||
bool opencl_ilu_parallel;
|
||||
std::unique_ptr<BILU0<double,block_size>> bilu0;
|
||||
std::unique_ptr<BILU0<Scalar,block_size>> bilu0;
|
||||
|
||||
/// Struct that holds the structure of the small subsystems for each column
|
||||
typedef struct{
|
||||
struct subsystemStructure {
|
||||
/// This vector holds the cumulative sum for the number of non-zero blocks for each subsystem.
|
||||
/// Works similarly to row and column pointers for the CSR and CSC matrix representations.
|
||||
std::vector<int> subsystemPointers;
|
||||
@ -85,15 +85,15 @@ private:
|
||||
std::vector<int> knownRhsIndices;
|
||||
/// This vector holds the indices of the unknown values of the right hand sides of the subsystems.
|
||||
std::vector<int> unknownRhsIndices;
|
||||
} subsystemStructure;
|
||||
};
|
||||
|
||||
/// GPU version of subsystemStructure
|
||||
typedef struct{
|
||||
struct subsystemStructureGPU {
|
||||
cl::Buffer subsystemPointers;
|
||||
cl::Buffer nzIndices;
|
||||
cl::Buffer knownRhsIndices;
|
||||
cl::Buffer unknownRhsIndices;
|
||||
} subsystemStructureGPU;
|
||||
} ;
|
||||
|
||||
subsystemStructure lower, upper;
|
||||
subsystemStructureGPU d_lower, d_upper;
|
||||
@ -110,17 +110,18 @@ public:
|
||||
BISAI(bool opencl_ilu_parallel, int verbosity);
|
||||
|
||||
// set own Opencl variables, but also that of the bilu0 preconditioner
|
||||
void setOpencl(std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue) override;
|
||||
void setOpencl(std::shared_ptr<cl::Context>& context,
|
||||
std::shared_ptr<cl::CommandQueue>& queue) override;
|
||||
|
||||
// analysis, extract parallelism
|
||||
bool analyze_matrix(BlockedMatrix<double>* mat) override;
|
||||
bool analyze_matrix(BlockedMatrix<double>* mat,
|
||||
BlockedMatrix<double>* jacMat) override;
|
||||
bool analyze_matrix(BlockedMatrix<Scalar>* mat) override;
|
||||
bool analyze_matrix(BlockedMatrix<Scalar>* mat,
|
||||
BlockedMatrix<Scalar>* jacMat) override;
|
||||
|
||||
// ilu_decomposition
|
||||
bool create_preconditioner(BlockedMatrix<double>* mat) override;
|
||||
bool create_preconditioner(BlockedMatrix<double>* mat,
|
||||
BlockedMatrix<double>* jacMat) override;
|
||||
bool create_preconditioner(BlockedMatrix<Scalar>* mat) override;
|
||||
bool create_preconditioner(BlockedMatrix<Scalar>* mat,
|
||||
BlockedMatrix<Scalar>* jacMat) override;
|
||||
|
||||
// apply preconditioner, x = prec(y)
|
||||
void apply(const cl::Buffer& y, cl::Buffer& x) override;
|
||||
|
@ -47,11 +47,11 @@ Preconditioner<Scalar,block_size>::create(Type type, bool opencl_ilu_parallel, i
|
||||
{
|
||||
switch (type ) {
|
||||
case Type::BILU0:
|
||||
return std::make_unique<BILU0<Scalar,block_size> >(opencl_ilu_parallel, verbosity);
|
||||
return std::make_unique<BILU0<Scalar,block_size>>(opencl_ilu_parallel, verbosity);
|
||||
case Type::CPR:
|
||||
return std::make_unique<CPR<block_size> >(opencl_ilu_parallel, verbosity);
|
||||
case Type::BISAI:
|
||||
return std::make_unique<BISAI<block_size> >(opencl_ilu_parallel, verbosity);
|
||||
return std::make_unique<BISAI<Scalar,block_size>>(opencl_ilu_parallel, verbosity);
|
||||
}
|
||||
|
||||
OPM_THROW(std::logic_error,
|
||||
|
Loading…
Reference in New Issue
Block a user