mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
157 lines
5.5 KiB
C++
157 lines
5.5 KiB
C++
/*
|
|
Copyright 2022 Equinor ASA
|
|
|
|
This file is part of the Open Porous Media project (OPM).
|
|
|
|
OPM is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
OPM is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#ifndef BISAI_HPP
|
|
#define BISAI_HPP
|
|
|
|
#include <mutex>
|
|
|
|
#include <opm/simulators/linalg/bda/opencl/BILU0.hpp>
|
|
#include <opm/simulators/linalg/bda/opencl/opencl.hpp>
|
|
#include <opm/simulators/linalg/bda/opencl/openclKernels.hpp>
|
|
#include <opm/simulators/linalg/bda/opencl/openclSolverBackend.hpp>
|
|
#include <opm/simulators/linalg/bda/opencl/Preconditioner.hpp>
|
|
|
|
namespace Opm
|
|
{
|
|
namespace Accelerator
|
|
{
|
|
|
|
template <unsigned int block_size>
|
|
class openclSolverBackend;
|
|
|
|
class BlockedMatrix;
|
|
|
|
/// This class implements a Blocked version of the Incomplete Sparse Approximate Inverse (ISAI) preconditioner.
|
|
/// Inspired by the paper "Incomplete Sparse Approximate Inverses for Parallel Preconditioning" by Anzt et. al.
|
|
template <unsigned int block_size>
|
|
class BISAI : public Preconditioner<block_size>
|
|
{
|
|
typedef Preconditioner<block_size> Base;
|
|
|
|
using Base::N;
|
|
using Base::Nb;
|
|
using Base::nnz;
|
|
using Base::nnzb;
|
|
using Base::verbosity;
|
|
using Base::context;
|
|
using Base::queue;
|
|
using Base::events;
|
|
using Base::err;
|
|
|
|
private:
|
|
std::once_flag initialize;
|
|
|
|
std::vector<int> colPointers;
|
|
std::vector<int> rowIndices;
|
|
std::vector<int> diagIndex;
|
|
std::vector<int> csrToCscOffsetMap;
|
|
std::vector<double> invLvals;
|
|
std::vector<double> invUvals;
|
|
|
|
cl::Buffer d_colPointers;
|
|
cl::Buffer d_rowIndices;
|
|
cl::Buffer d_csrToCscOffsetMap;
|
|
cl::Buffer d_diagIndex;
|
|
cl::Buffer d_LUvals;
|
|
cl::Buffer d_invDiagVals;
|
|
cl::Buffer d_invLvals;
|
|
cl::Buffer d_invUvals;
|
|
cl::Buffer d_invL_x;
|
|
|
|
ILUReorder opencl_ilu_reorder;
|
|
std::unique_ptr<BILU0<block_size> > bilu0;
|
|
|
|
/// Struct that holds the structure of the small subsystems for each column
|
|
typedef struct{
|
|
/// This vector holds the cumulative sum for the number of non-zero blocks for each subsystem.
|
|
/// Works similarly to row and column pointers for the CSR and CSC matrix representations.
|
|
std::vector<int> subsystemPointers;
|
|
/// This vector holds the indices of the non-zero blocks for the target subsystem. These blocks are
|
|
/// the ones that are present in the shadow set of the non-zero blocks of column j of the main matrix,
|
|
/// as described in section 2.3 of the paper. The amount of non-zero blocks for j-th subsystem is
|
|
/// given by subsystemPointers[j+1] - subsystemPointers[j].
|
|
std::vector<int> nzIndices;
|
|
/// This vector holds the indices of the already known values of the right hand sides of the subsystems.
|
|
/// Its purpose is to aid in the parallel solution of the subsystems.
|
|
std::vector<int> knownRhsIndices;
|
|
/// This vector holds the indices of the unknown values of the right hand sides of the subsystems.
|
|
std::vector<int> unknownRhsIndices;
|
|
} subsystemStructure;
|
|
|
|
/// GPU version of subsystemStructure
|
|
typedef struct{
|
|
cl::Buffer subsystemPointers;
|
|
cl::Buffer nzIndices;
|
|
cl::Buffer knownRhsIndices;
|
|
cl::Buffer unknownRhsIndices;
|
|
} subsystemStructureGPU;
|
|
|
|
subsystemStructure lower, upper;
|
|
subsystemStructureGPU d_lower, d_upper;
|
|
|
|
/// An approximate inverse for L is computed by solving a small lower triangular system for each column of the main matrix.
|
|
/// This function finds the structure of each of these subsystems and fills the 'lower' struct.
|
|
void buildLowerSubsystemsStructures();
|
|
|
|
/// An approximate inverse for U is computed by solving a small upper triangular system for each column of the main matrix.
|
|
/// This function finds the structure of each of theses subsystems and fills the 'upper' struct.
|
|
void buildUpperSubsystemsStructures();
|
|
|
|
public:
|
|
BISAI(ILUReorder opencl_ilu_reorder, int verbosity);
|
|
|
|
// set own Opencl variables, but also that of the bilu0 preconditioner
|
|
void setOpencl(std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue) override;
|
|
|
|
// analysis, find reordering if specified
|
|
bool analyze_matrix(BlockedMatrix *mat) override;
|
|
|
|
// ilu_decomposition
|
|
bool create_preconditioner(BlockedMatrix *mat) override;
|
|
|
|
// apply preconditioner, x = prec(y)
|
|
void apply(const cl::Buffer& y, cl::Buffer& x) override;
|
|
|
|
int* getToOrder() override
|
|
{
|
|
return bilu0->getToOrder();
|
|
}
|
|
|
|
int* getFromOrder() override
|
|
{
|
|
return bilu0->getFromOrder();
|
|
}
|
|
|
|
BlockedMatrix* getRMat() override
|
|
{
|
|
return bilu0->getRMat();
|
|
}
|
|
};
|
|
|
|
/// Similar function to csrPatternToCsc. It gives an offset map from CSR to CSC instead of the full CSR to CSC conversion.
|
|
/// The map works as follows: if an element 'e' of the matrix is in the i-th position in the CSR representation, it will be
|
|
/// in the csrToCscOffsetMap[i]-th position in the CSC representation.
|
|
std::vector<int> buildCsrToCscOffsetMap(std::vector<int> colPointers, std::vector<int> rowIndices);
|
|
|
|
} // namespace Accelerator
|
|
} // namespace Opm
|
|
|
|
#endif
|