Rename bda to gpu for classes

This commit is contained in:
Razvan Nane
2024-11-04 12:31:34 +01:00
parent 7c4bafe936
commit baf94f76d2
37 changed files with 220 additions and 220 deletions

View File

@@ -5,7 +5,7 @@ set (opm-simulators_CONFIG_VAR
HAVE_EWOMS
HAVE_MPI
HAVE_PETSC
COMPILE_BDA_BRIDGE
COMPILE_GPU_BRIDGE
HAVE_CUDA
HAVE_OPENCL
HAVE_OPENCL_HPP

View File

@@ -38,8 +38,8 @@
#include <opm/simulators/linalg/extractMatrix.hpp>
#if COMPILE_BDA_BRIDGE
#include <opm/simulators/linalg/ISTLSolverBda.hpp>
#if COMPILE_GPU_BRIDGE
#include <opm/simulators/linalg/ISTLSolverGpu.hpp>
#else
#include <opm/simulators/linalg/ISTLSolver.hpp>
#endif

View File

@@ -67,7 +67,7 @@ void FlowLinearSolverParameters::init(bool cprRequestedInDataFile)
}
accelerator_mode_ = Parameters::Get<Parameters::AcceleratorMode>();
bda_device_id_ = Parameters::Get<Parameters::BdaDeviceId>();
gpu_device_id_ = Parameters::Get<Parameters::GpuDeviceId>();
opencl_platform_id_ = Parameters::Get<Parameters::OpenclPlatformId>();
opencl_ilu_parallel_ = Parameters::Get<Parameters::OpenclIluParallel>();
}
@@ -142,7 +142,7 @@ void FlowLinearSolverParameters::registerParameters()
Parameters::Register<Parameters::AcceleratorMode>
("Choose a linear solver, usage: "
"'--accelerator-mode=[none|cusparse|opencl|amgcl|rocalution|rocsparse]'");
Parameters::Register<Parameters::BdaDeviceId>
Parameters::Register<Parameters::GpuDeviceId>
("Choose device ID for cusparseSolver or openclSolver, "
"use 'nvidia-smi' or 'clinfo' to determine valid IDs");
Parameters::Register<Parameters::OpenclPlatformId>
@@ -175,7 +175,7 @@ void FlowLinearSolverParameters::reset()
cpr_reuse_setup_ = 4;
cpr_reuse_interval_ = 30;
accelerator_mode_ = "none";
bda_device_id_ = 0;
gpu_device_id_ = 0;
opencl_platform_id_ = 0;
opencl_ilu_parallel_ = true;
}

View File

@@ -32,7 +32,7 @@
namespace Opm {
template <class TypeTag>
class ISTLSolverBda;
class ISTLSolverGpu;
template <class TypeTag>
class ISTLSolver;
@@ -51,8 +51,8 @@ struct FlowIstlSolverParams {};
template<class TypeTag>
struct LinearSolverBackend<TypeTag, TTag::FlowIstlSolverParams>
{
#if COMPILE_BDA_BRIDGE
using type = ISTLSolverBda<TypeTag>;
#if COMPILE_GPU_BRIDGE
using type = ISTLSolverGpu<TypeTag>;
#else
using type = ISTLSolver<TypeTag>;
#endif
@@ -82,7 +82,7 @@ struct LinearSolverPrintJsonDefinition { static constexpr auto value = true; };
struct CprReuseSetup { static constexpr int value = 4; };
struct CprReuseInterval { static constexpr int value = 30; };
struct AcceleratorMode { static constexpr auto value = "none"; };
struct BdaDeviceId { static constexpr int value = 0; };
struct GpuDeviceId { static constexpr int value = 0; };
struct OpenclPlatformId { static constexpr int value = 0; };
struct OpenclIluParallel { static constexpr bool value = true; }; // note: false should only be used in debug
@@ -112,7 +112,7 @@ struct FlowLinearSolverParameters
int cpr_reuse_setup_;
int cpr_reuse_interval_;
std::string accelerator_mode_;
int bda_device_id_;
int gpu_device_id_;
int opencl_platform_id_;
bool opencl_ilu_parallel_;

View File

@@ -33,9 +33,9 @@
#include <fmt/format.h>
#if COMPILE_BDA_BRIDGE
#include <opm/simulators/linalg/bda/BdaBridge.hpp>
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#if COMPILE_GPU_BRIDGE
#include <opm/simulators/linalg/gpubridge/GpuBridge.hpp>
#include <opm/simulators/linalg/gpubridge/WellContributions.hpp>
#endif
namespace Opm {

View File

@@ -53,8 +53,8 @@ std::shared_ptr<std::thread> copyThread;
namespace Opm::detail {
template<class Matrix, class Vector>
BdaSolverInfo<Matrix,Vector>::
BdaSolverInfo(const std::string& accelerator_mode,
GpuSolverInfo<Matrix,Vector>::
GpuSolverInfo(const std::string& accelerator_mode,
const int linear_solver_verbosity,
const int maxit,
const Scalar tolerance,
@@ -70,11 +70,11 @@ BdaSolverInfo(const std::string& accelerator_mode,
{}
template<class Matrix, class Vector>
BdaSolverInfo<Matrix,Vector>::~BdaSolverInfo() = default;
GpuSolverInfo<Matrix,Vector>::~GpuSolverInfo() = default;
template<class Matrix, class Vector>
template<class Grid>
void BdaSolverInfo<Matrix,Vector>::
void GpuSolverInfo<Matrix,Vector>::
prepare(const Grid& grid,
const Dune::CartesianIndexMapper<Grid>& cartMapper,
const std::vector<Well>& wellsForConn,
@@ -94,7 +94,7 @@ prepare(const Grid& grid,
}
template<class Matrix, class Vector>
bool BdaSolverInfo<Matrix,Vector>::
bool GpuSolverInfo<Matrix,Vector>::
apply(Vector& rhs,
const bool useWellConn,
[[maybe_unused]] WellContribFunc getContribs,
@@ -148,9 +148,9 @@ apply(Vector& rhs,
return true;
} else {
// warn about CPU fallback
// BdaBridge might have disabled its BdaSolver for this simulation due to some error
// in that case the BdaBridge is disabled and flexibleSolver is always used
// or maybe the BdaSolver did not converge in time, then it will be used next linear solve
// GpuBridge might have disabled its GpuSolver for this simulation due to some error
// in that case the GpuBridge is disabled and flexibleSolver is always used
// or maybe the GpuSolver did not converge in time, then it will be used next linear solve
if (rank == 0) {
OpmLog::warning(bridge_->getAccleratorName() + " did not converge, now trying Dune to solve current linear system...");
}
@@ -161,7 +161,7 @@ apply(Vector& rhs,
}
template<class Matrix, class Vector>
bool BdaSolverInfo<Matrix,Vector>::
bool GpuSolverInfo<Matrix,Vector>::
gpuActive()
{
return bridge_->getUseGpu();
@@ -169,7 +169,7 @@ gpuActive()
template<class Matrix, class Vector>
template<class Grid>
void BdaSolverInfo<Matrix,Vector>::
void GpuSolverInfo<Matrix,Vector>::
blockJacobiAdjacency(const Grid& grid,
const std::vector<int>& cell_part,
std::size_t nonzeroes)
@@ -217,7 +217,7 @@ blockJacobiAdjacency(const Grid& grid,
}
template<class Matrix, class Vector>
void BdaSolverInfo<Matrix,Vector>::
void GpuSolverInfo<Matrix,Vector>::
copyMatToBlockJac(const Matrix& mat, Matrix& blockJac)
{
auto rbegin = blockJac.begin();
@@ -242,7 +242,7 @@ template<class Scalar, int Dim>
using BV = Dune::BlockVector<Dune::FieldVector<Scalar,Dim>>;
#define INSTANTIATE_GRID(T, Dim, Grid) \
template void BdaSolverInfo<BM<T,Dim>,BV<T,Dim>>:: \
template void GpuSolverInfo<BM<T,Dim>,BV<T,Dim>>:: \
prepare(const Grid&, \
const Dune::CartesianIndexMapper<Grid>&, \
const std::vector<Well>&, \
@@ -257,13 +257,13 @@ using PolyHedralGrid3D = Dune::PolyhedralGrid<3, 3>;
using ALUGrid3CN = Dune::ALUGrid<3, 3, Dune::cube, Dune::nonconforming, Dune::ALUGridNoComm>;
#endif //HAVE_MPI
#define INSTANTIATE(T,Dim) \
template struct BdaSolverInfo<BM<T,Dim>,BV<T,Dim>>; \
template struct GpuSolverInfo<BM<T,Dim>,BV<T,Dim>>; \
INSTANTIATE_GRID(T,Dim,Dune::CpGrid) \
INSTANTIATE_GRID(T,Dim,ALUGrid3CN) \
INSTANTIATE_GRID(T,Dim,PolyHedralGrid3D)
#else
#define INSTANTIATE(T,Dim) \
template struct BdaSolverInfo<BM<T,Dim>,BV<T,Dim>>; \
template struct GpuSolverInfo<BM<T,Dim>,BV<T,Dim>>; \
INSTANTIATE_GRID(T,Dim,Dune::CpGrid) \
INSTANTIATE_GRID(T,Dim,PolyHedralGrid3D)
#endif

View File

@@ -19,8 +19,8 @@
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_ISTLSOLVER_WITH_BDA_HEADER_INCLUDED
#define OPM_ISTLSOLVER_WITH_BDA_HEADER_INCLUDED
#ifndef OPM_ISTLSOLVER_WITH_GPUBRIDGE_HEADER_INCLUDED
#define OPM_ISTLSOLVER_WITH_GPUBRIDGE_HEADER_INCLUDED
#include <opm/simulators/linalg/ISTLSolver.hpp>
@@ -34,18 +34,18 @@ namespace Opm {
class Well;
template<class Matrix, class Vector, int block_size> class BdaBridge;
template<class Matrix, class Vector, int block_size> class GpuBridge;
template<class Scalar> class WellContributions;
namespace detail {
template<class Matrix, class Vector>
struct BdaSolverInfo
struct GpuSolverInfo
{
using Scalar = typename Vector::field_type;
using WellContribFunc = std::function<void(WellContributions<Scalar>&)>;
using Bridge = BdaBridge<Matrix,Vector,Matrix::block_type::rows>;
using Bridge = GpuBridge<Matrix,Vector,Matrix::block_type::rows>;
BdaSolverInfo(const std::string& accelerator_mode,
GpuSolverInfo(const std::string& accelerator_mode,
const int linear_solver_verbosity,
const int maxit,
const Scalar tolerance,
@@ -54,7 +54,7 @@ struct BdaSolverInfo
const bool opencl_ilu_parallel,
const std::string& linsolver);
~BdaSolverInfo();
~GpuSolverInfo();
template<class Grid>
void prepare(const Grid& grid,
@@ -100,7 +100,7 @@ private:
/// as a block-structured matrix (one block for all cell variables) for a fixed
/// number of cell variables np .
template <class TypeTag>
class ISTLSolverBda : public ISTLSolver<TypeTag>
class ISTLSolverGpu : public ISTLSolver<TypeTag>
{
protected:
using ParentType = ISTLSolver<TypeTag>;
@@ -134,23 +134,23 @@ public:
/// \param[in] simulator The opm-models simulator object
/// \param[in] parameters Explicit parameters for solver setup, do not
/// read them from command line parameters.
ISTLSolverBda(const Simulator& simulator, const FlowLinearSolverParameters& parameters)
ISTLSolverGpu(const Simulator& simulator, const FlowLinearSolverParameters& parameters)
: ParentType(simulator, parameters)
{
initializeBda();
initializeGpu();
}
/// Construct a system solver.
/// \param[in] simulator The opm-models simulator object
explicit ISTLSolverBda(const Simulator& simulator)
explicit ISTLSolverGpu(const Simulator& simulator)
: ParentType(simulator)
{
initializeBda();
initializeGpu();
}
void initializeBda()
void initializeGpu()
{
OPM_TIMEBLOCK(initializeBda);
OPM_TIMEBLOCK(initializeGpu);
std::string accelerator_mode = Parameters::Get<Parameters::AcceleratorMode>();
// Force accelerator mode to none if using MPI.
@@ -166,15 +166,15 @@ public:
return;
}
// Initialize the BdaBridge
// Initialize the GpuBridge
const int platformID = Parameters::Get<Parameters::OpenclPlatformId>();
const int deviceID = Parameters::Get<Parameters::BdaDeviceId>();
const int deviceID = Parameters::Get<Parameters::GpuDeviceId>();
const int maxit = Parameters::Get<Parameters::LinearSolverMaxIter>();
const double tolerance = Parameters::Get<Parameters::LinearSolverReduction>();
const bool opencl_ilu_parallel = Parameters::Get<Parameters::OpenclIluParallel>();
const int linear_solver_verbosity = this->parameters_[0].linear_solver_verbosity_;
std::string linsolver = Parameters::Get<Parameters::LinearSolver>();
bdaBridge_ = std::make_unique<detail::BdaSolverInfo<Matrix,Vector>>(accelerator_mode,
gpuBridge_ = std::make_unique<detail::GpuSolverInfo<Matrix,Vector>>(accelerator_mode,
linear_solver_verbosity,
maxit,
tolerance,
@@ -191,7 +191,7 @@ public:
// Avoid performing the decomposition on CPU when we also do it on GPU,
// but we do need to initialize the pointers.
if (bdaBridge_) {
if (gpuBridge_) {
ParentType::initPrepare(M,b);
} else {
ParentType::prepare(M,b);
@@ -199,13 +199,13 @@ public:
#if HAVE_OPENCL || HAVE_ROCSPARSE || HAVE_CUDA
// update matrix entries for solvers.
if (firstcall && bdaBridge_) {
if (firstcall && gpuBridge_) {
// model will not change the matrix object. Hence simply store a pointer
// to the original one with a deleter that does nothing.
// Outch! We need to be able to scale the linear system! Hence const_cast
// setup sparsity pattern for jacobi matrix for preconditioner (only used for openclSolver)
bdaBridge_->numJacobiBlocks_ = Parameters::Get<Parameters::NumJacobiBlocks>();
bdaBridge_->prepare(this->simulator_.vanguard().grid(),
gpuBridge_->numJacobiBlocks_ = Parameters::Get<Parameters::NumJacobiBlocks>();
gpuBridge_->prepare(this->simulator_.vanguard().grid(),
this->simulator_.vanguard().cartesianIndexMapper(),
this->simulator_.vanguard().schedule().getWellsatEnd(),
this->simulator_.vanguard().schedule().getPossibleFutureConnections(),
@@ -233,11 +233,11 @@ public:
bool solve(Vector& x)
{
if (!bdaBridge_) {
if (!gpuBridge_) {
return ParentType::solve(x);
}
OPM_TIMEBLOCK(istlSolverBdaSolve);
OPM_TIMEBLOCK(istlSolverGpuSolve);
this->solveCount_ += 1;
// Write linear system if asked for.
const int verbosity = this->prm_[this->activeSolverNum_].template get<int>("verbosity", 0);
@@ -257,13 +257,13 @@ public:
{
this->simulator_.problem().wellModel().getWellContributions(w);
};
if (!bdaBridge_->apply(*(this->rhs_), this->useWellConn_, getContribs,
if (!gpuBridge_->apply(*(this->rhs_), this->useWellConn_, getContribs,
this->simulator_.gridView().comm().rank(),
const_cast<Matrix&>(this->getMatrix()),
x, result))
{
if(bdaBridge_->gpuActive()){
// bda solve fails use istl solver setup need to be done since it is not setup in prepare
if(gpuBridge_->gpuActive()){
// gpu solve fails use istl solver setup need to be done since it is not setup in prepare
ParentType::prepareFlexibleSolver();
}
assert(this->flexibleSolver_[this->activeSolverNum_].solver_);
@@ -277,9 +277,9 @@ public:
}
protected:
std::unique_ptr<detail::BdaSolverInfo<Matrix, Vector>> bdaBridge_;
std::unique_ptr<detail::GpuSolverInfo<Matrix, Vector>> gpuBridge_;
}; // end ISTLSolver
} // namespace Opm
#endif // OPM_ISTLSOLVER_WITH_BDA_HEADER_INCLUDED
#endif // OPM_ISTLSOLVER_WITH_GPUBRIDGE_HEADER_INCLUDED

View File

@@ -232,7 +232,7 @@ analyzeHierarchy()
}
}
Opm::BdaBridge<DuneMat, DuneVec, 1>::copySparsityPatternFromISTL(A, Amatrices.back().rowPointers, Amatrices.back().colIndices);
Opm::GpuBridge<DuneMat, DuneVec, 1>::copySparsityPatternFromISTL(A, Amatrices.back().rowPointers, Amatrices.back().colIndices);
// compute inverse diagonal values for current level
invDiags.emplace_back(A.N());

View File

@@ -56,13 +56,13 @@ using InverseOperatorResult = Dune::InverseOperatorResult;
namespace Opm {
using Accelerator::BdaResult;
using Accelerator::BdaSolver;
using Accelerator::GpuResult;
using Accelerator::GpuSolver;
using Accelerator::SolverStatus;
template<class BridgeMatrix, class BridgeVector, int block_size>
BdaBridge<BridgeMatrix, BridgeVector, block_size>::
BdaBridge(std::string accelerator_mode_,
GpuBridge<BridgeMatrix, BridgeVector, block_size>::
GpuBridge(std::string accelerator_mode_,
int linear_solver_verbosity,
[[maybe_unused]] int maxit,
[[maybe_unused]] Scalar tolerance,
@@ -176,7 +176,7 @@ int replaceZeroDiagonal(BridgeMatrix& mat,
// sparsity pattern should stay the same
// this could be removed if Dune::BCRSMatrix features an API call that returns colIndices and rowPointers
template <class BridgeMatrix, class BridgeVector, int block_size>
void BdaBridge<BridgeMatrix, BridgeVector, block_size>::
void GpuBridge<BridgeMatrix, BridgeVector, block_size>::
copySparsityPatternFromISTL(const BridgeMatrix& mat,
std::vector<int>& h_rows,
std::vector<int>& h_cols)
@@ -197,7 +197,7 @@ copySparsityPatternFromISTL(const BridgeMatrix& mat,
if (static_cast<unsigned int>(h_rows[mat.N()]) != mat.nonzeroes()) {
OPM_THROW(std::logic_error,
"Error size of rows do not sum to number of nonzeroes "
"in BdaBridge::copySparsityPatternFromISTL()");
"in GpuBridge::copySparsityPatternFromISTL()");
}
}
@@ -225,7 +225,7 @@ void checkMemoryContiguous(const BridgeMatrix& mat)
}
template <class BridgeMatrix, class BridgeVector, int block_size>
void BdaBridge<BridgeMatrix, BridgeVector, block_size>::
void GpuBridge<BridgeMatrix, BridgeVector, block_size>::
solve_system(BridgeMatrix* bridgeMat,
BridgeMatrix* jacMat,
int numJacobiBlocks,
@@ -234,14 +234,14 @@ solve_system(BridgeMatrix* bridgeMat,
InverseOperatorResult& res)
{
if (use_gpu) {
BdaResult result;
GpuResult result;
result.converged = false;
const int dim = (*bridgeMat)[0][0].N();
const int Nb = bridgeMat->N();
const int nnzb = bridgeMat->nonzeroes();
if (dim != 3) {
OpmLog::warning("BdaSolver only accepts blocksize = 3 at this time, will use Dune for the remainder of the program");
OpmLog::warning("GpuSolver only accepts blocksize = 3 at this time, will use Dune for the remainder of the program");
use_gpu = false;
return;
}
@@ -294,27 +294,27 @@ solve_system(BridgeMatrix* bridgeMat,
/////////////////////////
// actually solve
// assume that underlying data (nonzeroes) from b (Dune::BlockVector) are contiguous, if this is not the case, the chosen BdaSolver is expected to perform undefined behaviour
// assume that underlying data (nonzeroes) from b (Dune::BlockVector) are contiguous, if this is not the case, the chosen GpuSolver is expected to perform undefined behaviour
SolverStatus status = backend->solve_system(matrix,
static_cast<Scalar*>(&(b[0][0])),
jacMatrix, wellContribs, result);
switch (status) {
case SolverStatus::BDA_SOLVER_SUCCESS:
//OpmLog::info("BdaSolver converged");
case SolverStatus::GPU_SOLVER_SUCCESS:
//OpmLog::info("GpuSolver converged");
break;
case SolverStatus::BDA_SOLVER_ANALYSIS_FAILED:
OpmLog::warning("BdaSolver could not analyse level information of matrix, "
case SolverStatus::GPU_SOLVER_ANALYSIS_FAILED:
OpmLog::warning("GpuSolver could not analyse level information of matrix, "
"perhaps there is still a 0.0 on the diagonal of a "
"block on the diagonal");
break;
case SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED:
OpmLog::warning("BdaSolver could not create preconditioner, "
case SolverStatus::GPU_SOLVER_CREATE_PRECONDITIONER_FAILED:
OpmLog::warning("GpuSolver could not create preconditioner, "
"perhaps there is still a 0.0 on the diagonal "
"of a block on the diagonal");
break;
default:
OpmLog::warning("BdaSolver returned unknown status code");
OpmLog::warning("GpuSolver returned unknown status code");
}
res.iterations = result.iterations;
@@ -328,7 +328,7 @@ solve_system(BridgeMatrix* bridgeMat,
}
template <class BridgeMatrix, class BridgeVector, int block_size>
void BdaBridge<BridgeMatrix, BridgeVector, block_size>::
void GpuBridge<BridgeMatrix, BridgeVector, block_size>::
get_result([[maybe_unused]] BridgeVector& x)
{
if (use_gpu) {
@@ -337,7 +337,7 @@ get_result([[maybe_unused]] BridgeVector& x)
}
template <class BridgeMatrix, class BridgeVector, int block_size>
void BdaBridge<BridgeMatrix, BridgeVector, block_size>::
void GpuBridge<BridgeMatrix, BridgeVector, block_size>::
initWellContributions([[maybe_unused]] WellContributions<Scalar>& wellContribs,
[[maybe_unused]] unsigned N)
{
@@ -356,19 +356,19 @@ initWellContributions([[maybe_unused]] WellContributions<Scalar>& wellContribs,
}
// the tests use Dune::FieldMatrix, Flow uses Opm::MatrixBlock
#define INSTANTIATE_BDA_FUNCTIONS(T,n) \
template class BdaBridge<Dune::BCRSMatrix<MatrixBlock<T,n,n>>, \
#define INSTANTIATE_GPU_FUNCTIONS(T,n) \
template class GpuBridge<Dune::BCRSMatrix<MatrixBlock<T,n,n>>, \
Dune::BlockVector<Dune::FieldVector<T,n>>,n>; \
template class BdaBridge<Dune::BCRSMatrix<Dune::FieldMatrix<T,n,n>>, \
template class GpuBridge<Dune::BCRSMatrix<Dune::FieldMatrix<T,n,n>>, \
Dune::BlockVector<Dune::FieldVector<T,n>>,n>;
#define INSTANTIATE_TYPE(T) \
INSTANTIATE_BDA_FUNCTIONS(T,1) \
INSTANTIATE_BDA_FUNCTIONS(T,2) \
INSTANTIATE_BDA_FUNCTIONS(T,3) \
INSTANTIATE_BDA_FUNCTIONS(T,4) \
INSTANTIATE_BDA_FUNCTIONS(T,5) \
INSTANTIATE_BDA_FUNCTIONS(T,6)
INSTANTIATE_GPU_FUNCTIONS(T,1) \
INSTANTIATE_GPU_FUNCTIONS(T,2) \
INSTANTIATE_GPU_FUNCTIONS(T,3) \
INSTANTIATE_GPU_FUNCTIONS(T,4) \
INSTANTIATE_GPU_FUNCTIONS(T,5) \
INSTANTIATE_GPU_FUNCTIONS(T,6)
INSTANTIATE_TYPE(double)

View File

@@ -17,8 +17,8 @@
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef BDABRIDGE_HEADER_INCLUDED
#define BDABRIDGE_HEADER_INCLUDED
#ifndef GPUBRIDGE_HEADER_INCLUDED
#define GPUBRIDGE_HEADER_INCLUDED
#include "dune/istl/solver.hh" // for struct InverseOperatorResult
@@ -31,16 +31,16 @@ template<class Scalar> class WellContributions;
typedef Dune::InverseOperatorResult InverseOperatorResult;
/// BdaBridge acts as interface between opm-simulators with the BdaSolvers
/// GpuBridge acts as interface between opm-simulators with the GpuSolvers
template <class BridgeMatrix, class BridgeVector, int block_size>
class BdaBridge
class GpuBridge
{
private:
using Scalar = typename BridgeVector::field_type;
int verbosity = 0;
bool use_gpu = false;
std::string accelerator_mode;
std::unique_ptr<Accelerator::BdaSolver<Scalar,block_size>> backend;
std::unique_ptr<Accelerator::GpuSolver<Scalar,block_size>> backend;
std::shared_ptr<Accelerator::BlockedMatrix<Scalar>> matrix; // 'stores' matrix, actually points to h_rows, h_cols and the received BridgeMatrix for the nonzeroes
std::shared_ptr<Accelerator::BlockedMatrix<Scalar>> jacMatrix; // 'stores' preconditioner matrix, actually points to h_rows, h_cols and the received BridgeMatrix for the nonzeroes
std::vector<int> h_rows, h_cols; // store the sparsity pattern of the matrix
@@ -49,16 +49,16 @@ private:
std::vector<typename BridgeMatrix::size_type> jacDiagIndices; // same but for jacMatrix
public:
/// Construct a BdaBridge
/// Construct a GpuBridge
/// \param[in] accelerator_mode to select if an accelerated solver is used, is passed via command-line: '--accelerator-mode=[none|cusparse|opencl|amgcl|rocalution|rocsparse]'
/// \param[in] linear_solver_verbosity verbosity of BdaSolver
/// \param[in] maxit maximum number of iterations for BdaSolver
/// \param[in] tolerance required relative tolerance for BdaSolver
/// \param[in] linear_solver_verbosity verbosity of GpuSolver
/// \param[in] maxit maximum number of iterations for GpuSolver
/// \param[in] tolerance required relative tolerance for GpuSolver
/// \param[in] platformID the OpenCL platform ID to be used
/// \param[in] deviceID the device ID to be used by the cusparse- and openclSolvers, too high values could cause runtime errors
/// \param[in] opencl_ilu_parallel whether to parallelize the ILU decomposition and application in OpenCL with level_scheduling
/// \param[in] linsolver indicating the preconditioner, equal to the --linear-solver cmdline argument
BdaBridge(std::string accelerator_mode,
GpuBridge(std::string accelerator_mode,
int linear_solver_verbosity,
int maxit,
Scalar tolerance,
@@ -87,8 +87,8 @@ public:
/// \param[inout] x vector x, should be of type Dune::BlockVector
void get_result(BridgeVector &x);
/// Return whether the BdaBridge will use the GPU or not
/// return whether the BdaBridge will use the GPU or not
/// Return whether the GpuBridge will use the GPU or not
/// return whether the GpuBridge will use the GPU or not
bool getUseGpu()
{
return use_gpu;
@@ -113,7 +113,7 @@ public:
{
return accelerator_mode;
}
}; // end class BdaBridge
}; // end class GpuBridge
}

View File

@@ -17,8 +17,8 @@
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef BDARESULT_HEADER_INCLUDED
#define BDARESULT_HEADER_INCLUDED
#ifndef GPURESULT_HEADER_INCLUDED
#define GPURESULT_HEADER_INCLUDED
namespace Opm
{
@@ -27,7 +27,7 @@ namespace Accelerator
/// This class is based on InverseOperatorResult struct from dune/istl/solver.hh
/// It is needed to prevent a compile error in basearray.hh, the nvcc compiler might not support all features in there
class BdaResult
class GpuResult
{
public:
@@ -39,7 +39,7 @@ public:
// Dune 2.6 has a member 'double condition_estimate = -1' in InverseOperatorResult
}; // end class BdaResult
}; // end class GpuResult
} // namespace Accelerator
} // namespace Opm

View File

@@ -17,8 +17,8 @@
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_BDASOLVER_BACKEND_HEADER_INCLUDED
#define OPM_BDASOLVER_BACKEND_HEADER_INCLUDED
#ifndef OPM_GPUSOLVER_BACKEND_HEADER_INCLUDED
#define OPM_GPUSOLVER_BACKEND_HEADER_INCLUDED
#include <opm/simulators/linalg/gpubridge/GpuResult.hpp>
@@ -33,16 +33,16 @@ template<class Scalar> class WellContributions;
namespace Accelerator {
enum class SolverStatus {
BDA_SOLVER_SUCCESS,
BDA_SOLVER_ANALYSIS_FAILED,
BDA_SOLVER_CREATE_PRECONDITIONER_FAILED,
BDA_SOLVER_UNKNOWN_ERROR
GPU_SOLVER_SUCCESS,
GPU_SOLVER_ANALYSIS_FAILED,
GPU_SOLVER_CREATE_PRECONDITIONER_FAILED,
GPU_SOLVER_UNKNOWN_ERROR
};
/// This class serves to simplify choosing between different backend solvers, such as cusparseSolver and openclSolver
/// This class is abstract, no instantiations can of it can be made, only of its children
template<class Scalar, unsigned int block_size>
class BdaSolver
class GpuSolver
{
protected:
// verbosity
@@ -66,24 +66,24 @@ protected:
bool initialized = false;
public:
/// Construct a BdaSolver
/// Construct a GpuSolver
/// \param[in] linear_solver_verbosity verbosity of solver
/// \param[in] maxit maximum number of iterations for solver
/// \param[in] tolerance required relative tolerance for solver
/// \param[in] platformID the OpenCL platform to be used, only used in openclSolver
/// \param[in] deviceID the device to be used
BdaSolver(int linear_solver_verbosity, int max_it, Scalar tolerance_)
GpuSolver(int linear_solver_verbosity, int max_it, Scalar tolerance_)
: verbosity(linear_solver_verbosity)
, maxit(max_it)
, tolerance(tolerance_)
{}
BdaSolver(int linear_solver_verbosity, int max_it,
GpuSolver(int linear_solver_verbosity, int max_it,
Scalar tolerance_, unsigned int deviceID_)
: verbosity(linear_solver_verbosity)
, maxit(max_it)
, tolerance(tolerance_)
, deviceID(deviceID_) {};
BdaSolver(int linear_solver_verbosity, int max_it,
GpuSolver(int linear_solver_verbosity, int max_it,
double tolerance_, unsigned int platformID_,
unsigned int deviceID_)
: verbosity(linear_solver_verbosity)
@@ -94,17 +94,17 @@ public:
{}
/// Define virtual destructor, so that the derivedclass destructor will be called
virtual ~BdaSolver() = default;
virtual ~GpuSolver() = default;
/// Define as pure virtual functions, so derivedclass must implement them
virtual SolverStatus solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
Scalar* b,
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
WellContributions<Scalar>& wellContribs,
BdaResult& res) = 0;
GpuResult& res) = 0;
virtual void get_result(Scalar* x) = 0;
}; // end class BdaSolver
}; // end class GpuSolver
} // namespace Accelerator
} // namespace Opm

View File

@@ -35,7 +35,7 @@ template<class Scalar> class MultisegmentWellContribution;
/// This class serves to eliminate the need to include the WellContributions into the matrix (with --matrix-add-well-contributions=true) for the cusparseSolver or openclSolver.
/// If the --matrix-add-well-contributions commandline parameter is true, this class should still be used, but be empty.
/// StandardWell and MultisegmentWell are supported for both cusparseSolver and openclSolver.
/// A single instance (or pointer) of this class is passed to the BdaSolver.
/// A single instance (or pointer) of this class is passed to the GpuSolver.
/// For StandardWell, this class contains all the data and handles the computation. For MultisegmentWell, the vector 'multisegments' contains all the data. For more information, check the MultisegmentWellContribution class.
/// A StandardWell uses C, D and B and performs y -= (C^T * (D^-1 * (B*x)))

View File

@@ -283,7 +283,7 @@ void solve_vexcl(const AIJInfo& A,
template<class Scalar, unsigned int block_size>
void amgclSolverBackend<Scalar,block_size>::
solve_system(Scalar* b, BdaResult& res)
solve_system(Scalar* b, GpuResult& res)
{
Timer t;
@@ -417,7 +417,7 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
Scalar* b,
[[maybe_unused]] std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
[[maybe_unused]] WellContributions<Scalar>& wellContribs,
BdaResult& res)
GpuResult& res)
{
if (initialized == false) {
initialize(matrix->Nb, matrix->nnzbs);
@@ -425,7 +425,7 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
}
convert_data(matrix->nnzValues, matrix->rowPointers);
solve_system(b, res);
return SolverStatus::BDA_SOLVER_SUCCESS;
return SolverStatus::GPU_SOLVER_SUCCESS;
}
#define INSTANTIATE_TYPE(T) \

View File

@@ -46,9 +46,9 @@ namespace Opm::Accelerator {
/// This class does not implement a solver, but converts the BCSR format to normal CSR and uses amgcl for solving
/// Note amgcl also implements blocked solvers, but looks like it needs unblocked input data
template<class Scalar, unsigned int block_size>
class amgclSolverBackend : public BdaSolver<Scalar,block_size>
class amgclSolverBackend : public GpuSolver<Scalar,block_size>
{
using Base = BdaSolver<Scalar,block_size>;
using Base = GpuSolver<Scalar,block_size>;
using Base::N;
using Base::Nb;
@@ -115,7 +115,7 @@ private:
/// Solve linear system
/// \param[in] b pointer to b vector
/// \param[inout] res summary of solver result
void solve_system(Scalar* b, BdaResult& res);
void solve_system(Scalar* b, GpuResult& res);
public:
/// Construct an amgcl solver
@@ -142,7 +142,7 @@ public:
Scalar* b,
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
WellContributions<Scalar>& wellContribs,
BdaResult& res) override;
GpuResult& res) override;
/// Get result after linear solve, and peform postprocessing if necessary
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array

View File

@@ -38,7 +38,7 @@ inline void __cudaCheckError(const char *file, const int line, const char *msg){
if (cudaSuccess != err){
std::ostringstream out;
out << cudaGetErrorString(err) << "\n";
out << "BDA error message: " << msg << "\n";
out << "GPU error message: " << msg << "\n";
OPM_THROW(std::logic_error, out.str());
}
}

View File

@@ -94,7 +94,7 @@ cusparseSolverBackend<Scalar,block_size>::~cusparseSolverBackend()
template<class Scalar, unsigned int block_size>
void cusparseSolverBackend<Scalar,block_size>::
gpu_pbicgstab(WellContributions<Scalar>& wellContribs, BdaResult& res)
gpu_pbicgstab(WellContributions<Scalar>& wellContribs, GpuResult& res)
{
Timer t_total, t_prec(false), t_spmv(false), t_well(false), t_rest(false);
int n = N;
@@ -666,7 +666,7 @@ bool cusparseSolverBackend<Scalar,block_size>::create_preconditioner()
template<class Scalar, unsigned int block_size>
void cusparseSolverBackend<Scalar,block_size>::
solve_system(WellContributions<Scalar>& wellContribs, BdaResult& res)
solve_system(WellContributions<Scalar>& wellContribs, GpuResult& res)
{
// actually solve
gpu_pbicgstab(wellContribs, res);
@@ -697,7 +697,7 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
Scalar* b,
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
WellContributions<Scalar>& wellContribs,
BdaResult& res)
GpuResult& res)
{
if (initialized == false) {
initialize(matrix, jacMatrix);
@@ -707,15 +707,15 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
}
if (analysis_done == false) {
if (!analyse_matrix()) {
return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED;
return SolverStatus::GPU_SOLVER_ANALYSIS_FAILED;
}
}
if (create_preconditioner()) {
solve_system(wellContribs, res);
} else {
return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
return SolverStatus::GPU_SOLVER_CREATE_PRECONDITIONER_FAILED;
}
return SolverStatus::BDA_SOLVER_SUCCESS;
return SolverStatus::GPU_SOLVER_SUCCESS;
}
#define INSTANTIATE_TYPE(T) \

View File

@@ -32,9 +32,9 @@ namespace Opm::Accelerator {
/// This class implements a cusparse-based ilu0-bicgstab solver on GPU
template<class Scalar, unsigned int block_size>
class cusparseSolverBackend : public BdaSolver<Scalar,block_size>
class cusparseSolverBackend : public GpuSolver<Scalar,block_size>
{
using Base = BdaSolver<Scalar,block_size>;
using Base = GpuSolver<Scalar,block_size>;
using Base::N;
using Base::Nb;
@@ -73,7 +73,7 @@ private:
/// Solve linear system using ilu0-bicgstab
/// \param[in] wellContribs contains all WellContributions, to apply them separately, instead of adding them to matrix A
/// \param[inout] res summary of solver result
void gpu_pbicgstab(WellContributions<Scalar>& wellContribs, BdaResult& res);
void gpu_pbicgstab(WellContributions<Scalar>& wellContribs, GpuResult& res);
/// Initialize GPU and allocate memory
/// \param[in] matrix matrix for spmv
@@ -113,7 +113,7 @@ private:
/// Solve linear system
/// \param[in] wellContribs contains all WellContributions, to apply them separately, instead of adding them to matrix A
/// \param[inout] res summary of solver result
void solve_system(WellContributions<Scalar>& wellContribs, BdaResult &res);
void solve_system(WellContributions<Scalar>& wellContribs, GpuResult &res);
public:
/// Construct a cusparseSolver
@@ -138,7 +138,7 @@ public:
Scalar* b,
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
WellContributions<Scalar>& wellContribs,
BdaResult& res) override;
GpuResult& res) override;
/// Get resulting vector x after linear solve, also includes post processing if necessary
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array

View File

@@ -983,7 +983,7 @@ void ChowPatelIlu<block_size>::gpu_decomposition(
}
#define INSTANTIATE_BDA_FUNCTIONS(n) \
#define INSTANTIATE_GPU_FUNCTIONS(n) \
template void ChowPatelIlu<n>::decomposition( \
cl::CommandQueue *queue, cl::Context *context, \
BlockedMatrix *LUmat, BlockedMatrix *Lmat, BlockedMatrix *Umat, \
@@ -992,14 +992,14 @@ template void ChowPatelIlu<n>::decomposition( \
cl::Buffer& d_Lvals, cl::Buffer& d_Lcols, cl::Buffer& d_Lrows, \
cl::Buffer& d_Uvals, cl::Buffer& d_Ucols, cl::Buffer& d_Urows);
INSTANTIATE_BDA_FUNCTIONS(1);
INSTANTIATE_BDA_FUNCTIONS(2);
INSTANTIATE_BDA_FUNCTIONS(3);
INSTANTIATE_BDA_FUNCTIONS(4);
INSTANTIATE_BDA_FUNCTIONS(5);
INSTANTIATE_BDA_FUNCTIONS(6);
INSTANTIATE_GPU_FUNCTIONS(1);
INSTANTIATE_GPU_FUNCTIONS(2);
INSTANTIATE_GPU_FUNCTIONS(3);
INSTANTIATE_GPU_FUNCTIONS(4);
INSTANTIATE_GPU_FUNCTIONS(5);
INSTANTIATE_GPU_FUNCTIONS(6);
#undef INSTANTIATE_BDA_FUNCTIONS
#undef INSTANTIATE_GPU_FUNCTIONS
} // namespace Accelerator
} // namespace Opm

View File

@@ -18,7 +18,7 @@
*/
/// This file includes the relevant OpenCL header(s)
/// All bda files using OpenCL declarations should include this header
/// All gpu files using OpenCL declarations should include this header
#define CL_HPP_ENABLE_EXCEPTIONS
#define CL_TARGET_OPENCL_VERSION 120 // indicate OpenCL 1.2 is used

View File

@@ -248,7 +248,7 @@ setOpencl(std::shared_ptr<cl::Context>& context_,
template<class Scalar, unsigned int block_size>
void openclSolverBackend<Scalar,block_size>::
gpu_pbicgstab(WellContributions<Scalar>& wellContribs,
BdaResult& res)
GpuResult& res)
{
float it;
Scalar rho, rhop, beta, alpha, omega, tmp1, tmp2;
@@ -633,7 +633,7 @@ create_preconditioner()
template<class Scalar, unsigned int block_size>
void openclSolverBackend<Scalar,block_size>::
solve_system(WellContributions<Scalar>& wellContribs, BdaResult& res)
solve_system(WellContributions<Scalar>& wellContribs, GpuResult& res)
{
Timer t;
@@ -682,29 +682,29 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
Scalar* b,
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
WellContributions<Scalar>& wellContribs,
BdaResult& res)
GpuResult& res)
{
if (initialized == false) {
initialize(matrix, jacMatrix);
if (analysis_done == false) {
if (!analyze_matrix()) {
return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED;
return SolverStatus::GPU_SOLVER_ANALYSIS_FAILED;
}
}
update_system(matrix->nnzValues, b);
if (!create_preconditioner()) {
return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
return SolverStatus::GPU_SOLVER_CREATE_PRECONDITIONER_FAILED;
}
copy_system_to_gpu();
} else {
update_system(matrix->nnzValues, b);
if (!create_preconditioner()) {
return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
return SolverStatus::GPU_SOLVER_CREATE_PRECONDITIONER_FAILED;
}
update_system_on_gpu();
}
solve_system(wellContribs, res);
return SolverStatus::BDA_SOLVER_SUCCESS;
return SolverStatus::GPU_SOLVER_SUCCESS;
}
#define INSTANTIATE_TYPE(T) \

View File

@@ -31,9 +31,9 @@ namespace Opm::Accelerator {
/// This class implements a opencl-based ilu0-bicgstab solver on GPU
template<class Scalar, unsigned int block_size>
class openclSolverBackend : public BdaSolver<Scalar,block_size>
class openclSolverBackend : public GpuSolver<Scalar,block_size>
{
using Base = BdaSolver<Scalar,block_size>;
using Base = GpuSolver<Scalar,block_size>;
using Base::N;
using Base::Nb;
@@ -62,7 +62,7 @@ private:
std::unique_ptr<openclPreconditioner<Scalar,block_size>> prec;
// can perform blocked ILU0 and AMG on pressure component
bool is_root; // allow for nested solvers, the root solver is called by BdaBridge
bool is_root; // allow for nested solvers, the root solver is called by GpuBridge
bool analysis_done = false;
std::shared_ptr<BlockedMatrix<Scalar>> mat{}; // original matrix
std::shared_ptr<BlockedMatrix<Scalar>> jacMat{}; // matrix for preconditioner
@@ -73,7 +73,7 @@ private:
/// Solve linear system using ilu0-bicgstab
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
/// \param[inout] res summary of solver result
void gpu_pbicgstab(WellContributions<Scalar>& wellContribs, BdaResult& res);
void gpu_pbicgstab(WellContributions<Scalar>& wellContribs, GpuResult& res);
/// Initialize GPU and allocate memory
/// \param[in] matrix matrix A
@@ -104,7 +104,7 @@ private:
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
/// could be empty
/// \param[inout] res summary of solver result
void solve_system(WellContributions<Scalar>& wellContribs, BdaResult& res);
void solve_system(WellContributions<Scalar>& wellContribs, GpuResult& res);
public:
std::shared_ptr<cl::Context> context{};
@@ -138,11 +138,11 @@ public:
Scalar* b,
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
WellContributions<Scalar>& wellContribs,
BdaResult& res) override;
GpuResult& res) override;
/// Solve scalar linear system, for example a coarse system of an AMG preconditioner
/// Data is already on the GPU
// SolverStatus solve_system(BdaResult &res);
// SolverStatus solve_system(GpuResult &res);
/// Get result after linear solve, and peform postprocessing if necessary
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array

View File

@@ -157,7 +157,7 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
Scalar* b,
[[maybe_unused]] std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
[[maybe_unused]] WellContributions<Scalar>& wellContribs,
BdaResult& res)
GpuResult& res)
{
if (initialized == false) {
initialize(matrix.get());
@@ -234,7 +234,7 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
OpmLog::info(out.str());
}
return SolverStatus::BDA_SOLVER_SUCCESS;
return SolverStatus::GPU_SOLVER_SUCCESS;
}
#define INSTANTIATE_TYPE(T) \

View File

@@ -36,9 +36,9 @@ namespace Opm::Accelerator {
/// This class implements a rocalution based linear solver solver on GPU
/// It uses ilu0-bicgstab
template<class Scalar, unsigned int block_size>
class rocalutionSolverBackend : public BdaSolver<Scalar,block_size>
class rocalutionSolverBackend : public GpuSolver<Scalar,block_size>
{
using Base = BdaSolver<Scalar,block_size>;
using Base = GpuSolver<Scalar,block_size>;
using Base::N;
using Base::Nb;
@@ -95,7 +95,7 @@ public:
Scalar* b,
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
WellContributions<Scalar>& wellContribs,
BdaResult& res) override;
GpuResult& res) override;
/// Get result after linear solve, and peform postprocessing if necessary
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array

View File

@@ -133,7 +133,7 @@ rocsparseSolverBackend<Scalar,block_size>::~rocsparseSolverBackend()
template<class Scalar, unsigned int block_size>
void rocsparseSolverBackend<Scalar,block_size>::
gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs,
BdaResult& res)
GpuResult& res)
{
float it = 0.5;
Scalar rho, rhop, beta, alpha, nalpha, omega, nomega, tmp1, tmp2;
@@ -638,7 +638,7 @@ create_preconditioner()
template<class Scalar, unsigned int block_size>
void rocsparseSolverBackend<Scalar,block_size>::
solve_system(WellContributions<Scalar>& wellContribs, BdaResult& res)
solve_system(WellContributions<Scalar>& wellContribs, GpuResult& res)
{
Timer t;
@@ -678,28 +678,28 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
Scalar* b,
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
WellContributions<Scalar>& wellContribs,
BdaResult& res)
GpuResult& res)
{
if (initialized == false) {
initialize(matrix, jacMatrix);
copy_system_to_gpu(b);
if (analysis_done == false) {
if (!analyze_matrix()) {
return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED;
return SolverStatus::GPU_SOLVER_ANALYSIS_FAILED;
}
}
if (!create_preconditioner()) {
return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
return SolverStatus::GPU_SOLVER_CREATE_PRECONDITIONER_FAILED;
}
} else {
update_system_on_gpu(b);
if (!create_preconditioner()) {
return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
return SolverStatus::GPU_SOLVER_CREATE_PRECONDITIONER_FAILED;
}
}
solve_system(wellContribs, res);
return SolverStatus::BDA_SOLVER_SUCCESS;
return SolverStatus::GPU_SOLVER_SUCCESS;
}
#define INSTANTIATE_TYPE(T) \

View File

@@ -37,9 +37,9 @@ namespace Opm::Accelerator {
/// This class implements a rocsparse-based ilu0-bicgstab solver on GPU
template<class Scalar, unsigned int block_size>
class rocsparseSolverBackend : public BdaSolver<Scalar,block_size>
class rocsparseSolverBackend : public GpuSolver<Scalar,block_size>
{
using Base = BdaSolver<Scalar,block_size>;
using Base = GpuSolver<Scalar,block_size>;
using Base::N;
using Base::Nb;
@@ -82,7 +82,7 @@ private:
/// Solve linear system using ilu0-bicgstab
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
/// \param[inout] res summary of solver result
void gpu_pbicgstab(WellContributions<Scalar>& wellContribs, BdaResult& res);
void gpu_pbicgstab(WellContributions<Scalar>& wellContribs, GpuResult& res);
/// Initialize GPU and allocate memory
/// \param[in] matrix matrix A
@@ -109,7 +109,7 @@ private:
/// Solve linear system
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
/// \param[inout] res summary of solver result
void solve_system(WellContributions<Scalar>& wellContribs, BdaResult& res);
void solve_system(WellContributions<Scalar>& wellContribs, GpuResult& res);
public:
/// Construct a rocsparseSolver
@@ -143,7 +143,7 @@ public:
Scalar* b,
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
WellContributions<Scalar>& wellContribs,
BdaResult& res) override;
GpuResult& res) override;
/// Get result after linear solve, and peform postprocessing if necessary
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array

View File

@@ -88,7 +88,7 @@ struct EnableTerminalOutput { static constexpr bool value = true; };
namespace Opm {
#if COMPILE_BDA_BRIDGE
#if COMPILE_GPU_BRIDGE
template<class Scalar> class WellContributions;
#endif
@@ -304,7 +304,7 @@ template<class Scalar> class WellContributions;
void applyDomain(const BVector& x, BVector& Ax, const int domainIndex) const;
#if COMPILE_BDA_BRIDGE
#if COMPILE_GPU_BRIDGE
// accumulate the contributions of all Wells in the WellContributions object
void getWellContributions(WellContributions<Scalar>& x) const;
#endif

View File

@@ -49,8 +49,8 @@
#include <opm/simulators/utils/MPIPacker.hpp>
#include <opm/simulators/utils/phaseUsageFromDeck.hpp>
#if COMPILE_BDA_BRIDGE
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#if COMPILE_GPU_BRIDGE
#include <opm/simulators/linalg/gpubridge/WellContributions.hpp>
#endif
#if HAVE_MPI
@@ -1785,7 +1785,7 @@ namespace Opm {
}
}
#if COMPILE_BDA_BRIDGE
#if COMPILE_GPU_BRIDGE
template<typename TypeTag>
void
BlackoilWellModel<TypeTag>::

View File

@@ -29,8 +29,8 @@
#include <opm/input/eclipse/Schedule/MSW/WellSegments.hpp>
#if COMPILE_BDA_BRIDGE
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#if COMPILE_GPU_BRIDGE
#include <opm/simulators/linalg/gpubridge/WellContributions.hpp>
#endif
#include <opm/simulators/linalg/istlsparsematrixadapter.hh>
@@ -206,7 +206,7 @@ recoverSolutionWell(const BVector& x, BVectorWell& xw) const
xw = mswellhelpers::applyUMFPack(*duneDSolver_, resWell);
}
#if COMPILE_BDA_BRIDGE
#if COMPILE_GPU_BRIDGE
template<class Scalar, int numWellEq, int numEq>
void MultisegmentWellEquations<Scalar,numWellEq,numEq>::
extract(WellContributions<Scalar>& wellContribs) const

View File

@@ -38,7 +38,7 @@ namespace Opm
template<class Scalar, int numWellEq, int numEq> class MultisegmentWellEquationAccess;
template<class Scalar> class MultisegmentWellGeneric;
#if COMPILE_BDA_BRIDGE
#if COMPILE_GPU_BRIDGE
template<class Scalar> class WellContributions;
#endif
template<class Scalar> class WellInterfaceGeneric;
@@ -101,7 +101,7 @@ public:
//! \details xw = inv(D)*(rw - C*x)
void recoverSolutionWell(const BVector& x, BVectorWell& xw) const;
#if COMPILE_BDA_BRIDGE
#if COMPILE_GPU_BRIDGE
//! \brief Add the matrices of this well to the WellContributions object.
void extract(WellContributions<Scalar>& wellContribs) const;
#endif

View File

@@ -46,8 +46,8 @@
#include <cstddef>
#include <string>
#if COMPILE_BDA_BRIDGE && (HAVE_CUDA || HAVE_OPENCL)
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#if COMPILE_GPU_BRIDGE && (HAVE_CUDA || HAVE_OPENCL)
#include <opm/simulators/linalg/gpubridge/WellContributions.hpp>
#endif
namespace Opm

View File

@@ -24,8 +24,8 @@
#include <opm/common/TimingMacros.hpp>
#include <opm/simulators/wells/StandardWellEquations.hpp>
#if COMPILE_BDA_BRIDGE
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#if COMPILE_GPU_BRIDGE
#include <opm/simulators/linalg/gpubridge/WellContributions.hpp>
#endif
#include <opm/simulators/linalg/istlsparsematrixadapter.hh>
@@ -192,7 +192,7 @@ recoverSolutionWell(const BVector& x, BVectorWell& xw) const
invDuneD_.mv(resWell, xw);
}
#if COMPILE_BDA_BRIDGE
#if COMPILE_GPU_BRIDGE
template<class Scalar, int numEq>
void StandardWellEquations<Scalar,numEq>::
extract(const int numStaticWellEq,

View File

@@ -36,7 +36,7 @@ namespace Opm
template<class Scalar> class ParallelWellInfo;
template<class Scalar, int numEq> class StandardWellEquationAccess;
#if COMPILE_BDA_BRIDGE
#if COMPILE_GPU_BRIDGE
template<class Scalar> class WellContributions;
#endif
template<class Scalar> class WellInterfaceGeneric;
@@ -97,7 +97,7 @@ public:
//! \details xw = inv(D)*(rw - C*x)
void recoverSolutionWell(const BVector& x, BVectorWell& xw) const;
#if COMPILE_BDA_BRIDGE
#if COMPILE_GPU_BRIDGE
//! \brief Add the matrices of this well to the WellContributions object.
void extract(const int numStaticWellEq,
WellContributions<Scalar>& wellContribs) const;

View File

@@ -23,8 +23,8 @@
#define BOOST_TEST_MODULE OPM_test_cusparseSolver
#include <boost/test/unit_test.hpp>
#include <opm/simulators/linalg/bda/BdaBridge.hpp>
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#include <opm/simulators/linalg/gpubridge/GpuBridge.hpp>
#include <opm/simulators/linalg/gpubridge/WellContributions.hpp>
#include <dune/common/fvector.hh>
#include <dune/istl/bvector.hh>
@@ -88,7 +88,7 @@ getDuneSolution(Matrix<bz>& matrix, Vector<bz>& rhs)
template <int bz>
void
createBridge(const boost::property_tree::ptree& prm, std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >& bridge)
createBridge(const boost::property_tree::ptree& prm, std::unique_ptr<Opm::GpuBridge<Matrix<bz>, Vector<bz>, bz> >& bridge)
{
const int linear_solver_verbosity = prm.get<int>("verbosity");
const int maxit = prm.get<int>("maxiter");
@@ -100,7 +100,7 @@ createBridge(const boost::property_tree::ptree& prm, std::unique_ptr<Opm::BdaBri
const std::string linsolver("ilu0");
try {
bridge = std::make_unique<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >(accelerator_mode,
bridge = std::make_unique<Opm::GpuBridge<Matrix<bz>, Vector<bz>, bz> >(accelerator_mode,
linear_solver_verbosity,
maxit,
tolerance,
@@ -119,7 +119,7 @@ createBridge(const boost::property_tree::ptree& prm, std::unique_ptr<Opm::BdaBri
template <int bz>
Dune::BlockVector<Dune::FieldVector<double, bz>>
testCusparseSolver(Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz>& bridge, Matrix<bz>& matrix, Vector<bz>& rhs)
testCusparseSolver(Opm::GpuBridge<Matrix<bz>, Vector<bz>, bz>& bridge, Matrix<bz>& matrix, Vector<bz>& rhs)
{
Dune::InverseOperatorResult result;
Vector<bz> x(rhs.size());
@@ -130,11 +130,11 @@ testCusparseSolver(Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz>& bridge, Matrix<bz
bridge.get_result(x);
return x;
}
}
template <int bz>
Dune::BlockVector<Dune::FieldVector<double, bz>>
testCusparseSolverJacobi(Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz>& bridge, Matrix<bz>& matrix, Vector<bz>& rhs)
testCusparseSolverJacobi(Opm::GpuBridge<Matrix<bz>, Vector<bz>, bz>& bridge, Matrix<bz>& matrix, Vector<bz>& rhs)
{
Dune::InverseOperatorResult result;
Vector<bz> x(rhs.size());
@@ -156,7 +156,7 @@ void test3(const pt::ptree& prm)
const int bz = 3;
Matrix<bz> matrix;
Vector<bz> rhs;
std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> > bridge;
std::unique_ptr<Opm::GpuBridge<Matrix<bz>, Vector<bz>, bz> > bridge;
readLinearSystem("matr33.txt", "rhs3.txt", matrix, rhs);
Vector<bz> rhs2 = rhs; // deep copy, getDuneSolution() changes values in rhs vector
auto duneSolution = getDuneSolution<bz>(matrix, rhs);

View File

@@ -23,8 +23,8 @@
#define BOOST_TEST_MODULE OPM_test_openclSolver
#include <boost/test/unit_test.hpp>
#include <opm/simulators/linalg/bda/BdaBridge.hpp>
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#include <opm/simulators/linalg/gpubridge/GpuBridge.hpp>
#include <opm/simulators/linalg/gpubridge/WellContributions.hpp>
#include <dune/common/fvector.hh>
#include <dune/istl/bvector.hh>
@@ -88,7 +88,7 @@ getDuneSolution(Matrix<bz>& matrix, Vector<bz>& rhs)
template <int bz>
void
createBridge(const boost::property_tree::ptree& prm, std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >& bridge)
createBridge(const boost::property_tree::ptree& prm, std::unique_ptr<Opm::GpuBridge<Matrix<bz>, Vector<bz>, bz> >& bridge)
{
const int linear_solver_verbosity = prm.get<int>("verbosity");
const int maxit = prm.get<int>("maxiter");
@@ -100,7 +100,7 @@ createBridge(const boost::property_tree::ptree& prm, std::unique_ptr<Opm::BdaBri
const std::string linsolver("ilu0");
try {
bridge = std::make_unique<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >(accelerator_mode,
bridge = std::make_unique<Opm::GpuBridge<Matrix<bz>, Vector<bz>, bz> >(accelerator_mode,
linear_solver_verbosity,
maxit,
tolerance,
@@ -116,7 +116,7 @@ createBridge(const boost::property_tree::ptree& prm, std::unique_ptr<Opm::BdaBri
template <int bz>
Dune::BlockVector<Dune::FieldVector<double, bz>>
testOpenclSolver(Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz>& bridge, Matrix<bz>& matrix, Vector<bz>& rhs)
testOpenclSolver(Opm::GpuBridge<Matrix<bz>, Vector<bz>, bz>& bridge, Matrix<bz>& matrix, Vector<bz>& rhs)
{
Dune::InverseOperatorResult result;
Vector<bz> x(rhs.size());
@@ -131,7 +131,7 @@ testOpenclSolver(Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz>& bridge, Matrix<bz>&
template <int bz>
Dune::BlockVector<Dune::FieldVector<double, bz>>
testOpenclSolverJacobi(Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz>& bridge, Matrix<bz>& matrix, Vector<bz>& rhs)
testOpenclSolverJacobi(Opm::GpuBridge<Matrix<bz>, Vector<bz>, bz>& bridge, Matrix<bz>& matrix, Vector<bz>& rhs)
{
Dune::InverseOperatorResult result;
Vector<bz> x(rhs.size());
@@ -153,7 +153,7 @@ void test3(const pt::ptree& prm)
const int bz = 3;
Matrix<bz> matrix;
Vector<bz> rhs;
std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> > bridge;
std::unique_ptr<Opm::GpuBridge<Matrix<bz>, Vector<bz>, bz> > bridge;
readLinearSystem("matr33.txt", "rhs3.txt", matrix, rhs);
Vector<bz> rhs2 = rhs; // deep copy, getDuneSolution() changes values in rhs vector
auto duneSolution = getDuneSolution<bz>(matrix, rhs);

View File

@@ -23,8 +23,8 @@
#define BOOST_TEST_MODULE OPM_test_rocalutionSolver
#include <boost/test/unit_test.hpp>
#include <opm/simulators/linalg/bda/BdaBridge.hpp>
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#include <opm/simulators/linalg/gpubridge/GpuBridge.hpp>
#include <opm/simulators/linalg/gpubridge/WellContributions.hpp>
#include <rocalution.hpp>
#include <dune/common/fvector.hh>
@@ -97,9 +97,9 @@ testRocalutionSolver(const boost::property_tree::ptree& prm, Matrix<bz>& matrix,
Vector<bz> x(rhs.size());
auto wellContribs = Opm::WellContributions<double>::create(accelerator_mode, true);
std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> > bridge;
std::unique_ptr<Opm::GpuBridge<Matrix<bz>, Vector<bz>, bz> > bridge;
try {
bridge = std::make_unique<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >(accelerator_mode,
bridge = std::make_unique<Opm::GpuBridge<Matrix<bz>, Vector<bz>, bz> >(accelerator_mode,
linear_solver_verbosity,
maxit,
tolerance,

View File

@@ -27,8 +27,8 @@
#define BOOST_TEST_MODULE OPM_test_rocsparseSolver
#include <boost/test/unit_test.hpp>
#include <opm/simulators/linalg/bda/BdaBridge.hpp>
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#include <opm/simulators/linalg/gpubridge/GpuBridge.hpp>
#include <opm/simulators/linalg/gpubridge/WellContributions.hpp>
#include <dune/common/fvector.hh>
#include <dune/istl/bvector.hh>
@@ -92,7 +92,7 @@ getDuneSolution(Matrix<bz>& matrix, Vector<bz>& rhs)
template <int bz>
void
createBridge(const boost::property_tree::ptree& prm, std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >& bridge)
createBridge(const boost::property_tree::ptree& prm, std::unique_ptr<Opm::GpuBridge<Matrix<bz>, Vector<bz>, bz> >& bridge)
{
const int linear_solver_verbosity = prm.get<int>("verbosity");
const int maxit = prm.get<int>("maxiter");
@@ -104,7 +104,7 @@ createBridge(const boost::property_tree::ptree& prm, std::unique_ptr<Opm::BdaBri
const std::string linsolver("ilu0");
try {
bridge = std::make_unique<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >(accelerator_mode,
bridge = std::make_unique<Opm::GpuBridge<Matrix<bz>, Vector<bz>, bz> >(accelerator_mode,
linear_solver_verbosity,
maxit,
tolerance,
@@ -123,7 +123,7 @@ createBridge(const boost::property_tree::ptree& prm, std::unique_ptr<Opm::BdaBri
template <int bz>
Dune::BlockVector<Dune::FieldVector<double, bz>>
testRocsparseSolver(std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >& bridge, Matrix<bz>& matrix, Vector<bz>& rhs)
testRocsparseSolver(std::unique_ptr<Opm::GpuBridge<Matrix<bz>, Vector<bz>, bz> >& bridge, Matrix<bz>& matrix, Vector<bz>& rhs)
{
Dune::InverseOperatorResult result;
Vector<bz> x(rhs.size());
@@ -138,7 +138,7 @@ testRocsparseSolver(std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >
template <int bz>
Dune::BlockVector<Dune::FieldVector<double, bz>>
testRocsparseSolverJacobi(std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >& bridge, Matrix<bz>& matrix, Vector<bz>& rhs)
testRocsparseSolverJacobi(std::unique_ptr<Opm::GpuBridge<Matrix<bz>, Vector<bz>, bz> >& bridge, Matrix<bz>& matrix, Vector<bz>& rhs)
{
Dune::InverseOperatorResult result;
Vector<bz> x(rhs.size());
@@ -169,7 +169,7 @@ void test3(const pt::ptree& prm)
// if not present, no memory is allocated, and subsequent calls
// with a jacobi matrix will cause nans
{
std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> > bridge;
std::unique_ptr<Opm::GpuBridge<Matrix<bz>, Vector<bz>, bz> > bridge;
createBridge(prm, bridge); // create bridge with rocsparseSolver
// test rocsparseSolver without Jacobi matrix
@@ -183,7 +183,7 @@ void test3(const pt::ptree& prm)
}
{
std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> > bridge;
std::unique_ptr<Opm::GpuBridge<Matrix<bz>, Vector<bz>, bz> > bridge;
createBridge(prm, bridge); // create bridge with rocsparseSolver
// test rocsparseSolver with Jacobi matrix