Merge pull request #5556 from akva2/float_support5

Float support in simulators: Batch 5
This commit is contained in:
Atgeirr Flø Rasmussen 2024-08-30 15:06:35 +02:00 committed by GitHub
commit 58ce7cbc7c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
24 changed files with 641 additions and 189 deletions

View File

@ -473,7 +473,8 @@ private:
bool oscillate = false;
bool stagnate = false;
const int numPhases = convergence_history.front().size();
detail::detectOscillations(convergence_history, iter, numPhases, 0.2, 1, oscillate, stagnate);
detail::detectOscillations(convergence_history, iter, numPhases,
Scalar{0.2}, 1, oscillate, stagnate);
if (oscillate) {
damping_factor *= 0.85;
logger.debug(fmt::format("| Damping factor is now {}", damping_factor));

View File

@ -464,4 +464,8 @@ void FlowGenericVanguard::registerParameters_()
template void FlowGenericVanguard::registerParameters_<double>();
#if FLOW_INSTANTIATE_FLOAT
template void FlowGenericVanguard::registerParameters_<float>();
#endif
} // namespace Opm

View File

@ -278,4 +278,8 @@ using PolyHedralGrid3D = Dune::PolyhedralGrid<3, 3>;
INSTANTIATE_TYPE(double)
#if FLOW_INSTANTIATE_FLOAT
INSTANTIATE_TYPE(float)
#endif
} // namespace Opm::detail

View File

@ -50,7 +50,9 @@
#include <opm/simulators/linalg/bda/rocm/rocsparseSolverBackend.hpp>
#endif
typedef Dune::InverseOperatorResult InverseOperatorResult;
#include <type_traits>
using InverseOperatorResult = Dune::InverseOperatorResult;
namespace Opm {
@ -95,10 +97,14 @@ BdaBridge(std::string accelerator_mode_,
#endif
} else if (accelerator_mode.compare("amgcl") == 0) {
#if HAVE_AMGCL
use_gpu = true; // should be replaced by a 'use_bridge' boolean
using AMGCL = Accelerator::amgclSolverBackend<Scalar,block_size>;
backend = std::make_unique<AMGCL>(linear_solver_verbosity, maxit,
tolerance, platformID, deviceID);
if constexpr (std::is_same_v<Scalar,float>) {
OPM_THROW(std::logic_error, "Error amgclSolver disabled with float Scalar");
} else {
use_gpu = true; // should be replaced by a 'use_bridge' boolean
using AMGCL = Accelerator::amgclSolverBackend<Scalar,block_size>;
backend = std::make_unique<AMGCL>(linear_solver_verbosity, maxit,
tolerance, platformID, deviceID);
}
#else
OPM_THROW(std::logic_error, "Error amgclSolver was chosen, but amgcl was not found by CMake");
#endif
@ -366,4 +372,8 @@ initWellContributions([[maybe_unused]] WellContributions<Scalar>& wellContribs,
INSTANTIATE_TYPE(double)
#if FLOW_INSTANTIATE_FLOAT
INSTANTIATE_TYPE(float)
#endif
} // namespace Opm

View File

@ -89,10 +89,14 @@ void blockMult(Scalar* mat1, Scalar* mat2, Scalar* resMat, unsigned int block_si
}
}
#define INSTANCE_TYPE(T) \
template void blockMultSub(double*, double*, double*, unsigned int); \
template void blockMult(double*, double*, double*, unsigned int);
#define INSTANTIATE_TYPE(T) \
template void blockMultSub(T*, T*, T*, unsigned int); \
template void blockMult(T*, T*, T*, unsigned int);
INSTANCE_TYPE(double)
INSTANTIATE_TYPE(double)
#if FLOW_INSTANTIATE_FLOAT
INSTANTIATE_TYPE(float)
#endif
} // namespace Opm::Accelerator

View File

@ -190,7 +190,11 @@ analyzeHierarchy()
const typename DuneAmg::ParallelMatrixHierarchy& matrixHierarchy = dune_amg->matrices();
// store coarsest AMG level in umfpack format, also performs LU decomposition
umfpack.setMatrix((*matrixHierarchy.coarsest()).getmat());
if constexpr (std::is_same_v<Scalar,float>) {
OPM_THROW(std::runtime_error, "Cannot use CPR with float Scalar due to UMFPACK");
} else {
umfpack.setMatrix((*matrixHierarchy.coarsest()).getmat());
}
num_levels = dune_amg->levels();
level_sizes.resize(num_levels);
@ -280,7 +284,7 @@ analyzeAggregateMaps()
}
}
#define INSTANCE_TYPE(T) \
#define INSTANTIATE_TYPE(T) \
template class CprCreation<T,1>; \
template class CprCreation<T,2>; \
template class CprCreation<T,3>; \
@ -288,7 +292,11 @@ analyzeAggregateMaps()
template class CprCreation<T,5>; \
template class CprCreation<T,6>;
INSTANCE_TYPE(double)
INSTANTIATE_TYPE(double)
#if FLOW_INSTANTIATE_FLOAT
INSTANTIATE_TYPE(float)
#endif
} // namespace Opm

View File

@ -20,7 +20,6 @@
#ifndef OPM_CPRCREATION_HPP
#define OPM_CPRCREATION_HPP
#include <mutex>
#include <dune/istl/paamg/matrixhierarchy.hh>
#include <dune/istl/umfpack.hh>
@ -28,6 +27,8 @@
#include <opm/simulators/linalg/bda/Matrix.hpp>
#include <opm/simulators/linalg/bda/Preconditioner.hpp>
#include <type_traits>
namespace Opm::Accelerator {
template<class Scalar> class BlockedMatrix;
@ -63,7 +64,8 @@ protected:
std::shared_ptr<MatrixOperator> dune_op; // operator, input to Dune AMG
std::vector<int> level_sizes; // size of each level in the AMG hierarchy
std::vector<std::vector<int> > diagIndices; // index of diagonal value for each level
Dune::UMFPack<DuneMat> umfpack; // dune/istl/umfpack object used to solve the coarsest level of AMG
std::conditional_t<std::is_same_v<Scalar,double>,
Dune::UMFPack<DuneMat>, int> umfpack; // dune/istl/umfpack object used to solve the coarsest level of AMG
bool always_recalculate_aggregates = false; // OPM always reuses the aggregates by default
bool recalculate_aggregates = true; // only rerecalculate if true
const int pressure_idx = 1; // hardcoded to mimic OPM

View File

@ -1,8 +1,10 @@
#include <cmath>
#include <algorithm>
#include <config.h>
#include <opm/simulators/linalg/bda/Misc.hpp>
#include <cmath>
#include <algorithm>
namespace Opm::Accelerator {
// divide A by B, and round up: return (int)ceil(A/B)
@ -59,4 +61,8 @@ void solve_transposed_3x3(const Scalar* A,
INSTANTIATE_TYPE(double)
#if FLOW_INSTANTIATE_FLOAT
INSTANTIATE_TYPE(float)
#endif
}

View File

@ -39,6 +39,8 @@
#define COPY_ROW_BY_ROW 0
#include <thread>
#include <type_traits>
extern std::shared_ptr<std::thread> copyThread;
#if HAVE_OPENMP
@ -109,13 +111,27 @@ gpu_pbicgstab(WellContributions<Scalar>& wellContribs, BdaResult& res)
static_cast<WellContributionsCuda<Scalar>&>(wellContribs).setCudaStream(stream);
}
cusparseDbsrmv(cusparseHandle, order, operation, Nb, Nb, nnzb, &one, descr_M, d_bVals, d_bRows, d_bCols, block_size, d_x, &zero, d_r);
if constexpr (std::is_same_v<Scalar,float>) {
cusparseSbsrmv(cusparseHandle, order, operation, Nb, Nb, nnzb, &one,
descr_M, d_bVals, d_bRows, d_bCols, block_size, d_x, &zero, d_r);
} else {
cusparseDbsrmv(cusparseHandle, order, operation, Nb, Nb, nnzb, &one,
descr_M, d_bVals, d_bRows, d_bCols, block_size, d_x, &zero, d_r);
}
cublasDscal(cublasHandle, n, &mone, d_r, 1);
cublasDaxpy(cublasHandle, n, &one, d_b, 1, d_r, 1);
cublasDcopy(cublasHandle, n, d_r, 1, d_rw, 1);
cublasDcopy(cublasHandle, n, d_r, 1, d_p, 1);
cublasDnrm2(cublasHandle, n, d_r, 1, &norm_0);
if constexpr (std::is_same_v<Scalar,float>) {
cublasSscal(cublasHandle, n, &mone, d_r, 1);
cublasSaxpy(cublasHandle, n, &one, d_b, 1, d_r, 1);
cublasScopy(cublasHandle, n, d_r, 1, d_rw, 1);
cublasScopy(cublasHandle, n, d_r, 1, d_p, 1);
cublasSnrm2(cublasHandle, n, d_r, 1, &norm_0);
} else {
cublasDscal(cublasHandle, n, &mone, d_r, 1);
cublasDaxpy(cublasHandle, n, &one, d_b, 1, d_r, 1);
cublasDcopy(cublasHandle, n, d_r, 1, d_rw, 1);
cublasDcopy(cublasHandle, n, d_r, 1, d_p, 1);
cublasDnrm2(cublasHandle, n, d_r, 1, &norm_0);
}
if (verbosity > 1) {
std::ostringstream out;
@ -125,40 +141,80 @@ gpu_pbicgstab(WellContributions<Scalar>& wellContribs, BdaResult& res)
for (it = 0.5; it < maxit; it += 0.5) {
rhop = rho;
cublasDdot(cublasHandle, n, d_rw, 1, d_r, 1, &rho);
if constexpr (std::is_same_v<Scalar,float>) {
cublasSdot(cublasHandle, n, d_rw, 1, d_r, 1, &rho);
} else {
cublasDdot(cublasHandle, n, d_rw, 1, d_r, 1, &rho);
}
if (it > 1) {
beta = (rho / rhop) * (alpha / omega);
nomega = -omega;
cublasDaxpy(cublasHandle, n, &nomega, d_v, 1, d_p, 1);
cublasDscal(cublasHandle, n, &beta, d_p, 1);
cublasDaxpy(cublasHandle, n, &one, d_r, 1, d_p, 1);
if constexpr (std::is_same_v<Scalar,float>) {
cublasSaxpy(cublasHandle, n, &nomega, d_v, 1, d_p, 1);
cublasSscal(cublasHandle, n, &beta, d_p, 1);
cublasSaxpy(cublasHandle, n, &one, d_r, 1, d_p, 1);
} else {
cublasDaxpy(cublasHandle, n, &nomega, d_v, 1, d_p, 1);
cublasDscal(cublasHandle, n, &beta, d_p, 1);
cublasDaxpy(cublasHandle, n, &one, d_r, 1, d_p, 1);
}
}
// apply ilu0
cusparseDbsrsv2_solve(cusparseHandle, order, \
operation, Nb, nnzbs_prec, &one, \
descr_L, d_mVals, d_mRows, d_mCols, block_size, info_L, d_p, d_t, policy, d_buffer);
cusparseDbsrsv2_solve(cusparseHandle, order, \
operation, Nb, nnzbs_prec, &one, \
descr_U, d_mVals, d_mRows, d_mCols, block_size, info_U, d_t, d_pw, policy, d_buffer);
// spmv
cusparseDbsrmv(cusparseHandle, order, \
operation, Nb, Nb, nnzb, \
&one, descr_M, d_bVals, d_bRows, d_bCols, block_size, d_pw, &zero, d_v);
if constexpr (std::is_same_v<Scalar,float>) {
// apply ilu0
cusparseSbsrsv2_solve(cusparseHandle, order,
operation, Nb, nnzbs_prec, &one,
descr_L, d_mVals, d_mRows, d_mCols, block_size,
info_L, d_p, d_t, policy, d_buffer);
cusparseSbsrsv2_solve(cusparseHandle, order,
operation, Nb, nnzbs_prec, &one,
descr_U, d_mVals, d_mRows, d_mCols, block_size,
info_U, d_t, d_pw, policy, d_buffer);
// spmv
cusparseSbsrmv(cusparseHandle, order,
operation, Nb, Nb, nnzb,
&one, descr_M, d_bVals, d_bRows,
d_bCols, block_size, d_pw, &zero, d_v);
} else {
// apply ilu0
cusparseDbsrsv2_solve(cusparseHandle, order,
operation, Nb, nnzbs_prec, &one,
descr_L, d_mVals, d_mRows, d_mCols, block_size,
info_L, d_p, d_t, policy, d_buffer);
cusparseDbsrsv2_solve(cusparseHandle, order,
operation, Nb, nnzbs_prec, &one,
descr_U, d_mVals, d_mRows, d_mCols, block_size,
info_U, d_t, d_pw, policy, d_buffer);
// spmv
cusparseDbsrmv(cusparseHandle, order,
operation, Nb, Nb, nnzb,
&one, descr_M, d_bVals, d_bRows, d_bCols, block_size,
d_pw, &zero, d_v);
}
// apply wellContributions
if (wellContribs.getNumWells() > 0) {
static_cast<WellContributionsCuda<Scalar>&>(wellContribs).apply(d_pw, d_v);
}
cublasDdot(cublasHandle, n, d_rw, 1, d_v, 1, &tmp1);
if constexpr (std::is_same_v<Scalar,float>) {
cublasSdot(cublasHandle, n, d_rw, 1, d_v, 1, &tmp1);
} else {
cublasDdot(cublasHandle, n, d_rw, 1, d_v, 1, &tmp1);
}
alpha = rho / tmp1;
nalpha = -alpha;
cublasDaxpy(cublasHandle, n, &nalpha, d_v, 1, d_r, 1);
cublasDaxpy(cublasHandle, n, &alpha, d_pw, 1, d_x, 1);
cublasDnrm2(cublasHandle, n, d_r, 1, &norm);
if constexpr (std::is_same_v<Scalar,float>) {
cublasSaxpy(cublasHandle, n, &nalpha, d_v, 1, d_r, 1);
cublasSaxpy(cublasHandle, n, &alpha, d_pw, 1, d_x, 1);
cublasSnrm2(cublasHandle, n, d_r, 1, &norm);
} else {
cublasDaxpy(cublasHandle, n, &nalpha, d_v, 1, d_r, 1);
cublasDaxpy(cublasHandle, n, &alpha, d_pw, 1, d_x, 1);
cublasDnrm2(cublasHandle, n, d_r, 1, &norm);
}
if (norm < tolerance * norm_0) {
break;
@ -166,32 +222,65 @@ gpu_pbicgstab(WellContributions<Scalar>& wellContribs, BdaResult& res)
it += 0.5;
// apply ilu0
cusparseDbsrsv2_solve(cusparseHandle, order, \
operation, Nb, nnzbs_prec, &one, \
descr_L, d_mVals, d_mRows, d_mCols, block_size, info_L, d_r, d_t, policy, d_buffer);
cusparseDbsrsv2_solve(cusparseHandle, order, \
operation, Nb, nnzbs_prec, &one, \
descr_U, d_mVals, d_mRows, d_mCols, block_size, info_U, d_t, d_s, policy, d_buffer);
if constexpr (std::is_same_v<Scalar,float>) {
// apply ilu0
cusparseSbsrsv2_solve(cusparseHandle, order,
operation, Nb, nnzbs_prec, &one,
descr_L, d_mVals, d_mRows, d_mCols, block_size,
info_L, d_r, d_t, policy, d_buffer);
// spmv
cusparseDbsrmv(cusparseHandle, order, \
operation, Nb, Nb, nnzb, &one, descr_M, \
d_bVals, d_bRows, d_bCols, block_size, d_s, &zero, d_t);
cusparseSbsrsv2_solve(cusparseHandle, order,
operation, Nb, nnzbs_prec, &one,
descr_U, d_mVals, d_mRows, d_mCols, block_size,
info_U, d_t, d_s, policy, d_buffer);
// spmv
cusparseSbsrmv(cusparseHandle, order,
operation, Nb, Nb, nnzb, &one, descr_M,
d_bVals, d_bRows, d_bCols, block_size, d_s, &zero, d_t);
} else {
// apply ilu0
cusparseDbsrsv2_solve(cusparseHandle, order,
operation, Nb, nnzbs_prec, &one,
descr_L, d_mVals, d_mRows, d_mCols, block_size,
info_L, d_r, d_t, policy, d_buffer);
cusparseDbsrsv2_solve(cusparseHandle, order,
operation, Nb, nnzbs_prec, &one,
descr_U, d_mVals, d_mRows, d_mCols, block_size,
info_U, d_t, d_s, policy, d_buffer);
// spmv
cusparseDbsrmv(cusparseHandle, order,
operation, Nb, Nb, nnzb, &one, descr_M,
d_bVals, d_bRows, d_bCols, block_size, d_s, &zero, d_t);
}
// apply wellContributions
if (wellContribs.getNumWells() > 0) {
static_cast<WellContributionsCuda<Scalar>&>(wellContribs).apply(d_s, d_t);
}
cublasDdot(cublasHandle, n, d_t, 1, d_r, 1, &tmp1);
cublasDdot(cublasHandle, n, d_t, 1, d_t, 1, &tmp2);
if constexpr (std::is_same_v<Scalar,float>) {
cublasSdot(cublasHandle, n, d_t, 1, d_r, 1, &tmp1);
cublasSdot(cublasHandle, n, d_t, 1, d_t, 1, &tmp2);
} else {
cublasDdot(cublasHandle, n, d_t, 1, d_r, 1, &tmp1);
cublasDdot(cublasHandle, n, d_t, 1, d_t, 1, &tmp2);
}
omega = tmp1 / tmp2;
nomega = -omega;
cublasDaxpy(cublasHandle, n, &omega, d_s, 1, d_x, 1);
cublasDaxpy(cublasHandle, n, &nomega, d_t, 1, d_r, 1);
cublasDnrm2(cublasHandle, n, d_r, 1, &norm);
if constexpr (std::is_same_v<Scalar,float>) {
cublasSaxpy(cublasHandle, n, &omega, d_s, 1, d_x, 1);
cublasSaxpy(cublasHandle, n, &nomega, d_t, 1, d_r, 1);
cublasSnrm2(cublasHandle, n, d_r, 1, &norm);
} else {
cublasDaxpy(cublasHandle, n, &omega, d_s, 1, d_x, 1);
cublasDaxpy(cublasHandle, n, &nomega, d_t, 1, d_r, 1);
cublasDnrm2(cublasHandle, n, d_r, 1, &norm);
}
if (norm < tolerance * norm_0) {
break;
@ -470,21 +559,42 @@ bool cusparseSolverBackend<Scalar,block_size>::analyse_matrix()
cusparseCreateBsrsv2Info(&info_U);
cudaCheckLastError("Could not create analysis info");
cusparseDbsrilu02_bufferSize(cusparseHandle, order, Nb, nnzbs_prec,
descr_M, d_mVals, d_mRows, d_mCols, block_size, info_M, &d_bufferSize_M);
cusparseDbsrsv2_bufferSize(cusparseHandle, order, operation, Nb, nnzbs_prec,
descr_L, d_mVals, d_mRows, d_mCols, block_size, info_L, &d_bufferSize_L);
cusparseDbsrsv2_bufferSize(cusparseHandle, order, operation, Nb, nnzbs_prec,
descr_U, d_mVals, d_mRows, d_mCols, block_size, info_U, &d_bufferSize_U);
cudaCheckLastError();
if constexpr (std::is_same_v<Scalar,float>) {
cusparseSbsrilu02_bufferSize(cusparseHandle, order, Nb, nnzbs_prec,
descr_M, d_mVals, d_mRows, d_mCols, block_size,
info_M, &d_bufferSize_M);
cusparseSbsrsv2_bufferSize(cusparseHandle, order, operation, Nb, nnzbs_prec,
descr_L, d_mVals, d_mRows, d_mCols, block_size,
info_L, &d_bufferSize_L);
cusparseSbsrsv2_bufferSize(cusparseHandle, order, operation, Nb, nnzbs_prec,
descr_U, d_mVals, d_mRows, d_mCols, block_size,
info_U, &d_bufferSize_U);
} else {
cusparseDbsrilu02_bufferSize(cusparseHandle, order, Nb, nnzbs_prec,
descr_M, d_mVals, d_mRows, d_mCols, block_size,
info_M, &d_bufferSize_M);
cusparseDbsrsv2_bufferSize(cusparseHandle, order, operation, Nb, nnzbs_prec,
descr_L, d_mVals, d_mRows, d_mCols, block_size,
info_L, &d_bufferSize_L);
cusparseDbsrsv2_bufferSize(cusparseHandle, order, operation, Nb, nnzbs_prec,
descr_U, d_mVals, d_mRows, d_mCols, block_size,
info_U, &d_bufferSize_U);
}
d_bufferSize = std::max(d_bufferSize_M, std::max(d_bufferSize_L, d_bufferSize_U));
cudaMalloc((void**)&d_buffer, d_bufferSize);
// analysis of ilu LU decomposition
cusparseDbsrilu02_analysis(cusparseHandle, order, \
Nb, nnzbs_prec, descr_B, d_mVals, d_mRows, d_mCols, \
block_size, info_M, policy, d_buffer);
if constexpr (std::is_same_v<Scalar,float>) {
cusparseSbsrilu02_analysis(cusparseHandle, order,
Nb, nnzbs_prec, descr_B, d_mVals, d_mRows, d_mCols,
block_size, info_M, policy, d_buffer);
} else {
cusparseDbsrilu02_analysis(cusparseHandle, order,
Nb, nnzbs_prec, descr_B, d_mVals, d_mRows, d_mCols,
block_size, info_M, policy, d_buffer);
}
int structural_zero;
cusparseStatus_t status = cusparseXbsrilu02_zeroPivot(cusparseHandle, info_M, &structural_zero);
@ -493,13 +603,21 @@ bool cusparseSolverBackend<Scalar,block_size>::analyse_matrix()
}
// analysis of ilu apply
cusparseDbsrsv2_analysis(cusparseHandle, order, operation, \
Nb, nnzbs_prec, descr_L, d_mVals, d_mRows, d_mCols, \
block_size, info_L, policy, d_buffer);
cusparseDbsrsv2_analysis(cusparseHandle, order, operation, \
Nb, nnzbs_prec, descr_U, d_mVals, d_mRows, d_mCols, \
block_size, info_U, policy, d_buffer);
if constexpr (std::is_same_v<Scalar,float>) {
cusparseSbsrsv2_analysis(cusparseHandle, order, operation,
Nb, nnzbs_prec, descr_L, d_mVals, d_mRows, d_mCols,
block_size, info_L, policy, d_buffer);
cusparseSbsrsv2_analysis(cusparseHandle, order, operation,
Nb, nnzbs_prec, descr_U, d_mVals, d_mRows, d_mCols,
block_size, info_U, policy, d_buffer);
} else {
cusparseDbsrsv2_analysis(cusparseHandle, order, operation,
Nb, nnzbs_prec, descr_L, d_mVals, d_mRows, d_mCols,
block_size, info_L, policy, d_buffer);
cusparseDbsrsv2_analysis(cusparseHandle, order, operation,
Nb, nnzbs_prec, descr_U, d_mVals, d_mRows, d_mCols,
block_size, info_U, policy, d_buffer);
}
cudaCheckLastError("Could not analyse level information");
if (verbosity > 2) {
@ -519,9 +637,15 @@ bool cusparseSolverBackend<Scalar,block_size>::create_preconditioner()
{
Timer t;
cusparseDbsrilu02(cusparseHandle, order, \
Nb, nnzbs_prec, descr_M, d_mVals, d_mRows, d_mCols, \
block_size, info_M, policy, d_buffer);
if constexpr (std::is_same_v<Scalar,float>) {
cusparseSbsrilu02(cusparseHandle, order,
Nb, nnzbs_prec, descr_M, d_mVals, d_mRows, d_mCols,
block_size, info_M, policy, d_buffer);
} else {
cusparseDbsrilu02(cusparseHandle, order,
Nb, nnzbs_prec, descr_M, d_mVals, d_mRows, d_mCols,
block_size, info_M, policy, d_buffer);
}
cudaCheckLastError("Could not perform ilu decomposition");
int structural_zero;
@ -604,4 +728,8 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
INSTANTIATE_TYPE(double)
#if FLOW_INSTANTIATE_FLOAT
INSTANTIATE_TYPE(float)
#endif
} // namespace Opm::Accelerator

View File

@ -75,5 +75,9 @@ void OpenclMatrix<Scalar>::upload(cl::CommandQueue* queue, BlockedMatrix<Scalar>
template class OpenclMatrix<double>;
#if FLOW_INSTANTIATE_FLOAT
template class OpenclMatrix<float>;
#endif
} // namespace Accelerator
} // namespace Opm

View File

@ -333,7 +333,7 @@ void openclBILU0<Scalar,block_size>::apply(const cl::Buffer& y, cl::Buffer& x)
}
}
#define INSTANCE_TYPE(T) \
#define INSTANTIATE_TYPE(T) \
template class openclBILU0<T,1>; \
template class openclBILU0<T,2>; \
template class openclBILU0<T,3>; \
@ -341,6 +341,10 @@ void openclBILU0<Scalar,block_size>::apply(const cl::Buffer& y, cl::Buffer& x)
template class openclBILU0<T,5>; \
template class openclBILU0<T,6>;
INSTANCE_TYPE(double)
INSTANTIATE_TYPE(double)
#if FLOW_INSTANTIATE_FLOAT
INSTANTIATE_TYPE(float)
#endif
} // namespace Opm::Accelerator

View File

@ -353,7 +353,7 @@ void openclBISAI<Scalar,block_size>::apply(const cl::Buffer& x, cl::Buffer& y)
d_invL_x, y, Nb, bs); // application of isaiU is a simple spmv
}
#define INSTANCE_TYPE(T) \
#define INSTANTIATE_TYPE(T) \
template class openclBISAI<T,1>; \
template class openclBISAI<T,2>; \
template class openclBISAI<T,3>; \
@ -361,6 +361,10 @@ void openclBISAI<Scalar,block_size>::apply(const cl::Buffer& x, cl::Buffer& y)
template class openclBISAI<T,5>; \
template class openclBISAI<T,6>;
INSTANCE_TYPE(double)
INSTANTIATE_TYPE(double)
#if FLOW_INSTANTIATE_FLOAT
INSTANTIATE_TYPE(float)
#endif
} // namespace Opm::Accelerator

View File

@ -36,6 +36,8 @@
#include <opm/simulators/linalg/bda/Misc.hpp>
#include <type_traits>
namespace Opm::Accelerator {
using Dune::Timer;
@ -220,7 +222,11 @@ void openclCPR<Scalar,block_size>::amg_cycle_gpu(const int level, cl::Buffer& y,
}
// solve coarsest level using umfpack
this->umfpack.apply(h_x.data(), h_y.data());
if constexpr (std::is_same_v<Scalar,float>) {
OPM_THROW(std::runtime_error, "Cannot use CPR with floats due to UMFPACK usage");
} else {
this->umfpack.apply(h_x.data(), h_y.data());
}
events.resize(1);
err = queue->enqueueWriteBuffer(x, CL_FALSE, 0,
@ -308,7 +314,7 @@ void openclCPR<Scalar,block_size>::apply(const cl::Buffer& y, cl::Buffer& x)
}
}
#define INSTANCE_TYPE(T) \
#define INSTANTIATE_TYPE(T) \
template class openclCPR<T,1>; \
template class openclCPR<T,2>; \
template class openclCPR<T,3>; \
@ -316,6 +322,10 @@ void openclCPR<Scalar,block_size>::apply(const cl::Buffer& y, cl::Buffer& x)
template class openclCPR<T,5>; \
template class openclCPR<T,6>;
INSTANCE_TYPE(double)
INSTANTIATE_TYPE(double)
#if FLOW_INSTANTIATE_FLOAT
INSTANTIATE_TYPE(float)
#endif
} // namespace Opm::Accelerator

View File

@ -20,8 +20,6 @@
#ifndef OPM_OPENCLCPR_HPP
#define OPM_OPENCLCPR_HPP
#include <mutex>
#include <dune/istl/paamg/matrixhierarchy.hh>
#include <dune/istl/umfpack.hh>
@ -34,6 +32,8 @@
#include <opm/simulators/linalg/bda/opencl/openclSolverBackend.hpp>
#include <type_traits>
namespace Opm::Accelerator {
template<class Scalar> class BlockedMatrix;

View File

@ -61,7 +61,7 @@ setOpencl(std::shared_ptr<cl::Context>& context_,
queue = queue_;
}
#define INSTANCE_TYPE(T) \
#define INSTANTIATE_TYPE(T) \
template class openclPreconditioner<T,1>; \
template class openclPreconditioner<T,2>; \
template class openclPreconditioner<T,3>; \
@ -69,6 +69,10 @@ setOpencl(std::shared_ptr<cl::Context>& context_,
template class openclPreconditioner<T,5>; \
template class openclPreconditioner<T,6>;
INSTANCE_TYPE(double)
INSTANTIATE_TYPE(double)
#if FLOW_INSTANTIATE_FLOAT
INSTANTIATE_TYPE(float)
#endif
} // namespace Opm::Accelerator

View File

@ -717,4 +717,8 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
INSTANTIATE_TYPE(double)
#if FLOW_INSTANTIATE_FLOAT
INSTANTIATE_TYPE(float)
#endif
} // namespace Opm::Accelerator

View File

@ -490,4 +490,8 @@ spmv([[maybe_unused]] Scalar* vals,
template class HipKernels<double>;
#if FLOW_INSTANTIATE_FLOAT
template class HipKernels<float>;
#endif
} // namespace Opm

View File

@ -247,4 +247,8 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
INSTANTIATE_TYPE(double)
#if FLOW_INSTANTIATE_FLOAT
INSTANTIATE_TYPE(float)
#endif
} // namespace Opm::Accelerator

View File

@ -29,8 +29,9 @@
#include <opm/simulators/linalg/bda/Misc.hpp>
#include <sstream>
#include <thread>
#include <type_traits>
extern std::shared_ptr<std::thread> copyThread;
#if HAVE_OPENMP
@ -112,23 +113,63 @@ analyze_matrix(BlockedMatrix<Scalar>*,
ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_U));
ROCSPARSE_CHECK(rocsparse_set_mat_fill_mode(descr_U, rocsparse_fill_mode_upper));
ROCSPARSE_CHECK(rocsparse_set_mat_diag_type(descr_U, rocsparse_diag_type_non_unit));
ROCSPARSE_CHECK(rocsparse_dbsrilu0_buffer_size(this->handle, this->dir, Nb, this->nnzbs_prec, descr_M, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_M));
ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(this->handle, this->dir, this->operation, Nb, this->nnzbs_prec,
descr_L, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_L));
ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(this->handle, this->dir, this->operation, Nb, this->nnzbs_prec,
descr_U, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_U));
if constexpr (std::is_same_v<Scalar,float>) {
ROCSPARSE_CHECK(rocsparse_sbsrilu0_buffer_size(this->handle, this->dir, Nb,
this->nnzbs_prec, descr_M,
d_Mvals, d_Mrows, d_Mcols,
block_size, ilu_info, &d_bufferSize_M));
ROCSPARSE_CHECK(rocsparse_sbsrsv_buffer_size(this->handle, this->dir,
this->operation, Nb,
this->nnzbs_prec, descr_L,
d_Mvals, d_Mrows, d_Mcols,
block_size, ilu_info, &d_bufferSize_L));
ROCSPARSE_CHECK(rocsparse_sbsrsv_buffer_size(this->handle, this->dir,
this->operation, Nb,
this->nnzbs_prec, descr_U,
d_Mvals, d_Mrows, d_Mcols,
block_size, ilu_info, &d_bufferSize_U));
} else {
ROCSPARSE_CHECK(rocsparse_dbsrilu0_buffer_size(this->handle, this->dir, Nb,
this->nnzbs_prec, descr_M,
d_Mvals, d_Mrows, d_Mcols,
block_size, ilu_info, &d_bufferSize_M));
ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(this->handle, this->dir,
this->operation, Nb,
this->nnzbs_prec, descr_L,
d_Mvals, d_Mrows, d_Mcols,
block_size, ilu_info, &d_bufferSize_L));
ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(this->handle, this->dir,
this->operation, Nb,
this->nnzbs_prec, descr_U,
d_Mvals, d_Mrows, d_Mcols,
block_size, ilu_info, &d_bufferSize_U));
}
d_bufferSize = std::max(d_bufferSize_M, std::max(d_bufferSize_L, d_bufferSize_U));
HIP_CHECK(hipMalloc((void**)&d_buffer, d_bufferSize));
// analysis of ilu LU decomposition
ROCSPARSE_CHECK(rocsparse_dbsrilu0_analysis(this->handle, this->dir, \
Nb, this->nnzbs_prec, descr_M, d_Mvals, d_Mrows, d_Mcols, \
block_size, ilu_info, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, d_buffer));
if constexpr (std::is_same_v<Scalar,float>) {
ROCSPARSE_CHECK(rocsparse_sbsrilu0_analysis(this->handle, this->dir,
Nb, this->nnzbs_prec, descr_M,
d_Mvals, d_Mrows, d_Mcols,
block_size, ilu_info,
rocsparse_analysis_policy_reuse,
rocsparse_solve_policy_auto, d_buffer));
} else {
ROCSPARSE_CHECK(rocsparse_dbsrilu0_analysis(this->handle, this->dir,
Nb, this->nnzbs_prec, descr_M,
d_Mvals, d_Mrows, d_Mcols,
block_size, ilu_info,
rocsparse_analysis_policy_reuse,
rocsparse_solve_policy_auto, d_buffer));
}
int zero_position = 0;
rocsparse_status status = rocsparse_bsrilu0_zero_pivot(this->handle, ilu_info, &zero_position);
@ -138,12 +179,33 @@ analyze_matrix(BlockedMatrix<Scalar>*,
}
// analysis of ilu apply
ROCSPARSE_CHECK(rocsparse_dbsrsv_analysis(this->handle, this->dir, this->operation, \
Nb, this->nnzbs_prec, descr_L, d_Mvals, d_Mrows, d_Mcols, \
block_size, ilu_info, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, d_buffer));
ROCSPARSE_CHECK(rocsparse_dbsrsv_analysis(this->handle, this->dir, this->operation, \
Nb, this->nnzbs_prec, descr_U, d_Mvals, d_Mrows, d_Mcols, \
block_size, ilu_info, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, d_buffer));
if constexpr (std::is_same_v<Scalar,float>) {
ROCSPARSE_CHECK(rocsparse_sbsrsv_analysis(this->handle, this->dir, this->operation,
Nb, this->nnzbs_prec, descr_L,
d_Mvals, d_Mrows, d_Mcols,
block_size, ilu_info,
rocsparse_analysis_policy_reuse,
rocsparse_solve_policy_auto, d_buffer));
ROCSPARSE_CHECK(rocsparse_sbsrsv_analysis(this->handle, this->dir, this->operation,
Nb, this->nnzbs_prec, descr_U, d_Mvals,
d_Mrows, d_Mcols,
block_size, ilu_info,
rocsparse_analysis_policy_reuse,
rocsparse_solve_policy_auto, d_buffer));
} else {
ROCSPARSE_CHECK(rocsparse_dbsrsv_analysis(this->handle, this->dir, this->operation,
Nb, this->nnzbs_prec, descr_L,
d_Mvals, d_Mrows, d_Mcols,
block_size, ilu_info,
rocsparse_analysis_policy_reuse,
rocsparse_solve_policy_auto, d_buffer));
ROCSPARSE_CHECK(rocsparse_dbsrsv_analysis(this->handle, this->dir, this->operation,
Nb, this->nnzbs_prec, descr_U, d_Mvals,
d_Mrows, d_Mcols,
block_size, ilu_info,
rocsparse_analysis_policy_reuse,
rocsparse_solve_policy_auto, d_buffer));
}
if (verbosity >= 3) {
HIP_CHECK(hipStreamSynchronize(this->stream));
@ -168,13 +230,25 @@ create_preconditioner(BlockedMatrix<Scalar>*,
{
Timer t;
bool result = true;
ROCSPARSE_CHECK(rocsparse_dbsrilu0(this->handle, this->dir, Nb, this->nnzbs_prec, descr_M, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, rocsparse_solve_policy_auto, d_buffer));
if constexpr (std::is_same_v<Scalar,float>) {
ROCSPARSE_CHECK(rocsparse_sbsrilu0(this->handle, this->dir, Nb,
this->nnzbs_prec, descr_M,
d_Mvals, d_Mrows, d_Mcols,
block_size, ilu_info,
rocsparse_solve_policy_auto, d_buffer));
} else {
ROCSPARSE_CHECK(rocsparse_dbsrilu0(this->handle, this->dir, Nb,
this->nnzbs_prec, descr_M,
d_Mvals, d_Mrows, d_Mcols,
block_size, ilu_info,
rocsparse_solve_policy_auto, d_buffer));
}
// Check for zero pivot
int zero_position = 0;
rocsparse_status status = rocsparse_bsrilu0_zero_pivot(this->handle, ilu_info, &zero_position);
if(rocsparse_status_success != status)
if (rocsparse_status_success != status)
{
printf("L has structural and/or numerical zero at L(%d,%d)\n", zero_position, zero_position);
return false;
@ -257,13 +331,39 @@ apply(Scalar& y, Scalar& x) {
Timer t_apply;
ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(this->handle, this->dir, \
this->operation, Nb, this->nnzbs_prec, &one, \
descr_L, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &y, d_t, rocsparse_solve_policy_auto, d_buffer));
if constexpr (std::is_same_v<Scalar,float>) {
ROCSPARSE_CHECK(rocsparse_sbsrsv_solve(this->handle, this->dir,
this->operation, Nb,
this->nnzbs_prec, &one,
descr_L, d_Mvals, d_Mrows,
d_Mcols, block_size, ilu_info,
&y, d_t, rocsparse_solve_policy_auto,
d_buffer));
ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(this->handle, this->dir, \
this->operation, Nb, this->nnzbs_prec, &one, \
descr_U, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, d_t, &x, rocsparse_solve_policy_auto, d_buffer));
ROCSPARSE_CHECK(rocsparse_sbsrsv_solve(this->handle, this->dir,
this->operation, Nb,
this->nnzbs_prec, &one,
descr_U, d_Mvals, d_Mrows,
d_Mcols, block_size, ilu_info,
d_t, &x, rocsparse_solve_policy_auto,
d_buffer));
} else {
ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(this->handle, this->dir,
this->operation, Nb,
this->nnzbs_prec, &one,
descr_L, d_Mvals, d_Mrows,
d_Mcols, block_size, ilu_info,
&y, d_t, rocsparse_solve_policy_auto,
d_buffer));
ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(this->handle, this->dir,
this->operation, Nb,
this->nnzbs_prec, &one,
descr_U, d_Mvals, d_Mrows,
d_Mcols, block_size, ilu_info,
d_t, &x, rocsparse_solve_policy_auto,
d_buffer));
}
if (verbosity >= 3) {
std::ostringstream out;
@ -283,4 +383,8 @@ apply(Scalar& y, Scalar& x) {
INSTANTIATE_TYPE(double)
#if FLOW_INSTANTIATE_FLOAT
INSTANTIATE_TYPE(float)
#endif
} // namespace Opm

View File

@ -35,6 +35,8 @@
#include <opm/simulators/linalg/bda/Misc.hpp>
#include <type_traits>
namespace Opm::Accelerator {
using Opm::OpmLog;
@ -235,8 +237,13 @@ amg_cycle_gpu(const int level,
HIP_CHECK(hipMemcpyAsync(h_y.data(), &y, sizeof(Scalar) * Ncur, hipMemcpyDeviceToHost, this->stream));
// solve coarsest level using umfpack
this->umfpack.apply(h_x.data(), h_y.data());
// The if constexpr is needed to make the code compile
// since the umfpack member is an 'int' with float Scalar.
// We will never get here with float Scalar as we throw earlier.
// Solve coarsest level using umfpack
if constexpr (std::is_same_v<Scalar,double>) {
this->umfpack.apply(h_x.data(), h_y.data());
}
HIP_CHECK(hipMemcpyAsync(&x, h_x.data(), sizeof(Scalar) * Ncur, hipMemcpyHostToDevice, this->stream));
@ -332,4 +339,8 @@ apply(Scalar& y,
INSTANTIATE_TYPE(double)
#if FLOW_INSTANTIATE_FLOAT
INSTANTIATE_TYPE(float)
#endif
} // namespace Opm

View File

@ -103,11 +103,15 @@ upload(Scalar *vals,
HIP_CHECK(hipMemcpyAsync(nnzValues, vals, sizeof(Scalar) * size, hipMemcpyHostToDevice, stream));
}
#define INSTANCE_TYPE(T) \
template class RocmVector<T>;\
template class RocmMatrix<T>;
#define INSTANTIATE_TYPE(T) \
template class RocmVector<T>; \
template class RocmMatrix<T>;
INSTANCE_TYPE(int);
INSTANCE_TYPE(double);
INSTANTIATE_TYPE(int)
INSTANTIATE_TYPE(double)
#if FLOW_INSTANTIATE_FLOAT
INSTANTIATE_TYPE(float)
#endif
} // namespace Opm

View File

@ -73,7 +73,7 @@ setJacMat(const BlockedMatrix<Scalar>& jMat)
this->jacMat = std::make_shared<BlockedMatrix<Scalar>>(jMat);
}
#define INSTANTIATE_TYPE(T) \
#define INSTANTIATE_TYPE(T) \
template class rocsparsePreconditioner<T,1>; \
template class rocsparsePreconditioner<T,2>; \
template class rocsparsePreconditioner<T,3>; \
@ -83,5 +83,9 @@ setJacMat(const BlockedMatrix<Scalar>& jMat)
INSTANTIATE_TYPE(double)
#if FLOW_INSTANTIATE_FLOAT
INSTANTIATE_TYPE(float)
#endif
} //namespace Opm

View File

@ -51,6 +51,7 @@
#endif
#include <cstddef>
#include <type_traits>
namespace Opm::Accelerator {
@ -151,26 +152,55 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs,
// HIP_VERSION is defined as (HIP_VERSION_MAJOR * 10000000 + HIP_VERSION_MINOR * 100000 + HIP_VERSION_PATCH)
#if HIP_VERSION >= 60000000
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
spmv_info, d_x, &zero, d_r));
if constexpr (std::is_same_v<Scalar,float>) {
ROCSPARSE_CHECK(rocsparse_sbsrmv(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
spmv_info, d_x, &zero, d_r));
} else {
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
spmv_info, d_x, &zero, d_r));
}
#elif HIP_VERSION >= 50400000
ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
spmv_info, d_x, &zero, d_r));
if constexpr (std::is_same_v<Scalar,float>) {
ROCSPARSE_CHECK(rocsparse_sbsrmv_ex(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
spmv_info, d_x, &zero, d_r));
} else {
ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
spmv_info, d_x, &zero, d_r));
}
#else
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
d_x, &zero, d_r));
if constexpr (std::is_same_v<Scalar,float>) {
ROCSPARSE_CHECK(rocsparse_sbsrmv(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
d_x, &zero, d_r));
} else {
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
d_x, &zero, d_r));
}
#endif
ROCBLAS_CHECK(rocblas_dscal(blas_handle, N, &mone, d_r, 1));
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &one, d_b, 1, d_r, 1));
ROCBLAS_CHECK(rocblas_dcopy(blas_handle, N, d_r, 1, d_rw, 1));
ROCBLAS_CHECK(rocblas_dcopy(blas_handle, N, d_r, 1, d_p, 1));
ROCBLAS_CHECK(rocblas_dnrm2(blas_handle, N, d_r, 1, &norm_0));
if constexpr (std::is_same_v<Scalar,float>) {
ROCBLAS_CHECK(rocblas_sscal(blas_handle, N, &mone, d_r, 1));
ROCBLAS_CHECK(rocblas_saxpy(blas_handle, N, &one, d_b, 1, d_r, 1));
ROCBLAS_CHECK(rocblas_scopy(blas_handle, N, d_r, 1, d_rw, 1));
ROCBLAS_CHECK(rocblas_scopy(blas_handle, N, d_r, 1, d_p, 1));
ROCBLAS_CHECK(rocblas_snrm2(blas_handle, N, d_r, 1, &norm_0));
} else {
ROCBLAS_CHECK(rocblas_dscal(blas_handle, N, &mone, d_r, 1));
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &one, d_b, 1, d_r, 1));
ROCBLAS_CHECK(rocblas_dcopy(blas_handle, N, d_r, 1, d_rw, 1));
ROCBLAS_CHECK(rocblas_dcopy(blas_handle, N, d_r, 1, d_p, 1));
ROCBLAS_CHECK(rocblas_dnrm2(blas_handle, N, d_r, 1, &norm_0));
}
if (verbosity >= 2) {
std::ostringstream out;
@ -183,14 +213,24 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs,
}
for (it = 0.5; it < maxit; it += 0.5) {
rhop = rho;
ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_rw, 1, d_r, 1, &rho));
if constexpr (std::is_same_v<Scalar,float>) {
ROCBLAS_CHECK(rocblas_sdot(blas_handle, N, d_rw, 1, d_r, 1, &rho));
} else {
ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_rw, 1, d_r, 1, &rho));
}
if (it > 1) {
beta = (rho / rhop) * (alpha / omega);
nomega = -omega;
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &nomega, d_v, 1, d_p, 1));
ROCBLAS_CHECK(rocblas_dscal(blas_handle, N, &beta, d_p, 1));
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &one, d_r, 1, d_p, 1));
if constexpr (std::is_same_v<Scalar,float>) {
ROCBLAS_CHECK(rocblas_saxpy(blas_handle, N, &nomega, d_v, 1, d_p, 1));
ROCBLAS_CHECK(rocblas_sscal(blas_handle, N, &beta, d_p, 1));
ROCBLAS_CHECK(rocblas_saxpy(blas_handle, N, &one, d_r, 1, d_p, 1));
} else {
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &nomega, d_v, 1, d_p, 1));
ROCBLAS_CHECK(rocblas_dscal(blas_handle, N, &beta, d_p, 1));
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &one, d_r, 1, d_p, 1));
}
}
if (verbosity >= 3) {
HIP_CHECK(hipStreamSynchronize(stream));
@ -209,20 +249,41 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs,
// spmv
#if HIP_VERSION >= 60000000
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
spmv_info, d_pw, &zero, d_v));
if constexpr (std::is_same_v<Scalar,float>) {
ROCSPARSE_CHECK(rocsparse_sbsrmv(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
spmv_info, d_pw, &zero, d_v));
} else {
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
spmv_info, d_pw, &zero, d_v));
}
#elif HIP_VERSION >= 50400000
ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
spmv_info, d_pw, &zero, d_v));
if constexpr (std::is_same_v<Scalar,float>) {
ROCSPARSE_CHECK(rocsparse_sbsrmv_ex(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
spmv_info, d_pw, &zero, d_v));
} else {
ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
spmv_info, d_pw, &zero, d_v));
}
#else
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
d_pw, &zero, d_v));
if constexpr (std::is_same_v<Scalar,float>) {
ROCSPARSE_CHECK(rocsparse_sbsrmv(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
d_pw, &zero, d_v));
} else {
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
d_pw, &zero, d_v));
}
#endif
if (verbosity >= 3) {
HIP_CHECK(hipStreamSynchronize(stream));
@ -240,12 +301,22 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs,
t_rest.start();
}
ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_rw, 1, d_v, 1, &tmp1));
if constexpr (std::is_same_v<Scalar,float>) {
ROCBLAS_CHECK(rocblas_sdot(blas_handle, N, d_rw, 1, d_v, 1, &tmp1));
} else {
ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_rw, 1, d_v, 1, &tmp1));
}
alpha = rho / tmp1;
nalpha = -alpha;
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &nalpha, d_v, 1, d_r, 1));
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &alpha, d_pw, 1, d_x, 1));
ROCBLAS_CHECK(rocblas_dnrm2(blas_handle, N, d_r, 1, &norm));
if constexpr (std::is_same_v<Scalar,float>) {
ROCBLAS_CHECK(rocblas_saxpy(blas_handle, N, &nalpha, d_v, 1, d_r, 1));
ROCBLAS_CHECK(rocblas_saxpy(blas_handle, N, &alpha, d_pw, 1, d_x, 1));
ROCBLAS_CHECK(rocblas_snrm2(blas_handle, N, d_r, 1, &norm));
} else {
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &nalpha, d_v, 1, d_r, 1));
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &alpha, d_pw, 1, d_x, 1));
ROCBLAS_CHECK(rocblas_dnrm2(blas_handle, N, d_r, 1, &norm));
}
if (verbosity >= 3) {
HIP_CHECK(hipStreamSynchronize(stream));
t_rest.stop();
@ -272,20 +343,41 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs,
// spmv
#if HIP_VERSION >= 60000000
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
spmv_info, d_s, &zero, d_t));
if constexpr (std::is_same_v<Scalar,float>) {
ROCSPARSE_CHECK(rocsparse_sbsrmv(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
spmv_info, d_s, &zero, d_t));
} else {
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
spmv_info, d_s, &zero, d_t));
}
#elif HIP_VERSION >= 50400000
ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
spmv_info, d_s, &zero, d_t));
if constexpr (std::is_same_v<Scalar,float>) {
ROCSPARSE_CHECK(rocsparse_sbsrmv_ex(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
spmv_info, d_s, &zero, d_t));
} else {
ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
spmv_info, d_s, &zero, d_t));
}
#else
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
d_s, &zero, d_t));
if constexpr (std::is_same_v<Scalar,float>) {
ROCSPARSE_CHECK(rocsparse_sbsrmv(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
d_s, &zero, d_t));
} else {
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
Nb, Nb, nnzb, &one, descr_A,
d_Avals, d_Arows, d_Acols, block_size,
d_s, &zero, d_t));
}
#endif
if (verbosity >= 3) {
HIP_CHECK(hipStreamSynchronize(stream));
@ -303,14 +395,25 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs,
t_rest.start();
}
ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_t, 1, d_r, 1, &tmp1));
ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_t, 1, d_t, 1, &tmp2));
if constexpr (std::is_same_v<Scalar,float>) {
ROCBLAS_CHECK(rocblas_sdot(blas_handle, N, d_t, 1, d_r, 1, &tmp1));
ROCBLAS_CHECK(rocblas_sdot(blas_handle, N, d_t, 1, d_t, 1, &tmp2));
} else {
ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_t, 1, d_r, 1, &tmp1));
ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_t, 1, d_t, 1, &tmp2));
}
omega = tmp1 / tmp2;
nomega = -omega;
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &omega, d_s, 1, d_x, 1));
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &nomega, d_t, 1, d_r, 1));
ROCBLAS_CHECK(rocblas_dnrm2(blas_handle, N, d_r, 1, &norm));
if constexpr (std::is_same_v<Scalar,float>) {
ROCBLAS_CHECK(rocblas_saxpy(blas_handle, N, &omega, d_s, 1, d_x, 1));
ROCBLAS_CHECK(rocblas_saxpy(blas_handle, N, &nomega, d_t, 1, d_r, 1));
ROCBLAS_CHECK(rocblas_snrm2(blas_handle, N, d_r, 1, &norm));
} else {
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &omega, d_s, 1, d_x, 1));
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &nomega, d_t, 1, d_r, 1));
ROCBLAS_CHECK(rocblas_dnrm2(blas_handle, N, d_r, 1, &norm));
}
if (verbosity >= 3) {
HIP_CHECK(hipStreamSynchronize(stream));
t_rest.stop();
@ -480,15 +583,31 @@ analyze_matrix()
ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_A));
#if HIP_VERSION >= 60000000
ROCSPARSE_CHECK(rocsparse_dbsrmv_analysis(handle, dir, operation,
Nb, Nb, nnzb,
descr_A, d_Avals, d_Arows, d_Acols,
block_size, spmv_info));
if constexpr (std::is_same_v<Scalar,float>) {
ROCSPARSE_CHECK(rocsparse_sbsrmv_analysis(handle, dir, operation,
Nb, Nb, nnzb,
descr_A, d_Avals, d_Arows, d_Acols,
block_size, spmv_info));
} else {
ROCSPARSE_CHECK(rocsparse_dbsrmv_analysis(handle, dir, operation,
Nb, Nb, nnzb,
descr_A, d_Avals, d_Arows, d_Acols,
block_size, spmv_info));
}
#elif HIP_VERSION >= 50400000
ROCSPARSE_CHECK(rocsparse_dbsrmv_ex_analysis(handle, dir, operation,
Nb, Nb, nnzb,
descr_A, d_Avals, d_Arows, d_Acols,
block_size, spmv_info));
if constexpr (std::is_same_v<Scalar,float>) {
ROCSPARSE_CHECK(rocsparse_dbsrmv_ex_analysis(handle, dir, operation,
Nb, Nb, nnzb,
descr_A, d_Avals,
d_Arows, d_Acols,
block_size, spmv_info));
} else {
ROCSPARSE_CHECK(rocsparse_sbsrmv_ex_analysis(handle, dir, operation,
Nb, Nb, nnzb,
descr_A, d_Avals,
d_Arows, d_Acols,
block_size, spmv_info));
}
#endif
if(!prec->analyze_matrix(&*mat)) {
@ -593,4 +712,8 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
INSTANTIATE_TYPE(double)
#if FLOW_INSTANTIATE_FLOAT
INSTANTIATE_TYPE(float)
#endif
} // namespace Opm::Accelerator

View File

@ -1351,7 +1351,7 @@ namespace Opm {
WellBhpThpCalculator<Scalar>::bruteForceBracketCommonTHP(mismatch, min_thp, max_thp);
// Narrow down the bracket
Scalar low1, high1;
std::array<Scalar, 2> range = {0.9*min_thp, 1.1*max_thp};
std::array<Scalar, 2> range = {Scalar{0.9}*min_thp, Scalar{1.1}*max_thp};
std::optional<Scalar> appr_sol;
WellBhpThpCalculator<Scalar>::bruteForceBracketCommonTHP(mismatch, range, low1, high1, appr_sol, 0.0, local_deferredLogger);
min_thp = low1;
@ -1362,7 +1362,8 @@ namespace Opm {
if (!autochoke_thp.has_value() || autochoke_thp.value() > nodal_pressure) {
// The bracket is based on the initial bracket or on a range based on a previous calculated group thp
std::array<Scalar, 2> range = autochoke_thp.has_value() ?
std::array<Scalar, 2>{0.9 * autochoke_thp.value(), 1.1 * autochoke_thp.value()} : range_initial;
std::array<Scalar, 2>{Scalar{0.9} * autochoke_thp.value(),
Scalar{1.1} * autochoke_thp.value()} : range_initial;
Scalar low, high;
std::optional<Scalar> approximate_solution;
const Scalar tolerance1 = thp_tolerance;