mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Merge pull request #5556 from akva2/float_support5
Float support in simulators: Batch 5
This commit is contained in:
commit
58ce7cbc7c
@ -473,7 +473,8 @@ private:
|
||||
bool oscillate = false;
|
||||
bool stagnate = false;
|
||||
const int numPhases = convergence_history.front().size();
|
||||
detail::detectOscillations(convergence_history, iter, numPhases, 0.2, 1, oscillate, stagnate);
|
||||
detail::detectOscillations(convergence_history, iter, numPhases,
|
||||
Scalar{0.2}, 1, oscillate, stagnate);
|
||||
if (oscillate) {
|
||||
damping_factor *= 0.85;
|
||||
logger.debug(fmt::format("| Damping factor is now {}", damping_factor));
|
||||
|
@ -464,4 +464,8 @@ void FlowGenericVanguard::registerParameters_()
|
||||
|
||||
template void FlowGenericVanguard::registerParameters_<double>();
|
||||
|
||||
#if FLOW_INSTANTIATE_FLOAT
|
||||
template void FlowGenericVanguard::registerParameters_<float>();
|
||||
#endif
|
||||
|
||||
} // namespace Opm
|
||||
|
@ -278,4 +278,8 @@ using PolyHedralGrid3D = Dune::PolyhedralGrid<3, 3>;
|
||||
|
||||
INSTANTIATE_TYPE(double)
|
||||
|
||||
#if FLOW_INSTANTIATE_FLOAT
|
||||
INSTANTIATE_TYPE(float)
|
||||
#endif
|
||||
|
||||
} // namespace Opm::detail
|
||||
|
@ -50,7 +50,9 @@
|
||||
#include <opm/simulators/linalg/bda/rocm/rocsparseSolverBackend.hpp>
|
||||
#endif
|
||||
|
||||
typedef Dune::InverseOperatorResult InverseOperatorResult;
|
||||
#include <type_traits>
|
||||
|
||||
using InverseOperatorResult = Dune::InverseOperatorResult;
|
||||
|
||||
namespace Opm {
|
||||
|
||||
@ -95,10 +97,14 @@ BdaBridge(std::string accelerator_mode_,
|
||||
#endif
|
||||
} else if (accelerator_mode.compare("amgcl") == 0) {
|
||||
#if HAVE_AMGCL
|
||||
use_gpu = true; // should be replaced by a 'use_bridge' boolean
|
||||
using AMGCL = Accelerator::amgclSolverBackend<Scalar,block_size>;
|
||||
backend = std::make_unique<AMGCL>(linear_solver_verbosity, maxit,
|
||||
tolerance, platformID, deviceID);
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
OPM_THROW(std::logic_error, "Error amgclSolver disabled with float Scalar");
|
||||
} else {
|
||||
use_gpu = true; // should be replaced by a 'use_bridge' boolean
|
||||
using AMGCL = Accelerator::amgclSolverBackend<Scalar,block_size>;
|
||||
backend = std::make_unique<AMGCL>(linear_solver_verbosity, maxit,
|
||||
tolerance, platformID, deviceID);
|
||||
}
|
||||
#else
|
||||
OPM_THROW(std::logic_error, "Error amgclSolver was chosen, but amgcl was not found by CMake");
|
||||
#endif
|
||||
@ -366,4 +372,8 @@ initWellContributions([[maybe_unused]] WellContributions<Scalar>& wellContribs,
|
||||
|
||||
INSTANTIATE_TYPE(double)
|
||||
|
||||
#if FLOW_INSTANTIATE_FLOAT
|
||||
INSTANTIATE_TYPE(float)
|
||||
#endif
|
||||
|
||||
} // namespace Opm
|
||||
|
@ -89,10 +89,14 @@ void blockMult(Scalar* mat1, Scalar* mat2, Scalar* resMat, unsigned int block_si
|
||||
}
|
||||
}
|
||||
|
||||
#define INSTANCE_TYPE(T) \
|
||||
template void blockMultSub(double*, double*, double*, unsigned int); \
|
||||
template void blockMult(double*, double*, double*, unsigned int);
|
||||
#define INSTANTIATE_TYPE(T) \
|
||||
template void blockMultSub(T*, T*, T*, unsigned int); \
|
||||
template void blockMult(T*, T*, T*, unsigned int);
|
||||
|
||||
INSTANCE_TYPE(double)
|
||||
INSTANTIATE_TYPE(double)
|
||||
|
||||
#if FLOW_INSTANTIATE_FLOAT
|
||||
INSTANTIATE_TYPE(float)
|
||||
#endif
|
||||
|
||||
} // namespace Opm::Accelerator
|
||||
|
@ -190,7 +190,11 @@ analyzeHierarchy()
|
||||
const typename DuneAmg::ParallelMatrixHierarchy& matrixHierarchy = dune_amg->matrices();
|
||||
|
||||
// store coarsest AMG level in umfpack format, also performs LU decomposition
|
||||
umfpack.setMatrix((*matrixHierarchy.coarsest()).getmat());
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
OPM_THROW(std::runtime_error, "Cannot use CPR with float Scalar due to UMFPACK");
|
||||
} else {
|
||||
umfpack.setMatrix((*matrixHierarchy.coarsest()).getmat());
|
||||
}
|
||||
|
||||
num_levels = dune_amg->levels();
|
||||
level_sizes.resize(num_levels);
|
||||
@ -280,7 +284,7 @@ analyzeAggregateMaps()
|
||||
}
|
||||
}
|
||||
|
||||
#define INSTANCE_TYPE(T) \
|
||||
#define INSTANTIATE_TYPE(T) \
|
||||
template class CprCreation<T,1>; \
|
||||
template class CprCreation<T,2>; \
|
||||
template class CprCreation<T,3>; \
|
||||
@ -288,7 +292,11 @@ analyzeAggregateMaps()
|
||||
template class CprCreation<T,5>; \
|
||||
template class CprCreation<T,6>;
|
||||
|
||||
INSTANCE_TYPE(double)
|
||||
INSTANTIATE_TYPE(double)
|
||||
|
||||
#if FLOW_INSTANTIATE_FLOAT
|
||||
INSTANTIATE_TYPE(float)
|
||||
#endif
|
||||
|
||||
} // namespace Opm
|
||||
|
||||
|
@ -20,7 +20,6 @@
|
||||
#ifndef OPM_CPRCREATION_HPP
|
||||
#define OPM_CPRCREATION_HPP
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#include <dune/istl/paamg/matrixhierarchy.hh>
|
||||
#include <dune/istl/umfpack.hh>
|
||||
@ -28,6 +27,8 @@
|
||||
#include <opm/simulators/linalg/bda/Matrix.hpp>
|
||||
#include <opm/simulators/linalg/bda/Preconditioner.hpp>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
namespace Opm::Accelerator {
|
||||
|
||||
template<class Scalar> class BlockedMatrix;
|
||||
@ -63,7 +64,8 @@ protected:
|
||||
std::shared_ptr<MatrixOperator> dune_op; // operator, input to Dune AMG
|
||||
std::vector<int> level_sizes; // size of each level in the AMG hierarchy
|
||||
std::vector<std::vector<int> > diagIndices; // index of diagonal value for each level
|
||||
Dune::UMFPack<DuneMat> umfpack; // dune/istl/umfpack object used to solve the coarsest level of AMG
|
||||
std::conditional_t<std::is_same_v<Scalar,double>,
|
||||
Dune::UMFPack<DuneMat>, int> umfpack; // dune/istl/umfpack object used to solve the coarsest level of AMG
|
||||
bool always_recalculate_aggregates = false; // OPM always reuses the aggregates by default
|
||||
bool recalculate_aggregates = true; // only rerecalculate if true
|
||||
const int pressure_idx = 1; // hardcoded to mimic OPM
|
||||
|
@ -1,8 +1,10 @@
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
#include <config.h>
|
||||
|
||||
#include <opm/simulators/linalg/bda/Misc.hpp>
|
||||
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
|
||||
namespace Opm::Accelerator {
|
||||
|
||||
// divide A by B, and round up: return (int)ceil(A/B)
|
||||
@ -59,4 +61,8 @@ void solve_transposed_3x3(const Scalar* A,
|
||||
|
||||
INSTANTIATE_TYPE(double)
|
||||
|
||||
#if FLOW_INSTANTIATE_FLOAT
|
||||
INSTANTIATE_TYPE(float)
|
||||
#endif
|
||||
|
||||
}
|
||||
|
@ -39,6 +39,8 @@
|
||||
#define COPY_ROW_BY_ROW 0
|
||||
|
||||
#include <thread>
|
||||
#include <type_traits>
|
||||
|
||||
extern std::shared_ptr<std::thread> copyThread;
|
||||
|
||||
#if HAVE_OPENMP
|
||||
@ -109,13 +111,27 @@ gpu_pbicgstab(WellContributions<Scalar>& wellContribs, BdaResult& res)
|
||||
static_cast<WellContributionsCuda<Scalar>&>(wellContribs).setCudaStream(stream);
|
||||
}
|
||||
|
||||
cusparseDbsrmv(cusparseHandle, order, operation, Nb, Nb, nnzb, &one, descr_M, d_bVals, d_bRows, d_bCols, block_size, d_x, &zero, d_r);
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
cusparseSbsrmv(cusparseHandle, order, operation, Nb, Nb, nnzb, &one,
|
||||
descr_M, d_bVals, d_bRows, d_bCols, block_size, d_x, &zero, d_r);
|
||||
} else {
|
||||
cusparseDbsrmv(cusparseHandle, order, operation, Nb, Nb, nnzb, &one,
|
||||
descr_M, d_bVals, d_bRows, d_bCols, block_size, d_x, &zero, d_r);
|
||||
}
|
||||
|
||||
cublasDscal(cublasHandle, n, &mone, d_r, 1);
|
||||
cublasDaxpy(cublasHandle, n, &one, d_b, 1, d_r, 1);
|
||||
cublasDcopy(cublasHandle, n, d_r, 1, d_rw, 1);
|
||||
cublasDcopy(cublasHandle, n, d_r, 1, d_p, 1);
|
||||
cublasDnrm2(cublasHandle, n, d_r, 1, &norm_0);
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
cublasSscal(cublasHandle, n, &mone, d_r, 1);
|
||||
cublasSaxpy(cublasHandle, n, &one, d_b, 1, d_r, 1);
|
||||
cublasScopy(cublasHandle, n, d_r, 1, d_rw, 1);
|
||||
cublasScopy(cublasHandle, n, d_r, 1, d_p, 1);
|
||||
cublasSnrm2(cublasHandle, n, d_r, 1, &norm_0);
|
||||
} else {
|
||||
cublasDscal(cublasHandle, n, &mone, d_r, 1);
|
||||
cublasDaxpy(cublasHandle, n, &one, d_b, 1, d_r, 1);
|
||||
cublasDcopy(cublasHandle, n, d_r, 1, d_rw, 1);
|
||||
cublasDcopy(cublasHandle, n, d_r, 1, d_p, 1);
|
||||
cublasDnrm2(cublasHandle, n, d_r, 1, &norm_0);
|
||||
}
|
||||
|
||||
if (verbosity > 1) {
|
||||
std::ostringstream out;
|
||||
@ -125,40 +141,80 @@ gpu_pbicgstab(WellContributions<Scalar>& wellContribs, BdaResult& res)
|
||||
|
||||
for (it = 0.5; it < maxit; it += 0.5) {
|
||||
rhop = rho;
|
||||
cublasDdot(cublasHandle, n, d_rw, 1, d_r, 1, &rho);
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
cublasSdot(cublasHandle, n, d_rw, 1, d_r, 1, &rho);
|
||||
} else {
|
||||
cublasDdot(cublasHandle, n, d_rw, 1, d_r, 1, &rho);
|
||||
}
|
||||
|
||||
if (it > 1) {
|
||||
beta = (rho / rhop) * (alpha / omega);
|
||||
nomega = -omega;
|
||||
cublasDaxpy(cublasHandle, n, &nomega, d_v, 1, d_p, 1);
|
||||
cublasDscal(cublasHandle, n, &beta, d_p, 1);
|
||||
cublasDaxpy(cublasHandle, n, &one, d_r, 1, d_p, 1);
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
cublasSaxpy(cublasHandle, n, &nomega, d_v, 1, d_p, 1);
|
||||
cublasSscal(cublasHandle, n, &beta, d_p, 1);
|
||||
cublasSaxpy(cublasHandle, n, &one, d_r, 1, d_p, 1);
|
||||
} else {
|
||||
cublasDaxpy(cublasHandle, n, &nomega, d_v, 1, d_p, 1);
|
||||
cublasDscal(cublasHandle, n, &beta, d_p, 1);
|
||||
cublasDaxpy(cublasHandle, n, &one, d_r, 1, d_p, 1);
|
||||
}
|
||||
}
|
||||
|
||||
// apply ilu0
|
||||
cusparseDbsrsv2_solve(cusparseHandle, order, \
|
||||
operation, Nb, nnzbs_prec, &one, \
|
||||
descr_L, d_mVals, d_mRows, d_mCols, block_size, info_L, d_p, d_t, policy, d_buffer);
|
||||
cusparseDbsrsv2_solve(cusparseHandle, order, \
|
||||
operation, Nb, nnzbs_prec, &one, \
|
||||
descr_U, d_mVals, d_mRows, d_mCols, block_size, info_U, d_t, d_pw, policy, d_buffer);
|
||||
|
||||
// spmv
|
||||
cusparseDbsrmv(cusparseHandle, order, \
|
||||
operation, Nb, Nb, nnzb, \
|
||||
&one, descr_M, d_bVals, d_bRows, d_bCols, block_size, d_pw, &zero, d_v);
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
// apply ilu0
|
||||
cusparseSbsrsv2_solve(cusparseHandle, order,
|
||||
operation, Nb, nnzbs_prec, &one,
|
||||
descr_L, d_mVals, d_mRows, d_mCols, block_size,
|
||||
info_L, d_p, d_t, policy, d_buffer);
|
||||
cusparseSbsrsv2_solve(cusparseHandle, order,
|
||||
operation, Nb, nnzbs_prec, &one,
|
||||
descr_U, d_mVals, d_mRows, d_mCols, block_size,
|
||||
info_U, d_t, d_pw, policy, d_buffer);
|
||||
// spmv
|
||||
cusparseSbsrmv(cusparseHandle, order,
|
||||
operation, Nb, Nb, nnzb,
|
||||
&one, descr_M, d_bVals, d_bRows,
|
||||
d_bCols, block_size, d_pw, &zero, d_v);
|
||||
} else {
|
||||
// apply ilu0
|
||||
cusparseDbsrsv2_solve(cusparseHandle, order,
|
||||
operation, Nb, nnzbs_prec, &one,
|
||||
descr_L, d_mVals, d_mRows, d_mCols, block_size,
|
||||
info_L, d_p, d_t, policy, d_buffer);
|
||||
cusparseDbsrsv2_solve(cusparseHandle, order,
|
||||
operation, Nb, nnzbs_prec, &one,
|
||||
descr_U, d_mVals, d_mRows, d_mCols, block_size,
|
||||
info_U, d_t, d_pw, policy, d_buffer);
|
||||
// spmv
|
||||
cusparseDbsrmv(cusparseHandle, order,
|
||||
operation, Nb, Nb, nnzb,
|
||||
&one, descr_M, d_bVals, d_bRows, d_bCols, block_size,
|
||||
d_pw, &zero, d_v);
|
||||
}
|
||||
|
||||
// apply wellContributions
|
||||
if (wellContribs.getNumWells() > 0) {
|
||||
static_cast<WellContributionsCuda<Scalar>&>(wellContribs).apply(d_pw, d_v);
|
||||
}
|
||||
|
||||
cublasDdot(cublasHandle, n, d_rw, 1, d_v, 1, &tmp1);
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
cublasSdot(cublasHandle, n, d_rw, 1, d_v, 1, &tmp1);
|
||||
} else {
|
||||
cublasDdot(cublasHandle, n, d_rw, 1, d_v, 1, &tmp1);
|
||||
}
|
||||
|
||||
alpha = rho / tmp1;
|
||||
nalpha = -alpha;
|
||||
cublasDaxpy(cublasHandle, n, &nalpha, d_v, 1, d_r, 1);
|
||||
cublasDaxpy(cublasHandle, n, &alpha, d_pw, 1, d_x, 1);
|
||||
cublasDnrm2(cublasHandle, n, d_r, 1, &norm);
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
cublasSaxpy(cublasHandle, n, &nalpha, d_v, 1, d_r, 1);
|
||||
cublasSaxpy(cublasHandle, n, &alpha, d_pw, 1, d_x, 1);
|
||||
cublasSnrm2(cublasHandle, n, d_r, 1, &norm);
|
||||
} else {
|
||||
cublasDaxpy(cublasHandle, n, &nalpha, d_v, 1, d_r, 1);
|
||||
cublasDaxpy(cublasHandle, n, &alpha, d_pw, 1, d_x, 1);
|
||||
cublasDnrm2(cublasHandle, n, d_r, 1, &norm);
|
||||
}
|
||||
|
||||
if (norm < tolerance * norm_0) {
|
||||
break;
|
||||
@ -166,32 +222,65 @@ gpu_pbicgstab(WellContributions<Scalar>& wellContribs, BdaResult& res)
|
||||
|
||||
it += 0.5;
|
||||
|
||||
// apply ilu0
|
||||
cusparseDbsrsv2_solve(cusparseHandle, order, \
|
||||
operation, Nb, nnzbs_prec, &one, \
|
||||
descr_L, d_mVals, d_mRows, d_mCols, block_size, info_L, d_r, d_t, policy, d_buffer);
|
||||
cusparseDbsrsv2_solve(cusparseHandle, order, \
|
||||
operation, Nb, nnzbs_prec, &one, \
|
||||
descr_U, d_mVals, d_mRows, d_mCols, block_size, info_U, d_t, d_s, policy, d_buffer);
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
// apply ilu0
|
||||
cusparseSbsrsv2_solve(cusparseHandle, order,
|
||||
operation, Nb, nnzbs_prec, &one,
|
||||
descr_L, d_mVals, d_mRows, d_mCols, block_size,
|
||||
info_L, d_r, d_t, policy, d_buffer);
|
||||
|
||||
// spmv
|
||||
cusparseDbsrmv(cusparseHandle, order, \
|
||||
operation, Nb, Nb, nnzb, &one, descr_M, \
|
||||
d_bVals, d_bRows, d_bCols, block_size, d_s, &zero, d_t);
|
||||
cusparseSbsrsv2_solve(cusparseHandle, order,
|
||||
operation, Nb, nnzbs_prec, &one,
|
||||
descr_U, d_mVals, d_mRows, d_mCols, block_size,
|
||||
info_U, d_t, d_s, policy, d_buffer);
|
||||
|
||||
// spmv
|
||||
cusparseSbsrmv(cusparseHandle, order,
|
||||
operation, Nb, Nb, nnzb, &one, descr_M,
|
||||
d_bVals, d_bRows, d_bCols, block_size, d_s, &zero, d_t);
|
||||
} else {
|
||||
// apply ilu0
|
||||
cusparseDbsrsv2_solve(cusparseHandle, order,
|
||||
operation, Nb, nnzbs_prec, &one,
|
||||
descr_L, d_mVals, d_mRows, d_mCols, block_size,
|
||||
info_L, d_r, d_t, policy, d_buffer);
|
||||
|
||||
cusparseDbsrsv2_solve(cusparseHandle, order,
|
||||
operation, Nb, nnzbs_prec, &one,
|
||||
descr_U, d_mVals, d_mRows, d_mCols, block_size,
|
||||
info_U, d_t, d_s, policy, d_buffer);
|
||||
|
||||
// spmv
|
||||
cusparseDbsrmv(cusparseHandle, order,
|
||||
operation, Nb, Nb, nnzb, &one, descr_M,
|
||||
d_bVals, d_bRows, d_bCols, block_size, d_s, &zero, d_t);
|
||||
}
|
||||
|
||||
// apply wellContributions
|
||||
if (wellContribs.getNumWells() > 0) {
|
||||
static_cast<WellContributionsCuda<Scalar>&>(wellContribs).apply(d_s, d_t);
|
||||
}
|
||||
|
||||
cublasDdot(cublasHandle, n, d_t, 1, d_r, 1, &tmp1);
|
||||
cublasDdot(cublasHandle, n, d_t, 1, d_t, 1, &tmp2);
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
cublasSdot(cublasHandle, n, d_t, 1, d_r, 1, &tmp1);
|
||||
cublasSdot(cublasHandle, n, d_t, 1, d_t, 1, &tmp2);
|
||||
} else {
|
||||
cublasDdot(cublasHandle, n, d_t, 1, d_r, 1, &tmp1);
|
||||
cublasDdot(cublasHandle, n, d_t, 1, d_t, 1, &tmp2);
|
||||
}
|
||||
|
||||
omega = tmp1 / tmp2;
|
||||
nomega = -omega;
|
||||
cublasDaxpy(cublasHandle, n, &omega, d_s, 1, d_x, 1);
|
||||
cublasDaxpy(cublasHandle, n, &nomega, d_t, 1, d_r, 1);
|
||||
|
||||
cublasDnrm2(cublasHandle, n, d_r, 1, &norm);
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
cublasSaxpy(cublasHandle, n, &omega, d_s, 1, d_x, 1);
|
||||
cublasSaxpy(cublasHandle, n, &nomega, d_t, 1, d_r, 1);
|
||||
cublasSnrm2(cublasHandle, n, d_r, 1, &norm);
|
||||
} else {
|
||||
cublasDaxpy(cublasHandle, n, &omega, d_s, 1, d_x, 1);
|
||||
cublasDaxpy(cublasHandle, n, &nomega, d_t, 1, d_r, 1);
|
||||
cublasDnrm2(cublasHandle, n, d_r, 1, &norm);
|
||||
}
|
||||
|
||||
if (norm < tolerance * norm_0) {
|
||||
break;
|
||||
@ -470,21 +559,42 @@ bool cusparseSolverBackend<Scalar,block_size>::analyse_matrix()
|
||||
cusparseCreateBsrsv2Info(&info_U);
|
||||
cudaCheckLastError("Could not create analysis info");
|
||||
|
||||
cusparseDbsrilu02_bufferSize(cusparseHandle, order, Nb, nnzbs_prec,
|
||||
descr_M, d_mVals, d_mRows, d_mCols, block_size, info_M, &d_bufferSize_M);
|
||||
cusparseDbsrsv2_bufferSize(cusparseHandle, order, operation, Nb, nnzbs_prec,
|
||||
descr_L, d_mVals, d_mRows, d_mCols, block_size, info_L, &d_bufferSize_L);
|
||||
cusparseDbsrsv2_bufferSize(cusparseHandle, order, operation, Nb, nnzbs_prec,
|
||||
descr_U, d_mVals, d_mRows, d_mCols, block_size, info_U, &d_bufferSize_U);
|
||||
cudaCheckLastError();
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
cusparseSbsrilu02_bufferSize(cusparseHandle, order, Nb, nnzbs_prec,
|
||||
descr_M, d_mVals, d_mRows, d_mCols, block_size,
|
||||
info_M, &d_bufferSize_M);
|
||||
cusparseSbsrsv2_bufferSize(cusparseHandle, order, operation, Nb, nnzbs_prec,
|
||||
descr_L, d_mVals, d_mRows, d_mCols, block_size,
|
||||
info_L, &d_bufferSize_L);
|
||||
cusparseSbsrsv2_bufferSize(cusparseHandle, order, operation, Nb, nnzbs_prec,
|
||||
descr_U, d_mVals, d_mRows, d_mCols, block_size,
|
||||
info_U, &d_bufferSize_U);
|
||||
} else {
|
||||
cusparseDbsrilu02_bufferSize(cusparseHandle, order, Nb, nnzbs_prec,
|
||||
descr_M, d_mVals, d_mRows, d_mCols, block_size,
|
||||
info_M, &d_bufferSize_M);
|
||||
cusparseDbsrsv2_bufferSize(cusparseHandle, order, operation, Nb, nnzbs_prec,
|
||||
descr_L, d_mVals, d_mRows, d_mCols, block_size,
|
||||
info_L, &d_bufferSize_L);
|
||||
cusparseDbsrsv2_bufferSize(cusparseHandle, order, operation, Nb, nnzbs_prec,
|
||||
descr_U, d_mVals, d_mRows, d_mCols, block_size,
|
||||
info_U, &d_bufferSize_U);
|
||||
}
|
||||
|
||||
d_bufferSize = std::max(d_bufferSize_M, std::max(d_bufferSize_L, d_bufferSize_U));
|
||||
|
||||
cudaMalloc((void**)&d_buffer, d_bufferSize);
|
||||
|
||||
// analysis of ilu LU decomposition
|
||||
cusparseDbsrilu02_analysis(cusparseHandle, order, \
|
||||
Nb, nnzbs_prec, descr_B, d_mVals, d_mRows, d_mCols, \
|
||||
block_size, info_M, policy, d_buffer);
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
cusparseSbsrilu02_analysis(cusparseHandle, order,
|
||||
Nb, nnzbs_prec, descr_B, d_mVals, d_mRows, d_mCols,
|
||||
block_size, info_M, policy, d_buffer);
|
||||
} else {
|
||||
cusparseDbsrilu02_analysis(cusparseHandle, order,
|
||||
Nb, nnzbs_prec, descr_B, d_mVals, d_mRows, d_mCols,
|
||||
block_size, info_M, policy, d_buffer);
|
||||
}
|
||||
|
||||
int structural_zero;
|
||||
cusparseStatus_t status = cusparseXbsrilu02_zeroPivot(cusparseHandle, info_M, &structural_zero);
|
||||
@ -493,13 +603,21 @@ bool cusparseSolverBackend<Scalar,block_size>::analyse_matrix()
|
||||
}
|
||||
|
||||
// analysis of ilu apply
|
||||
cusparseDbsrsv2_analysis(cusparseHandle, order, operation, \
|
||||
Nb, nnzbs_prec, descr_L, d_mVals, d_mRows, d_mCols, \
|
||||
block_size, info_L, policy, d_buffer);
|
||||
|
||||
cusparseDbsrsv2_analysis(cusparseHandle, order, operation, \
|
||||
Nb, nnzbs_prec, descr_U, d_mVals, d_mRows, d_mCols, \
|
||||
block_size, info_U, policy, d_buffer);
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
cusparseSbsrsv2_analysis(cusparseHandle, order, operation,
|
||||
Nb, nnzbs_prec, descr_L, d_mVals, d_mRows, d_mCols,
|
||||
block_size, info_L, policy, d_buffer);
|
||||
cusparseSbsrsv2_analysis(cusparseHandle, order, operation,
|
||||
Nb, nnzbs_prec, descr_U, d_mVals, d_mRows, d_mCols,
|
||||
block_size, info_U, policy, d_buffer);
|
||||
} else {
|
||||
cusparseDbsrsv2_analysis(cusparseHandle, order, operation,
|
||||
Nb, nnzbs_prec, descr_L, d_mVals, d_mRows, d_mCols,
|
||||
block_size, info_L, policy, d_buffer);
|
||||
cusparseDbsrsv2_analysis(cusparseHandle, order, operation,
|
||||
Nb, nnzbs_prec, descr_U, d_mVals, d_mRows, d_mCols,
|
||||
block_size, info_U, policy, d_buffer);
|
||||
}
|
||||
cudaCheckLastError("Could not analyse level information");
|
||||
|
||||
if (verbosity > 2) {
|
||||
@ -519,9 +637,15 @@ bool cusparseSolverBackend<Scalar,block_size>::create_preconditioner()
|
||||
{
|
||||
Timer t;
|
||||
|
||||
cusparseDbsrilu02(cusparseHandle, order, \
|
||||
Nb, nnzbs_prec, descr_M, d_mVals, d_mRows, d_mCols, \
|
||||
block_size, info_M, policy, d_buffer);
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
cusparseSbsrilu02(cusparseHandle, order,
|
||||
Nb, nnzbs_prec, descr_M, d_mVals, d_mRows, d_mCols,
|
||||
block_size, info_M, policy, d_buffer);
|
||||
} else {
|
||||
cusparseDbsrilu02(cusparseHandle, order,
|
||||
Nb, nnzbs_prec, descr_M, d_mVals, d_mRows, d_mCols,
|
||||
block_size, info_M, policy, d_buffer);
|
||||
}
|
||||
cudaCheckLastError("Could not perform ilu decomposition");
|
||||
|
||||
int structural_zero;
|
||||
@ -604,4 +728,8 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
|
||||
|
||||
INSTANTIATE_TYPE(double)
|
||||
|
||||
#if FLOW_INSTANTIATE_FLOAT
|
||||
INSTANTIATE_TYPE(float)
|
||||
#endif
|
||||
|
||||
} // namespace Opm::Accelerator
|
||||
|
@ -75,5 +75,9 @@ void OpenclMatrix<Scalar>::upload(cl::CommandQueue* queue, BlockedMatrix<Scalar>
|
||||
|
||||
template class OpenclMatrix<double>;
|
||||
|
||||
#if FLOW_INSTANTIATE_FLOAT
|
||||
template class OpenclMatrix<float>;
|
||||
#endif
|
||||
|
||||
} // namespace Accelerator
|
||||
} // namespace Opm
|
||||
|
@ -333,7 +333,7 @@ void openclBILU0<Scalar,block_size>::apply(const cl::Buffer& y, cl::Buffer& x)
|
||||
}
|
||||
}
|
||||
|
||||
#define INSTANCE_TYPE(T) \
|
||||
#define INSTANTIATE_TYPE(T) \
|
||||
template class openclBILU0<T,1>; \
|
||||
template class openclBILU0<T,2>; \
|
||||
template class openclBILU0<T,3>; \
|
||||
@ -341,6 +341,10 @@ void openclBILU0<Scalar,block_size>::apply(const cl::Buffer& y, cl::Buffer& x)
|
||||
template class openclBILU0<T,5>; \
|
||||
template class openclBILU0<T,6>;
|
||||
|
||||
INSTANCE_TYPE(double)
|
||||
INSTANTIATE_TYPE(double)
|
||||
|
||||
#if FLOW_INSTANTIATE_FLOAT
|
||||
INSTANTIATE_TYPE(float)
|
||||
#endif
|
||||
|
||||
} // namespace Opm::Accelerator
|
||||
|
@ -353,7 +353,7 @@ void openclBISAI<Scalar,block_size>::apply(const cl::Buffer& x, cl::Buffer& y)
|
||||
d_invL_x, y, Nb, bs); // application of isaiU is a simple spmv
|
||||
}
|
||||
|
||||
#define INSTANCE_TYPE(T) \
|
||||
#define INSTANTIATE_TYPE(T) \
|
||||
template class openclBISAI<T,1>; \
|
||||
template class openclBISAI<T,2>; \
|
||||
template class openclBISAI<T,3>; \
|
||||
@ -361,6 +361,10 @@ void openclBISAI<Scalar,block_size>::apply(const cl::Buffer& x, cl::Buffer& y)
|
||||
template class openclBISAI<T,5>; \
|
||||
template class openclBISAI<T,6>;
|
||||
|
||||
INSTANCE_TYPE(double)
|
||||
INSTANTIATE_TYPE(double)
|
||||
|
||||
#if FLOW_INSTANTIATE_FLOAT
|
||||
INSTANTIATE_TYPE(float)
|
||||
#endif
|
||||
|
||||
} // namespace Opm::Accelerator
|
||||
|
@ -36,6 +36,8 @@
|
||||
|
||||
#include <opm/simulators/linalg/bda/Misc.hpp>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
namespace Opm::Accelerator {
|
||||
|
||||
using Dune::Timer;
|
||||
@ -220,7 +222,11 @@ void openclCPR<Scalar,block_size>::amg_cycle_gpu(const int level, cl::Buffer& y,
|
||||
}
|
||||
|
||||
// solve coarsest level using umfpack
|
||||
this->umfpack.apply(h_x.data(), h_y.data());
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
OPM_THROW(std::runtime_error, "Cannot use CPR with floats due to UMFPACK usage");
|
||||
} else {
|
||||
this->umfpack.apply(h_x.data(), h_y.data());
|
||||
}
|
||||
|
||||
events.resize(1);
|
||||
err = queue->enqueueWriteBuffer(x, CL_FALSE, 0,
|
||||
@ -308,7 +314,7 @@ void openclCPR<Scalar,block_size>::apply(const cl::Buffer& y, cl::Buffer& x)
|
||||
}
|
||||
}
|
||||
|
||||
#define INSTANCE_TYPE(T) \
|
||||
#define INSTANTIATE_TYPE(T) \
|
||||
template class openclCPR<T,1>; \
|
||||
template class openclCPR<T,2>; \
|
||||
template class openclCPR<T,3>; \
|
||||
@ -316,6 +322,10 @@ void openclCPR<Scalar,block_size>::apply(const cl::Buffer& y, cl::Buffer& x)
|
||||
template class openclCPR<T,5>; \
|
||||
template class openclCPR<T,6>;
|
||||
|
||||
INSTANCE_TYPE(double)
|
||||
INSTANTIATE_TYPE(double)
|
||||
|
||||
#if FLOW_INSTANTIATE_FLOAT
|
||||
INSTANTIATE_TYPE(float)
|
||||
#endif
|
||||
|
||||
} // namespace Opm::Accelerator
|
||||
|
@ -20,8 +20,6 @@
|
||||
#ifndef OPM_OPENCLCPR_HPP
|
||||
#define OPM_OPENCLCPR_HPP
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#include <dune/istl/paamg/matrixhierarchy.hh>
|
||||
#include <dune/istl/umfpack.hh>
|
||||
|
||||
@ -34,6 +32,8 @@
|
||||
|
||||
#include <opm/simulators/linalg/bda/opencl/openclSolverBackend.hpp>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
namespace Opm::Accelerator {
|
||||
|
||||
template<class Scalar> class BlockedMatrix;
|
||||
|
@ -61,7 +61,7 @@ setOpencl(std::shared_ptr<cl::Context>& context_,
|
||||
queue = queue_;
|
||||
}
|
||||
|
||||
#define INSTANCE_TYPE(T) \
|
||||
#define INSTANTIATE_TYPE(T) \
|
||||
template class openclPreconditioner<T,1>; \
|
||||
template class openclPreconditioner<T,2>; \
|
||||
template class openclPreconditioner<T,3>; \
|
||||
@ -69,6 +69,10 @@ setOpencl(std::shared_ptr<cl::Context>& context_,
|
||||
template class openclPreconditioner<T,5>; \
|
||||
template class openclPreconditioner<T,6>;
|
||||
|
||||
INSTANCE_TYPE(double)
|
||||
INSTANTIATE_TYPE(double)
|
||||
|
||||
#if FLOW_INSTANTIATE_FLOAT
|
||||
INSTANTIATE_TYPE(float)
|
||||
#endif
|
||||
|
||||
} // namespace Opm::Accelerator
|
||||
|
@ -717,4 +717,8 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
|
||||
|
||||
INSTANTIATE_TYPE(double)
|
||||
|
||||
#if FLOW_INSTANTIATE_FLOAT
|
||||
INSTANTIATE_TYPE(float)
|
||||
#endif
|
||||
|
||||
} // namespace Opm::Accelerator
|
||||
|
@ -490,4 +490,8 @@ spmv([[maybe_unused]] Scalar* vals,
|
||||
|
||||
template class HipKernels<double>;
|
||||
|
||||
#if FLOW_INSTANTIATE_FLOAT
|
||||
template class HipKernels<float>;
|
||||
#endif
|
||||
|
||||
} // namespace Opm
|
||||
|
@ -247,4 +247,8 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
|
||||
|
||||
INSTANTIATE_TYPE(double)
|
||||
|
||||
#if FLOW_INSTANTIATE_FLOAT
|
||||
INSTANTIATE_TYPE(float)
|
||||
#endif
|
||||
|
||||
} // namespace Opm::Accelerator
|
||||
|
@ -29,8 +29,9 @@
|
||||
#include <opm/simulators/linalg/bda/Misc.hpp>
|
||||
|
||||
#include <sstream>
|
||||
|
||||
#include <thread>
|
||||
#include <type_traits>
|
||||
|
||||
extern std::shared_ptr<std::thread> copyThread;
|
||||
|
||||
#if HAVE_OPENMP
|
||||
@ -112,23 +113,63 @@ analyze_matrix(BlockedMatrix<Scalar>*,
|
||||
ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_U));
|
||||
ROCSPARSE_CHECK(rocsparse_set_mat_fill_mode(descr_U, rocsparse_fill_mode_upper));
|
||||
ROCSPARSE_CHECK(rocsparse_set_mat_diag_type(descr_U, rocsparse_diag_type_non_unit));
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrilu0_buffer_size(this->handle, this->dir, Nb, this->nnzbs_prec, descr_M, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_M));
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(this->handle, this->dir, this->operation, Nb, this->nnzbs_prec,
|
||||
descr_L, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_L));
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(this->handle, this->dir, this->operation, Nb, this->nnzbs_prec,
|
||||
descr_U, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_U));
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
ROCSPARSE_CHECK(rocsparse_sbsrilu0_buffer_size(this->handle, this->dir, Nb,
|
||||
this->nnzbs_prec, descr_M,
|
||||
d_Mvals, d_Mrows, d_Mcols,
|
||||
block_size, ilu_info, &d_bufferSize_M));
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_sbsrsv_buffer_size(this->handle, this->dir,
|
||||
this->operation, Nb,
|
||||
this->nnzbs_prec, descr_L,
|
||||
d_Mvals, d_Mrows, d_Mcols,
|
||||
block_size, ilu_info, &d_bufferSize_L));
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_sbsrsv_buffer_size(this->handle, this->dir,
|
||||
this->operation, Nb,
|
||||
this->nnzbs_prec, descr_U,
|
||||
d_Mvals, d_Mrows, d_Mcols,
|
||||
block_size, ilu_info, &d_bufferSize_U));
|
||||
} else {
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrilu0_buffer_size(this->handle, this->dir, Nb,
|
||||
this->nnzbs_prec, descr_M,
|
||||
d_Mvals, d_Mrows, d_Mcols,
|
||||
block_size, ilu_info, &d_bufferSize_M));
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(this->handle, this->dir,
|
||||
this->operation, Nb,
|
||||
this->nnzbs_prec, descr_L,
|
||||
d_Mvals, d_Mrows, d_Mcols,
|
||||
block_size, ilu_info, &d_bufferSize_L));
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(this->handle, this->dir,
|
||||
this->operation, Nb,
|
||||
this->nnzbs_prec, descr_U,
|
||||
d_Mvals, d_Mrows, d_Mcols,
|
||||
block_size, ilu_info, &d_bufferSize_U));
|
||||
}
|
||||
|
||||
d_bufferSize = std::max(d_bufferSize_M, std::max(d_bufferSize_L, d_bufferSize_U));
|
||||
|
||||
HIP_CHECK(hipMalloc((void**)&d_buffer, d_bufferSize));
|
||||
|
||||
// analysis of ilu LU decomposition
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrilu0_analysis(this->handle, this->dir, \
|
||||
Nb, this->nnzbs_prec, descr_M, d_Mvals, d_Mrows, d_Mcols, \
|
||||
block_size, ilu_info, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, d_buffer));
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
ROCSPARSE_CHECK(rocsparse_sbsrilu0_analysis(this->handle, this->dir,
|
||||
Nb, this->nnzbs_prec, descr_M,
|
||||
d_Mvals, d_Mrows, d_Mcols,
|
||||
block_size, ilu_info,
|
||||
rocsparse_analysis_policy_reuse,
|
||||
rocsparse_solve_policy_auto, d_buffer));
|
||||
} else {
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrilu0_analysis(this->handle, this->dir,
|
||||
Nb, this->nnzbs_prec, descr_M,
|
||||
d_Mvals, d_Mrows, d_Mcols,
|
||||
block_size, ilu_info,
|
||||
rocsparse_analysis_policy_reuse,
|
||||
rocsparse_solve_policy_auto, d_buffer));
|
||||
}
|
||||
|
||||
int zero_position = 0;
|
||||
rocsparse_status status = rocsparse_bsrilu0_zero_pivot(this->handle, ilu_info, &zero_position);
|
||||
@ -138,12 +179,33 @@ analyze_matrix(BlockedMatrix<Scalar>*,
|
||||
}
|
||||
|
||||
// analysis of ilu apply
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_analysis(this->handle, this->dir, this->operation, \
|
||||
Nb, this->nnzbs_prec, descr_L, d_Mvals, d_Mrows, d_Mcols, \
|
||||
block_size, ilu_info, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, d_buffer));
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_analysis(this->handle, this->dir, this->operation, \
|
||||
Nb, this->nnzbs_prec, descr_U, d_Mvals, d_Mrows, d_Mcols, \
|
||||
block_size, ilu_info, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, d_buffer));
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
ROCSPARSE_CHECK(rocsparse_sbsrsv_analysis(this->handle, this->dir, this->operation,
|
||||
Nb, this->nnzbs_prec, descr_L,
|
||||
d_Mvals, d_Mrows, d_Mcols,
|
||||
block_size, ilu_info,
|
||||
rocsparse_analysis_policy_reuse,
|
||||
rocsparse_solve_policy_auto, d_buffer));
|
||||
ROCSPARSE_CHECK(rocsparse_sbsrsv_analysis(this->handle, this->dir, this->operation,
|
||||
Nb, this->nnzbs_prec, descr_U, d_Mvals,
|
||||
d_Mrows, d_Mcols,
|
||||
block_size, ilu_info,
|
||||
rocsparse_analysis_policy_reuse,
|
||||
rocsparse_solve_policy_auto, d_buffer));
|
||||
} else {
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_analysis(this->handle, this->dir, this->operation,
|
||||
Nb, this->nnzbs_prec, descr_L,
|
||||
d_Mvals, d_Mrows, d_Mcols,
|
||||
block_size, ilu_info,
|
||||
rocsparse_analysis_policy_reuse,
|
||||
rocsparse_solve_policy_auto, d_buffer));
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_analysis(this->handle, this->dir, this->operation,
|
||||
Nb, this->nnzbs_prec, descr_U, d_Mvals,
|
||||
d_Mrows, d_Mcols,
|
||||
block_size, ilu_info,
|
||||
rocsparse_analysis_policy_reuse,
|
||||
rocsparse_solve_policy_auto, d_buffer));
|
||||
}
|
||||
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(this->stream));
|
||||
@ -168,13 +230,25 @@ create_preconditioner(BlockedMatrix<Scalar>*,
|
||||
{
|
||||
Timer t;
|
||||
bool result = true;
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrilu0(this->handle, this->dir, Nb, this->nnzbs_prec, descr_M, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, rocsparse_solve_policy_auto, d_buffer));
|
||||
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
ROCSPARSE_CHECK(rocsparse_sbsrilu0(this->handle, this->dir, Nb,
|
||||
this->nnzbs_prec, descr_M,
|
||||
d_Mvals, d_Mrows, d_Mcols,
|
||||
block_size, ilu_info,
|
||||
rocsparse_solve_policy_auto, d_buffer));
|
||||
} else {
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrilu0(this->handle, this->dir, Nb,
|
||||
this->nnzbs_prec, descr_M,
|
||||
d_Mvals, d_Mrows, d_Mcols,
|
||||
block_size, ilu_info,
|
||||
rocsparse_solve_policy_auto, d_buffer));
|
||||
}
|
||||
|
||||
// Check for zero pivot
|
||||
int zero_position = 0;
|
||||
rocsparse_status status = rocsparse_bsrilu0_zero_pivot(this->handle, ilu_info, &zero_position);
|
||||
if(rocsparse_status_success != status)
|
||||
if (rocsparse_status_success != status)
|
||||
{
|
||||
printf("L has structural and/or numerical zero at L(%d,%d)\n", zero_position, zero_position);
|
||||
return false;
|
||||
@ -257,13 +331,39 @@ apply(Scalar& y, Scalar& x) {
|
||||
|
||||
Timer t_apply;
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(this->handle, this->dir, \
|
||||
this->operation, Nb, this->nnzbs_prec, &one, \
|
||||
descr_L, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &y, d_t, rocsparse_solve_policy_auto, d_buffer));
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
ROCSPARSE_CHECK(rocsparse_sbsrsv_solve(this->handle, this->dir,
|
||||
this->operation, Nb,
|
||||
this->nnzbs_prec, &one,
|
||||
descr_L, d_Mvals, d_Mrows,
|
||||
d_Mcols, block_size, ilu_info,
|
||||
&y, d_t, rocsparse_solve_policy_auto,
|
||||
d_buffer));
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(this->handle, this->dir, \
|
||||
this->operation, Nb, this->nnzbs_prec, &one, \
|
||||
descr_U, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, d_t, &x, rocsparse_solve_policy_auto, d_buffer));
|
||||
ROCSPARSE_CHECK(rocsparse_sbsrsv_solve(this->handle, this->dir,
|
||||
this->operation, Nb,
|
||||
this->nnzbs_prec, &one,
|
||||
descr_U, d_Mvals, d_Mrows,
|
||||
d_Mcols, block_size, ilu_info,
|
||||
d_t, &x, rocsparse_solve_policy_auto,
|
||||
d_buffer));
|
||||
} else {
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(this->handle, this->dir,
|
||||
this->operation, Nb,
|
||||
this->nnzbs_prec, &one,
|
||||
descr_L, d_Mvals, d_Mrows,
|
||||
d_Mcols, block_size, ilu_info,
|
||||
&y, d_t, rocsparse_solve_policy_auto,
|
||||
d_buffer));
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(this->handle, this->dir,
|
||||
this->operation, Nb,
|
||||
this->nnzbs_prec, &one,
|
||||
descr_U, d_Mvals, d_Mrows,
|
||||
d_Mcols, block_size, ilu_info,
|
||||
d_t, &x, rocsparse_solve_policy_auto,
|
||||
d_buffer));
|
||||
}
|
||||
|
||||
if (verbosity >= 3) {
|
||||
std::ostringstream out;
|
||||
@ -283,4 +383,8 @@ apply(Scalar& y, Scalar& x) {
|
||||
|
||||
INSTANTIATE_TYPE(double)
|
||||
|
||||
#if FLOW_INSTANTIATE_FLOAT
|
||||
INSTANTIATE_TYPE(float)
|
||||
#endif
|
||||
|
||||
} // namespace Opm
|
||||
|
@ -35,6 +35,8 @@
|
||||
|
||||
#include <opm/simulators/linalg/bda/Misc.hpp>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
namespace Opm::Accelerator {
|
||||
|
||||
using Opm::OpmLog;
|
||||
@ -235,8 +237,13 @@ amg_cycle_gpu(const int level,
|
||||
|
||||
HIP_CHECK(hipMemcpyAsync(h_y.data(), &y, sizeof(Scalar) * Ncur, hipMemcpyDeviceToHost, this->stream));
|
||||
|
||||
// solve coarsest level using umfpack
|
||||
this->umfpack.apply(h_x.data(), h_y.data());
|
||||
// The if constexpr is needed to make the code compile
|
||||
// since the umfpack member is an 'int' with float Scalar.
|
||||
// We will never get here with float Scalar as we throw earlier.
|
||||
// Solve coarsest level using umfpack
|
||||
if constexpr (std::is_same_v<Scalar,double>) {
|
||||
this->umfpack.apply(h_x.data(), h_y.data());
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemcpyAsync(&x, h_x.data(), sizeof(Scalar) * Ncur, hipMemcpyHostToDevice, this->stream));
|
||||
|
||||
@ -332,4 +339,8 @@ apply(Scalar& y,
|
||||
|
||||
INSTANTIATE_TYPE(double)
|
||||
|
||||
#if FLOW_INSTANTIATE_FLOAT
|
||||
INSTANTIATE_TYPE(float)
|
||||
#endif
|
||||
|
||||
} // namespace Opm
|
||||
|
@ -103,11 +103,15 @@ upload(Scalar *vals,
|
||||
HIP_CHECK(hipMemcpyAsync(nnzValues, vals, sizeof(Scalar) * size, hipMemcpyHostToDevice, stream));
|
||||
}
|
||||
|
||||
#define INSTANCE_TYPE(T) \
|
||||
template class RocmVector<T>;\
|
||||
template class RocmMatrix<T>;
|
||||
#define INSTANTIATE_TYPE(T) \
|
||||
template class RocmVector<T>; \
|
||||
template class RocmMatrix<T>;
|
||||
|
||||
INSTANCE_TYPE(int);
|
||||
INSTANCE_TYPE(double);
|
||||
INSTANTIATE_TYPE(int)
|
||||
INSTANTIATE_TYPE(double)
|
||||
|
||||
#if FLOW_INSTANTIATE_FLOAT
|
||||
INSTANTIATE_TYPE(float)
|
||||
#endif
|
||||
|
||||
} // namespace Opm
|
||||
|
@ -73,7 +73,7 @@ setJacMat(const BlockedMatrix<Scalar>& jMat)
|
||||
this->jacMat = std::make_shared<BlockedMatrix<Scalar>>(jMat);
|
||||
}
|
||||
|
||||
#define INSTANTIATE_TYPE(T) \
|
||||
#define INSTANTIATE_TYPE(T) \
|
||||
template class rocsparsePreconditioner<T,1>; \
|
||||
template class rocsparsePreconditioner<T,2>; \
|
||||
template class rocsparsePreconditioner<T,3>; \
|
||||
@ -83,5 +83,9 @@ setJacMat(const BlockedMatrix<Scalar>& jMat)
|
||||
|
||||
INSTANTIATE_TYPE(double)
|
||||
|
||||
#if FLOW_INSTANTIATE_FLOAT
|
||||
INSTANTIATE_TYPE(float)
|
||||
#endif
|
||||
|
||||
} //namespace Opm
|
||||
|
||||
|
@ -51,6 +51,7 @@
|
||||
#endif
|
||||
|
||||
#include <cstddef>
|
||||
#include <type_traits>
|
||||
|
||||
namespace Opm::Accelerator {
|
||||
|
||||
@ -151,26 +152,55 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs,
|
||||
|
||||
// HIP_VERSION is defined as (HIP_VERSION_MAJOR * 10000000 + HIP_VERSION_MINOR * 100000 + HIP_VERSION_PATCH)
|
||||
#if HIP_VERSION >= 60000000
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_x, &zero, d_r));
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
ROCSPARSE_CHECK(rocsparse_sbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_x, &zero, d_r));
|
||||
} else {
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_x, &zero, d_r));
|
||||
}
|
||||
#elif HIP_VERSION >= 50400000
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_x, &zero, d_r));
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
ROCSPARSE_CHECK(rocsparse_sbsrmv_ex(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_x, &zero, d_r));
|
||||
} else {
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_x, &zero, d_r));
|
||||
}
|
||||
#else
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
d_x, &zero, d_r));
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
ROCSPARSE_CHECK(rocsparse_sbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
d_x, &zero, d_r));
|
||||
} else {
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
d_x, &zero, d_r));
|
||||
}
|
||||
#endif
|
||||
ROCBLAS_CHECK(rocblas_dscal(blas_handle, N, &mone, d_r, 1));
|
||||
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &one, d_b, 1, d_r, 1));
|
||||
ROCBLAS_CHECK(rocblas_dcopy(blas_handle, N, d_r, 1, d_rw, 1));
|
||||
ROCBLAS_CHECK(rocblas_dcopy(blas_handle, N, d_r, 1, d_p, 1));
|
||||
ROCBLAS_CHECK(rocblas_dnrm2(blas_handle, N, d_r, 1, &norm_0));
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
ROCBLAS_CHECK(rocblas_sscal(blas_handle, N, &mone, d_r, 1));
|
||||
ROCBLAS_CHECK(rocblas_saxpy(blas_handle, N, &one, d_b, 1, d_r, 1));
|
||||
ROCBLAS_CHECK(rocblas_scopy(blas_handle, N, d_r, 1, d_rw, 1));
|
||||
ROCBLAS_CHECK(rocblas_scopy(blas_handle, N, d_r, 1, d_p, 1));
|
||||
ROCBLAS_CHECK(rocblas_snrm2(blas_handle, N, d_r, 1, &norm_0));
|
||||
} else {
|
||||
ROCBLAS_CHECK(rocblas_dscal(blas_handle, N, &mone, d_r, 1));
|
||||
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &one, d_b, 1, d_r, 1));
|
||||
ROCBLAS_CHECK(rocblas_dcopy(blas_handle, N, d_r, 1, d_rw, 1));
|
||||
ROCBLAS_CHECK(rocblas_dcopy(blas_handle, N, d_r, 1, d_p, 1));
|
||||
ROCBLAS_CHECK(rocblas_dnrm2(blas_handle, N, d_r, 1, &norm_0));
|
||||
}
|
||||
|
||||
if (verbosity >= 2) {
|
||||
std::ostringstream out;
|
||||
@ -183,14 +213,24 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs,
|
||||
}
|
||||
for (it = 0.5; it < maxit; it += 0.5) {
|
||||
rhop = rho;
|
||||
ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_rw, 1, d_r, 1, &rho));
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
ROCBLAS_CHECK(rocblas_sdot(blas_handle, N, d_rw, 1, d_r, 1, &rho));
|
||||
} else {
|
||||
ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_rw, 1, d_r, 1, &rho));
|
||||
}
|
||||
|
||||
if (it > 1) {
|
||||
beta = (rho / rhop) * (alpha / omega);
|
||||
nomega = -omega;
|
||||
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &nomega, d_v, 1, d_p, 1));
|
||||
ROCBLAS_CHECK(rocblas_dscal(blas_handle, N, &beta, d_p, 1));
|
||||
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &one, d_r, 1, d_p, 1));
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
ROCBLAS_CHECK(rocblas_saxpy(blas_handle, N, &nomega, d_v, 1, d_p, 1));
|
||||
ROCBLAS_CHECK(rocblas_sscal(blas_handle, N, &beta, d_p, 1));
|
||||
ROCBLAS_CHECK(rocblas_saxpy(blas_handle, N, &one, d_r, 1, d_p, 1));
|
||||
} else {
|
||||
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &nomega, d_v, 1, d_p, 1));
|
||||
ROCBLAS_CHECK(rocblas_dscal(blas_handle, N, &beta, d_p, 1));
|
||||
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &one, d_r, 1, d_p, 1));
|
||||
}
|
||||
}
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
@ -209,20 +249,41 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs,
|
||||
|
||||
// spmv
|
||||
#if HIP_VERSION >= 60000000
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_pw, &zero, d_v));
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
ROCSPARSE_CHECK(rocsparse_sbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_pw, &zero, d_v));
|
||||
} else {
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_pw, &zero, d_v));
|
||||
}
|
||||
#elif HIP_VERSION >= 50400000
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_pw, &zero, d_v));
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
ROCSPARSE_CHECK(rocsparse_sbsrmv_ex(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_pw, &zero, d_v));
|
||||
} else {
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_pw, &zero, d_v));
|
||||
}
|
||||
#else
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
d_pw, &zero, d_v));
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
ROCSPARSE_CHECK(rocsparse_sbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
d_pw, &zero, d_v));
|
||||
} else {
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
d_pw, &zero, d_v));
|
||||
}
|
||||
#endif
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
@ -240,12 +301,22 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs,
|
||||
t_rest.start();
|
||||
}
|
||||
|
||||
ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_rw, 1, d_v, 1, &tmp1));
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
ROCBLAS_CHECK(rocblas_sdot(blas_handle, N, d_rw, 1, d_v, 1, &tmp1));
|
||||
} else {
|
||||
ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_rw, 1, d_v, 1, &tmp1));
|
||||
}
|
||||
alpha = rho / tmp1;
|
||||
nalpha = -alpha;
|
||||
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &nalpha, d_v, 1, d_r, 1));
|
||||
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &alpha, d_pw, 1, d_x, 1));
|
||||
ROCBLAS_CHECK(rocblas_dnrm2(blas_handle, N, d_r, 1, &norm));
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
ROCBLAS_CHECK(rocblas_saxpy(blas_handle, N, &nalpha, d_v, 1, d_r, 1));
|
||||
ROCBLAS_CHECK(rocblas_saxpy(blas_handle, N, &alpha, d_pw, 1, d_x, 1));
|
||||
ROCBLAS_CHECK(rocblas_snrm2(blas_handle, N, d_r, 1, &norm));
|
||||
} else {
|
||||
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &nalpha, d_v, 1, d_r, 1));
|
||||
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &alpha, d_pw, 1, d_x, 1));
|
||||
ROCBLAS_CHECK(rocblas_dnrm2(blas_handle, N, d_r, 1, &norm));
|
||||
}
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
t_rest.stop();
|
||||
@ -272,20 +343,41 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs,
|
||||
|
||||
// spmv
|
||||
#if HIP_VERSION >= 60000000
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_s, &zero, d_t));
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
ROCSPARSE_CHECK(rocsparse_sbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_s, &zero, d_t));
|
||||
} else {
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_s, &zero, d_t));
|
||||
}
|
||||
#elif HIP_VERSION >= 50400000
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_s, &zero, d_t));
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
ROCSPARSE_CHECK(rocsparse_sbsrmv_ex(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_s, &zero, d_t));
|
||||
} else {
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_s, &zero, d_t));
|
||||
}
|
||||
#else
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
d_s, &zero, d_t));
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
ROCSPARSE_CHECK(rocsparse_sbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
d_s, &zero, d_t));
|
||||
} else {
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
d_s, &zero, d_t));
|
||||
}
|
||||
#endif
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
@ -303,14 +395,25 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs,
|
||||
t_rest.start();
|
||||
}
|
||||
|
||||
ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_t, 1, d_r, 1, &tmp1));
|
||||
ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_t, 1, d_t, 1, &tmp2));
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
ROCBLAS_CHECK(rocblas_sdot(blas_handle, N, d_t, 1, d_r, 1, &tmp1));
|
||||
ROCBLAS_CHECK(rocblas_sdot(blas_handle, N, d_t, 1, d_t, 1, &tmp2));
|
||||
} else {
|
||||
ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_t, 1, d_r, 1, &tmp1));
|
||||
ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_t, 1, d_t, 1, &tmp2));
|
||||
|
||||
}
|
||||
omega = tmp1 / tmp2;
|
||||
nomega = -omega;
|
||||
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &omega, d_s, 1, d_x, 1));
|
||||
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &nomega, d_t, 1, d_r, 1));
|
||||
|
||||
ROCBLAS_CHECK(rocblas_dnrm2(blas_handle, N, d_r, 1, &norm));
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
ROCBLAS_CHECK(rocblas_saxpy(blas_handle, N, &omega, d_s, 1, d_x, 1));
|
||||
ROCBLAS_CHECK(rocblas_saxpy(blas_handle, N, &nomega, d_t, 1, d_r, 1));
|
||||
ROCBLAS_CHECK(rocblas_snrm2(blas_handle, N, d_r, 1, &norm));
|
||||
} else {
|
||||
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &omega, d_s, 1, d_x, 1));
|
||||
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &nomega, d_t, 1, d_r, 1));
|
||||
ROCBLAS_CHECK(rocblas_dnrm2(blas_handle, N, d_r, 1, &norm));
|
||||
}
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
t_rest.stop();
|
||||
@ -480,15 +583,31 @@ analyze_matrix()
|
||||
ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_A));
|
||||
|
||||
#if HIP_VERSION >= 60000000
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv_analysis(handle, dir, operation,
|
||||
Nb, Nb, nnzb,
|
||||
descr_A, d_Avals, d_Arows, d_Acols,
|
||||
block_size, spmv_info));
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
ROCSPARSE_CHECK(rocsparse_sbsrmv_analysis(handle, dir, operation,
|
||||
Nb, Nb, nnzb,
|
||||
descr_A, d_Avals, d_Arows, d_Acols,
|
||||
block_size, spmv_info));
|
||||
} else {
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv_analysis(handle, dir, operation,
|
||||
Nb, Nb, nnzb,
|
||||
descr_A, d_Avals, d_Arows, d_Acols,
|
||||
block_size, spmv_info));
|
||||
}
|
||||
#elif HIP_VERSION >= 50400000
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv_ex_analysis(handle, dir, operation,
|
||||
Nb, Nb, nnzb,
|
||||
descr_A, d_Avals, d_Arows, d_Acols,
|
||||
block_size, spmv_info));
|
||||
if constexpr (std::is_same_v<Scalar,float>) {
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv_ex_analysis(handle, dir, operation,
|
||||
Nb, Nb, nnzb,
|
||||
descr_A, d_Avals,
|
||||
d_Arows, d_Acols,
|
||||
block_size, spmv_info));
|
||||
} else {
|
||||
ROCSPARSE_CHECK(rocsparse_sbsrmv_ex_analysis(handle, dir, operation,
|
||||
Nb, Nb, nnzb,
|
||||
descr_A, d_Avals,
|
||||
d_Arows, d_Acols,
|
||||
block_size, spmv_info));
|
||||
}
|
||||
#endif
|
||||
|
||||
if(!prec->analyze_matrix(&*mat)) {
|
||||
@ -593,4 +712,8 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
|
||||
|
||||
INSTANTIATE_TYPE(double)
|
||||
|
||||
#if FLOW_INSTANTIATE_FLOAT
|
||||
INSTANTIATE_TYPE(float)
|
||||
#endif
|
||||
|
||||
} // namespace Opm::Accelerator
|
||||
|
@ -1351,7 +1351,7 @@ namespace Opm {
|
||||
WellBhpThpCalculator<Scalar>::bruteForceBracketCommonTHP(mismatch, min_thp, max_thp);
|
||||
// Narrow down the bracket
|
||||
Scalar low1, high1;
|
||||
std::array<Scalar, 2> range = {0.9*min_thp, 1.1*max_thp};
|
||||
std::array<Scalar, 2> range = {Scalar{0.9}*min_thp, Scalar{1.1}*max_thp};
|
||||
std::optional<Scalar> appr_sol;
|
||||
WellBhpThpCalculator<Scalar>::bruteForceBracketCommonTHP(mismatch, range, low1, high1, appr_sol, 0.0, local_deferredLogger);
|
||||
min_thp = low1;
|
||||
@ -1362,7 +1362,8 @@ namespace Opm {
|
||||
if (!autochoke_thp.has_value() || autochoke_thp.value() > nodal_pressure) {
|
||||
// The bracket is based on the initial bracket or on a range based on a previous calculated group thp
|
||||
std::array<Scalar, 2> range = autochoke_thp.has_value() ?
|
||||
std::array<Scalar, 2>{0.9 * autochoke_thp.value(), 1.1 * autochoke_thp.value()} : range_initial;
|
||||
std::array<Scalar, 2>{Scalar{0.9} * autochoke_thp.value(),
|
||||
Scalar{1.1} * autochoke_thp.value()} : range_initial;
|
||||
Scalar low, high;
|
||||
std::optional<Scalar> approximate_solution;
|
||||
const Scalar tolerance1 = thp_tolerance;
|
||||
|
Loading…
Reference in New Issue
Block a user