mirror of
				https://github.com/OPM/opm-simulators.git
				synced 2025-02-25 18:55:30 -06:00 
			
		
		
		
	Merge pull request #5556 from akva2/float_support5
Float support in simulators: Batch 5
This commit is contained in:
		| @@ -473,7 +473,8 @@ private: | ||||
|                 bool oscillate = false; | ||||
|                 bool stagnate = false; | ||||
|                 const int numPhases = convergence_history.front().size(); | ||||
|                 detail::detectOscillations(convergence_history, iter, numPhases, 0.2, 1, oscillate, stagnate); | ||||
|                 detail::detectOscillations(convergence_history, iter, numPhases, | ||||
|                                            Scalar{0.2}, 1, oscillate, stagnate); | ||||
|                 if (oscillate) { | ||||
|                     damping_factor *= 0.85; | ||||
|                     logger.debug(fmt::format("| Damping factor is now {}", damping_factor)); | ||||
|   | ||||
| @@ -464,4 +464,8 @@ void FlowGenericVanguard::registerParameters_() | ||||
|  | ||||
| template void FlowGenericVanguard::registerParameters_<double>(); | ||||
|  | ||||
| #if FLOW_INSTANTIATE_FLOAT | ||||
| template void FlowGenericVanguard::registerParameters_<float>(); | ||||
| #endif | ||||
|  | ||||
| } // namespace Opm | ||||
|   | ||||
| @@ -278,4 +278,8 @@ using PolyHedralGrid3D = Dune::PolyhedralGrid<3, 3>; | ||||
|  | ||||
| INSTANTIATE_TYPE(double) | ||||
|  | ||||
| #if FLOW_INSTANTIATE_FLOAT | ||||
| INSTANTIATE_TYPE(float) | ||||
| #endif | ||||
|  | ||||
| } // namespace Opm::detail | ||||
|   | ||||
| @@ -50,7 +50,9 @@ | ||||
| #include <opm/simulators/linalg/bda/rocm/rocsparseSolverBackend.hpp> | ||||
| #endif | ||||
|  | ||||
| typedef Dune::InverseOperatorResult InverseOperatorResult; | ||||
| #include <type_traits> | ||||
|  | ||||
| using InverseOperatorResult = Dune::InverseOperatorResult; | ||||
|  | ||||
| namespace Opm { | ||||
|  | ||||
| @@ -95,10 +97,14 @@ BdaBridge(std::string accelerator_mode_, | ||||
| #endif | ||||
|     } else if (accelerator_mode.compare("amgcl") == 0) { | ||||
| #if HAVE_AMGCL | ||||
|         use_gpu = true; // should be replaced by a 'use_bridge' boolean | ||||
|         using AMGCL = Accelerator::amgclSolverBackend<Scalar,block_size>; | ||||
|         backend = std::make_unique<AMGCL>(linear_solver_verbosity, maxit, | ||||
|                                           tolerance, platformID, deviceID); | ||||
|         if constexpr (std::is_same_v<Scalar,float>) { | ||||
|             OPM_THROW(std::logic_error, "Error amgclSolver disabled with float Scalar"); | ||||
|         } else { | ||||
|             use_gpu = true; // should be replaced by a 'use_bridge' boolean | ||||
|             using AMGCL = Accelerator::amgclSolverBackend<Scalar,block_size>; | ||||
|             backend = std::make_unique<AMGCL>(linear_solver_verbosity, maxit, | ||||
|                                               tolerance, platformID, deviceID); | ||||
|         } | ||||
| #else | ||||
|         OPM_THROW(std::logic_error, "Error amgclSolver was chosen, but amgcl was not found by CMake"); | ||||
| #endif | ||||
| @@ -366,4 +372,8 @@ initWellContributions([[maybe_unused]] WellContributions<Scalar>& wellContribs, | ||||
|  | ||||
| INSTANTIATE_TYPE(double) | ||||
|  | ||||
| #if FLOW_INSTANTIATE_FLOAT | ||||
| INSTANTIATE_TYPE(float) | ||||
| #endif | ||||
|  | ||||
| } // namespace Opm | ||||
|   | ||||
| @@ -89,10 +89,14 @@ void blockMult(Scalar* mat1, Scalar* mat2, Scalar* resMat, unsigned int block_si | ||||
|     } | ||||
| } | ||||
|  | ||||
| #define INSTANCE_TYPE(T) \ | ||||
|     template void blockMultSub(double*, double*, double*, unsigned int); \ | ||||
|     template void blockMult(double*, double*, double*, unsigned int); | ||||
| #define INSTANTIATE_TYPE(T)                               \ | ||||
|     template void blockMultSub(T*, T*, T*, unsigned int); \ | ||||
|     template void blockMult(T*, T*, T*, unsigned int); | ||||
|  | ||||
| INSTANCE_TYPE(double) | ||||
| INSTANTIATE_TYPE(double) | ||||
|  | ||||
| #if FLOW_INSTANTIATE_FLOAT | ||||
| INSTANTIATE_TYPE(float) | ||||
| #endif | ||||
|  | ||||
| } // namespace Opm::Accelerator | ||||
|   | ||||
| @@ -190,7 +190,11 @@ analyzeHierarchy() | ||||
|     const typename DuneAmg::ParallelMatrixHierarchy& matrixHierarchy = dune_amg->matrices(); | ||||
|  | ||||
|     // store coarsest AMG level in umfpack format, also performs LU decomposition | ||||
|     umfpack.setMatrix((*matrixHierarchy.coarsest()).getmat()); | ||||
|     if constexpr (std::is_same_v<Scalar,float>) { | ||||
|         OPM_THROW(std::runtime_error, "Cannot use CPR with float Scalar due to UMFPACK"); | ||||
|     } else { | ||||
|         umfpack.setMatrix((*matrixHierarchy.coarsest()).getmat()); | ||||
|     } | ||||
|  | ||||
|     num_levels = dune_amg->levels(); | ||||
|     level_sizes.resize(num_levels); | ||||
| @@ -280,7 +284,7 @@ analyzeAggregateMaps() | ||||
|     } | ||||
| } | ||||
|  | ||||
| #define INSTANCE_TYPE(T)                \ | ||||
| #define INSTANTIATE_TYPE(T)          \ | ||||
|     template class CprCreation<T,1>; \ | ||||
|     template class CprCreation<T,2>; \ | ||||
|     template class CprCreation<T,3>; \ | ||||
| @@ -288,7 +292,11 @@ analyzeAggregateMaps() | ||||
|     template class CprCreation<T,5>; \ | ||||
|     template class CprCreation<T,6>; | ||||
|  | ||||
| INSTANCE_TYPE(double) | ||||
| INSTANTIATE_TYPE(double) | ||||
|  | ||||
| #if FLOW_INSTANTIATE_FLOAT | ||||
| INSTANTIATE_TYPE(float) | ||||
| #endif | ||||
|  | ||||
| } // namespace Opm | ||||
|  | ||||
|   | ||||
| @@ -20,7 +20,6 @@ | ||||
| #ifndef OPM_CPRCREATION_HPP | ||||
| #define OPM_CPRCREATION_HPP | ||||
|  | ||||
| #include <mutex> | ||||
|  | ||||
| #include <dune/istl/paamg/matrixhierarchy.hh> | ||||
| #include <dune/istl/umfpack.hh> | ||||
| @@ -28,6 +27,8 @@ | ||||
| #include <opm/simulators/linalg/bda/Matrix.hpp> | ||||
| #include <opm/simulators/linalg/bda/Preconditioner.hpp> | ||||
|  | ||||
| #include <type_traits> | ||||
|  | ||||
| namespace Opm::Accelerator { | ||||
|  | ||||
| template<class Scalar> class BlockedMatrix; | ||||
| @@ -63,7 +64,8 @@ protected: | ||||
|     std::shared_ptr<MatrixOperator> dune_op;    // operator, input to Dune AMG | ||||
|     std::vector<int> level_sizes;               // size of each level in the AMG hierarchy | ||||
|     std::vector<std::vector<int> > diagIndices; // index of diagonal value for each level | ||||
|     Dune::UMFPack<DuneMat> umfpack;             // dune/istl/umfpack object used to solve the coarsest level of AMG | ||||
|     std::conditional_t<std::is_same_v<Scalar,double>, | ||||
|                        Dune::UMFPack<DuneMat>, int> umfpack; // dune/istl/umfpack object used to solve the coarsest level of AMG | ||||
|     bool always_recalculate_aggregates = false; // OPM always reuses the aggregates by default | ||||
|     bool recalculate_aggregates = true;         // only rerecalculate if true | ||||
|     const int pressure_idx = 1;                 // hardcoded to mimic OPM | ||||
|   | ||||
| @@ -1,8 +1,10 @@ | ||||
| #include <cmath> | ||||
| #include <algorithm> | ||||
| #include <config.h> | ||||
|  | ||||
| #include <opm/simulators/linalg/bda/Misc.hpp> | ||||
|  | ||||
| #include <cmath> | ||||
| #include <algorithm> | ||||
|  | ||||
| namespace Opm::Accelerator { | ||||
|  | ||||
| // divide A by B, and round up: return (int)ceil(A/B) | ||||
| @@ -59,4 +61,8 @@ void solve_transposed_3x3(const Scalar* A, | ||||
|  | ||||
| INSTANTIATE_TYPE(double) | ||||
|  | ||||
| #if FLOW_INSTANTIATE_FLOAT | ||||
| INSTANTIATE_TYPE(float) | ||||
| #endif | ||||
|  | ||||
| } | ||||
|   | ||||
| @@ -39,6 +39,8 @@ | ||||
| #define COPY_ROW_BY_ROW 0 | ||||
|  | ||||
| #include <thread> | ||||
| #include <type_traits> | ||||
|  | ||||
| extern std::shared_ptr<std::thread> copyThread; | ||||
|  | ||||
| #if HAVE_OPENMP | ||||
| @@ -109,13 +111,27 @@ gpu_pbicgstab(WellContributions<Scalar>& wellContribs, BdaResult& res) | ||||
|         static_cast<WellContributionsCuda<Scalar>&>(wellContribs).setCudaStream(stream); | ||||
|     } | ||||
|  | ||||
|     cusparseDbsrmv(cusparseHandle, order, operation, Nb, Nb, nnzb, &one, descr_M, d_bVals, d_bRows, d_bCols, block_size, d_x, &zero, d_r); | ||||
|     if constexpr (std::is_same_v<Scalar,float>) { | ||||
|         cusparseSbsrmv(cusparseHandle, order, operation, Nb, Nb, nnzb, &one, | ||||
|                        descr_M, d_bVals, d_bRows, d_bCols, block_size, d_x, &zero, d_r); | ||||
|     } else { | ||||
|         cusparseDbsrmv(cusparseHandle, order, operation, Nb, Nb, nnzb, &one, | ||||
|                        descr_M, d_bVals, d_bRows, d_bCols, block_size, d_x, &zero, d_r); | ||||
|     } | ||||
|  | ||||
|     cublasDscal(cublasHandle, n, &mone, d_r, 1); | ||||
|     cublasDaxpy(cublasHandle, n, &one, d_b, 1, d_r, 1); | ||||
|     cublasDcopy(cublasHandle, n, d_r, 1, d_rw, 1); | ||||
|     cublasDcopy(cublasHandle, n, d_r, 1, d_p, 1); | ||||
|     cublasDnrm2(cublasHandle, n, d_r, 1, &norm_0); | ||||
|     if constexpr (std::is_same_v<Scalar,float>) { | ||||
|         cublasSscal(cublasHandle, n, &mone, d_r, 1); | ||||
|         cublasSaxpy(cublasHandle, n, &one, d_b, 1, d_r, 1); | ||||
|         cublasScopy(cublasHandle, n, d_r, 1, d_rw, 1); | ||||
|         cublasScopy(cublasHandle, n, d_r, 1, d_p, 1); | ||||
|         cublasSnrm2(cublasHandle, n, d_r, 1, &norm_0); | ||||
|     } else { | ||||
|         cublasDscal(cublasHandle, n, &mone, d_r, 1); | ||||
|         cublasDaxpy(cublasHandle, n, &one, d_b, 1, d_r, 1); | ||||
|         cublasDcopy(cublasHandle, n, d_r, 1, d_rw, 1); | ||||
|         cublasDcopy(cublasHandle, n, d_r, 1, d_p, 1); | ||||
|         cublasDnrm2(cublasHandle, n, d_r, 1, &norm_0); | ||||
|     } | ||||
|  | ||||
|     if (verbosity > 1) { | ||||
|         std::ostringstream out; | ||||
| @@ -125,40 +141,80 @@ gpu_pbicgstab(WellContributions<Scalar>& wellContribs, BdaResult& res) | ||||
|  | ||||
|     for (it = 0.5; it < maxit; it += 0.5) { | ||||
|         rhop = rho; | ||||
|         cublasDdot(cublasHandle, n, d_rw, 1, d_r, 1, &rho); | ||||
|         if constexpr (std::is_same_v<Scalar,float>) { | ||||
|             cublasSdot(cublasHandle, n, d_rw, 1, d_r, 1, &rho); | ||||
|         } else { | ||||
|             cublasDdot(cublasHandle, n, d_rw, 1, d_r, 1, &rho); | ||||
|         } | ||||
|  | ||||
|         if (it > 1) { | ||||
|             beta = (rho / rhop) * (alpha / omega); | ||||
|             nomega = -omega; | ||||
|             cublasDaxpy(cublasHandle, n, &nomega, d_v, 1, d_p, 1); | ||||
|             cublasDscal(cublasHandle, n, &beta, d_p, 1); | ||||
|             cublasDaxpy(cublasHandle, n, &one, d_r, 1, d_p, 1); | ||||
|             if constexpr (std::is_same_v<Scalar,float>) { | ||||
|                 cublasSaxpy(cublasHandle, n, &nomega, d_v, 1, d_p, 1); | ||||
|                 cublasSscal(cublasHandle, n, &beta, d_p, 1); | ||||
|                 cublasSaxpy(cublasHandle, n, &one, d_r, 1, d_p, 1); | ||||
|             } else { | ||||
|                 cublasDaxpy(cublasHandle, n, &nomega, d_v, 1, d_p, 1); | ||||
|                 cublasDscal(cublasHandle, n, &beta, d_p, 1); | ||||
|                 cublasDaxpy(cublasHandle, n, &one, d_r, 1, d_p, 1); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // apply ilu0 | ||||
|         cusparseDbsrsv2_solve(cusparseHandle, order, \ | ||||
|                               operation, Nb, nnzbs_prec, &one, \ | ||||
|                               descr_L, d_mVals, d_mRows, d_mCols, block_size, info_L, d_p, d_t, policy, d_buffer); | ||||
|         cusparseDbsrsv2_solve(cusparseHandle, order, \ | ||||
|                               operation, Nb, nnzbs_prec, &one, \ | ||||
|                               descr_U, d_mVals, d_mRows, d_mCols, block_size, info_U, d_t, d_pw, policy, d_buffer); | ||||
|  | ||||
|         // spmv | ||||
|         cusparseDbsrmv(cusparseHandle, order, \ | ||||
|                        operation, Nb, Nb, nnzb, \ | ||||
|                        &one, descr_M, d_bVals, d_bRows, d_bCols, block_size, d_pw, &zero, d_v); | ||||
|         if constexpr (std::is_same_v<Scalar,float>) { | ||||
|             // apply ilu0 | ||||
|             cusparseSbsrsv2_solve(cusparseHandle, order, | ||||
|                                   operation, Nb, nnzbs_prec, &one, | ||||
|                                   descr_L, d_mVals, d_mRows, d_mCols, block_size, | ||||
|                                   info_L, d_p, d_t, policy, d_buffer); | ||||
|             cusparseSbsrsv2_solve(cusparseHandle, order, | ||||
|                                   operation, Nb, nnzbs_prec, &one, | ||||
|                                   descr_U, d_mVals, d_mRows, d_mCols, block_size, | ||||
|                                   info_U, d_t, d_pw, policy, d_buffer); | ||||
|             // spmv | ||||
|             cusparseSbsrmv(cusparseHandle, order, | ||||
|                            operation, Nb, Nb, nnzb, | ||||
|                            &one, descr_M, d_bVals, d_bRows, | ||||
|                            d_bCols, block_size, d_pw, &zero, d_v); | ||||
|         } else { | ||||
|             // apply ilu0 | ||||
|             cusparseDbsrsv2_solve(cusparseHandle, order, | ||||
|                                        operation, Nb, nnzbs_prec, &one, | ||||
|                                        descr_L, d_mVals, d_mRows, d_mCols, block_size, | ||||
|                                        info_L, d_p, d_t, policy, d_buffer); | ||||
|             cusparseDbsrsv2_solve(cusparseHandle, order, | ||||
|                                   operation, Nb, nnzbs_prec, &one, | ||||
|                                   descr_U, d_mVals, d_mRows, d_mCols, block_size, | ||||
|                                   info_U, d_t, d_pw, policy, d_buffer); | ||||
|             // spmv | ||||
|             cusparseDbsrmv(cusparseHandle, order, | ||||
|                            operation, Nb, Nb, nnzb, | ||||
|                            &one, descr_M, d_bVals, d_bRows, d_bCols, block_size, | ||||
|                            d_pw, &zero, d_v); | ||||
|         } | ||||
|  | ||||
|         // apply wellContributions | ||||
|         if (wellContribs.getNumWells() > 0) { | ||||
|             static_cast<WellContributionsCuda<Scalar>&>(wellContribs).apply(d_pw, d_v); | ||||
|         } | ||||
|  | ||||
|         cublasDdot(cublasHandle, n, d_rw, 1, d_v, 1, &tmp1); | ||||
|         if constexpr (std::is_same_v<Scalar,float>) { | ||||
|             cublasSdot(cublasHandle, n, d_rw, 1, d_v, 1, &tmp1); | ||||
|         } else { | ||||
|             cublasDdot(cublasHandle, n, d_rw, 1, d_v, 1, &tmp1); | ||||
|         } | ||||
|  | ||||
|         alpha = rho / tmp1; | ||||
|         nalpha = -alpha; | ||||
|         cublasDaxpy(cublasHandle, n, &nalpha, d_v, 1, d_r, 1); | ||||
|         cublasDaxpy(cublasHandle, n, &alpha, d_pw, 1, d_x, 1); | ||||
|         cublasDnrm2(cublasHandle, n, d_r, 1, &norm); | ||||
|         if constexpr (std::is_same_v<Scalar,float>) { | ||||
|             cublasSaxpy(cublasHandle, n, &nalpha, d_v, 1, d_r, 1); | ||||
|             cublasSaxpy(cublasHandle, n, &alpha, d_pw, 1, d_x, 1); | ||||
|             cublasSnrm2(cublasHandle, n, d_r, 1, &norm); | ||||
|         } else { | ||||
|             cublasDaxpy(cublasHandle, n, &nalpha, d_v, 1, d_r, 1); | ||||
|             cublasDaxpy(cublasHandle, n, &alpha, d_pw, 1, d_x, 1); | ||||
|             cublasDnrm2(cublasHandle, n, d_r, 1, &norm); | ||||
|         } | ||||
|  | ||||
|         if (norm < tolerance * norm_0) { | ||||
|             break; | ||||
| @@ -166,32 +222,65 @@ gpu_pbicgstab(WellContributions<Scalar>& wellContribs, BdaResult& res) | ||||
|  | ||||
|         it += 0.5; | ||||
|  | ||||
|         // apply ilu0 | ||||
|         cusparseDbsrsv2_solve(cusparseHandle, order, \ | ||||
|                               operation, Nb, nnzbs_prec, &one, \ | ||||
|                               descr_L, d_mVals, d_mRows, d_mCols, block_size, info_L, d_r, d_t, policy, d_buffer); | ||||
|         cusparseDbsrsv2_solve(cusparseHandle, order, \ | ||||
|                               operation, Nb, nnzbs_prec, &one, \ | ||||
|                               descr_U, d_mVals, d_mRows, d_mCols, block_size, info_U, d_t, d_s, policy, d_buffer); | ||||
|         if constexpr (std::is_same_v<Scalar,float>) { | ||||
|             // apply ilu0 | ||||
|             cusparseSbsrsv2_solve(cusparseHandle, order, | ||||
|                                   operation, Nb, nnzbs_prec, &one, | ||||
|                                   descr_L, d_mVals, d_mRows, d_mCols, block_size, | ||||
|                                   info_L, d_r, d_t, policy, d_buffer); | ||||
|  | ||||
|         // spmv | ||||
|         cusparseDbsrmv(cusparseHandle, order, \ | ||||
|                        operation, Nb, Nb, nnzb, &one, descr_M, \ | ||||
|                        d_bVals, d_bRows, d_bCols, block_size, d_s, &zero, d_t); | ||||
|             cusparseSbsrsv2_solve(cusparseHandle, order, | ||||
|                                   operation, Nb, nnzbs_prec, &one, | ||||
|                                   descr_U, d_mVals, d_mRows, d_mCols, block_size, | ||||
|                                   info_U, d_t, d_s, policy, d_buffer); | ||||
|  | ||||
|             // spmv | ||||
|             cusparseSbsrmv(cusparseHandle, order, | ||||
|                            operation, Nb, Nb, nnzb, &one, descr_M, | ||||
|                            d_bVals, d_bRows, d_bCols, block_size, d_s, &zero, d_t); | ||||
|         } else { | ||||
|             // apply ilu0 | ||||
|             cusparseDbsrsv2_solve(cusparseHandle, order, | ||||
|                                        operation, Nb, nnzbs_prec, &one, | ||||
|                                        descr_L, d_mVals, d_mRows, d_mCols, block_size, | ||||
|                                        info_L, d_r, d_t, policy, d_buffer); | ||||
|  | ||||
|             cusparseDbsrsv2_solve(cusparseHandle, order, | ||||
|                                   operation, Nb, nnzbs_prec, &one, | ||||
|                                   descr_U, d_mVals, d_mRows, d_mCols, block_size, | ||||
|                                   info_U, d_t, d_s, policy, d_buffer); | ||||
|  | ||||
|             // spmv | ||||
|             cusparseDbsrmv(cusparseHandle, order, | ||||
|                            operation, Nb, Nb, nnzb, &one, descr_M, | ||||
|                            d_bVals, d_bRows, d_bCols, block_size, d_s, &zero, d_t); | ||||
|         } | ||||
|  | ||||
|         // apply wellContributions | ||||
|         if (wellContribs.getNumWells() > 0) { | ||||
|             static_cast<WellContributionsCuda<Scalar>&>(wellContribs).apply(d_s, d_t); | ||||
|         } | ||||
|  | ||||
|         cublasDdot(cublasHandle, n, d_t, 1, d_r, 1, &tmp1); | ||||
|         cublasDdot(cublasHandle, n, d_t, 1, d_t, 1, &tmp2); | ||||
|         if constexpr (std::is_same_v<Scalar,float>) { | ||||
|             cublasSdot(cublasHandle, n, d_t, 1, d_r, 1, &tmp1); | ||||
|             cublasSdot(cublasHandle, n, d_t, 1, d_t, 1, &tmp2); | ||||
|         } else { | ||||
|             cublasDdot(cublasHandle, n, d_t, 1, d_r, 1, &tmp1); | ||||
|             cublasDdot(cublasHandle, n, d_t, 1, d_t, 1, &tmp2); | ||||
|         } | ||||
|  | ||||
|         omega = tmp1 / tmp2; | ||||
|         nomega = -omega; | ||||
|         cublasDaxpy(cublasHandle, n, &omega, d_s, 1, d_x, 1); | ||||
|         cublasDaxpy(cublasHandle, n, &nomega, d_t, 1, d_r, 1); | ||||
|  | ||||
|         cublasDnrm2(cublasHandle, n, d_r, 1, &norm); | ||||
|         if constexpr (std::is_same_v<Scalar,float>) { | ||||
|             cublasSaxpy(cublasHandle, n, &omega, d_s, 1, d_x, 1); | ||||
|             cublasSaxpy(cublasHandle, n, &nomega, d_t, 1, d_r, 1); | ||||
|             cublasSnrm2(cublasHandle, n, d_r, 1, &norm); | ||||
|         } else { | ||||
|             cublasDaxpy(cublasHandle, n, &omega, d_s, 1, d_x, 1); | ||||
|             cublasDaxpy(cublasHandle, n, &nomega, d_t, 1, d_r, 1); | ||||
|             cublasDnrm2(cublasHandle, n, d_r, 1, &norm); | ||||
|         } | ||||
|  | ||||
|         if (norm < tolerance * norm_0) { | ||||
|             break; | ||||
| @@ -470,21 +559,42 @@ bool cusparseSolverBackend<Scalar,block_size>::analyse_matrix() | ||||
|     cusparseCreateBsrsv2Info(&info_U); | ||||
|     cudaCheckLastError("Could not create analysis info"); | ||||
|  | ||||
|     cusparseDbsrilu02_bufferSize(cusparseHandle, order, Nb, nnzbs_prec, | ||||
|                                  descr_M, d_mVals, d_mRows, d_mCols, block_size, info_M, &d_bufferSize_M); | ||||
|     cusparseDbsrsv2_bufferSize(cusparseHandle, order, operation, Nb, nnzbs_prec, | ||||
|                                descr_L, d_mVals, d_mRows, d_mCols, block_size, info_L, &d_bufferSize_L); | ||||
|     cusparseDbsrsv2_bufferSize(cusparseHandle, order, operation, Nb, nnzbs_prec, | ||||
|                                descr_U, d_mVals, d_mRows, d_mCols, block_size, info_U, &d_bufferSize_U); | ||||
|     cudaCheckLastError(); | ||||
|     if constexpr (std::is_same_v<Scalar,float>) { | ||||
|         cusparseSbsrilu02_bufferSize(cusparseHandle, order, Nb, nnzbs_prec, | ||||
|                                      descr_M, d_mVals, d_mRows, d_mCols, block_size, | ||||
|                                      info_M, &d_bufferSize_M); | ||||
|         cusparseSbsrsv2_bufferSize(cusparseHandle, order, operation, Nb, nnzbs_prec, | ||||
|                                    descr_L, d_mVals, d_mRows, d_mCols, block_size, | ||||
|                                    info_L, &d_bufferSize_L); | ||||
|         cusparseSbsrsv2_bufferSize(cusparseHandle, order, operation, Nb, nnzbs_prec, | ||||
|                                    descr_U, d_mVals, d_mRows, d_mCols, block_size, | ||||
|                                    info_U, &d_bufferSize_U); | ||||
|     } else { | ||||
|         cusparseDbsrilu02_bufferSize(cusparseHandle, order, Nb, nnzbs_prec, | ||||
|                                      descr_M, d_mVals, d_mRows, d_mCols, block_size, | ||||
|                                      info_M, &d_bufferSize_M); | ||||
|         cusparseDbsrsv2_bufferSize(cusparseHandle, order, operation, Nb, nnzbs_prec, | ||||
|                                    descr_L, d_mVals, d_mRows, d_mCols, block_size, | ||||
|                                    info_L, &d_bufferSize_L); | ||||
|         cusparseDbsrsv2_bufferSize(cusparseHandle, order, operation, Nb, nnzbs_prec, | ||||
|                                    descr_U, d_mVals, d_mRows, d_mCols, block_size, | ||||
|                                    info_U, &d_bufferSize_U); | ||||
|     } | ||||
|  | ||||
|     d_bufferSize = std::max(d_bufferSize_M, std::max(d_bufferSize_L, d_bufferSize_U)); | ||||
|  | ||||
|     cudaMalloc((void**)&d_buffer, d_bufferSize); | ||||
|  | ||||
|     // analysis of ilu LU decomposition | ||||
|     cusparseDbsrilu02_analysis(cusparseHandle, order, \ | ||||
|                                Nb, nnzbs_prec, descr_B, d_mVals, d_mRows, d_mCols, \ | ||||
|                                block_size, info_M, policy, d_buffer); | ||||
|     if constexpr (std::is_same_v<Scalar,float>) { | ||||
|         cusparseSbsrilu02_analysis(cusparseHandle, order, | ||||
|                                    Nb, nnzbs_prec, descr_B, d_mVals, d_mRows, d_mCols, | ||||
|                                    block_size, info_M, policy, d_buffer); | ||||
|     } else { | ||||
|         cusparseDbsrilu02_analysis(cusparseHandle, order, | ||||
|                                    Nb, nnzbs_prec, descr_B, d_mVals, d_mRows, d_mCols, | ||||
|                                    block_size, info_M, policy, d_buffer); | ||||
|     } | ||||
|  | ||||
|     int structural_zero; | ||||
|     cusparseStatus_t status = cusparseXbsrilu02_zeroPivot(cusparseHandle, info_M, &structural_zero); | ||||
| @@ -493,13 +603,21 @@ bool cusparseSolverBackend<Scalar,block_size>::analyse_matrix() | ||||
|     } | ||||
|  | ||||
|     // analysis of ilu apply | ||||
|     cusparseDbsrsv2_analysis(cusparseHandle, order, operation, \ | ||||
|                              Nb, nnzbs_prec, descr_L, d_mVals, d_mRows, d_mCols, \ | ||||
|                              block_size, info_L, policy, d_buffer); | ||||
|  | ||||
|     cusparseDbsrsv2_analysis(cusparseHandle, order, operation, \ | ||||
|                              Nb, nnzbs_prec, descr_U, d_mVals, d_mRows, d_mCols, \ | ||||
|                              block_size, info_U, policy, d_buffer); | ||||
|     if constexpr (std::is_same_v<Scalar,float>) { | ||||
|         cusparseSbsrsv2_analysis(cusparseHandle, order, operation, | ||||
|                                  Nb, nnzbs_prec, descr_L, d_mVals, d_mRows, d_mCols, | ||||
|                                  block_size, info_L, policy, d_buffer); | ||||
|         cusparseSbsrsv2_analysis(cusparseHandle, order, operation, | ||||
|                                  Nb, nnzbs_prec, descr_U, d_mVals, d_mRows, d_mCols, | ||||
|                                  block_size, info_U, policy, d_buffer); | ||||
|     } else { | ||||
|         cusparseDbsrsv2_analysis(cusparseHandle, order, operation, | ||||
|                                  Nb, nnzbs_prec, descr_L, d_mVals, d_mRows, d_mCols, | ||||
|                                  block_size, info_L, policy, d_buffer); | ||||
|         cusparseDbsrsv2_analysis(cusparseHandle, order, operation, | ||||
|                                  Nb, nnzbs_prec, descr_U, d_mVals, d_mRows, d_mCols, | ||||
|                                  block_size, info_U, policy, d_buffer); | ||||
|     } | ||||
|     cudaCheckLastError("Could not analyse level information"); | ||||
|  | ||||
|     if (verbosity > 2) { | ||||
| @@ -519,9 +637,15 @@ bool cusparseSolverBackend<Scalar,block_size>::create_preconditioner() | ||||
| { | ||||
|     Timer t; | ||||
|  | ||||
|     cusparseDbsrilu02(cusparseHandle, order, \ | ||||
|                       Nb, nnzbs_prec, descr_M, d_mVals, d_mRows, d_mCols, \ | ||||
|                       block_size, info_M, policy, d_buffer); | ||||
|     if constexpr (std::is_same_v<Scalar,float>) { | ||||
|         cusparseSbsrilu02(cusparseHandle, order, | ||||
|                           Nb, nnzbs_prec, descr_M, d_mVals, d_mRows, d_mCols, | ||||
|                           block_size, info_M, policy, d_buffer); | ||||
|     } else { | ||||
|         cusparseDbsrilu02(cusparseHandle, order, | ||||
|                           Nb, nnzbs_prec, descr_M, d_mVals, d_mRows, d_mCols, | ||||
|                           block_size, info_M, policy, d_buffer); | ||||
|     } | ||||
|     cudaCheckLastError("Could not perform ilu decomposition"); | ||||
|  | ||||
|     int structural_zero; | ||||
| @@ -604,4 +728,8 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix, | ||||
|  | ||||
| INSTANTIATE_TYPE(double) | ||||
|  | ||||
| #if FLOW_INSTANTIATE_FLOAT | ||||
| INSTANTIATE_TYPE(float) | ||||
| #endif | ||||
|  | ||||
| } // namespace Opm::Accelerator | ||||
|   | ||||
| @@ -75,5 +75,9 @@ void OpenclMatrix<Scalar>::upload(cl::CommandQueue* queue, BlockedMatrix<Scalar> | ||||
|  | ||||
| template class OpenclMatrix<double>; | ||||
|  | ||||
| #if FLOW_INSTANTIATE_FLOAT | ||||
| template class OpenclMatrix<float>; | ||||
| #endif | ||||
|  | ||||
| } // namespace Accelerator | ||||
| } // namespace Opm | ||||
|   | ||||
| @@ -333,7 +333,7 @@ void openclBILU0<Scalar,block_size>::apply(const cl::Buffer& y, cl::Buffer& x) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #define INSTANCE_TYPE(T)       \ | ||||
| #define INSTANTIATE_TYPE(T)          \ | ||||
|     template class openclBILU0<T,1>; \ | ||||
|     template class openclBILU0<T,2>; \ | ||||
|     template class openclBILU0<T,3>; \ | ||||
| @@ -341,6 +341,10 @@ void openclBILU0<Scalar,block_size>::apply(const cl::Buffer& y, cl::Buffer& x) | ||||
|     template class openclBILU0<T,5>; \ | ||||
|     template class openclBILU0<T,6>; | ||||
|  | ||||
| INSTANCE_TYPE(double) | ||||
| INSTANTIATE_TYPE(double) | ||||
|  | ||||
| #if FLOW_INSTANTIATE_FLOAT | ||||
| INSTANTIATE_TYPE(float) | ||||
| #endif | ||||
|  | ||||
| } // namespace Opm::Accelerator | ||||
|   | ||||
| @@ -353,7 +353,7 @@ void openclBISAI<Scalar,block_size>::apply(const cl::Buffer& x, cl::Buffer& y) | ||||
|                                 d_invL_x, y, Nb, bs); // application of isaiU is a simple spmv | ||||
| } | ||||
|  | ||||
| #define INSTANCE_TYPE(T)       \ | ||||
| #define INSTANTIATE_TYPE(T)          \ | ||||
|     template class openclBISAI<T,1>; \ | ||||
|     template class openclBISAI<T,2>; \ | ||||
|     template class openclBISAI<T,3>; \ | ||||
| @@ -361,6 +361,10 @@ void openclBISAI<Scalar,block_size>::apply(const cl::Buffer& x, cl::Buffer& y) | ||||
|     template class openclBISAI<T,5>; \ | ||||
|     template class openclBISAI<T,6>; | ||||
|  | ||||
| INSTANCE_TYPE(double) | ||||
| INSTANTIATE_TYPE(double) | ||||
|  | ||||
| #if FLOW_INSTANTIATE_FLOAT | ||||
| INSTANTIATE_TYPE(float) | ||||
| #endif | ||||
|  | ||||
| } // namespace Opm::Accelerator | ||||
|   | ||||
| @@ -36,6 +36,8 @@ | ||||
|  | ||||
| #include <opm/simulators/linalg/bda/Misc.hpp> | ||||
|  | ||||
| #include <type_traits> | ||||
|  | ||||
| namespace Opm::Accelerator { | ||||
|  | ||||
| using Dune::Timer; | ||||
| @@ -220,7 +222,11 @@ void openclCPR<Scalar,block_size>::amg_cycle_gpu(const int level, cl::Buffer& y, | ||||
|         } | ||||
|  | ||||
|         // solve coarsest level using umfpack | ||||
|         this->umfpack.apply(h_x.data(), h_y.data()); | ||||
|         if constexpr (std::is_same_v<Scalar,float>) { | ||||
|             OPM_THROW(std::runtime_error, "Cannot use CPR with floats due to UMFPACK usage"); | ||||
|         } else { | ||||
|             this->umfpack.apply(h_x.data(), h_y.data()); | ||||
|         } | ||||
|  | ||||
|         events.resize(1); | ||||
|         err = queue->enqueueWriteBuffer(x, CL_FALSE, 0, | ||||
| @@ -308,7 +314,7 @@ void openclCPR<Scalar,block_size>::apply(const cl::Buffer& y, cl::Buffer& x) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #define INSTANCE_TYPE(T)     \ | ||||
| #define INSTANTIATE_TYPE(T)        \ | ||||
|     template class openclCPR<T,1>; \ | ||||
|     template class openclCPR<T,2>; \ | ||||
|     template class openclCPR<T,3>; \ | ||||
| @@ -316,6 +322,10 @@ void openclCPR<Scalar,block_size>::apply(const cl::Buffer& y, cl::Buffer& x) | ||||
|     template class openclCPR<T,5>; \ | ||||
|     template class openclCPR<T,6>; | ||||
|  | ||||
| INSTANCE_TYPE(double) | ||||
| INSTANTIATE_TYPE(double) | ||||
|  | ||||
| #if FLOW_INSTANTIATE_FLOAT | ||||
| INSTANTIATE_TYPE(float) | ||||
| #endif | ||||
|  | ||||
| } // namespace Opm::Accelerator | ||||
|   | ||||
| @@ -20,8 +20,6 @@ | ||||
| #ifndef OPM_OPENCLCPR_HPP | ||||
| #define OPM_OPENCLCPR_HPP | ||||
|  | ||||
| #include <mutex> | ||||
|  | ||||
| #include <dune/istl/paamg/matrixhierarchy.hh> | ||||
| #include <dune/istl/umfpack.hh> | ||||
|  | ||||
| @@ -34,6 +32,8 @@ | ||||
|  | ||||
| #include <opm/simulators/linalg/bda/opencl/openclSolverBackend.hpp> | ||||
|  | ||||
| #include <type_traits> | ||||
|  | ||||
| namespace Opm::Accelerator { | ||||
|  | ||||
| template<class Scalar> class BlockedMatrix; | ||||
|   | ||||
| @@ -61,7 +61,7 @@ setOpencl(std::shared_ptr<cl::Context>& context_, | ||||
|     queue = queue_; | ||||
| } | ||||
|  | ||||
| #define INSTANCE_TYPE(T)                \ | ||||
| #define INSTANTIATE_TYPE(T)                   \ | ||||
|     template class openclPreconditioner<T,1>; \ | ||||
|     template class openclPreconditioner<T,2>; \ | ||||
|     template class openclPreconditioner<T,3>; \ | ||||
| @@ -69,6 +69,10 @@ setOpencl(std::shared_ptr<cl::Context>& context_, | ||||
|     template class openclPreconditioner<T,5>; \ | ||||
|     template class openclPreconditioner<T,6>; | ||||
|  | ||||
| INSTANCE_TYPE(double) | ||||
| INSTANTIATE_TYPE(double) | ||||
|  | ||||
| #if FLOW_INSTANTIATE_FLOAT | ||||
| INSTANTIATE_TYPE(float) | ||||
| #endif | ||||
|  | ||||
| } // namespace Opm::Accelerator | ||||
|   | ||||
| @@ -717,4 +717,8 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix, | ||||
|  | ||||
| INSTANTIATE_TYPE(double) | ||||
|  | ||||
| #if FLOW_INSTANTIATE_FLOAT | ||||
| INSTANTIATE_TYPE(float) | ||||
| #endif | ||||
|  | ||||
| } // namespace Opm::Accelerator | ||||
|   | ||||
| @@ -490,4 +490,8 @@ spmv([[maybe_unused]] Scalar* vals, | ||||
|  | ||||
| template class HipKernels<double>; | ||||
|  | ||||
| #if FLOW_INSTANTIATE_FLOAT | ||||
| template class HipKernels<float>; | ||||
| #endif | ||||
|  | ||||
| } // namespace Opm | ||||
|   | ||||
| @@ -247,4 +247,8 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix, | ||||
|  | ||||
| INSTANTIATE_TYPE(double) | ||||
|  | ||||
| #if FLOW_INSTANTIATE_FLOAT | ||||
| INSTANTIATE_TYPE(float) | ||||
| #endif | ||||
|  | ||||
| } // namespace Opm::Accelerator | ||||
|   | ||||
| @@ -29,8 +29,9 @@ | ||||
| #include <opm/simulators/linalg/bda/Misc.hpp> | ||||
|  | ||||
| #include <sstream> | ||||
|  | ||||
| #include <thread> | ||||
| #include <type_traits> | ||||
|  | ||||
| extern std::shared_ptr<std::thread> copyThread; | ||||
|     | ||||
| #if HAVE_OPENMP | ||||
| @@ -112,23 +113,63 @@ analyze_matrix(BlockedMatrix<Scalar>*, | ||||
|     ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_U)); | ||||
|     ROCSPARSE_CHECK(rocsparse_set_mat_fill_mode(descr_U, rocsparse_fill_mode_upper)); | ||||
|     ROCSPARSE_CHECK(rocsparse_set_mat_diag_type(descr_U, rocsparse_diag_type_non_unit)); | ||||
|      | ||||
|     ROCSPARSE_CHECK(rocsparse_dbsrilu0_buffer_size(this->handle, this->dir, Nb, this->nnzbs_prec, descr_M, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_M)); | ||||
|      | ||||
|     ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(this->handle, this->dir, this->operation, Nb, this->nnzbs_prec, | ||||
|                                descr_L, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_L)); | ||||
|  | ||||
|     ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(this->handle, this->dir, this->operation, Nb, this->nnzbs_prec, | ||||
|                                descr_U, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_U)); | ||||
|     if constexpr (std::is_same_v<Scalar,float>) { | ||||
|         ROCSPARSE_CHECK(rocsparse_sbsrilu0_buffer_size(this->handle, this->dir, Nb, | ||||
|                                                        this->nnzbs_prec, descr_M, | ||||
|                                                        d_Mvals, d_Mrows, d_Mcols, | ||||
|                                                        block_size, ilu_info, &d_bufferSize_M)); | ||||
|  | ||||
|         ROCSPARSE_CHECK(rocsparse_sbsrsv_buffer_size(this->handle, this->dir, | ||||
|                                                      this->operation, Nb, | ||||
|                                                      this->nnzbs_prec, descr_L, | ||||
|                                                      d_Mvals, d_Mrows, d_Mcols, | ||||
|                                                      block_size, ilu_info, &d_bufferSize_L)); | ||||
|  | ||||
|         ROCSPARSE_CHECK(rocsparse_sbsrsv_buffer_size(this->handle, this->dir, | ||||
|                                                      this->operation, Nb, | ||||
|                                                      this->nnzbs_prec, descr_U, | ||||
|                                                      d_Mvals, d_Mrows, d_Mcols, | ||||
|                                                      block_size, ilu_info, &d_bufferSize_U)); | ||||
|     } else { | ||||
|         ROCSPARSE_CHECK(rocsparse_dbsrilu0_buffer_size(this->handle, this->dir, Nb, | ||||
|                                                        this->nnzbs_prec, descr_M, | ||||
|                                                        d_Mvals, d_Mrows, d_Mcols, | ||||
|                                                        block_size, ilu_info, &d_bufferSize_M)); | ||||
|  | ||||
|         ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(this->handle, this->dir, | ||||
|                                                      this->operation, Nb, | ||||
|                                                      this->nnzbs_prec, descr_L, | ||||
|                                                      d_Mvals, d_Mrows, d_Mcols, | ||||
|                                                      block_size, ilu_info, &d_bufferSize_L)); | ||||
|  | ||||
|         ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(this->handle, this->dir, | ||||
|                                                      this->operation, Nb, | ||||
|                                                      this->nnzbs_prec, descr_U, | ||||
|                                                      d_Mvals, d_Mrows, d_Mcols, | ||||
|                                                      block_size, ilu_info, &d_bufferSize_U)); | ||||
|     } | ||||
|  | ||||
|     d_bufferSize = std::max(d_bufferSize_M, std::max(d_bufferSize_L, d_bufferSize_U)); | ||||
|  | ||||
|     HIP_CHECK(hipMalloc((void**)&d_buffer, d_bufferSize)); | ||||
|  | ||||
|     // analysis of ilu LU decomposition | ||||
|     ROCSPARSE_CHECK(rocsparse_dbsrilu0_analysis(this->handle, this->dir, \ | ||||
|                                Nb, this->nnzbs_prec, descr_M, d_Mvals, d_Mrows, d_Mcols, \ | ||||
|                                block_size, ilu_info, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, d_buffer)); | ||||
|     if constexpr (std::is_same_v<Scalar,float>) { | ||||
|         ROCSPARSE_CHECK(rocsparse_sbsrilu0_analysis(this->handle, this->dir, | ||||
|                                                     Nb, this->nnzbs_prec, descr_M, | ||||
|                                                     d_Mvals, d_Mrows, d_Mcols, | ||||
|                                                     block_size, ilu_info, | ||||
|                                                     rocsparse_analysis_policy_reuse, | ||||
|                                                     rocsparse_solve_policy_auto, d_buffer)); | ||||
|     } else { | ||||
|         ROCSPARSE_CHECK(rocsparse_dbsrilu0_analysis(this->handle, this->dir, | ||||
|                                                     Nb, this->nnzbs_prec, descr_M, | ||||
|                                                     d_Mvals, d_Mrows, d_Mcols, | ||||
|                                                     block_size, ilu_info, | ||||
|                                                     rocsparse_analysis_policy_reuse, | ||||
|                                                     rocsparse_solve_policy_auto, d_buffer)); | ||||
|     } | ||||
|  | ||||
|     int zero_position = 0; | ||||
|     rocsparse_status status = rocsparse_bsrilu0_zero_pivot(this->handle, ilu_info, &zero_position); | ||||
| @@ -138,12 +179,33 @@ analyze_matrix(BlockedMatrix<Scalar>*, | ||||
|     } | ||||
|  | ||||
|     // analysis of ilu apply | ||||
|     ROCSPARSE_CHECK(rocsparse_dbsrsv_analysis(this->handle, this->dir, this->operation, \ | ||||
|                              Nb, this->nnzbs_prec, descr_L, d_Mvals, d_Mrows, d_Mcols, \ | ||||
|                              block_size, ilu_info, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, d_buffer)); | ||||
|     ROCSPARSE_CHECK(rocsparse_dbsrsv_analysis(this->handle, this->dir, this->operation, \ | ||||
|                              Nb, this->nnzbs_prec, descr_U, d_Mvals, d_Mrows, d_Mcols, \ | ||||
|                              block_size, ilu_info, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, d_buffer)); | ||||
|     if constexpr (std::is_same_v<Scalar,float>) { | ||||
|         ROCSPARSE_CHECK(rocsparse_sbsrsv_analysis(this->handle, this->dir, this->operation, | ||||
|                                                   Nb, this->nnzbs_prec, descr_L, | ||||
|                                                   d_Mvals, d_Mrows, d_Mcols, | ||||
|                                                   block_size, ilu_info, | ||||
|                                                   rocsparse_analysis_policy_reuse, | ||||
|                                                   rocsparse_solve_policy_auto, d_buffer)); | ||||
|         ROCSPARSE_CHECK(rocsparse_sbsrsv_analysis(this->handle, this->dir, this->operation, | ||||
|                                                   Nb, this->nnzbs_prec, descr_U, d_Mvals, | ||||
|                                                   d_Mrows, d_Mcols, | ||||
|                                                   block_size, ilu_info, | ||||
|                                                   rocsparse_analysis_policy_reuse, | ||||
|                                                   rocsparse_solve_policy_auto, d_buffer)); | ||||
|     } else { | ||||
|         ROCSPARSE_CHECK(rocsparse_dbsrsv_analysis(this->handle, this->dir, this->operation, | ||||
|                                                   Nb, this->nnzbs_prec, descr_L, | ||||
|                                                   d_Mvals, d_Mrows, d_Mcols, | ||||
|                                                   block_size, ilu_info, | ||||
|                                                   rocsparse_analysis_policy_reuse, | ||||
|                                                   rocsparse_solve_policy_auto, d_buffer)); | ||||
|         ROCSPARSE_CHECK(rocsparse_dbsrsv_analysis(this->handle, this->dir, this->operation, | ||||
|                                                   Nb, this->nnzbs_prec, descr_U, d_Mvals, | ||||
|                                                   d_Mrows, d_Mcols, | ||||
|                                                   block_size, ilu_info, | ||||
|                                                   rocsparse_analysis_policy_reuse, | ||||
|                                                   rocsparse_solve_policy_auto, d_buffer)); | ||||
|     } | ||||
|  | ||||
|     if (verbosity >= 3) { | ||||
|     	HIP_CHECK(hipStreamSynchronize(this->stream)); | ||||
| @@ -168,13 +230,25 @@ create_preconditioner(BlockedMatrix<Scalar>*, | ||||
| { | ||||
|     Timer t; | ||||
|     bool result = true; | ||||
|      | ||||
|     ROCSPARSE_CHECK(rocsparse_dbsrilu0(this->handle, this->dir, Nb, this->nnzbs_prec, descr_M, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, rocsparse_solve_policy_auto, d_buffer)); | ||||
|  | ||||
|     if constexpr (std::is_same_v<Scalar,float>) { | ||||
|         ROCSPARSE_CHECK(rocsparse_sbsrilu0(this->handle, this->dir, Nb, | ||||
|                                            this->nnzbs_prec, descr_M, | ||||
|                                            d_Mvals, d_Mrows, d_Mcols, | ||||
|                                            block_size, ilu_info, | ||||
|                                            rocsparse_solve_policy_auto, d_buffer)); | ||||
|     } else { | ||||
|         ROCSPARSE_CHECK(rocsparse_dbsrilu0(this->handle, this->dir, Nb, | ||||
|                                            this->nnzbs_prec, descr_M, | ||||
|                                            d_Mvals, d_Mrows, d_Mcols, | ||||
|                                            block_size, ilu_info, | ||||
|                                            rocsparse_solve_policy_auto, d_buffer)); | ||||
|     } | ||||
|  | ||||
|     // Check for zero pivot | ||||
|     int zero_position = 0; | ||||
|     rocsparse_status status = rocsparse_bsrilu0_zero_pivot(this->handle, ilu_info, &zero_position); | ||||
|     if(rocsparse_status_success != status) | ||||
|     if (rocsparse_status_success != status) | ||||
|     { | ||||
|         printf("L has structural and/or numerical zero at L(%d,%d)\n", zero_position, zero_position); | ||||
|         return false; | ||||
| @@ -257,13 +331,39 @@ apply(Scalar& y, Scalar& x) { | ||||
|  | ||||
|     Timer t_apply; | ||||
|  | ||||
|     ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(this->handle, this->dir, \ | ||||
|                               this->operation, Nb, this->nnzbs_prec, &one, \ | ||||
|                               descr_L, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &y, d_t, rocsparse_solve_policy_auto, d_buffer)); | ||||
|     if constexpr (std::is_same_v<Scalar,float>) { | ||||
|         ROCSPARSE_CHECK(rocsparse_sbsrsv_solve(this->handle, this->dir, | ||||
|                                                this->operation, Nb, | ||||
|                                                this->nnzbs_prec, &one, | ||||
|                                                descr_L, d_Mvals, d_Mrows, | ||||
|                                                d_Mcols, block_size, ilu_info, | ||||
|                                                &y, d_t, rocsparse_solve_policy_auto, | ||||
|                                                d_buffer)); | ||||
|  | ||||
|     ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(this->handle, this->dir, \ | ||||
|                               this->operation, Nb, this->nnzbs_prec, &one, \ | ||||
|                               descr_U, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, d_t, &x, rocsparse_solve_policy_auto, d_buffer)); | ||||
|         ROCSPARSE_CHECK(rocsparse_sbsrsv_solve(this->handle, this->dir, | ||||
|                                                this->operation, Nb, | ||||
|                                                this->nnzbs_prec, &one, | ||||
|                                                descr_U, d_Mvals, d_Mrows, | ||||
|                                                d_Mcols, block_size, ilu_info, | ||||
|                                                d_t, &x, rocsparse_solve_policy_auto, | ||||
|                                                d_buffer)); | ||||
|     } else { | ||||
|         ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(this->handle, this->dir, | ||||
|                                                this->operation, Nb, | ||||
|                                                this->nnzbs_prec, &one, | ||||
|                                                descr_L, d_Mvals, d_Mrows, | ||||
|                                                d_Mcols, block_size, ilu_info, | ||||
|                                                &y, d_t, rocsparse_solve_policy_auto, | ||||
|                                                d_buffer)); | ||||
|  | ||||
|         ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(this->handle, this->dir, | ||||
|                                                this->operation, Nb, | ||||
|                                                this->nnzbs_prec, &one, | ||||
|                                                descr_U, d_Mvals, d_Mrows, | ||||
|                                                d_Mcols, block_size, ilu_info, | ||||
|                                                d_t, &x, rocsparse_solve_policy_auto, | ||||
|                                                d_buffer)); | ||||
|     } | ||||
|          | ||||
|     if (verbosity >= 3) { | ||||
|         std::ostringstream out; | ||||
| @@ -283,4 +383,8 @@ apply(Scalar& y, Scalar& x) { | ||||
|  | ||||
| INSTANTIATE_TYPE(double) | ||||
|  | ||||
| #if FLOW_INSTANTIATE_FLOAT | ||||
| INSTANTIATE_TYPE(float) | ||||
| #endif | ||||
|  | ||||
| } // namespace Opm | ||||
|   | ||||
| @@ -35,6 +35,8 @@ | ||||
|  | ||||
| #include <opm/simulators/linalg/bda/Misc.hpp> | ||||
|  | ||||
| #include <type_traits> | ||||
|  | ||||
| namespace Opm::Accelerator { | ||||
|  | ||||
| using Opm::OpmLog; | ||||
| @@ -235,8 +237,13 @@ amg_cycle_gpu(const int level, | ||||
|  | ||||
|         HIP_CHECK(hipMemcpyAsync(h_y.data(), &y, sizeof(Scalar) * Ncur, hipMemcpyDeviceToHost, this->stream)); | ||||
|          | ||||
|         // solve coarsest level using umfpack | ||||
|         this->umfpack.apply(h_x.data(), h_y.data()); | ||||
|         // The if constexpr is needed to make the code compile | ||||
|         // since the umfpack member is an 'int' with float Scalar. | ||||
|         // We will never get here with float Scalar as we throw earlier. | ||||
|         // Solve coarsest level using umfpack | ||||
|         if constexpr (std::is_same_v<Scalar,double>) { | ||||
|             this->umfpack.apply(h_x.data(), h_y.data()); | ||||
|         } | ||||
|  | ||||
|         HIP_CHECK(hipMemcpyAsync(&x, h_x.data(), sizeof(Scalar) * Ncur, hipMemcpyHostToDevice, this->stream)); | ||||
|          | ||||
| @@ -332,4 +339,8 @@ apply(Scalar& y, | ||||
|  | ||||
| INSTANTIATE_TYPE(double) | ||||
|  | ||||
| #if FLOW_INSTANTIATE_FLOAT | ||||
| INSTANTIATE_TYPE(float) | ||||
| #endif | ||||
|  | ||||
| } // namespace Opm | ||||
|   | ||||
| @@ -103,11 +103,15 @@ upload(Scalar *vals, | ||||
|     HIP_CHECK(hipMemcpyAsync(nnzValues, vals, sizeof(Scalar) * size, hipMemcpyHostToDevice, stream));     | ||||
| } | ||||
|  | ||||
| #define INSTANCE_TYPE(T)  \ | ||||
| template class RocmVector<T>;\ | ||||
| template class RocmMatrix<T>; | ||||
| #define INSTANTIATE_TYPE(T)       \ | ||||
|     template class RocmVector<T>; \ | ||||
|     template class RocmMatrix<T>; | ||||
|  | ||||
| INSTANCE_TYPE(int); | ||||
| INSTANCE_TYPE(double); | ||||
| INSTANTIATE_TYPE(int) | ||||
| INSTANTIATE_TYPE(double) | ||||
|  | ||||
| #if FLOW_INSTANTIATE_FLOAT | ||||
| INSTANTIATE_TYPE(float) | ||||
| #endif | ||||
|  | ||||
| } // namespace Opm | ||||
|   | ||||
| @@ -73,7 +73,7 @@ setJacMat(const BlockedMatrix<Scalar>& jMat) | ||||
|     this->jacMat = std::make_shared<BlockedMatrix<Scalar>>(jMat); | ||||
| } | ||||
|  | ||||
| #define INSTANTIATE_TYPE(T)                  \ | ||||
| #define INSTANTIATE_TYPE(T)                      \ | ||||
|     template class rocsparsePreconditioner<T,1>; \ | ||||
|     template class rocsparsePreconditioner<T,2>; \ | ||||
|     template class rocsparsePreconditioner<T,3>; \ | ||||
| @@ -83,5 +83,9 @@ setJacMat(const BlockedMatrix<Scalar>& jMat) | ||||
|  | ||||
| INSTANTIATE_TYPE(double) | ||||
|  | ||||
| #if FLOW_INSTANTIATE_FLOAT | ||||
| INSTANTIATE_TYPE(float) | ||||
| #endif | ||||
|  | ||||
| } //namespace Opm | ||||
|  | ||||
|   | ||||
| @@ -51,6 +51,7 @@ | ||||
| #endif | ||||
|  | ||||
| #include <cstddef> | ||||
| #include <type_traits> | ||||
|  | ||||
| namespace Opm::Accelerator { | ||||
|  | ||||
| @@ -151,26 +152,55 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs, | ||||
|  | ||||
| // HIP_VERSION is defined as (HIP_VERSION_MAJOR * 10000000 + HIP_VERSION_MINOR * 100000 + HIP_VERSION_PATCH) | ||||
| #if HIP_VERSION >= 60000000 | ||||
|     ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation, | ||||
|                                      Nb, Nb, nnzb, &one, descr_A, | ||||
|                                      d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                      spmv_info, d_x, &zero, d_r)); | ||||
|     if constexpr (std::is_same_v<Scalar,float>) { | ||||
|         ROCSPARSE_CHECK(rocsparse_sbsrmv(handle, dir, operation, | ||||
|                                          Nb, Nb, nnzb, &one, descr_A, | ||||
|                                          d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                          spmv_info, d_x, &zero, d_r)); | ||||
|     } else { | ||||
|         ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation, | ||||
|                                          Nb, Nb, nnzb, &one, descr_A, | ||||
|                                          d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                          spmv_info, d_x, &zero, d_r)); | ||||
|     } | ||||
| #elif HIP_VERSION >= 50400000 | ||||
|     ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation, | ||||
|                                         Nb, Nb, nnzb, &one, descr_A, | ||||
|                                         d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                         spmv_info, d_x, &zero, d_r)); | ||||
|     if constexpr (std::is_same_v<Scalar,float>) { | ||||
|         ROCSPARSE_CHECK(rocsparse_sbsrmv_ex(handle, dir, operation, | ||||
|                                             Nb, Nb, nnzb, &one, descr_A, | ||||
|                                             d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                             spmv_info, d_x, &zero, d_r)); | ||||
|     } else { | ||||
|         ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation, | ||||
|                                             Nb, Nb, nnzb, &one, descr_A, | ||||
|                                             d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                             spmv_info, d_x, &zero, d_r)); | ||||
|     } | ||||
| #else | ||||
|     ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation, | ||||
|                                         Nb, Nb, nnzb, &one, descr_A, | ||||
|                                         d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                         d_x, &zero, d_r)); | ||||
|     if constexpr (std::is_same_v<Scalar,float>) { | ||||
|         ROCSPARSE_CHECK(rocsparse_sbsrmv(handle, dir, operation, | ||||
|                                             Nb, Nb, nnzb, &one, descr_A, | ||||
|                                             d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                             d_x, &zero, d_r)); | ||||
|     } else { | ||||
|         ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation, | ||||
|                                             Nb, Nb, nnzb, &one, descr_A, | ||||
|                                             d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                             d_x, &zero, d_r)); | ||||
|     } | ||||
| #endif | ||||
|     ROCBLAS_CHECK(rocblas_dscal(blas_handle, N, &mone, d_r, 1)); | ||||
|     ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &one, d_b, 1, d_r, 1)); | ||||
|     ROCBLAS_CHECK(rocblas_dcopy(blas_handle, N, d_r, 1, d_rw, 1)); | ||||
|     ROCBLAS_CHECK(rocblas_dcopy(blas_handle, N, d_r, 1, d_p, 1)); | ||||
|     ROCBLAS_CHECK(rocblas_dnrm2(blas_handle, N, d_r, 1, &norm_0)); | ||||
|     if constexpr (std::is_same_v<Scalar,float>) { | ||||
|         ROCBLAS_CHECK(rocblas_sscal(blas_handle, N, &mone, d_r, 1)); | ||||
|         ROCBLAS_CHECK(rocblas_saxpy(blas_handle, N, &one, d_b, 1, d_r, 1)); | ||||
|         ROCBLAS_CHECK(rocblas_scopy(blas_handle, N, d_r, 1, d_rw, 1)); | ||||
|         ROCBLAS_CHECK(rocblas_scopy(blas_handle, N, d_r, 1, d_p, 1)); | ||||
|         ROCBLAS_CHECK(rocblas_snrm2(blas_handle, N, d_r, 1, &norm_0)); | ||||
|     } else { | ||||
|         ROCBLAS_CHECK(rocblas_dscal(blas_handle, N, &mone, d_r, 1)); | ||||
|         ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &one, d_b, 1, d_r, 1)); | ||||
|         ROCBLAS_CHECK(rocblas_dcopy(blas_handle, N, d_r, 1, d_rw, 1)); | ||||
|         ROCBLAS_CHECK(rocblas_dcopy(blas_handle, N, d_r, 1, d_p, 1)); | ||||
|         ROCBLAS_CHECK(rocblas_dnrm2(blas_handle, N, d_r, 1, &norm_0)); | ||||
|     } | ||||
|  | ||||
|     if (verbosity >= 2) { | ||||
|         std::ostringstream out; | ||||
| @@ -183,14 +213,24 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs, | ||||
|     } | ||||
|     for (it = 0.5; it < maxit; it += 0.5) { | ||||
|         rhop = rho; | ||||
|         ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_rw, 1, d_r, 1, &rho)); | ||||
|         if constexpr (std::is_same_v<Scalar,float>) { | ||||
|             ROCBLAS_CHECK(rocblas_sdot(blas_handle, N, d_rw, 1, d_r, 1, &rho)); | ||||
|         } else { | ||||
|             ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_rw, 1, d_r, 1, &rho)); | ||||
|         } | ||||
|  | ||||
|         if (it > 1) { | ||||
|             beta = (rho / rhop) * (alpha / omega); | ||||
|             nomega = -omega; | ||||
|             ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &nomega, d_v, 1, d_p, 1)); | ||||
|             ROCBLAS_CHECK(rocblas_dscal(blas_handle, N, &beta, d_p, 1)); | ||||
|             ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &one, d_r, 1, d_p, 1)); | ||||
|             if constexpr (std::is_same_v<Scalar,float>) { | ||||
|                 ROCBLAS_CHECK(rocblas_saxpy(blas_handle, N, &nomega, d_v, 1, d_p, 1)); | ||||
|                 ROCBLAS_CHECK(rocblas_sscal(blas_handle, N, &beta, d_p, 1)); | ||||
|                 ROCBLAS_CHECK(rocblas_saxpy(blas_handle, N, &one, d_r, 1, d_p, 1)); | ||||
|             } else { | ||||
|                 ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &nomega, d_v, 1, d_p, 1)); | ||||
|                 ROCBLAS_CHECK(rocblas_dscal(blas_handle, N, &beta, d_p, 1)); | ||||
|                 ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &one, d_r, 1, d_p, 1)); | ||||
|             } | ||||
|         } | ||||
|         if (verbosity >= 3) { | ||||
|             HIP_CHECK(hipStreamSynchronize(stream)); | ||||
| @@ -209,20 +249,41 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs, | ||||
|  | ||||
|         // spmv | ||||
| #if HIP_VERSION >= 60000000 | ||||
|         ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation, | ||||
|                                           Nb, Nb, nnzb, &one, descr_A, | ||||
|                                           d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                           spmv_info, d_pw, &zero, d_v)); | ||||
|         if constexpr (std::is_same_v<Scalar,float>) { | ||||
|             ROCSPARSE_CHECK(rocsparse_sbsrmv(handle, dir, operation, | ||||
|                                               Nb, Nb, nnzb, &one, descr_A, | ||||
|                                               d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                               spmv_info, d_pw, &zero, d_v)); | ||||
|         } else { | ||||
|             ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation, | ||||
|                                               Nb, Nb, nnzb, &one, descr_A, | ||||
|                                               d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                               spmv_info, d_pw, &zero, d_v)); | ||||
|         } | ||||
| #elif HIP_VERSION >= 50400000 | ||||
|         ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation, | ||||
|                                             Nb, Nb, nnzb, &one, descr_A, | ||||
|                                             d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                             spmv_info, d_pw, &zero, d_v)); | ||||
|         if constexpr (std::is_same_v<Scalar,float>) { | ||||
|             ROCSPARSE_CHECK(rocsparse_sbsrmv_ex(handle, dir, operation, | ||||
|                                                 Nb, Nb, nnzb, &one, descr_A, | ||||
|                                                 d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                                 spmv_info, d_pw, &zero, d_v)); | ||||
|         } else { | ||||
|             ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation, | ||||
|                                                 Nb, Nb, nnzb, &one, descr_A, | ||||
|                                                 d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                                 spmv_info, d_pw, &zero, d_v)); | ||||
|         } | ||||
| #else | ||||
|         ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation, | ||||
|                                             Nb, Nb, nnzb, &one, descr_A, | ||||
|                                             d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                             d_pw, &zero, d_v)); | ||||
|         if constexpr (std::is_same_v<Scalar,float>) { | ||||
|             ROCSPARSE_CHECK(rocsparse_sbsrmv(handle, dir, operation, | ||||
|                                                 Nb, Nb, nnzb, &one, descr_A, | ||||
|                                                 d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                                 d_pw, &zero, d_v)); | ||||
|         } else { | ||||
|             ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation, | ||||
|                                                 Nb, Nb, nnzb, &one, descr_A, | ||||
|                                                 d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                                 d_pw, &zero, d_v)); | ||||
|         } | ||||
| #endif | ||||
|         if (verbosity >= 3) { | ||||
|             HIP_CHECK(hipStreamSynchronize(stream)); | ||||
| @@ -240,12 +301,22 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs, | ||||
|             t_rest.start(); | ||||
|         } | ||||
|  | ||||
|         ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_rw, 1, d_v, 1, &tmp1)); | ||||
|         if constexpr (std::is_same_v<Scalar,float>) { | ||||
|             ROCBLAS_CHECK(rocblas_sdot(blas_handle, N, d_rw, 1, d_v, 1, &tmp1)); | ||||
|         } else { | ||||
|             ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_rw, 1, d_v, 1, &tmp1)); | ||||
|         } | ||||
|         alpha = rho / tmp1; | ||||
|         nalpha = -alpha; | ||||
|         ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &nalpha, d_v, 1, d_r, 1)); | ||||
|         ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &alpha, d_pw, 1, d_x, 1)); | ||||
|         ROCBLAS_CHECK(rocblas_dnrm2(blas_handle, N, d_r, 1, &norm)); | ||||
|         if constexpr (std::is_same_v<Scalar,float>) { | ||||
|             ROCBLAS_CHECK(rocblas_saxpy(blas_handle, N, &nalpha, d_v, 1, d_r, 1)); | ||||
|             ROCBLAS_CHECK(rocblas_saxpy(blas_handle, N, &alpha, d_pw, 1, d_x, 1)); | ||||
|             ROCBLAS_CHECK(rocblas_snrm2(blas_handle, N, d_r, 1, &norm)); | ||||
|         } else { | ||||
|             ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &nalpha, d_v, 1, d_r, 1)); | ||||
|             ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &alpha, d_pw, 1, d_x, 1)); | ||||
|             ROCBLAS_CHECK(rocblas_dnrm2(blas_handle, N, d_r, 1, &norm)); | ||||
|         } | ||||
|         if (verbosity >= 3) { | ||||
|             HIP_CHECK(hipStreamSynchronize(stream)); | ||||
|             t_rest.stop(); | ||||
| @@ -272,20 +343,41 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs, | ||||
|  | ||||
|         // spmv | ||||
| #if HIP_VERSION >= 60000000 | ||||
|         ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation, | ||||
|                                          Nb, Nb, nnzb, &one, descr_A, | ||||
|                                          d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                          spmv_info, d_s, &zero, d_t)); | ||||
|         if constexpr (std::is_same_v<Scalar,float>) { | ||||
|             ROCSPARSE_CHECK(rocsparse_sbsrmv(handle, dir, operation, | ||||
|                                              Nb, Nb, nnzb, &one, descr_A, | ||||
|                                              d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                              spmv_info, d_s, &zero, d_t)); | ||||
|         } else { | ||||
|             ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation, | ||||
|                                              Nb, Nb, nnzb, &one, descr_A, | ||||
|                                              d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                              spmv_info, d_s, &zero, d_t)); | ||||
|         } | ||||
| #elif HIP_VERSION >= 50400000 | ||||
|         ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation, | ||||
|                                             Nb, Nb, nnzb, &one, descr_A, | ||||
|                                             d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                             spmv_info, d_s, &zero, d_t)); | ||||
|         if constexpr (std::is_same_v<Scalar,float>) { | ||||
|             ROCSPARSE_CHECK(rocsparse_sbsrmv_ex(handle, dir, operation, | ||||
|                                                 Nb, Nb, nnzb, &one, descr_A, | ||||
|                                                 d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                                 spmv_info, d_s, &zero, d_t)); | ||||
|         } else { | ||||
|             ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation, | ||||
|                                                 Nb, Nb, nnzb, &one, descr_A, | ||||
|                                                 d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                                 spmv_info, d_s, &zero, d_t)); | ||||
|         } | ||||
| #else | ||||
|         ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation, | ||||
|                                             Nb, Nb, nnzb, &one, descr_A, | ||||
|                                             d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                             d_s, &zero, d_t)); | ||||
|         if constexpr (std::is_same_v<Scalar,float>) { | ||||
|             ROCSPARSE_CHECK(rocsparse_sbsrmv(handle, dir, operation, | ||||
|                                                 Nb, Nb, nnzb, &one, descr_A, | ||||
|                                                 d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                                 d_s, &zero, d_t)); | ||||
|         } else { | ||||
|             ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation, | ||||
|                                                 Nb, Nb, nnzb, &one, descr_A, | ||||
|                                                 d_Avals, d_Arows, d_Acols, block_size, | ||||
|                                                 d_s, &zero, d_t)); | ||||
|         } | ||||
| #endif | ||||
|         if (verbosity >= 3) { | ||||
|             HIP_CHECK(hipStreamSynchronize(stream)); | ||||
| @@ -303,14 +395,25 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs, | ||||
|             t_rest.start(); | ||||
|         } | ||||
|  | ||||
|         ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_t, 1, d_r, 1, &tmp1)); | ||||
|         ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_t, 1, d_t, 1, &tmp2)); | ||||
|         if constexpr (std::is_same_v<Scalar,float>) { | ||||
|             ROCBLAS_CHECK(rocblas_sdot(blas_handle, N, d_t, 1, d_r, 1, &tmp1)); | ||||
|             ROCBLAS_CHECK(rocblas_sdot(blas_handle, N, d_t, 1, d_t, 1, &tmp2)); | ||||
|         }  else { | ||||
|             ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_t, 1, d_r, 1, &tmp1)); | ||||
|             ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_t, 1, d_t, 1, &tmp2)); | ||||
|  | ||||
|         } | ||||
|         omega = tmp1 / tmp2; | ||||
|         nomega = -omega; | ||||
|         ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &omega, d_s, 1, d_x, 1)); | ||||
|         ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &nomega, d_t, 1, d_r, 1)); | ||||
|  | ||||
|         ROCBLAS_CHECK(rocblas_dnrm2(blas_handle, N, d_r, 1, &norm)); | ||||
|         if constexpr (std::is_same_v<Scalar,float>) { | ||||
|             ROCBLAS_CHECK(rocblas_saxpy(blas_handle, N, &omega, d_s, 1, d_x, 1)); | ||||
|             ROCBLAS_CHECK(rocblas_saxpy(blas_handle, N, &nomega, d_t, 1, d_r, 1)); | ||||
|             ROCBLAS_CHECK(rocblas_snrm2(blas_handle, N, d_r, 1, &norm)); | ||||
|         } else { | ||||
|             ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &omega, d_s, 1, d_x, 1)); | ||||
|             ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &nomega, d_t, 1, d_r, 1)); | ||||
|             ROCBLAS_CHECK(rocblas_dnrm2(blas_handle, N, d_r, 1, &norm)); | ||||
|         } | ||||
|         if (verbosity >= 3) { | ||||
|             HIP_CHECK(hipStreamSynchronize(stream)); | ||||
|             t_rest.stop(); | ||||
| @@ -480,15 +583,31 @@ analyze_matrix() | ||||
|     ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_A)); | ||||
|  | ||||
| #if HIP_VERSION >= 60000000 | ||||
|     ROCSPARSE_CHECK(rocsparse_dbsrmv_analysis(handle, dir, operation, | ||||
|                                               Nb, Nb, nnzb, | ||||
|                                               descr_A, d_Avals, d_Arows, d_Acols, | ||||
|                                               block_size, spmv_info)); | ||||
|     if constexpr (std::is_same_v<Scalar,float>) { | ||||
|       ROCSPARSE_CHECK(rocsparse_sbsrmv_analysis(handle, dir, operation, | ||||
|                                                 Nb, Nb, nnzb, | ||||
|                                                 descr_A, d_Avals, d_Arows, d_Acols, | ||||
|                                                 block_size, spmv_info)); | ||||
|     } else { | ||||
|       ROCSPARSE_CHECK(rocsparse_dbsrmv_analysis(handle, dir, operation, | ||||
|                                                 Nb, Nb, nnzb, | ||||
|                                                 descr_A, d_Avals, d_Arows, d_Acols, | ||||
|                                                 block_size, spmv_info)); | ||||
|     } | ||||
| #elif HIP_VERSION >= 50400000 | ||||
|     ROCSPARSE_CHECK(rocsparse_dbsrmv_ex_analysis(handle, dir, operation, | ||||
|         Nb, Nb, nnzb, | ||||
|         descr_A, d_Avals, d_Arows, d_Acols, | ||||
|         block_size, spmv_info)); | ||||
|     if constexpr (std::is_same_v<Scalar,float>) { | ||||
|         ROCSPARSE_CHECK(rocsparse_dbsrmv_ex_analysis(handle, dir, operation, | ||||
|                                                     Nb, Nb, nnzb, | ||||
|                                                     descr_A, d_Avals, | ||||
|                                                     d_Arows, d_Acols, | ||||
|                                                     block_size, spmv_info)); | ||||
|     } else { | ||||
|         ROCSPARSE_CHECK(rocsparse_sbsrmv_ex_analysis(handle, dir, operation, | ||||
|                                                      Nb, Nb, nnzb, | ||||
|                                                      descr_A, d_Avals, | ||||
|                                                      d_Arows, d_Acols, | ||||
|                                                      block_size, spmv_info)); | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     if(!prec->analyze_matrix(&*mat)) { | ||||
| @@ -593,4 +712,8 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix, | ||||
|  | ||||
| INSTANTIATE_TYPE(double) | ||||
|  | ||||
| #if FLOW_INSTANTIATE_FLOAT | ||||
| INSTANTIATE_TYPE(float) | ||||
| #endif | ||||
|  | ||||
| } // namespace Opm::Accelerator | ||||
|   | ||||
| @@ -1351,7 +1351,7 @@ namespace Opm { | ||||
|                     WellBhpThpCalculator<Scalar>::bruteForceBracketCommonTHP(mismatch, min_thp, max_thp); | ||||
|                     // Narrow down the bracket | ||||
|                     Scalar low1, high1; | ||||
|                     std::array<Scalar, 2> range = {0.9*min_thp, 1.1*max_thp}; | ||||
|                     std::array<Scalar, 2> range = {Scalar{0.9}*min_thp, Scalar{1.1}*max_thp}; | ||||
|                     std::optional<Scalar> appr_sol; | ||||
|                     WellBhpThpCalculator<Scalar>::bruteForceBracketCommonTHP(mismatch, range, low1, high1, appr_sol, 0.0, local_deferredLogger); | ||||
|                     min_thp = low1; | ||||
| @@ -1362,7 +1362,8 @@ namespace Opm { | ||||
|                 if (!autochoke_thp.has_value() || autochoke_thp.value() > nodal_pressure) { | ||||
|                     // The bracket is based on the initial bracket or on a range based on a previous calculated group thp | ||||
|                     std::array<Scalar, 2> range = autochoke_thp.has_value() ? | ||||
|                         std::array<Scalar, 2>{0.9 * autochoke_thp.value(), 1.1 * autochoke_thp.value()} : range_initial; | ||||
|                         std::array<Scalar, 2>{Scalar{0.9} * autochoke_thp.value(), | ||||
|                                               Scalar{1.1} * autochoke_thp.value()} : range_initial; | ||||
|                     Scalar low, high; | ||||
|                     std::optional<Scalar> approximate_solution; | ||||
|                     const Scalar tolerance1 = thp_tolerance; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user