From 3eed028978b2962c98245697ecaee6dea2728086 Mon Sep 17 00:00:00 2001 From: Arne Morten Kvarving Date: Tue, 16 Apr 2024 19:11:04 +0200 Subject: [PATCH] rocsparseSolverBackend: template Scalar type --- opm/simulators/linalg/bda/BdaBridge.cpp | 4 +- .../linalg/bda/rocsparseSolverBackend.cpp | 242 ++++++++++-------- .../linalg/bda/rocsparseSolverBackend.hpp | 63 +++-- 3 files changed, 165 insertions(+), 144 deletions(-) diff --git a/opm/simulators/linalg/bda/BdaBridge.cpp b/opm/simulators/linalg/bda/BdaBridge.cpp index 5492e504e..ae24fc807 100644 --- a/opm/simulators/linalg/bda/BdaBridge.cpp +++ b/opm/simulators/linalg/bda/BdaBridge.cpp @@ -112,7 +112,9 @@ BdaBridge::BdaBridge(std::string acceler } else if (accelerator_mode.compare("rocsparse") == 0) { #if HAVE_ROCSPARSE use_gpu = true; // should be replaced by a 'use_bridge' boolean - backend.reset(new Opm::Accelerator::rocsparseSolverBackend(linear_solver_verbosity, maxit, tolerance, platformID, deviceID)); + using ROCS = Accelerator::rocsparseSolverBackend; + backend = std::make_unique(linear_solver_verbosity, maxit, + tolerance, platformID, deviceID); #else OPM_THROW(std::logic_error, "Error rocsparseSolver was chosen, but rocsparse/rocblas was not found by CMake"); #endif diff --git a/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp b/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp index 3c688a68b..9860f5728 100644 --- a/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp +++ b/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp @@ -93,24 +93,20 @@ extern std::shared_ptr copyThread; #endif //HAVE_OPENMP -namespace Opm -{ -namespace Accelerator -{ +namespace Opm::Accelerator { -using Opm::OpmLog; using Dune::Timer; -template -rocsparseSolverBackend:: -rocsparseSolverBackend(int verbosity_, int maxit_, double tolerance_, +template +rocsparseSolverBackend:: +rocsparseSolverBackend(int verbosity_, int maxit_, Scalar tolerance_, unsigned int platformID_, unsigned int deviceID_) : Base(verbosity_, maxit_, tolerance_, platformID_, deviceID_) { int numDevices = 0; HIP_CHECK(hipGetDeviceCount(&numDevices)); if (static_cast(deviceID) >= numDevices) { - OPM_THROW(std::runtime_error, "Error chosen too high HIP device ID"); + OPM_THROW(std::runtime_error, "Invalid HIP device ID"); } HIP_CHECK(hipSetDevice(deviceID)); @@ -130,46 +126,45 @@ rocsparseSolverBackend(int verbosity_, int maxit_, double tolerance_, ROCBLAS_CHECK(rocblas_set_stream(blas_handle, stream)); } - -template -rocsparseSolverBackend::~rocsparseSolverBackend() { +template +rocsparseSolverBackend::~rocsparseSolverBackend() +{ hipError_t hipstatus = hipStreamSynchronize(stream); - if(hipstatus != hipSuccess){ + if (hipstatus != hipSuccess) { OpmLog::error("Could not synchronize with hipStream"); } hipstatus = hipStreamDestroy(stream); - if(hipstatus != hipSuccess){ + if (hipstatus != hipSuccess) { OpmLog::error("Could not destroy hipStream"); } rocsparse_status status1 = rocsparse_destroy_handle(handle); - if(status1 != rocsparse_status_success){ + if (status1 != rocsparse_status_success) { OpmLog::error("Could not destroy rocsparse handle"); } rocblas_status status2 = rocblas_destroy_handle(blas_handle); - if(status2 != rocblas_status_success){ + if (status2 != rocblas_status_success) { OpmLog::error("Could not destroy rocblas handle"); } } - -template -void rocsparseSolverBackend:: -gpu_pbicgstab([[maybe_unused]] WellContributions& wellContribs, +template +void rocsparseSolverBackend:: +gpu_pbicgstab([[maybe_unused]] WellContributions& wellContribs, BdaResult& res) { float it = 0.5; - double rho, rhop, beta, alpha, nalpha, omega, nomega, tmp1, tmp2; - double norm, norm_0; - double zero = 0.0; - double one = 1.0; - double mone = -1.0; + Scalar rho, rhop, beta, alpha, nalpha, omega, nomega, tmp1, tmp2; + Scalar norm, norm_0; + Scalar zero = 0.0; + Scalar one = 1.0; + Scalar mone = -1.0; Timer t_total, t_prec(false), t_spmv(false), t_well(false), t_rest(false); // set stream here, the WellContributions object is destroyed every linear solve // the number of wells can change every linear solve if (wellContribs.getNumWells() > 0) { - static_cast&>(wellContribs).setStream(stream); + static_cast&>(wellContribs).setStream(stream); } // HIP_VERSION is defined as (HIP_VERSION_MAJOR * 10000000 + HIP_VERSION_MINOR * 100000 + HIP_VERSION_PATCH) @@ -259,7 +254,7 @@ gpu_pbicgstab([[maybe_unused]] WellContributions& wellContribs, // apply wellContributions if (wellContribs.getNumWells() > 0) { - static_cast&>(wellContribs).apply(d_pw, d_v); + static_cast&>(wellContribs).apply(d_pw, d_v); } if (verbosity >= 3) { HIP_CHECK(hipStreamSynchronize(stream)); @@ -325,7 +320,7 @@ gpu_pbicgstab([[maybe_unused]] WellContributions& wellContribs, // apply wellContributions if (wellContribs.getNumWells() > 0) { - static_cast&>(wellContribs).apply(d_s, d_t); + static_cast&>(wellContribs).apply(d_s, d_t); } if (verbosity >= 3) { HIP_CHECK(hipStreamSynchronize(stream)); @@ -365,8 +360,11 @@ gpu_pbicgstab([[maybe_unused]] WellContributions& wellContribs, if (verbosity >= 1) { std::ostringstream out; - out << "=== converged: " << res.converged << ", conv_rate: " << res.conv_rate << ", time: " << res.elapsed << \ - ", time per iteration: " << res.elapsed / it << ", iterations: " << it; + out << "=== converged: " << res.converged + << ", conv_rate: " << res.conv_rate + << ", time: " << res.elapsed << \ + ", time per iteration: " << res.elapsed / it + << ", iterations: " << it; OpmLog::info(out.str()); } if (verbosity >= 3) { @@ -380,10 +378,10 @@ gpu_pbicgstab([[maybe_unused]] WellContributions& wellContribs, } } -template -void rocsparseSolverBackend:: -initialize(std::shared_ptr> matrix, - std::shared_ptr> jacMatrix) +template +void rocsparseSolverBackend:: +initialize(std::shared_ptr> matrix, + std::shared_ptr> jacMatrix) { this->Nb = matrix->Nb; this->N = Nb * block_size; @@ -397,12 +395,14 @@ initialize(std::shared_ptr> matrix, } std::ostringstream out; - out << "Initializing GPU, matrix size: " << Nb << " blockrows, nnzb: " << nnzb << "\n"; + out << "Initializing GPU, matrix size: " + << Nb << " blockrows, nnzb: " << nnzb << "\n"; if (useJacMatrix) { out << "Blocks in ILU matrix: " << jacMatrix->nnzbs << "\n"; } - out << "Maxit: " << maxit << std::scientific << ", tolerance: " << tolerance << "\n"; - out << "PlatformID: " << platformID << ", deviceID: " << deviceID << "\n"; + out << "Maxit: " << maxit + << std::scientific << ", tolerance: " << tolerance << "\n" + << "PlatformID: " << platformID << ", deviceID: " << deviceID << "\n"; OpmLog::info(out.str()); out.str(""); out.clear(); @@ -410,26 +410,26 @@ initialize(std::shared_ptr> matrix, mat = matrix; jacMat = jacMatrix; - HIP_CHECK(hipMalloc((void**)&d_r, sizeof(double) * N)); - HIP_CHECK(hipMalloc((void**)&d_rw, sizeof(double) * N)); - HIP_CHECK(hipMalloc((void**)&d_p, sizeof(double) * N)); - HIP_CHECK(hipMalloc((void**)&d_pw, sizeof(double) * N)); - HIP_CHECK(hipMalloc((void**)&d_s, sizeof(double) * N)); - HIP_CHECK(hipMalloc((void**)&d_t, sizeof(double) * N)); - HIP_CHECK(hipMalloc((void**)&d_v, sizeof(double) * N)); + HIP_CHECK(hipMalloc((void**)&d_r, sizeof(Scalar) * N)); + HIP_CHECK(hipMalloc((void**)&d_rw, sizeof(Scalar) * N)); + HIP_CHECK(hipMalloc((void**)&d_p, sizeof(Scalar) * N)); + HIP_CHECK(hipMalloc((void**)&d_pw, sizeof(Scalar) * N)); + HIP_CHECK(hipMalloc((void**)&d_s, sizeof(Scalar) * N)); + HIP_CHECK(hipMalloc((void**)&d_t, sizeof(Scalar) * N)); + HIP_CHECK(hipMalloc((void**)&d_v, sizeof(Scalar) * N)); HIP_CHECK(hipMalloc((void**)&d_Arows, sizeof(rocsparse_int) * (Nb + 1))); HIP_CHECK(hipMalloc((void**)&d_Acols, sizeof(rocsparse_int) * nnzb)); - HIP_CHECK(hipMalloc((void**)&d_Avals, sizeof(double) * nnz)); - HIP_CHECK(hipMalloc((void**)&d_x, sizeof(double) * N)); - HIP_CHECK(hipMalloc((void**)&d_b, sizeof(double) * N)); + HIP_CHECK(hipMalloc((void**)&d_Avals, sizeof(Scalar) * nnz)); + HIP_CHECK(hipMalloc((void**)&d_x, sizeof(Scalar) * N)); + HIP_CHECK(hipMalloc((void**)&d_b, sizeof(Scalar) * N)); if (useJacMatrix) { HIP_CHECK(hipMalloc((void**)&d_Mrows, sizeof(rocsparse_int) * (Nb + 1))); HIP_CHECK(hipMalloc((void**)&d_Mcols, sizeof(rocsparse_int) * nnzbs_prec)); - HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(double) * nnzbs_prec * block_size * block_size)); + HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(Scalar) * nnzbs_prec * block_size * block_size)); } else { // preconditioner matrix is same - HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(double) * nnzbs_prec * block_size * block_size)); + HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(Scalar) * nnzbs_prec * block_size * block_size)); d_Mcols = d_Acols; d_Mrows = d_Arows; } @@ -437,26 +437,43 @@ initialize(std::shared_ptr> matrix, initialized = true; } // end initialize() -template -void rocsparseSolverBackend::copy_system_to_gpu(double *b) { +template +void rocsparseSolverBackend:: +copy_system_to_gpu(Scalar *b) +{ Timer t; - HIP_CHECK(hipMemcpyAsync(d_Arows, mat->rowPointers, sizeof(rocsparse_int) * (Nb + 1), hipMemcpyHostToDevice, stream)); - HIP_CHECK(hipMemcpyAsync(d_Acols, mat->colIndices, sizeof(rocsparse_int) * nnzb, hipMemcpyHostToDevice, stream)); - HIP_CHECK(hipMemcpyAsync(d_Avals, mat->nnzValues, sizeof(double) * nnz, hipMemcpyHostToDevice, stream)); - HIP_CHECK(hipMemsetAsync(d_x, 0, sizeof(double) * N, stream)); - HIP_CHECK(hipMemcpyAsync(d_b, b, sizeof(double) * N, hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipMemcpyAsync(d_Arows, mat->rowPointers, + sizeof(rocsparse_int) * (Nb + 1), + hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipMemcpyAsync(d_Acols, mat->colIndices, + sizeof(rocsparse_int) * nnzb, + hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipMemcpyAsync(d_Avals, mat->nnzValues, + sizeof(Scalar) * nnz, + hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipMemsetAsync(d_x, 0, N * sizeof(Scalar), stream)); + HIP_CHECK(hipMemcpyAsync(d_b, b, N * sizeof(Scalar) * N, + hipMemcpyHostToDevice, stream)); if (useJacMatrix) { #if HAVE_OPENMP - if(omp_get_max_threads() > 1) - copyThread->join(); + if (omp_get_max_threads() > 1) { + copyThread->join(); + } #endif - HIP_CHECK(hipMemcpyAsync(d_Mrows, jacMat->rowPointers, sizeof(rocsparse_int) * (Nb + 1), hipMemcpyHostToDevice, stream)); - HIP_CHECK(hipMemcpyAsync(d_Mcols, jacMat->colIndices, sizeof(rocsparse_int) * nnzbs_prec, hipMemcpyHostToDevice, stream)); - HIP_CHECK(hipMemcpyAsync(d_Mvals, jacMat->nnzValues, sizeof(double) * nnzbs_prec * block_size * block_size, hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipMemcpyAsync(d_Mrows, jacMat->rowPointers, + sizeof(rocsparse_int) * (Nb + 1), + hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipMemcpyAsync(d_Mcols, jacMat->colIndices, + sizeof(rocsparse_int) * nnzbs_prec, + hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipMemcpyAsync(d_Mvals, jacMat->nnzValues, + sizeof(Scalar) * nnzbs_prec * block_size * block_size, + hipMemcpyHostToDevice, stream)); } else { - HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals, sizeof(double) * nnz, hipMemcpyDeviceToDevice, stream)); + HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals, + sizeof(Scalar) * nnz, hipMemcpyDeviceToDevice, stream)); } if (verbosity >= 3) { @@ -466,29 +483,36 @@ void rocsparseSolverBackend::copy_system_to_gpu(double *b) { std::ostringstream out; out << "-----rocsparseSolver::copy_system_to_gpu(): " << t.elapsed() << " s\n"; out << "---rocsparseSolver::cum copy: " << c_copy << " s"; - OpmLog::info(out.str()); + OpmLog::info(out.str()); } } // end copy_system_to_gpu() // don't copy rowpointers and colindices, they stay the same -template -void rocsparseSolverBackend::update_system_on_gpu(double *b) { +template +void rocsparseSolverBackend:: +update_system_on_gpu(Scalar* b) +{ Timer t; - HIP_CHECK(hipMemcpyAsync(d_Avals, mat->nnzValues, sizeof(double) * nnz, hipMemcpyHostToDevice, stream)); - HIP_CHECK(hipMemsetAsync(d_x, 0, sizeof(double) * N, stream)); - HIP_CHECK(hipMemcpyAsync(d_b, b, sizeof(double) * N, hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipMemcpyAsync(d_Avals, mat->nnzValues, sizeof(Scalar) * nnz, + hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipMemsetAsync(d_x, 0, N * sizeof(Scalar), stream)); + HIP_CHECK(hipMemcpyAsync(d_b, b, N* sizeof(Scalar), + hipMemcpyHostToDevice, stream)); if (useJacMatrix) { #if HAVE_OPENMP - if (omp_get_max_threads() > 1) - copyThread->join(); + if (omp_get_max_threads() > 1) { + copyThread->join(); + } #endif - HIP_CHECK(hipMemcpyAsync(d_Mvals, jacMat->nnzValues, sizeof(double) * nnzbs_prec * block_size * block_size, hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipMemcpyAsync(d_Mvals, jacMat->nnzValues, + sizeof(Scalar) * nnzbs_prec * block_size * block_size, + hipMemcpyHostToDevice, stream)); } else { - HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals, sizeof(double) * nnz, hipMemcpyDeviceToDevice, stream)); + HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals, + sizeof(Scalar) * nnz, hipMemcpyDeviceToDevice, stream)); } - if (verbosity >= 3) { HIP_CHECK(hipStreamSynchronize(stream)); @@ -500,8 +524,10 @@ void rocsparseSolverBackend::update_system_on_gpu(double *b) { } } // end update_system_on_gpu() -template -bool rocsparseSolverBackend::analyze_matrix() { +template +bool rocsparseSolverBackend:: +analyze_matrix() +{ std::size_t d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize; Timer t; @@ -530,7 +556,8 @@ bool rocsparseSolverBackend::analyze_matrix() { ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(handle, dir, operation, Nb, nnzbs_prec, descr_U, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_U)); - d_bufferSize = std::max(d_bufferSize_M, std::max(d_bufferSize_L, d_bufferSize_U)); + d_bufferSize = std::max(d_bufferSize_M, + std::max(d_bufferSize_L, d_bufferSize_U)); HIP_CHECK(hipMalloc((void**)&d_buffer, d_bufferSize)); @@ -578,9 +605,10 @@ bool rocsparseSolverBackend::analyze_matrix() { return true; } // end analyze_matrix() - -template -bool rocsparseSolverBackend::create_preconditioner() { +template +bool rocsparseSolverBackend:: +create_preconditioner() +{ Timer t; bool result = true; @@ -605,10 +633,9 @@ bool rocsparseSolverBackend::create_preconditioner() { return result; } // end create_preconditioner() - -template -void rocsparseSolverBackend:: -solve_system(WellContributions& wellContribs, BdaResult& res) +template +void rocsparseSolverBackend:: +solve_system(WellContributions& wellContribs, BdaResult& res) { Timer t; @@ -621,17 +648,18 @@ solve_system(WellContributions& wellContribs, BdaResult& res) out << "rocsparseSolver::solve_system(): " << t.stop() << " s"; OpmLog::info(out.str()); } - } // end solve_system() - // copy result to host memory // caller must be sure that x is a valid array -template -void rocsparseSolverBackend::get_result(double *x) { +template +void rocsparseSolverBackend:: +get_result(Scalar* x) +{ Timer t; - HIP_CHECK(hipMemcpyAsync(x, d_x, sizeof(double) * N, hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipMemcpyAsync(x, d_x, sizeof(Scalar) * N, + hipMemcpyDeviceToHost, stream)); HIP_CHECK(hipStreamSynchronize(stream)); // always wait, caller might want to use x immediately if (verbosity >= 3) { @@ -641,14 +669,13 @@ void rocsparseSolverBackend::get_result(double *x) { } } // end get_result() - -template -SolverStatus rocsparseSolverBackend:: - solve_system(std::shared_ptr> matrix, - double *b, - std::shared_ptr> jacMatrix, - WellContributions& wellContribs, - BdaResult& res) +template +SolverStatus rocsparseSolverBackend:: +solve_system(std::shared_ptr> matrix, + Scalar* b, + std::shared_ptr> jacMatrix, + WellContributions& wellContribs, + BdaResult& res) { if (initialized == false) { initialize(matrix, jacMatrix); @@ -672,19 +699,14 @@ SolverStatus rocsparseSolverBackend:: return SolverStatus::BDA_SOLVER_SUCCESS; } +#define INSTANTIATE_TYPE(T) \ + template class rocsparseSolverBackend; \ + template class rocsparseSolverBackend; \ + template class rocsparseSolverBackend; \ + template class rocsparseSolverBackend; \ + template class rocsparseSolverBackend; \ + template class rocsparseSolverBackend; -#define INSTANTIATE_BDA_FUNCTIONS(n) \ -template rocsparseSolverBackend::rocsparseSolverBackend( \ - int, int, double, unsigned int, unsigned int); +INSTANTIATE_TYPE(double) -INSTANTIATE_BDA_FUNCTIONS(1); -INSTANTIATE_BDA_FUNCTIONS(2); -INSTANTIATE_BDA_FUNCTIONS(3); -INSTANTIATE_BDA_FUNCTIONS(4); -INSTANTIATE_BDA_FUNCTIONS(5); -INSTANTIATE_BDA_FUNCTIONS(6); - -#undef INSTANTIATE_BDA_FUNCTIONS - -} // namespace Accelerator -} // namespace Opm +} // namespace Opm::Accelerator diff --git a/opm/simulators/linalg/bda/rocsparseSolverBackend.hpp b/opm/simulators/linalg/bda/rocsparseSolverBackend.hpp index b3e41ed44..e2d179b9e 100644 --- a/opm/simulators/linalg/bda/rocsparseSolverBackend.hpp +++ b/opm/simulators/linalg/bda/rocsparseSolverBackend.hpp @@ -31,16 +31,13 @@ #include -namespace Opm -{ -namespace Accelerator -{ +namespace Opm::Accelerator { /// This class implements a rocsparse-based ilu0-bicgstab solver on GPU -template -class rocsparseSolverBackend : public BdaSolver +template +class rocsparseSolverBackend : public BdaSolver { - using Base = BdaSolver; + using Base = BdaSolver; using Base::N; using Base::Nb; @@ -59,8 +56,8 @@ private: bool useJacMatrix = false; bool analysis_done = false; - std::shared_ptr> mat{}; // original matrix - std::shared_ptr> jacMat{}; // matrix for preconditioner + std::shared_ptr> mat{}; // original matrix + std::shared_ptr> jacMat{}; // matrix for preconditioner int nnzbs_prec = 0; // number of nnz blocks in preconditioner matrix M rocsparse_direction dir = rocsparse_direction_row; @@ -76,32 +73,31 @@ private: rocsparse_int *d_Arows, *d_Mrows; rocsparse_int *d_Acols, *d_Mcols; - double *d_Avals, *d_Mvals; - double *d_x, *d_b, *d_r, *d_rw, *d_p; // vectors, used during linear solve - double *d_pw, *d_s, *d_t, *d_v; + Scalar *d_Avals, *d_Mvals; + Scalar *d_x, *d_b, *d_r, *d_rw, *d_p; // vectors, used during linear solve + Scalar *d_pw, *d_s, *d_t, *d_v; void *d_buffer; // buffer space, used by rocsparse ilu0 analysis int ver; char rev[64]; - /// Solve linear system using ilu0-bicgstab /// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A /// \param[inout] res summary of solver result - void gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res); + void gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res); /// Initialize GPU and allocate memory /// \param[in] matrix matrix A /// \param[in] jacMatrix matrix for preconditioner - void initialize(std::shared_ptr> matrix, - std::shared_ptr> jacMatrix); + void initialize(std::shared_ptr> matrix, + std::shared_ptr> jacMatrix); /// Copy linear system to GPU /// \param[in] b input vector, contains N values - void copy_system_to_gpu(double *b); + void copy_system_to_gpu(Scalar* b); /// Update linear system to GPU /// \param[in] b input vector, contains N values - void update_system_on_gpu(double *b); + void update_system_on_gpu(Scalar* b); /// Analyze sparsity pattern to extract parallelism /// \return true iff analysis was successful @@ -114,16 +110,20 @@ private: /// Solve linear system /// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A /// \param[inout] res summary of solver result - void solve_system(WellContributions& wellContribs, BdaResult& res); + void solve_system(WellContributions& wellContribs, BdaResult& res); public: - /// Construct a openclSolver - /// \param[in] linear_solver_verbosity verbosity of openclSolver - /// \param[in] maxit maximum number of iterations for openclSolver - /// \param[in] tolerance required relative tolerance for openclSolver + /// Construct a rocsparseSolver + /// \param[in] linear_solver_verbosity verbosity of rocsparseSolver + /// \param[in] maxit maximum number of iterations for rocsparseSolver + /// \param[in] tolerance required relative tolerance for rocsparseSolver /// \param[in] platformID the OpenCL platform to be used /// \param[in] deviceID the device to be used - rocsparseSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID); + rocsparseSolverBackend(int linear_solver_verbosity, + int maxit, + Scalar tolerance, + unsigned int platformID, + unsigned int deviceID); /// For the CPR coarse solver // rocsparseSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, ILUReorder opencl_ilu_reorder); @@ -138,10 +138,10 @@ public: /// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A /// \param[inout] res summary of solver result /// \return status code - SolverStatus solve_system(std::shared_ptr> matrix, - double* b, - std::shared_ptr> jacMatrix, - WellContributions& wellContribs, + SolverStatus solve_system(std::shared_ptr> matrix, + Scalar* b, + std::shared_ptr> jacMatrix, + WellContributions& wellContribs, BdaResult& res) override; /// Solve scalar linear system, for example a coarse system of an AMG preconditioner @@ -150,13 +150,10 @@ public: /// Get result after linear solve, and peform postprocessing if necessary /// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array - void get_result(double *x) override; + void get_result(Scalar* x) override; }; // end class rocsparseSolverBackend -} // namespace Accelerator -} // namespace Opm +} // namespace Opm::Accelerator #endif - -