diff --git a/opm/simulators/linalg/bda/BdaBridge.cpp b/opm/simulators/linalg/bda/BdaBridge.cpp index e05996f55..5492e504e 100644 --- a/opm/simulators/linalg/bda/BdaBridge.cpp +++ b/opm/simulators/linalg/bda/BdaBridge.cpp @@ -104,7 +104,8 @@ BdaBridge::BdaBridge(std::string acceler } else if (accelerator_mode.compare("rocalution") == 0) { #if HAVE_ROCALUTION use_gpu = true; // should be replaced by a 'use_bridge' boolean - backend.reset(new Opm::Accelerator::rocalutionSolverBackend(linear_solver_verbosity, maxit, tolerance)); + using ROCA = Accelerator::rocalutionSolverBackend; + backend = std::make_unique(linear_solver_verbosity, maxit, tolerance); #else OPM_THROW(std::logic_error, "Error rocalutionSolver was chosen, but rocalution was not found by CMake"); #endif diff --git a/opm/simulators/linalg/bda/rocalutionSolverBackend.cpp b/opm/simulators/linalg/bda/rocalutionSolverBackend.cpp index bcb17b334..03ce6fcba 100644 --- a/opm/simulators/linalg/bda/rocalutionSolverBackend.cpp +++ b/opm/simulators/linalg/bda/rocalutionSolverBackend.cpp @@ -47,30 +47,28 @@ #undef HIP_HAVE_CUDA_DEFINED #endif -namespace Opm -{ -namespace Accelerator -{ +namespace Opm::Accelerator { -using Opm::OpmLog; using Dune::Timer; -template -rocalutionSolverBackend:: -rocalutionSolverBackend(int verbosity_, int maxit_, double tolerance_) +template +rocalutionSolverBackend:: +rocalutionSolverBackend(int verbosity_, int maxit_, Scalar tolerance_) : Base(verbosity_, maxit_, tolerance_) { rocalution::init_rocalution(); rocalution::info_rocalution(); - roc_solver = std::make_unique, rocalution::LocalVector, double> >(); - roc_prec = std::make_unique, rocalution::LocalVector, double> >(); + using BCGS = rocalution::BiCGStab; + roc_solver = std::make_unique(); + using ILU = rocalution::ILU; + roc_prec = std::make_unique(); roc_solver->Verbose(0); roc_solver->Init(/*abs_tol=*/1e-15, tolerance, /*divergence_tol=*/1e3, maxit); } - -template -rocalutionSolverBackend::~rocalutionSolverBackend() { +template +rocalutionSolverBackend::~rocalutionSolverBackend() +{ // normally, these rocalution variables are destroyed after the destructor automatically, // but sometimes it segfaults, both with test_rocalutionSolver and with an actual case // release both variables here to prevent that segfault @@ -79,8 +77,9 @@ rocalutionSolverBackend::~rocalutionSolverBackend() { rocalution::stop_rocalution(); } -template -void rocalutionSolverBackend::initialize(BlockedMatrix* matrix) +template +void rocalutionSolverBackend:: +initialize(BlockedMatrix* matrix) { this->Nb = matrix->Nb; this->N = Nb * block_size; @@ -97,15 +96,16 @@ void rocalutionSolverBackend::initialize(BlockedMatrix* matr initialized = true; } // end initialize() -template -void rocalutionSolverBackend::convert_matrix(BlockedMatrix* matrix) +template +void rocalutionSolverBackend:: +convert_matrix(BlockedMatrix* matrix) { Timer t; - for(int i = 0; i < Nb+1; ++i){ + for (int i = 0; i < Nb+1; ++i) { tmp_rowpointers[i] = matrix->rowPointers[i]; } - for(int i = 0; i < nnzb; ++i){ + for (int i = 0; i < nnzb; ++i) { tmp_colindices[i] = matrix->colIndices[i]; } @@ -115,7 +115,7 @@ void rocalutionSolverBackend::convert_matrix(BlockedMatrix* // BCSR_IND_BASE == 0: rocalution expects column-major // BCSR_IND_BASE == 1: rocalution expects row-major if (BCSR_IND_BASE == 0) { - for(int i = 0; i < nnzb; ++i){ + for (int i = 0; i < nnzb; ++i) { tmp_nnzvalues[i * block_size * block_size + 0] = matrix->nnzValues[i * block_size * block_size + 0]; tmp_nnzvalues[i * block_size * block_size + 1] = matrix->nnzValues[i * block_size * block_size + 3]; tmp_nnzvalues[i * block_size * block_size + 2] = matrix->nnzValues[i * block_size * block_size + 6]; @@ -136,8 +136,10 @@ void rocalutionSolverBackend::convert_matrix(BlockedMatrix* // copy result to host memory // caller must be sure that x is a valid array -template -void rocalutionSolverBackend::get_result(double *x) { +template +void rocalutionSolverBackend:: +get_result(Scalar* x) +{ Timer t; std::copy(h_x.begin(), h_x.end(), x); @@ -149,12 +151,12 @@ void rocalutionSolverBackend::get_result(double *x) { } } // end get_result() -template -SolverStatus rocalutionSolverBackend:: -solve_system(std::shared_ptr> matrix, - double *b, - [[maybe_unused]] std::shared_ptr> jacMatrix, - [[maybe_unused]] WellContributions& wellContribs, +template +SolverStatus rocalutionSolverBackend:: +solve_system(std::shared_ptr> matrix, + Scalar* b, + [[maybe_unused]] std::shared_ptr> jacMatrix, + [[maybe_unused]] WellContributions& wellContribs, BdaResult& res) { if (initialized == false) { @@ -163,21 +165,20 @@ solve_system(std::shared_ptr> matrix, tmp_rowpointers = new int[Nb+1]; tmp_colindices = new int[nnzb]; - tmp_nnzvalues = new double[nnzb*block_size*block_size]; + tmp_nnzvalues = new Scalar[nnzb*block_size*block_size]; convert_matrix(matrix.get()); - rocalution::LocalVector roc_x; - rocalution::LocalVector roc_rhs; - rocalution::LocalMatrix roc_mat; + Vec roc_x; + Vec roc_rhs; + Mat roc_mat; // this also transfers ownership to the allocated memory to rocalution // and sets the tmp_* pointers to nullptr - roc_mat.SetDataPtrBCSR( - &tmp_rowpointers, - &tmp_colindices, - &tmp_nnzvalues, - "matrix A", nnzb, Nb, Nb, block_size); + roc_mat.SetDataPtrBCSR(&tmp_rowpointers, + &tmp_colindices, + &tmp_nnzvalues, + "matrix A", nnzb, Nb, Nb, block_size); roc_mat.MoveToAccelerator(); roc_x.MoveToAccelerator(); @@ -198,7 +199,7 @@ solve_system(std::shared_ptr> matrix, // so it just calls ILU::Build() everytime roc_solver->ReBuildNumeric(); - double norm_0 = roc_rhs.Norm(); // since the initial guess is a vector with 0s, initial error is norm(b) + Scalar norm_0 = roc_rhs.Norm(); // since the initial guess is a vector with 0s, initial error is norm(b) // actually solve Dune::Timer t_solve; @@ -217,7 +218,6 @@ solve_system(std::shared_ptr> matrix, res.conv_rate = static_cast(pow(res.reduction, 1.0 / res.iterations)); res.converged = (roc_solver->GetSolverStatus() == 2); - // copy solution vector to host vector // if roc_x could be reused, this should be removed here // and roc_x should be directly copied into x in get_result() @@ -226,26 +226,25 @@ solve_system(std::shared_ptr> matrix, if (verbosity >= 1) { std::ostringstream out; - out << "=== converged: " << res.converged << ", conv_rate: " << res.conv_rate << ", time: " << res.elapsed << \ - ", time per iteration: " << res.elapsed / res.iterations << ", iterations: " << res.iterations; + out << "=== converged: " << res.converged + << ", conv_rate: " << res.conv_rate + << ", time: " << res.elapsed << + ", time per iteration: " << res.elapsed / res.iterations + << ", iterations: " << res.iterations; OpmLog::info(out.str()); } return SolverStatus::BDA_SOLVER_SUCCESS; } +#define INSTANTIATE_TYPE(T) \ + template class rocalutionSolverBackend; \ + template class rocalutionSolverBackend; \ + template class rocalutionSolverBackend; \ + template class rocalutionSolverBackend; \ + template class rocalutionSolverBackend; \ + template class rocalutionSolverBackend; -#define INSTANTIATE_BDA_FUNCTIONS(n) \ -template rocalutionSolverBackend::rocalutionSolverBackend(int, int, double); +INSTANTIATE_TYPE(double) -INSTANTIATE_BDA_FUNCTIONS(1); -INSTANTIATE_BDA_FUNCTIONS(2); -INSTANTIATE_BDA_FUNCTIONS(3); -INSTANTIATE_BDA_FUNCTIONS(4); -INSTANTIATE_BDA_FUNCTIONS(5); -INSTANTIATE_BDA_FUNCTIONS(6); - -#undef INSTANTIATE_BDA_FUNCTIONS - -} // namespace Accelerator -} // namespace Opm +} // namespace Opm::Accelerator diff --git a/opm/simulators/linalg/bda/rocalutionSolverBackend.hpp b/opm/simulators/linalg/bda/rocalutionSolverBackend.hpp index 6bb3827d3..7f397bc73 100644 --- a/opm/simulators/linalg/bda/rocalutionSolverBackend.hpp +++ b/opm/simulators/linalg/bda/rocalutionSolverBackend.hpp @@ -31,17 +31,14 @@ template class LocalMatrix; template class LocalVector; } -namespace Opm -{ -namespace Accelerator -{ +namespace Opm::Accelerator { /// This class implements a rocalution based linear solver solver on GPU /// It uses ilu0-bicgstab -template -class rocalutionSolverBackend : public BdaSolver +template +class rocalutionSolverBackend : public BdaSolver { - using Base = BdaSolver; + using Base = BdaSolver; using Base::N; using Base::Nb; @@ -55,31 +52,34 @@ class rocalutionSolverBackend : public BdaSolver using Base::initialized; private: - std::vector h_x; // store solution vector on host + std::vector h_x; // store solution vector on host int *tmp_rowpointers; // store matrix on host, this pointer is given to and freed by rocalution int *tmp_colindices; // store matrix on host, this pointer is given to and freed by rocalution - double *tmp_nnzvalues; // store matrix on host, this pointer is given to and freed by rocalution + Scalar* tmp_nnzvalues; // store matrix on host, this pointer is given to and freed by rocalution - std::unique_ptr, rocalution::LocalVector, double> > roc_prec; - std::unique_ptr, rocalution::LocalVector, double> > roc_solver; + using Mat = rocalution::LocalMatrix; + using Vec = rocalution::LocalVector; + + std::unique_ptr> roc_prec; + std::unique_ptr> roc_solver; /// Initialize sizes and allocate memory /// \param[in] matrix matrix A - void initialize(BlockedMatrix* matrix); + void initialize(BlockedMatrix* matrix); /// Convert matrix to rocalution format /// copy matrix to raw pointers, which are given to and freed by rocalution /// \param[in] matrix matrix A - void convert_matrix(BlockedMatrix* matrix); + void convert_matrix(BlockedMatrix* matrix); public: - /// Construct a rocalutionSolver /// also initialize rocalution library and rocalution variables /// \param[in] linear_solver_verbosity verbosity of rocalutionSolver /// \param[in] maxit maximum number of iterations for rocalutionSolver /// \param[in] tolerance required relative tolerance for rocalutionSolver - rocalutionSolverBackend(int linear_solver_verbosity, int maxit, double tolerance); + rocalutionSolverBackend(int linear_solver_verbosity, + int maxit, Scalar tolerance); /// Destroy a rocalutionSolver, and free memory ~rocalutionSolverBackend(); @@ -91,20 +91,19 @@ public: /// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A /// \param[inout] res summary of solver result /// \return status code - SolverStatus solve_system(std::shared_ptr> matrix, - double *b, - std::shared_ptr> jacMatrix, - WellContributions& wellContribs, + SolverStatus solve_system(std::shared_ptr> matrix, + Scalar* b, + std::shared_ptr> jacMatrix, + WellContributions& wellContribs, BdaResult& res) override; /// Get result after linear solve, and peform postprocessing if necessary /// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array - void get_result(double *x) override; + void get_result(Scalar* x) override; }; // end class rocalutionSolverBackend -} // namespace Accelerator -} // namespace Opm +} // namespace Opm::Accelerator #endif