From ad1d8624264b4538ce48383afcca28f30682e77c Mon Sep 17 00:00:00 2001 From: Arne Morten Kvarving Date: Tue, 16 Apr 2024 12:28:49 +0200 Subject: [PATCH] BdaBridge: template Scalar type --- opm/simulators/linalg/bda/BdaBridge.cpp | 142 +++++++++++++----------- opm/simulators/linalg/bda/BdaBridge.hpp | 32 ++++-- 2 files changed, 98 insertions(+), 76 deletions(-) diff --git a/opm/simulators/linalg/bda/BdaBridge.cpp b/opm/simulators/linalg/bda/BdaBridge.cpp index ae24fc807..b20071433 100644 --- a/opm/simulators/linalg/bda/BdaBridge.cpp +++ b/opm/simulators/linalg/bda/BdaBridge.cpp @@ -52,28 +52,29 @@ typedef Dune::InverseOperatorResult InverseOperatorResult; -namespace Opm -{ +namespace Opm { - using Opm::Accelerator::BdaResult; - using Opm::Accelerator::BdaSolver; - using Opm::Accelerator::SolverStatus; +using Accelerator::BdaResult; +using Accelerator::BdaSolver; +using Accelerator::SolverStatus; -template -BdaBridge::BdaBridge(std::string accelerator_mode_, - int linear_solver_verbosity, - [[maybe_unused]] int maxit, - [[maybe_unused]] double tolerance, - [[maybe_unused]] unsigned int platformID, - [[maybe_unused]] unsigned int deviceID, - [[maybe_unused]] bool opencl_ilu_parallel, - [[maybe_unused]] std::string linsolver) -: verbosity(linear_solver_verbosity), accelerator_mode(accelerator_mode_) +template +BdaBridge:: +BdaBridge(std::string accelerator_mode_, + int linear_solver_verbosity, + [[maybe_unused]] int maxit, + [[maybe_unused]] Scalar tolerance, + [[maybe_unused]] unsigned int platformID, + [[maybe_unused]] unsigned int deviceID, + [[maybe_unused]] bool opencl_ilu_parallel, + [[maybe_unused]] std::string linsolver) + : verbosity(linear_solver_verbosity) + , accelerator_mode(accelerator_mode_) { if (accelerator_mode.compare("cusparse") == 0) { #if HAVE_CUDA use_gpu = true; - using CU = Accelerator::cusparseSolverBackend; + using CU = Accelerator::cusparseSolverBackend; backend = std::make_unique(linear_solver_verbosity, maxit, tolerance, deviceID); #else OPM_THROW(std::logic_error, "Error cusparseSolver was chosen, but CUDA was not found by CMake"); @@ -81,7 +82,7 @@ BdaBridge::BdaBridge(std::string acceler } else if (accelerator_mode.compare("opencl") == 0) { #if HAVE_OPENCL use_gpu = true; - using OCL = Accelerator::openclSolverBackend; + using OCL = Accelerator::openclSolverBackend; backend = std::make_unique(linear_solver_verbosity, maxit, tolerance, @@ -95,7 +96,7 @@ BdaBridge::BdaBridge(std::string acceler } else if (accelerator_mode.compare("amgcl") == 0) { #if HAVE_AMGCL use_gpu = true; // should be replaced by a 'use_bridge' boolean - using AMGCL = Accelerator::amgclSolverBackend; + using AMGCL = Accelerator::amgclSolverBackend; backend = std::make_unique(linear_solver_verbosity, maxit, tolerance, platformID, deviceID); #else @@ -104,7 +105,7 @@ BdaBridge::BdaBridge(std::string acceler } else if (accelerator_mode.compare("rocalution") == 0) { #if HAVE_ROCALUTION use_gpu = true; // should be replaced by a 'use_bridge' boolean - using ROCA = Accelerator::rocalutionSolverBackend; + using ROCA = Accelerator::rocalutionSolverBackend; backend = std::make_unique(linear_solver_verbosity, maxit, tolerance); #else OPM_THROW(std::logic_error, "Error rocalutionSolver was chosen, but rocalution was not found by CMake"); @@ -112,7 +113,7 @@ BdaBridge::BdaBridge(std::string acceler } else if (accelerator_mode.compare("rocsparse") == 0) { #if HAVE_ROCSPARSE use_gpu = true; // should be replaced by a 'use_bridge' boolean - using ROCS = Accelerator::rocsparseSolverBackend; + using ROCS = Accelerator::rocsparseSolverBackend; backend = std::make_unique(linear_solver_verbosity, maxit, tolerance, platformID, deviceID); #else @@ -125,13 +126,14 @@ BdaBridge::BdaBridge(std::string acceler } } - - template -int replaceZeroDiagonal(BridgeMatrix& mat, std::vector& diag_indices) { +int replaceZeroDiagonal(BridgeMatrix& mat, + std::vector& diag_indices) +{ + using Scalar = typename BridgeMatrix::field_type; int numZeros = 0; const int dim = mat[0][0].N(); // might be replaced with BridgeMatrix::block_type::size() - const double zero_replace = 1e-15; + const Scalar zero_replace = 1e-15; if (diag_indices.empty()) { int Nb = mat.N(); diag_indices.reserve(Nb); @@ -147,7 +149,7 @@ int replaceZeroDiagonal(BridgeMatrix& mat, std::vectorgetptr()[offset]; // diag_block is a reference to MatrixBlock, located on column r of row r @@ -164,13 +166,15 @@ int replaceZeroDiagonal(BridgeMatrix& mat, std::vector -void BdaBridge::copySparsityPatternFromISTL(const BridgeMatrix& mat, std::vector &h_rows, std::vector &h_cols) { - +void BdaBridge:: +copySparsityPatternFromISTL(const BridgeMatrix& mat, + std::vector& h_rows, + std::vector& h_cols) +{ h_rows.clear(); h_cols.clear(); @@ -185,17 +189,19 @@ void BdaBridge::copySparsityPatternFromI // h_rows and h_cols could be changed to 'unsigned int', but cusparse expects 'int' if (static_cast(h_rows[mat.N()]) != mat.nonzeroes()) { - OPM_THROW(std::logic_error, "Error size of rows do not sum to number of nonzeroes in BdaBridge::copySparsityPatternFromISTL()"); + OPM_THROW(std::logic_error, + "Error size of rows do not sum to number of nonzeroes " + "in BdaBridge::copySparsityPatternFromISTL()"); } } - // check if the nnz values of the matrix are in contiguous memory // this is done by checking if the distance between the last value of the last block of row 0 and // the first value of the first row of row 1 is equal to 1 // if the matrix only has 1 row, it is always contiguous template -void checkMemoryContiguous(const BridgeMatrix& mat) { +void checkMemoryContiguous(const BridgeMatrix& mat) +{ auto block_size = mat[0][0].N(); auto row = mat.begin(); auto last_of_row0 = row->begin(); @@ -212,14 +218,13 @@ void checkMemoryContiguous(const BridgeMatrix& mat) { } } - template void BdaBridge:: solve_system(BridgeMatrix* bridgeMat, BridgeMatrix* jacMat, int numJacobiBlocks, BridgeVector& b, - WellContributions& wellContribs, + WellContributions& wellContribs, InverseOperatorResult& res) { if (use_gpu) { @@ -235,14 +240,14 @@ solve_system(BridgeMatrix* bridgeMat, return; } - using Mat = Accelerator::BlockedMatrix; + using Mat = Accelerator::BlockedMatrix; if (!matrix) { h_rows.reserve(Nb+1); h_cols.reserve(nnzb); copySparsityPatternFromISTL(*bridgeMat, h_rows, h_cols); checkMemoryContiguous(*bridgeMat); matrix = std::make_unique(Nb, nnzb, block_size, - static_cast(&(((*bridgeMat)[0][0][0][0]))), + static_cast(&(((*bridgeMat)[0][0][0][0]))), h_cols.data(), h_rows.data()); } @@ -251,12 +256,14 @@ solve_system(BridgeMatrix* bridgeMat, int numZeros = replaceZeroDiagonal(*bridgeMat, diagIndices); if (verbosity >= 2) { std::ostringstream out; - out << "Checking zeros took: " << t_zeros.stop() << " s, found " << numZeros << " zeros"; + out << "Checking zeros took: " << t_zeros.stop() << " s, found " + << numZeros << " zeros"; OpmLog::info(out.str()); } if (numJacobiBlocks >= 2) { - const int jacNnzb = (h_jacRows.empty()) ? jacMat->nonzeroes() : h_jacRows.back(); + const int jacNnzb = (h_jacRows.empty()) ? jacMat->nonzeroes() + : h_jacRows.back(); if (!jacMatrix) { h_jacRows.reserve(Nb+1); @@ -264,7 +271,7 @@ solve_system(BridgeMatrix* bridgeMat, copySparsityPatternFromISTL(*jacMat, h_jacRows, h_jacCols); checkMemoryContiguous(*jacMat); jacMatrix = std::make_unique(Nb, jacNnzb, block_size, - static_cast(&(((*jacMat)[0][0][0][0]))), + static_cast(&(((*jacMat)[0][0][0][0]))), h_jacCols.data(), h_jacRows.data()); } @@ -273,7 +280,8 @@ solve_system(BridgeMatrix* bridgeMat, int jacNumZeros = replaceZeroDiagonal(*jacMat, jacDiagIndices); if (verbosity >= 2) { std::ostringstream out; - out << "Checking zeros for jacMat took: " << t_zeros2.stop() << " s, found " << jacNumZeros << " zeros"; + out << "Checking zeros for jacMat took: " << t_zeros2.stop() + << " s, found " << jacNumZeros << " zeros"; OpmLog::info(out.str()); } } @@ -281,17 +289,23 @@ solve_system(BridgeMatrix* bridgeMat, ///////////////////////// // actually solve // assume that underlying data (nonzeroes) from b (Dune::BlockVector) are contiguous, if this is not the case, the chosen BdaSolver is expected to perform undefined behaviour - SolverStatus status = backend->solve_system(matrix, static_cast(&(b[0][0])), jacMatrix, wellContribs, result); + SolverStatus status = backend->solve_system(matrix, + static_cast(&(b[0][0])), + jacMatrix, wellContribs, result); - switch(status) { + switch (status) { case SolverStatus::BDA_SOLVER_SUCCESS: //OpmLog::info("BdaSolver converged"); break; case SolverStatus::BDA_SOLVER_ANALYSIS_FAILED: - OpmLog::warning("BdaSolver could not analyse level information of matrix, perhaps there is still a 0.0 on the diagonal of a block on the diagonal"); + OpmLog::warning("BdaSolver could not analyse level information of matrix, " + "perhaps there is still a 0.0 on the diagonal of a " + "block on the diagonal"); break; case SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED: - OpmLog::warning("BdaSolver could not create preconditioner, perhaps there is still a 0.0 on the diagonal of a block on the diagonal"); + OpmLog::warning("BdaSolver could not create preconditioner, " + "perhaps there is still a 0.0 on the diagonal " + "of a block on the diagonal"); break; default: OpmLog::warning("BdaSolver returned unknown status code"); @@ -307,24 +321,25 @@ solve_system(BridgeMatrix* bridgeMat, } } - template -void BdaBridge::get_result([[maybe_unused]] BridgeVector& x) { +void BdaBridge:: +get_result([[maybe_unused]] BridgeVector& x) +{ if (use_gpu) { - backend->get_result(static_cast(&(x[0][0]))); + backend->get_result(static_cast(&(x[0][0]))); } } template void BdaBridge:: -initWellContributions([[maybe_unused]] WellContributions& wellContribs, +initWellContributions([[maybe_unused]] WellContributions& wellContribs, [[maybe_unused]] unsigned N) { if (accelerator_mode.compare("opencl") == 0) { #if HAVE_OPENCL - using OCL = Accelerator::openclSolverBackend; + using OCL = Accelerator::openclSolverBackend; const auto openclBackend = static_cast(backend.get()); - using WCOCL = WellContributionsOCL; + using WCOCL = WellContributionsOCL; static_cast(wellContribs).setOpenCLEnv(openclBackend->context.get(), openclBackend->queue.get()); #else @@ -335,23 +350,20 @@ initWellContributions([[maybe_unused]] WellContributions& wellContribs, } // the tests use Dune::FieldMatrix, Flow uses Opm::MatrixBlock -#define INSTANTIATE_BDA_FUNCTIONS(n) \ -template class BdaBridge, std::allocator > >, \ -Dune::BlockVector, std::allocator > >, \ -n>; \ - \ -template class BdaBridge, std::allocator > >, \ -Dune::BlockVector, std::allocator > >, \ -n>; +#define INSTANTIATE_BDA_FUNCTIONS(T,n) \ + template class BdaBridge>, \ + Dune::BlockVector>,n>; \ + template class BdaBridge>, \ + Dune::BlockVector>,n>; +#define INSTANTIATE_TYPE(T) \ + INSTANTIATE_BDA_FUNCTIONS(T,1) \ + INSTANTIATE_BDA_FUNCTIONS(T,2) \ + INSTANTIATE_BDA_FUNCTIONS(T,3) \ + INSTANTIATE_BDA_FUNCTIONS(T,4) \ + INSTANTIATE_BDA_FUNCTIONS(T,5) \ + INSTANTIATE_BDA_FUNCTIONS(T,6) -INSTANTIATE_BDA_FUNCTIONS(1); -INSTANTIATE_BDA_FUNCTIONS(2); -INSTANTIATE_BDA_FUNCTIONS(3); -INSTANTIATE_BDA_FUNCTIONS(4); -INSTANTIATE_BDA_FUNCTIONS(5); -INSTANTIATE_BDA_FUNCTIONS(6); - -#undef INSTANTIATE_BDA_FUNCTIONS +INSTANTIATE_TYPE(double) } // namespace Opm diff --git a/opm/simulators/linalg/bda/BdaBridge.hpp b/opm/simulators/linalg/bda/BdaBridge.hpp index baeb1897a..3ebdf0dcf 100644 --- a/opm/simulators/linalg/bda/BdaBridge.hpp +++ b/opm/simulators/linalg/bda/BdaBridge.hpp @@ -36,12 +36,13 @@ template class BdaBridge { private: + using Scalar = typename BridgeVector::field_type; int verbosity = 0; bool use_gpu = false; std::string accelerator_mode; - std::unique_ptr> backend; - std::shared_ptr> matrix; // 'stores' matrix, actually points to h_rows, h_cols and the received BridgeMatrix for the nonzeroes - std::shared_ptr> jacMatrix; // 'stores' preconditioner matrix, actually points to h_rows, h_cols and the received BridgeMatrix for the nonzeroes + std::unique_ptr> backend; + std::shared_ptr> matrix; // 'stores' matrix, actually points to h_rows, h_cols and the received BridgeMatrix for the nonzeroes + std::shared_ptr> jacMatrix; // 'stores' preconditioner matrix, actually points to h_rows, h_cols and the received BridgeMatrix for the nonzeroes std::vector h_rows, h_cols; // store the sparsity pattern of the matrix std::vector h_jacRows, h_jacCols; // store the sparsity pattern of the jacMatrix std::vector diagIndices; // contains offsets of the diagonal blocks wrt start of the row, used for replaceZeroDiagonal() @@ -57,8 +58,14 @@ public: /// \param[in] deviceID the device ID to be used by the cusparse- and openclSolvers, too high values could cause runtime errors /// \param[in] opencl_ilu_parallel whether to parallelize the ILU decomposition and application in OpenCL with level_scheduling /// \param[in] linsolver indicating the preconditioner, equal to the --linear-solver cmdline argument - BdaBridge(std::string accelerator_mode, int linear_solver_verbosity, int maxit, double tolerance, - unsigned int platformID, unsigned int deviceID, bool opencl_ilu_parallel, std::string linsolver); + BdaBridge(std::string accelerator_mode, + int linear_solver_verbosity, + int maxit, + Scalar tolerance, + unsigned int platformID, + unsigned int deviceID, + bool opencl_ilu_parallel, + std::string linsolver); /// Solve linear system, A*x = b @@ -73,7 +80,7 @@ public: BridgeMatrix* jacMat, int numJacobiBlocks, BridgeVector& b, - WellContributions& wellContribs, + WellContributions& wellContribs, InverseOperatorResult &result); /// Get the resulting x vector @@ -82,7 +89,8 @@ public: /// Return whether the BdaBridge will use the GPU or not /// return whether the BdaBridge will use the GPU or not - bool getUseGpu(){ + bool getUseGpu() + { return use_gpu; } @@ -90,19 +98,21 @@ public: /// \param[in] mat input matrix, probably BCRSMatrix /// \param[out] h_rows rowpointers /// \param[out] h_cols columnindices - static void copySparsityPatternFromISTL(const BridgeMatrix& mat, std::vector& h_rows, std::vector& h_cols); + static void copySparsityPatternFromISTL(const BridgeMatrix& mat, + std::vector& h_rows, + std::vector& h_cols); /// Initialize the WellContributions object with opencl context and queue /// those must be set before calling BlackOilWellModel::getWellContributions() in ISTL /// \param[in] wellContribs container to hold all WellContributions /// \param[in] N number of rows in scalar vector that wellContribs will be applied on - void initWellContributions(WellContributions& wellContribs, unsigned N); + void initWellContributions(WellContributions& wellContribs, unsigned N); /// Return the selected accelerator mode, this is input via the command-line - std::string getAccleratorName(){ + std::string getAccleratorName() + { return accelerator_mode; } - }; // end class BdaBridge }