diff --git a/opm/simulators/linalg/bda/BdaBridge.cpp b/opm/simulators/linalg/bda/BdaBridge.cpp index d45fb7b85..dd097bcbc 100644 --- a/opm/simulators/linalg/bda/BdaBridge.cpp +++ b/opm/simulators/linalg/bda/BdaBridge.cpp @@ -192,14 +192,13 @@ void BdaBridge::copySparsityPatternFromI template -void BdaBridge::solve_system([[maybe_unused]] BridgeMatrix* bridgeMat, - [[maybe_unused]] BridgeMatrix* jacMat, - [[maybe_unused]] int numJacobiBlocks, - [[maybe_unused]] BridgeVector& b, - [[maybe_unused]] WellContributions& wellContribs, - [[maybe_unused]] InverseOperatorResult& res) +void BdaBridge::solve_system(BridgeMatrix* bridgeMat, + BridgeMatrix* jacMat, + int numJacobiBlocks, + BridgeVector& b, + WellContributions& wellContribs, + InverseOperatorResult& res) { - if (use_gpu || use_fpga) { BdaResult result; result.converged = false; @@ -229,14 +228,7 @@ void BdaBridge::solve_system([[maybe_unu OpmLog::info(out.str()); } - - ///////////////////////// - // actually solve - SolverStatus status; - if (numJacobiBlocks < 2) { - // assume that underlying data (nonzeroes) from mat (Dune::BCRSMatrix) are contiguous, if this is not the case, the chosen BdaSolver is expected to perform undefined behaviour - status = backend->solve_system(matrix, static_cast(&(b[0][0])), wellContribs, result); - } else { + if (numJacobiBlocks >= 2) { const int jacNnzb = (h_jacRows.empty()) ? jacMat->nonzeroes() : h_jacRows.back(); if (!jacMatrix) { @@ -253,9 +245,13 @@ void BdaBridge::solve_system([[maybe_unu out << "Checking zeros for jacMat took: " << t_zeros2.stop() << " s, found " << jacNumZeros << " zeros"; OpmLog::info(out.str()); } - status = backend->solve_system2(matrix, static_cast(&(b[0][0])), jacMatrix, wellContribs, result); } + ///////////////////////// + // actually solve + // assume that underlying data (nonzeroes) from b (Dune::BlockVector) are contiguous, if this is not the case, the chosen BdaSolver is expected to perform undefined behaviour + SolverStatus status = backend->solve_system(matrix, static_cast(&(b[0][0])), jacMatrix, wellContribs, result); + switch(status) { case SolverStatus::BDA_SOLVER_SUCCESS: //OpmLog::info("BdaSolver converged"); diff --git a/opm/simulators/linalg/bda/BdaSolver.hpp b/opm/simulators/linalg/bda/BdaSolver.hpp index c30a78dff..dd5b97094 100644 --- a/opm/simulators/linalg/bda/BdaSolver.hpp +++ b/opm/simulators/linalg/bda/BdaSolver.hpp @@ -86,12 +86,8 @@ namespace Accelerator { virtual ~BdaSolver() {}; /// Define as pure virtual functions, so derivedclass must implement them - virtual SolverStatus solve_system(std::shared_ptr matrix, - double *b, WellContributions& wellContribs, BdaResult &res) = 0; - - virtual SolverStatus solve_system2(std::shared_ptr matrix, double *b, - std::shared_ptr jacMatrix, - WellContributions& wellContribs, BdaResult &res) = 0; + virtual SolverStatus solve_system(std::shared_ptr matrix, double *b, + std::shared_ptr jacMatrix, WellContributions& wellContribs, BdaResult &res) = 0; virtual void get_result(double *x) = 0; diff --git a/opm/simulators/linalg/bda/FPGASolverBackend.cpp b/opm/simulators/linalg/bda/FPGASolverBackend.cpp index 46f43bc65..875b96676 100644 --- a/opm/simulators/linalg/bda/FPGASolverBackend.cpp +++ b/opm/simulators/linalg/bda/FPGASolverBackend.cpp @@ -225,7 +225,11 @@ void FpgaSolverBackend::get_result(double *x_) template -SolverStatus FpgaSolverBackend::solve_system(std::shared_ptr matrix, double *b, WellContributions&, BdaResult &res) +SolverStatus FpgaSolverBackend::solve_system(std::shared_ptr matrix, + double *b, + [[maybe_unused]] std::shared_ptr jacMatrix, + WellContributions&, + BdaResult &res) { if (initialized == false) { initialize(matrix->Nb, matrix->nnzbzs, matrix->nnzValues, matrix->rowPointers, matrix->colIndices); @@ -250,15 +254,6 @@ SolverStatus FpgaSolverBackend::solve_system(std::shared_ptr -SolverStatus FpgaSolverBackend::solve_system2(std::shared_ptr matrix, - double *b, - [[maybe_unused]] std::shared_ptr jacMatrix, - WellContributions& wellContribs, - BdaResult &res) -{ - return solve_system(matrix, b, wellContribs, res); -} template void FpgaSolverBackend::initialize(int Nb_, int nnzbs, double *vals, int *rows, int *cols) diff --git a/opm/simulators/linalg/bda/FPGASolverBackend.hpp b/opm/simulators/linalg/bda/FPGASolverBackend.hpp index 3c1e2e769..05e7c6d33 100644 --- a/opm/simulators/linalg/bda/FPGASolverBackend.hpp +++ b/opm/simulators/linalg/bda/FPGASolverBackend.hpp @@ -244,14 +244,12 @@ public: /// Solve linear system, A*x = b, matrix A must be in blocked-CSR format /// \param[in] matrix matrix A /// \param[in] b input vector, contains N values + /// \param[in] jacMatrix matrix for preconditioner /// \param[in] wellContribs WellContributions, not used in FPGA solver because it requires them already added to matrix A /// \param[inout] res summary of solver result /// \return status code - SolverStatus solve_system(std::shared_ptr matrix, double *b, WellContributions& wellContribs, BdaResult &res) override; - - SolverStatus solve_system2(std::shared_ptr matrix, double *b, - std::shared_ptr jacMatrix, - WellContributions& wellContribs, BdaResult &res) override; + SolverStatus solve_system(std::shared_ptr matrix, double *b, + std::shared_ptr jacMatrix, WellContributions& wellContribs, BdaResult &res) override; /// Get result after linear solve, and peform postprocessing if necessary /// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array diff --git a/opm/simulators/linalg/bda/amgclSolverBackend.cpp b/opm/simulators/linalg/bda/amgclSolverBackend.cpp index d258c7177..58046abb3 100644 --- a/opm/simulators/linalg/bda/amgclSolverBackend.cpp +++ b/opm/simulators/linalg/bda/amgclSolverBackend.cpp @@ -360,17 +360,12 @@ void amgclSolverBackend::get_result(double *x_) { template -SolverStatus amgclSolverBackend::solve_system2(std::shared_ptr matrix, +SolverStatus amgclSolverBackend::solve_system(std::shared_ptr matrix, double *b, [[maybe_unused]] std::shared_ptr jacMatrix, - WellContributions& wellContribs, + [[maybe_unused]] WellContributions& wellContribs, BdaResult &res) { - return solve_system(matrix, b, wellContribs, res); -} - -template -SolverStatus amgclSolverBackend::solve_system(std::shared_ptr matrix, double *b, WellContributions&, BdaResult &res) { if (initialized == false) { initialize(matrix->Nb, matrix->nnzbs); convert_sparsity_pattern(matrix->rowPointers, matrix->colIndices); diff --git a/opm/simulators/linalg/bda/amgclSolverBackend.hpp b/opm/simulators/linalg/bda/amgclSolverBackend.hpp index ab978224c..8896ea65e 100644 --- a/opm/simulators/linalg/bda/amgclSolverBackend.hpp +++ b/opm/simulators/linalg/bda/amgclSolverBackend.hpp @@ -130,14 +130,12 @@ public: /// Solve linear system, A*x = b, matrix A must be in blocked-CSR format /// \param[in] matrix matrix A /// \param[in] b input vector, contains N values + /// \param[in] jacMatrix matrix for preconditioner /// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A /// \param[inout] res summary of solver result /// \return status code - SolverStatus solve_system(std::shared_ptr matrix, double *b, WellContributions& wellContribs, BdaResult &res) override; - - SolverStatus solve_system2(std::shared_ptr matrix, double *b, - std::shared_ptr jacMatrix, - WellContributions& wellContribs, BdaResult &res) override; + SolverStatus solve_system(std::shared_ptr matrix, double *b, + std::shared_ptr jacMatrix, WellContributions& wellContribs, BdaResult &res) override; /// Get result after linear solve, and peform postprocessing if necessary /// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array diff --git a/opm/simulators/linalg/bda/cuda/cusparseSolverBackend.cu b/opm/simulators/linalg/bda/cuda/cusparseSolverBackend.cu index e2e103506..430a0a4ad 100644 --- a/opm/simulators/linalg/bda/cuda/cusparseSolverBackend.cu +++ b/opm/simulators/linalg/bda/cuda/cusparseSolverBackend.cu @@ -478,7 +478,12 @@ void cusparseSolverBackend::get_result(double *x) { template -SolverStatus cusparseSolverBackend::solve_system(std::shared_ptr matrix, double *b, WellContributions& wellContribs, BdaResult &res) { +SolverStatus cusparseSolverBackend::solve_system(std::shared_ptr matrix, + double *b, + [[maybe_unused]] std::shared_ptr jacMatrix, + WellContributions& wellContribs, + BdaResult &res) +{ if (initialized == false) { initialize(matrix->Nb, matrix->nnzbs); copy_system_to_gpu(matrix->nnzValues, matrix->rowPointers, matrix->colIndices, b); @@ -499,15 +504,6 @@ SolverStatus cusparseSolverBackend::solve_system(std::shared_ptr -SolverStatus cusparseSolverBackend::solve_system2(std::shared_ptr matrix, - double *b, - [[maybe_unused]] std::shared_ptr jacMatrix, - WellContributions& wellContribs, - BdaResult &res) -{ - return solve_system(matrix, b, wellContribs, res); -} #define INSTANTIATE_BDA_FUNCTIONS(n) \ template cusparseSolverBackend::cusparseSolverBackend(int, int, double, unsigned int); \ diff --git a/opm/simulators/linalg/bda/cuda/cusparseSolverBackend.hpp b/opm/simulators/linalg/bda/cuda/cusparseSolverBackend.hpp index f3d95c121..3f470b02c 100644 --- a/opm/simulators/linalg/bda/cuda/cusparseSolverBackend.hpp +++ b/opm/simulators/linalg/bda/cuda/cusparseSolverBackend.hpp @@ -127,14 +127,12 @@ public: /// Solve linear system, A*x = b, matrix A must be in blocked-CSR format /// \param[in] matrix matrix A /// \param[in] b input vector, contains N values + /// \param[in] jacMatrix matrix for preconditioner /// \param[in] wellContribs contains all WellContributions, to apply them separately, instead of adding them to matrix A /// \param[inout] res summary of solver result /// \return status code - SolverStatus solve_system(std::shared_ptr matrix, double *b, WellContributions& wellContribs, BdaResult &res) override; - - SolverStatus solve_system2(std::shared_ptr matrix, double *b, - std::shared_ptr jacMatrix, - WellContributions& wellContribs, BdaResult &res) override; + SolverStatus solve_system(std::shared_ptr matrix, double *b, + std::shared_ptr jacMatrix, WellContributions& wellContribs, BdaResult &res) override; /// Get resulting vector x after linear solve, also includes post processing if necessary /// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array diff --git a/opm/simulators/linalg/bda/opencl/openclSolverBackend.cpp b/opm/simulators/linalg/bda/opencl/openclSolverBackend.cpp index cbc00aeb5..cb537e3c6 100644 --- a/opm/simulators/linalg/bda/opencl/openclSolverBackend.cpp +++ b/opm/simulators/linalg/bda/opencl/openclSolverBackend.cpp @@ -383,78 +383,26 @@ void openclSolverBackend::gpu_pbicgstab(WellContributions& wellContr template -void openclSolverBackend::initialize(std::shared_ptr matrix) { +void openclSolverBackend::initialize(std::shared_ptr matrix, std::shared_ptr jacMatrix) { this->Nb = matrix->Nb; this->N = Nb * block_size; this->nnzb = matrix->nnzbs; this->nnz = nnzb * block_size * block_size; - std::ostringstream out; - out << "Initializing GPU, matrix size: " << Nb << " blockrows, nnzb: " << nnzb << "\n"; - out << "Maxit: " << maxit << std::scientific << ", tolerance: " << tolerance << "\n"; - out << "PlatformID: " << platformID << ", deviceID: " << deviceID << "\n"; - OpmLog::info(out.str()); - out.str(""); - out.clear(); - - try { - prec->setOpencl(context, queue); - -#if COPY_ROW_BY_ROW - vals_contiguous.resize(nnz); -#endif - // mat.reset(new BlockedMatrix(Nb, nnzb, block_size, vals, cols, rows)); - mat = matrix; - - d_x = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N); - d_b = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N); - d_rb = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N); - d_r = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N); - d_rw = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N); - d_p = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N); - d_pw = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N); - d_s = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N); - d_t = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N); - d_v = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N); - d_tmp = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N); - - d_Avals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * nnz); - d_Acols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * nnzb); - d_Arows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Nb + 1)); - - bool reorder = (opencl_ilu_reorder != ILUReorder::NONE); - if (reorder) { - rb = new double[N]; - d_toOrder = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Nb); - } - - } catch (const cl::Error& error) { - std::ostringstream oss; - oss << "OpenCL Error: " << error.what() << "(" << error.err() << ")\n"; - oss << getErrorString(error.err()); - // rethrow exception - OPM_THROW(std::logic_error, oss.str()); - } catch (const std::logic_error& error) { - // rethrow exception by OPM_THROW in the try{}, without this, a segfault occurs - throw error; + if (jacMatrix) { + useJacMatrix = true; } - initialized = true; -} // end initialize() - -template -void openclSolverBackend::initialize2(std::shared_ptr matrix, std::shared_ptr jacMatrix) { - this->Nb = matrix->Nb; - this->N = Nb * block_size; - this->nnzb = matrix->nnzbs; - this->nnz = nnzb * block_size * block_size; - - this->jac_nnz = jacMatrix->nnzbs; - this->jac_nnzb = jac_nnz * block_size * block_size; + if (useJacMatrix) { + this->jac_nnz = jacMatrix->nnzbs; + this->jac_nnzb = jac_nnz * block_size * block_size; + } std::ostringstream out; out << "Initializing GPU, matrix size: " << N << " blocks, nnzb: " << nnzb << "\n"; - out << "Blocks in ILU matrix: " << jac_nnzb << "\n"; + if (useJacMatrix) { + out << "Blocks in ILU matrix: " << jac_nnzb << "\n"; + } out << "Maxit: " << maxit << std::scientific << ", tolerance: " << tolerance << "\n"; out << "PlatformID: " << platformID << ", deviceID: " << deviceID << "\n"; OpmLog::info(out.str()); @@ -594,7 +542,7 @@ bool openclSolverBackend::analyze_matrix() { Timer t; bool success; - if (blockJacVersion) + if (useJacMatrix) success = prec->analyze_matrix(mat.get(), jacMat.get()); else success = prec->analyze_matrix(mat.get()); @@ -649,7 +597,7 @@ bool openclSolverBackend::create_preconditioner() { Timer t; bool result; - if (blockJacVersion) + if (useJacMatrix) result = prec->create_preconditioner(mat.get(), jacMat.get()); else result = prec->create_preconditioner(mat.get()); @@ -712,37 +660,14 @@ void openclSolverBackend::get_result(double *x) { template -SolverStatus openclSolverBackend::solve_system(std::shared_ptr matrix, double *b, WellContributions& wellContribs, BdaResult &res) { - if (initialized == false) { - initialize(matrix); - if (analysis_done == false) { - if (!analyze_matrix()) { - return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED; - } - } - update_system(matrix->nnzValues, b, wellContribs); - if (!create_preconditioner()) { - return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED; - } - copy_system_to_gpu(); - } else { - update_system(matrix->nnzValues, b, wellContribs); - if (!create_preconditioner()) { - return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED; - } - update_system_on_gpu(); - } - solve_system(wellContribs, res); - return SolverStatus::BDA_SOLVER_SUCCESS; -} -template -SolverStatus openclSolverBackend::solve_system2(std::shared_ptr matrix, double *b, - std::shared_ptr jacMatrix, - WellContributions& wellContribs, BdaResult &res) +SolverStatus openclSolverBackend::solve_system(std::shared_ptr matrix, + double *b, + std::shared_ptr jacMatrix, + WellContributions& wellContribs, + BdaResult &res) { if (initialized == false) { - blockJacVersion = true; - initialize2(matrix, jacMatrix); + initialize(matrix, jacMatrix); if (analysis_done == false) { if (!analyze_matrix()) { return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED; @@ -764,6 +689,7 @@ SolverStatus openclSolverBackend::solve_system2(std::shared_ptr::openclSolverBackend( \ int, int, double, unsigned int, unsigned int, ILUReorder, std::string); \ diff --git a/opm/simulators/linalg/bda/opencl/openclSolverBackend.hpp b/opm/simulators/linalg/bda/opencl/openclSolverBackend.hpp index 95d0643f8..ddfb1fc24 100644 --- a/opm/simulators/linalg/bda/opencl/openclSolverBackend.hpp +++ b/opm/simulators/linalg/bda/opencl/openclSolverBackend.hpp @@ -65,7 +65,7 @@ private: int jac_nnz; int jac_nnzb; - bool blockJacVersion = false; + bool useJacMatrix = false; std::unique_ptr > prec; // can perform blocked ILU0 and AMG on pressure component @@ -137,8 +137,7 @@ private: /// Initialize GPU and allocate memory /// \param[in] matrix matrix A /// \param[in] jacMatrix matrix for preconditioner - void initialize(std::shared_ptr matrix); - void initialize2(std::shared_ptr matrix, std::shared_ptr jacMatrix); + void initialize(std::shared_ptr matrix, std::shared_ptr jacMatrix); /// Clean memory void finalize(); @@ -193,14 +192,12 @@ public: /// Solve linear system, A*x = b, matrix A must be in blocked-CSR format /// \param[in] matrix matrix A /// \param[in] b input vector, contains N values + /// \param[in] jacMatrix matrix for preconditioner /// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A /// \param[inout] res summary of solver result /// \return status code - SolverStatus solve_system(std::shared_ptr matrix, double *b, WellContributions& wellContribs, BdaResult &res) override; - - SolverStatus solve_system2(std::shared_ptr matrix, double *b, - std::shared_ptr jacMatrix, - WellContributions& wellContribs, BdaResult &res) override; + SolverStatus solve_system(std::shared_ptr matrix, double *b, + std::shared_ptr jacMatrix, WellContributions& wellContribs, BdaResult &res) override; /// Solve scalar linear system, for example a coarse system of an AMG preconditioner /// Data is already on the GPU