diff --git a/opm/simulators/linalg/bda/BdaBridge.cpp b/opm/simulators/linalg/bda/BdaBridge.cpp index e3240975e..713ce432c 100644 --- a/opm/simulators/linalg/bda/BdaBridge.cpp +++ b/opm/simulators/linalg/bda/BdaBridge.cpp @@ -35,7 +35,7 @@ typedef Dune::InverseOperatorResult InverseOperatorResult; namespace Opm { -BdaBridge::BdaBridge(bool use_gpu_, int linear_solver_verbosity OPM_UNUSED, int maxit OPM_UNUSED, double tolerance OPM_UNUSED) : use_gpu(use_gpu_) { +BdaBridge::BdaBridge(bool use_gpu_ OPM_UNUSED, int linear_solver_verbosity OPM_UNUSED, int maxit OPM_UNUSED, double tolerance OPM_UNUSED) : use_gpu(use_gpu_) { #if HAVE_CUDA if (use_gpu) { backend.reset(new cusparseSolverBackend(linear_solver_verbosity, maxit, tolerance)); diff --git a/opm/simulators/linalg/bda/BdaBridge.hpp b/opm/simulators/linalg/bda/BdaBridge.hpp index a1ceb5701..7d7317cd8 100644 --- a/opm/simulators/linalg/bda/BdaBridge.hpp +++ b/opm/simulators/linalg/bda/BdaBridge.hpp @@ -35,7 +35,8 @@ namespace Opm typedef Dune::InverseOperatorResult InverseOperatorResult; - +/// BdaBridge acts as interface between opm-simulators with the cusparseSolver +/// if CUDA was not found during CMake, function bodies of this class are empty class BdaBridge { private: @@ -45,11 +46,23 @@ private: bool use_gpu; public: + /// Construct a BdaBridge + /// \param[in] use_gpu true iff the cusparseSolver is used, is passed via command-line: '--use-gpu=[true|false]' + /// \param[in] linear_solver_verbosity verbosity of cusparseSolver + /// \param[in] maxit maximum number of iterations for cusparseSolver + /// \param[in] tolerance required relative tolerance for cusparseSolver BdaBridge(bool use_gpu, int linear_solver_verbosity, int maxit, double tolerance); + + /// Solve linear system, A*x = b + /// \param[in] mat matrix A, should be of type Dune::BCRSMatrix + /// \param[in] b vector b, should be of type Dune::BlockVector + /// \param[in] result summary of solver result template void solve_system(BridgeMatrix *mat, BridgeVector &b, InverseOperatorResult &result); + /// Get the resulting x vector + /// \param[inout] x vector x, should be of type Dune::BlockVector template void get_result(BridgeVector &x); diff --git a/opm/simulators/linalg/bda/BdaResult.hpp b/opm/simulators/linalg/bda/BdaResult.hpp index 92e02b8b7..90880828f 100644 --- a/opm/simulators/linalg/bda/BdaResult.hpp +++ b/opm/simulators/linalg/bda/BdaResult.hpp @@ -23,7 +23,8 @@ namespace Opm { -// based on InverseOperatorResult struct from dune/istl/solver.hh +/// This class is based on InverseOperatorResult struct from dune/istl/solver.hh +/// It is needed to prevent a compile error in basearray.hh, the nvcc compiler might not support all features in there class BdaResult { diff --git a/opm/simulators/linalg/bda/cuda_header.hpp b/opm/simulators/linalg/bda/cuda_header.hpp index 4025e7cb2..7d1f722d4 100644 --- a/opm/simulators/linalg/bda/cuda_header.hpp +++ b/opm/simulators/linalg/bda/cuda_header.hpp @@ -22,6 +22,11 @@ #include +/// Runtime error checking of CUDA functions +/// Usage: +/// cudaMalloc(...); +/// cudaCheckLastError("Error could not allocate memory"); +/// #define cudaCheckLastError(msg) __cudaCheckError( __FILE__, __LINE__, #msg ) inline void __cudaCheckError(const char *file, const int line, const char *msg){ diff --git a/opm/simulators/linalg/bda/cusparseSolverBackend.hpp b/opm/simulators/linalg/bda/cusparseSolverBackend.hpp index 97ccd25d5..60a47744e 100644 --- a/opm/simulators/linalg/bda/cusparseSolverBackend.hpp +++ b/opm/simulators/linalg/bda/cusparseSolverBackend.hpp @@ -29,6 +29,7 @@ namespace Opm { +/// This class implements a cusparse-based ilu0-bicgstab solver on GPU class cusparseSolverBackend{ private: @@ -68,24 +69,42 @@ private: int verbosity = 0; - + /// Solve linear system using ilu0-bicgstab + /// \param[inout] res summary of solver result void gpu_pbicgstab(BdaResult& res); + /// Initialize GPU and allocate memory + /// \param[in] N number of nonzeroes, divide by dim*dim to get number of blocks + /// \param[in] nnz number of nonzeroes, divide by dim*dim to get number of blocks + /// \param[in] dim size of block void initialize(int N, int nnz, int dim); + /// Clean memory void finalize(); + /// Copy linear system to GPU + /// \param[in] vals array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values + /// \param[in] rows array of rowPointers, contains N/dim+1 values + /// \param[in] cols array of columnIndices, contains nnz values + /// \param[in] b input vector, contains N values void copy_system_to_gpu(double *vals, int *rows, int *cols, double *b); - // don't copy rowpointers and colindices, they stay the same + // Update linear system on GPU, don't copy rowpointers and colindices, they stay the same + /// \param[in] vals array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values + /// \param[in] b input vector, contains N values void update_system_on_gpu(double *vals, double *b); + /// Reset preconditioner on GPU, ilu0-decomposition is done inplace by cusparse void reset_prec_on_gpu(); + /// Analyse sparsity pattern to extract parallelism bool analyse_matrix(); + /// Perform ilu0-decomposition bool create_preconditioner(); + /// Solve linear system + /// \param[inout] res summary of solver result void solve_system(BdaResult &res); public: @@ -97,12 +116,28 @@ public: CUSPARSE_SOLVER_UNKNOWN_ERROR }; + /// Construct a cusparseSolver + /// \param[in] linear_solver_verbosity verbosity of cusparseSolver + /// \param[in] maxit maximum number of iterations for cusparseSolver + /// \param[in] tolerance required relative tolerance for cusparseSolver cusparseSolverBackend(int linear_solver_verbosity, int maxit, double tolerance); + /// Destroy a cusparseSolver, and free memory ~cusparseSolverBackend(); + /// Solve linear system, A*x = b, matrix A must be in blocked-CSR format + /// \param[in] N number of rows, divide by dim to get number of blockrows + /// \param[in] nnz number of nonzeroes, divide by dim*dim to get number of blocks + /// \param[in] dim size of block + /// \param[in] vals array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values + /// \param[in] rows array of rowPointers, contains N/dim+1 values + /// \param[in] cols array of columnIndices, contains nnz values + /// \param[in] b input vector, contains N values + /// \param[inout] res summary of solver result cusparseSolverStatus solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, BdaResult &res); + /// Post processing after linear solve, now only copies resulting x vector back + /// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array void post_process(double *x); }; // end class cusparseSolverBackend