Added doxygen-style comments

This commit is contained in:
T.D. (Tongdong) Qiu 2019-12-18 17:05:33 +01:00
parent 6506ab5f55
commit 57344fd5e0
5 changed files with 59 additions and 5 deletions

View File

@ -35,7 +35,7 @@ typedef Dune::InverseOperatorResult InverseOperatorResult;
namespace Opm namespace Opm
{ {
BdaBridge::BdaBridge(bool use_gpu_, int linear_solver_verbosity OPM_UNUSED, int maxit OPM_UNUSED, double tolerance OPM_UNUSED) : use_gpu(use_gpu_) { BdaBridge::BdaBridge(bool use_gpu_ OPM_UNUSED, int linear_solver_verbosity OPM_UNUSED, int maxit OPM_UNUSED, double tolerance OPM_UNUSED) : use_gpu(use_gpu_) {
#if HAVE_CUDA #if HAVE_CUDA
if (use_gpu) { if (use_gpu) {
backend.reset(new cusparseSolverBackend(linear_solver_verbosity, maxit, tolerance)); backend.reset(new cusparseSolverBackend(linear_solver_verbosity, maxit, tolerance));

View File

@ -35,7 +35,8 @@ namespace Opm
typedef Dune::InverseOperatorResult InverseOperatorResult; typedef Dune::InverseOperatorResult InverseOperatorResult;
/// BdaBridge acts as interface between opm-simulators with the cusparseSolver
/// if CUDA was not found during CMake, function bodies of this class are empty
class BdaBridge class BdaBridge
{ {
private: private:
@ -45,11 +46,23 @@ private:
bool use_gpu; bool use_gpu;
public: public:
/// Construct a BdaBridge
/// \param[in] use_gpu true iff the cusparseSolver is used, is passed via command-line: '--use-gpu=[true|false]'
/// \param[in] linear_solver_verbosity verbosity of cusparseSolver
/// \param[in] maxit maximum number of iterations for cusparseSolver
/// \param[in] tolerance required relative tolerance for cusparseSolver
BdaBridge(bool use_gpu, int linear_solver_verbosity, int maxit, double tolerance); BdaBridge(bool use_gpu, int linear_solver_verbosity, int maxit, double tolerance);
/// Solve linear system, A*x = b
/// \param[in] mat matrix A, should be of type Dune::BCRSMatrix
/// \param[in] b vector b, should be of type Dune::BlockVector
/// \param[in] result summary of solver result
template <class BridgeMatrix, class BridgeVector> template <class BridgeMatrix, class BridgeVector>
void solve_system(BridgeMatrix *mat, BridgeVector &b, InverseOperatorResult &result); void solve_system(BridgeMatrix *mat, BridgeVector &b, InverseOperatorResult &result);
/// Get the resulting x vector
/// \param[inout] x vector x, should be of type Dune::BlockVector
template <class BridgeVector> template <class BridgeVector>
void get_result(BridgeVector &x); void get_result(BridgeVector &x);

View File

@ -23,7 +23,8 @@
namespace Opm namespace Opm
{ {
// based on InverseOperatorResult struct from dune/istl/solver.hh /// This class is based on InverseOperatorResult struct from dune/istl/solver.hh
/// It is needed to prevent a compile error in basearray.hh, the nvcc compiler might not support all features in there
class BdaResult class BdaResult
{ {

View File

@ -22,6 +22,11 @@
#include <iostream> #include <iostream>
/// Runtime error checking of CUDA functions
/// Usage:
/// cudaMalloc(...);
/// cudaCheckLastError("Error could not allocate memory");
///
#define cudaCheckLastError(msg) __cudaCheckError( __FILE__, __LINE__, #msg ) #define cudaCheckLastError(msg) __cudaCheckError( __FILE__, __LINE__, #msg )
inline void __cudaCheckError(const char *file, const int line, const char *msg){ inline void __cudaCheckError(const char *file, const int line, const char *msg){

View File

@ -29,6 +29,7 @@
namespace Opm namespace Opm
{ {
/// This class implements a cusparse-based ilu0-bicgstab solver on GPU
class cusparseSolverBackend{ class cusparseSolverBackend{
private: private:
@ -68,24 +69,42 @@ private:
int verbosity = 0; int verbosity = 0;
/// Solve linear system using ilu0-bicgstab
/// \param[inout] res summary of solver result
void gpu_pbicgstab(BdaResult& res); void gpu_pbicgstab(BdaResult& res);
/// Initialize GPU and allocate memory
/// \param[in] N number of nonzeroes, divide by dim*dim to get number of blocks
/// \param[in] nnz number of nonzeroes, divide by dim*dim to get number of blocks
/// \param[in] dim size of block
void initialize(int N, int nnz, int dim); void initialize(int N, int nnz, int dim);
/// Clean memory
void finalize(); void finalize();
/// Copy linear system to GPU
/// \param[in] vals array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
/// \param[in] rows array of rowPointers, contains N/dim+1 values
/// \param[in] cols array of columnIndices, contains nnz values
/// \param[in] b input vector, contains N values
void copy_system_to_gpu(double *vals, int *rows, int *cols, double *b); void copy_system_to_gpu(double *vals, int *rows, int *cols, double *b);
// don't copy rowpointers and colindices, they stay the same // Update linear system on GPU, don't copy rowpointers and colindices, they stay the same
/// \param[in] vals array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
/// \param[in] b input vector, contains N values
void update_system_on_gpu(double *vals, double *b); void update_system_on_gpu(double *vals, double *b);
/// Reset preconditioner on GPU, ilu0-decomposition is done inplace by cusparse
void reset_prec_on_gpu(); void reset_prec_on_gpu();
/// Analyse sparsity pattern to extract parallelism
bool analyse_matrix(); bool analyse_matrix();
/// Perform ilu0-decomposition
bool create_preconditioner(); bool create_preconditioner();
/// Solve linear system
/// \param[inout] res summary of solver result
void solve_system(BdaResult &res); void solve_system(BdaResult &res);
public: public:
@ -97,12 +116,28 @@ public:
CUSPARSE_SOLVER_UNKNOWN_ERROR CUSPARSE_SOLVER_UNKNOWN_ERROR
}; };
/// Construct a cusparseSolver
/// \param[in] linear_solver_verbosity verbosity of cusparseSolver
/// \param[in] maxit maximum number of iterations for cusparseSolver
/// \param[in] tolerance required relative tolerance for cusparseSolver
cusparseSolverBackend(int linear_solver_verbosity, int maxit, double tolerance); cusparseSolverBackend(int linear_solver_verbosity, int maxit, double tolerance);
/// Destroy a cusparseSolver, and free memory
~cusparseSolverBackend(); ~cusparseSolverBackend();
/// Solve linear system, A*x = b, matrix A must be in blocked-CSR format
/// \param[in] N number of rows, divide by dim to get number of blockrows
/// \param[in] nnz number of nonzeroes, divide by dim*dim to get number of blocks
/// \param[in] dim size of block
/// \param[in] vals array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
/// \param[in] rows array of rowPointers, contains N/dim+1 values
/// \param[in] cols array of columnIndices, contains nnz values
/// \param[in] b input vector, contains N values
/// \param[inout] res summary of solver result
cusparseSolverStatus solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, BdaResult &res); cusparseSolverStatus solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, BdaResult &res);
/// Post processing after linear solve, now only copies resulting x vector back
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array
void post_process(double *x); void post_process(double *x);
}; // end class cusparseSolverBackend }; // end class cusparseSolverBackend