diff --git a/opm/simulators/linalg/bda/BdaBridge.cpp b/opm/simulators/linalg/bda/BdaBridge.cpp index 2ddeb5c28..696211655 100644 --- a/opm/simulators/linalg/bda/BdaBridge.cpp +++ b/opm/simulators/linalg/bda/BdaBridge.cpp @@ -20,6 +20,8 @@ #include #include +#include + #include #include @@ -91,7 +93,7 @@ int checkZeroDiagonal(BridgeMatrix& mat) { // if only_vals, do not convert rowPointers and colIndices // sparsity pattern should stay the same due to matrix-add-well-contributions template -void convertMatrixBsr(BridgeMatrix& mat, std::vector &h_vals, std::vector &h_rows, std::vector &h_cols, int dim, bool only_vals) { +void convertMatrixBsr(BridgeMatrix& mat, std::vector &h_vals, std::vector &h_rows, std::vector &h_cols, int dim) { int sum_nnzs = 0; int nnz = mat.nonzeroes()*dim*dim; @@ -99,7 +101,7 @@ void convertMatrixBsr(BridgeMatrix& mat, std::vector &h_vals, std::vecto memcpy(h_vals.data(), &(mat[0][0][0][0]), sizeof(double)*nnz); // convert colIndices and rowPointers - if(only_vals == false){ + if(h_rows.size() == 0){ h_rows.emplace_back(0); for(typename BridgeMatrix::const_iterator r = mat.begin(); r != mat.end(); ++r){ int size_row = 0; @@ -160,13 +162,11 @@ void BdaBridge::solve_system(BridgeMatrix *mat, BridgeVector &b, InverseOperator checkZeroDiagonal(*mat); #endif - bool initialized = backend->isInitialized(); - #if PRINT_TIMERS_BRIDGE Dune::Timer t; #endif - convertMatrixBsr(*mat, h_vals, h_rows, h_cols, dim, initialized); + convertMatrixBsr(*mat, h_vals, h_rows, h_cols, dim); convertBlockVectorToArray(b, h_b); #if PRINT_TIMERS_BRIDGE @@ -176,17 +176,23 @@ void BdaBridge::solve_system(BridgeMatrix *mat, BridgeVector &b, InverseOperator ///////////////////////// // actually solve - if(initialized == false){ - backend->initialize(N, nnz, dim); - backend->copy_system_to_gpu(h_vals.data(), h_rows.data(), h_cols.data(), h_b.data()); - backend->analyse_matrix(); - }else{ - backend->update_system_on_gpu(h_vals.data(), h_b.data()); - } - backend->reset_prec_on_gpu(); - if(backend->create_preconditioner()){ - backend->solve_system(result); + typedef cusparseSolverBackend::cusparseSolverStatus cusparseSolverStatus; + + cusparseSolverStatus status = backend->solve_system(N, nnz, dim, h_vals.data(), h_rows.data(), h_cols.data(), h_b.data(), result); + switch(status){ + case cusparseSolverStatus::CUSPARSE_SOLVER_SUCCESS: + //OpmLog::info("cusparseSolver converged"); + break; + case cusparseSolverStatus::CUSPARSE_SOLVER_ANALYSIS_FAILED: + OpmLog::warning("cusparseSolver could not analyse level information of matrix, perhaps there is still a 0.0 on the diagonal of a block on the diagonal"); + break; + case cusparseSolverStatus::CUSPARSE_SOLVER_CREATE_PRECONDITIONER_FAILED: + OpmLog::warning("cusparseSolver could not create preconditioner, perhaps there is still a 0.0 on the diagonal of a block on the diagonal"); + break; + default: + OpmLog::warning("cusparseSolver returned unknown status code"); } + res.iterations = result.iterations; res.reduction = result.reduction; res.converged = result.converged; diff --git a/opm/simulators/linalg/bda/cusparseSolverBackend.cu b/opm/simulators/linalg/bda/cusparseSolverBackend.cu index 9b8475935..43e5629ab 100644 --- a/opm/simulators/linalg/bda/cusparseSolverBackend.cu +++ b/opm/simulators/linalg/bda/cusparseSolverBackend.cu @@ -55,8 +55,7 @@ namespace Opm finalize(); } - // return true iff converged - bool cusparseSolverBackend::gpu_pbicgstab(BdaResult& res){ + void cusparseSolverBackend::gpu_pbicgstab(BdaResult& res){ double t_total1, t_total2; int n = N; double rho = 1.0, rhop; @@ -163,7 +162,6 @@ namespace Opm if(verbosity > 0){ printf("=== converged: %d, conv_rate: %.2f, time: %f, time per iteration: %f, iterations: %.1f\n", res.converged, res.conv_rate, res.elapsed, res.elapsed/it, it); } - return res.converged; } @@ -308,7 +306,7 @@ namespace Opm } - void cusparseSolverBackend::analyse_matrix(){ + bool cusparseSolverBackend::analyse_matrix(){ int d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize; double t1, t2; @@ -367,9 +365,7 @@ namespace Opm int structural_zero; cusparseStatus_t status = cusparseXbsrilu02_zeroPivot(cusparseHandle, info_M, &structural_zero); if(CUSPARSE_STATUS_ZERO_PIVOT == status){ - fprintf(stderr, "ERROR block U(%d,%d) is not invertible\n", structural_zero, structural_zero); - fprintf(stderr, "cusparse fails when a block has a 0.0 on its diagonal, these should be replaced in BdaBridge::checkZeroDiagonal()\n"); - exit(1); + return false; } // analysis of ilu apply @@ -388,6 +384,7 @@ namespace Opm printf("cusparseSolver::analyse_matrix(): %f s\n", t2-t1); } + return true; } // end analyse_matrix() bool cusparseSolverBackend::create_preconditioner(){ @@ -407,8 +404,6 @@ namespace Opm // cusparseXbsrilu02_zeroPivot() calls cudaDeviceSynchronize() cusparseStatus_t status = cusparseXbsrilu02_zeroPivot(cusparseHandle, info_M, &structural_zero); if(CUSPARSE_STATUS_ZERO_PIVOT == status){ - fprintf(stderr, "WARNING block U(%d,%d) is not invertible\n", structural_zero, structural_zero); - fprintf(stderr, "cusparse fails when a block has a 0.0 on its diagonal, these should be replaced in BdaBridge::checkZeroDiagonal()\n"); return false; } @@ -421,13 +416,11 @@ namespace Opm } // end create_preconditioner() - // return true iff converged - bool cusparseSolverBackend::solve_system(BdaResult &res){ + void cusparseSolverBackend::solve_system(BdaResult &res){ // actually solve - bool converged = gpu_pbicgstab(res); + gpu_pbicgstab(res); cudaStreamSynchronize(stream); cudaCheckLastError("Something went wrong during the GPU solve"); - return converged; } // end solve_system() @@ -454,9 +447,29 @@ namespace Opm } // end post_process() - bool cusparseSolverBackend::isInitialized(){ - return initialized; - } + typedef cusparseSolverBackend::cusparseSolverStatus cusparseSolverStatus; + + cusparseSolverStatus cusparseSolverBackend::solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, BdaResult &res){ + if(initialized == false){ + initialize(N, nnz, dim); + copy_system_to_gpu(vals, rows, cols, b); + }else{ + update_system_on_gpu(vals, b); + } + if(analysis_done == false){ + if(!analyse_matrix()){ + return cusparseSolverStatus::CUSPARSE_SOLVER_ANALYSIS_FAILED; + } + } + reset_prec_on_gpu(); + if(create_preconditioner()){ + solve_system(res); + }else{ + return cusparseSolverStatus::CUSPARSE_SOLVER_CREATE_PRECONDITIONER_FAILED; + } + return cusparseSolverStatus::CUSPARSE_SOLVER_SUCCESS; + } + } diff --git a/opm/simulators/linalg/bda/cusparseSolverBackend.hpp b/opm/simulators/linalg/bda/cusparseSolverBackend.hpp index a64853e37..491f44883 100644 --- a/opm/simulators/linalg/bda/cusparseSolverBackend.hpp +++ b/opm/simulators/linalg/bda/cusparseSolverBackend.hpp @@ -58,6 +58,7 @@ private: int BLOCK_SIZE; bool initialized = false; + bool analysis_done = false; // verbosity // 0: print nothing during solves, only when initializing @@ -67,14 +68,8 @@ private: int verbosity = 0; -public: - cusparseSolverBackend(int linear_solver_verbosity, int maxit, double tolerance); - - ~cusparseSolverBackend(); - - // return true iff converged - bool gpu_pbicgstab(BdaResult& res); + void gpu_pbicgstab(BdaResult& res); void initialize(int N, int nnz, int dim); @@ -87,17 +82,29 @@ public: void reset_prec_on_gpu(); - void analyse_matrix(); + bool analyse_matrix(); bool create_preconditioner(); - // return true iff converged - bool solve_system(BdaResult &res); + void solve_system(BdaResult &res); + +public: + + enum class cusparseSolverStatus { + CUSPARSE_SOLVER_SUCCESS, + CUSPARSE_SOLVER_ANALYSIS_FAILED, + CUSPARSE_SOLVER_CREATE_PRECONDITIONER_FAILED, + CUSPARSE_SOLVER_UNKNOWN_ERROR + }; + + cusparseSolverBackend(int linear_solver_verbosity, int maxit, double tolerance); + + ~cusparseSolverBackend(); + + cusparseSolverStatus solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, BdaResult &res); void post_process(double *x); - bool isInitialized(); - }; // end class cusparseSolverBackend }