From b6e13bffd22e201a50602f0f1110bc7db74da861 Mon Sep 17 00:00:00 2001 From: "T.D. (Tongdong) Qiu" Date: Thu, 5 Dec 2019 17:08:32 +0100 Subject: [PATCH] Added headers to PUBLIC_HEADER_FILES. Added warning print when cusparseSolver did not converge. Added more synchronization points in cusparseSolver. Pinning b and x vector as well. --- CMakeLists_files.cmake | 4 ++++ opm/simulators/linalg/ISTLSolverEbos.hpp | 1 + opm/simulators/linalg/bda/cusparseSolverBackend.cu | 9 +++++++++ 3 files changed, 14 insertions(+) diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index 2b711fb9d..b3da16b6d 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -134,6 +134,10 @@ list (APPEND PUBLIC_HEADER_FILES opm/simulators/aquifers/AquiferFetkovich.hpp opm/simulators/aquifers/BlackoilAquiferModel.hpp opm/simulators/aquifers/BlackoilAquiferModel_impl.hpp + opm/simulators/linalg/BdaBridge.hpp + opm/simulators/linalg/BdaResult.hpp + opm/simulators/linalg/cuda_header.h + opm/simulators/linalg/cusparseSolverBackend.hpp opm/simulators/linalg/BlackoilAmg.hpp opm/simulators/linalg/BlackoilAmgCpr.hpp opm/simulators/linalg/amgcpr.hh diff --git a/opm/simulators/linalg/ISTLSolverEbos.hpp b/opm/simulators/linalg/ISTLSolverEbos.hpp index 49385d299..b9fa0811b 100644 --- a/opm/simulators/linalg/ISTLSolverEbos.hpp +++ b/opm/simulators/linalg/ISTLSolverEbos.hpp @@ -475,6 +475,7 @@ protected: bdaBridge->get_result(x); }else{ // CPU fallback, or default case for Dune + OpmLog::warning("cusparseSolver did not converge, now trying Dune to solve current linear system..."); auto precond = constructPrecond(linearOperator, parallelInformation_arg); solve(linearOperator, x, istlb, *sp, *precond, result); } // end Dune call diff --git a/opm/simulators/linalg/bda/cusparseSolverBackend.cu b/opm/simulators/linalg/bda/cusparseSolverBackend.cu index dc8d09364..9b8475935 100644 --- a/opm/simulators/linalg/bda/cusparseSolverBackend.cu +++ b/opm/simulators/linalg/bda/cusparseSolverBackend.cu @@ -264,6 +264,7 @@ namespace Opm cudaHostRegister(vals, nnz * sizeof(double), cudaHostRegisterDefault); cudaHostRegister(cols, nnz * sizeof(int), cudaHostRegisterDefault); cudaHostRegister(rows, (Nb+1) * sizeof(int), cudaHostRegisterDefault); + cudaHostRegister(b, N * sizeof(double), cudaHostRegisterDefault); cudaMemcpyAsync(d_bVals, vals, nnz * sizeof(double), cudaMemcpyHostToDevice, stream); cudaMemcpyAsync(d_bCols, cols, nnz * sizeof(int), cudaMemcpyHostToDevice, stream); cudaMemcpyAsync(d_bRows, rows, (Nb+1) * sizeof(int), cudaMemcpyHostToDevice, stream); @@ -275,6 +276,7 @@ namespace Opm this->rows = rows; if(verbosity > 2){ + cudaStreamSynchronize(stream); t2 = second(); printf("cusparseSolver::copy_system_to_gpu(): %f s\n", t2-t1); } @@ -294,6 +296,7 @@ namespace Opm cudaMemsetAsync(d_x, 0, sizeof(double) * N, stream); if(verbosity > 2){ + cudaStreamSynchronize(stream); t2 = second(); printf("cusparseSolver::update_system_on_gpu(): %f s\n", t2-t1); } @@ -380,6 +383,7 @@ namespace Opm cudaCheckLastError("Could not analyse level information"); if(verbosity > 2){ + cudaStreamSynchronize(stream); t2 = second(); printf("cusparseSolver::analyse_matrix(): %f s\n", t2-t1); } @@ -400,6 +404,7 @@ namespace Opm BLOCK_SIZE, info_M, policy, d_buffer); int structural_zero; + // cusparseXbsrilu02_zeroPivot() calls cudaDeviceSynchronize() cusparseStatus_t status = cusparseXbsrilu02_zeroPivot(cusparseHandle, info_M, &structural_zero); if(CUSPARSE_STATUS_ZERO_PIVOT == status){ fprintf(stderr, "WARNING block U(%d,%d) is not invertible\n", structural_zero, structural_zero); @@ -430,6 +435,10 @@ namespace Opm // caller must be sure that x is a valid array void cusparseSolverBackend::post_process(double *x){ + if(!initialized){ + cudaHostRegister(x, N * sizeof(double), cudaHostRegisterDefault); + } + double t1, t2; if(verbosity > 2){ t1 = second();