Added headers to PUBLIC_HEADER_FILES. Added warning print when cusparseSolver did not converge. Added more synchronization points in cusparseSolver. Pinning b and x vector as well.

2025-02-25 18:55:30 -06:00 · 2019-12-05 17:08:32 +01:00 · 2019-12-05 17:08:32 +01:00 · b6e13bffd2
commit b6e13bffd2
parent f19a3b09b1
3 changed files with 14 additions and 0 deletions
--- a/CMakeLists_files.cmake
+++ b/CMakeLists_files.cmake
@ -134,6 +134,10 @@ list (APPEND PUBLIC_HEADER_FILES
  opm/simulators/aquifers/AquiferFetkovich.hpp
  opm/simulators/aquifers/BlackoilAquiferModel.hpp
  opm/simulators/aquifers/BlackoilAquiferModel_impl.hpp
  opm/simulators/linalg/BdaBridge.hpp
  opm/simulators/linalg/BdaResult.hpp
  opm/simulators/linalg/cuda_header.h
  opm/simulators/linalg/cusparseSolverBackend.hpp
  opm/simulators/linalg/BlackoilAmg.hpp
  opm/simulators/linalg/BlackoilAmgCpr.hpp
  opm/simulators/linalg/amgcpr.hh
--- a/opm/simulators/linalg/ISTLSolverEbos.hpp
+++ b/opm/simulators/linalg/ISTLSolverEbos.hpp
@ -475,6 +475,7 @@ protected:
                    bdaBridge->get_result(x);
                }else{
                    // CPU fallback, or default case for Dune
                    OpmLog::warning("cusparseSolver did not converge, now trying Dune to solve current linear system...");
                    auto precond = constructPrecond(linearOperator, parallelInformation_arg);
                    solve(linearOperator, x, istlb, *sp, *precond, result);
                } // end Dune call
--- a/opm/simulators/linalg/bda/cusparseSolverBackend.cu
+++ b/opm/simulators/linalg/bda/cusparseSolverBackend.cu
@ -264,6 +264,7 @@ namespace Opm
 		cudaHostRegister(vals, nnz * sizeof(double), cudaHostRegisterDefault);
 		cudaHostRegister(cols, nnz * sizeof(int), cudaHostRegisterDefault);
 		cudaHostRegister(rows, (Nb+1) * sizeof(int), cudaHostRegisterDefault);
 		cudaHostRegister(b, N * sizeof(double), cudaHostRegisterDefault);
 		cudaMemcpyAsync(d_bVals, vals, nnz * sizeof(double), cudaMemcpyHostToDevice, stream);
 		cudaMemcpyAsync(d_bCols, cols, nnz * sizeof(int), cudaMemcpyHostToDevice, stream);
 		cudaMemcpyAsync(d_bRows, rows, (Nb+1) * sizeof(int), cudaMemcpyHostToDevice, stream);
@ -275,6 +276,7 @@ namespace Opm
 		this->rows = rows;
 		if(verbosity > 2){
 			cudaStreamSynchronize(stream);
 			t2 = second();
 			printf("cusparseSolver::copy_system_to_gpu(): %f s\n", t2-t1);
 		}
@ -294,6 +296,7 @@ namespace Opm
 		cudaMemsetAsync(d_x, 0, sizeof(double) * N, stream);
 		if(verbosity > 2){
 			cudaStreamSynchronize(stream);
 			t2 = second();
 			printf("cusparseSolver::update_system_on_gpu(): %f s\n", t2-t1);
 		}
@ -380,6 +383,7 @@ namespace Opm
 		cudaCheckLastError("Could not analyse level information");
 		if(verbosity > 2){
 			cudaStreamSynchronize(stream);
 			t2 = second();
 			printf("cusparseSolver::analyse_matrix(): %f s\n", t2-t1);
 		}
@ -400,6 +404,7 @@ namespace Opm
 			BLOCK_SIZE, info_M, policy, d_buffer);
 		int structural_zero;
 		// cusparseXbsrilu02_zeroPivot() calls cudaDeviceSynchronize()
 		cusparseStatus_t status = cusparseXbsrilu02_zeroPivot(cusparseHandle, info_M, &structural_zero);
 		if(CUSPARSE_STATUS_ZERO_PIVOT == status){
 			fprintf(stderr, "WARNING block U(%d,%d) is not invertible\n", structural_zero, structural_zero);
@ -430,6 +435,10 @@ namespace Opm
 	// caller must be sure that x is a valid array
 	void cusparseSolverBackend::post_process(double *x){
 		if(!initialized){
 			cudaHostRegister(x, N * sizeof(double), cudaHostRegisterDefault);
 		}
 		double t1, t2;
 		if(verbosity > 2){
 			t1 = second();