Added headers to PUBLIC_HEADER_FILES. Added warning print when cusparseSolver did not converge. Added more synchronization points in cusparseSolver. Pinning b and x vector as well.

2025-02-25 18:55:30 -06:00 · 2019-12-05 17:08:32 +01:00
parent f19a3b09b1
commit b6e13bffd2
3 changed files with 14 additions and 0 deletions
--- a/CMakeLists_files.cmake
+++ b/CMakeLists_files.cmake
@@ -134,6 +134,10 @@ list (APPEND PUBLIC_HEADER_FILES
  opm/simulators/aquifers/AquiferFetkovich.hpp
  opm/simulators/aquifers/BlackoilAquiferModel.hpp
  opm/simulators/aquifers/BlackoilAquiferModel_impl.hpp
+  opm/simulators/linalg/BdaBridge.hpp
+  opm/simulators/linalg/BdaResult.hpp
+  opm/simulators/linalg/cuda_header.h
+  opm/simulators/linalg/cusparseSolverBackend.hpp
  opm/simulators/linalg/BlackoilAmg.hpp
  opm/simulators/linalg/BlackoilAmgCpr.hpp
  opm/simulators/linalg/amgcpr.hh
--- a/opm/simulators/linalg/ISTLSolverEbos.hpp
+++ b/opm/simulators/linalg/ISTLSolverEbos.hpp
@@ -475,6 +475,7 @@ protected:
                    bdaBridge->get_result(x);
                }else{
                    // CPU fallback, or default case for Dune
+                    OpmLog::warning("cusparseSolver did not converge, now trying Dune to solve current linear system...");
                    auto precond = constructPrecond(linearOperator, parallelInformation_arg);
                    solve(linearOperator, x, istlb, *sp, *precond, result);
                } // end Dune call
--- a/opm/simulators/linalg/bda/cusparseSolverBackend.cu
+++ b/opm/simulators/linalg/bda/cusparseSolverBackend.cu
@@ -264,6 +264,7 @@ namespace Opm
 		cudaHostRegister(vals, nnz * sizeof(double), cudaHostRegisterDefault);
 		cudaHostRegister(cols, nnz * sizeof(int), cudaHostRegisterDefault);
 		cudaHostRegister(rows, (Nb+1) * sizeof(int), cudaHostRegisterDefault);
+		cudaHostRegister(b, N * sizeof(double), cudaHostRegisterDefault);
 		cudaMemcpyAsync(d_bVals, vals, nnz * sizeof(double), cudaMemcpyHostToDevice, stream);
 		cudaMemcpyAsync(d_bCols, cols, nnz * sizeof(int), cudaMemcpyHostToDevice, stream);
 		cudaMemcpyAsync(d_bRows, rows, (Nb+1) * sizeof(int), cudaMemcpyHostToDevice, stream);
@@ -275,6 +276,7 @@ namespace Opm
 		this->rows = rows;

 		if(verbosity > 2){
+			cudaStreamSynchronize(stream);
 			t2 = second();
 			printf("cusparseSolver::copy_system_to_gpu(): %f s\n", t2-t1);
 		}
@@ -294,6 +296,7 @@ namespace Opm
 		cudaMemsetAsync(d_x, 0, sizeof(double) * N, stream);

 		if(verbosity > 2){
+			cudaStreamSynchronize(stream);
 			t2 = second();
 			printf("cusparseSolver::update_system_on_gpu(): %f s\n", t2-t1);
 		}
@@ -380,6 +383,7 @@ namespace Opm
 		cudaCheckLastError("Could not analyse level information");

 		if(verbosity > 2){
+			cudaStreamSynchronize(stream);
 			t2 = second();
 			printf("cusparseSolver::analyse_matrix(): %f s\n", t2-t1);
 		}
@@ -400,6 +404,7 @@ namespace Opm
 			BLOCK_SIZE, info_M, policy, d_buffer);

 		int structural_zero;
+		// cusparseXbsrilu02_zeroPivot() calls cudaDeviceSynchronize()
 		cusparseStatus_t status = cusparseXbsrilu02_zeroPivot(cusparseHandle, info_M, &structural_zero);
 		if(CUSPARSE_STATUS_ZERO_PIVOT == status){
 			fprintf(stderr, "WARNING block U(%d,%d) is not invertible\n", structural_zero, structural_zero);
@@ -430,6 +435,10 @@ namespace Opm
 	// caller must be sure that x is a valid array
 	void cusparseSolverBackend::post_process(double *x){

+		if(!initialized){
+			cudaHostRegister(x, N * sizeof(double), cudaHostRegisterDefault);
+		}
+
 		double t1, t2;
 		if(verbosity > 2){
 			t1 = second();