Added headers to PUBLIC_HEADER_FILES. Added warning print when cusparseSolver did not converge. Added more synchronization points in cusparseSolver. Pinning b and x vector as well.

This commit is contained in:
T.D. (Tongdong) Qiu 2019-12-05 17:08:32 +01:00
parent f19a3b09b1
commit b6e13bffd2
3 changed files with 14 additions and 0 deletions

View File

@ -134,6 +134,10 @@ list (APPEND PUBLIC_HEADER_FILES
opm/simulators/aquifers/AquiferFetkovich.hpp
opm/simulators/aquifers/BlackoilAquiferModel.hpp
opm/simulators/aquifers/BlackoilAquiferModel_impl.hpp
opm/simulators/linalg/BdaBridge.hpp
opm/simulators/linalg/BdaResult.hpp
opm/simulators/linalg/cuda_header.h
opm/simulators/linalg/cusparseSolverBackend.hpp
opm/simulators/linalg/BlackoilAmg.hpp
opm/simulators/linalg/BlackoilAmgCpr.hpp
opm/simulators/linalg/amgcpr.hh

View File

@ -475,6 +475,7 @@ protected:
bdaBridge->get_result(x);
}else{
// CPU fallback, or default case for Dune
OpmLog::warning("cusparseSolver did not converge, now trying Dune to solve current linear system...");
auto precond = constructPrecond(linearOperator, parallelInformation_arg);
solve(linearOperator, x, istlb, *sp, *precond, result);
} // end Dune call

View File

@ -264,6 +264,7 @@ namespace Opm
cudaHostRegister(vals, nnz * sizeof(double), cudaHostRegisterDefault);
cudaHostRegister(cols, nnz * sizeof(int), cudaHostRegisterDefault);
cudaHostRegister(rows, (Nb+1) * sizeof(int), cudaHostRegisterDefault);
cudaHostRegister(b, N * sizeof(double), cudaHostRegisterDefault);
cudaMemcpyAsync(d_bVals, vals, nnz * sizeof(double), cudaMemcpyHostToDevice, stream);
cudaMemcpyAsync(d_bCols, cols, nnz * sizeof(int), cudaMemcpyHostToDevice, stream);
cudaMemcpyAsync(d_bRows, rows, (Nb+1) * sizeof(int), cudaMemcpyHostToDevice, stream);
@ -275,6 +276,7 @@ namespace Opm
this->rows = rows;
if(verbosity > 2){
cudaStreamSynchronize(stream);
t2 = second();
printf("cusparseSolver::copy_system_to_gpu(): %f s\n", t2-t1);
}
@ -294,6 +296,7 @@ namespace Opm
cudaMemsetAsync(d_x, 0, sizeof(double) * N, stream);
if(verbosity > 2){
cudaStreamSynchronize(stream);
t2 = second();
printf("cusparseSolver::update_system_on_gpu(): %f s\n", t2-t1);
}
@ -380,6 +383,7 @@ namespace Opm
cudaCheckLastError("Could not analyse level information");
if(verbosity > 2){
cudaStreamSynchronize(stream);
t2 = second();
printf("cusparseSolver::analyse_matrix(): %f s\n", t2-t1);
}
@ -400,6 +404,7 @@ namespace Opm
BLOCK_SIZE, info_M, policy, d_buffer);
int structural_zero;
// cusparseXbsrilu02_zeroPivot() calls cudaDeviceSynchronize()
cusparseStatus_t status = cusparseXbsrilu02_zeroPivot(cusparseHandle, info_M, &structural_zero);
if(CUSPARSE_STATUS_ZERO_PIVOT == status){
fprintf(stderr, "WARNING block U(%d,%d) is not invertible\n", structural_zero, structural_zero);
@ -430,6 +435,10 @@ namespace Opm
// caller must be sure that x is a valid array
void cusparseSolverBackend::post_process(double *x){
if(!initialized){
cudaHostRegister(x, N * sizeof(double), cudaHostRegisterDefault);
}
double t1, t2;
if(verbosity > 2){
t1 = second();