mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Allow verbosity of cusparseSolver to be set via command line
This commit is contained in:
parent
69033ca7f2
commit
5cb6ec510c
@ -251,11 +251,12 @@ protected:
|
|||||||
const int maxit = EWOMS_GET_PARAM(TypeTag, int, LinearSolverMaxIter);
|
const int maxit = EWOMS_GET_PARAM(TypeTag, int, LinearSolverMaxIter);
|
||||||
const double tolerance = EWOMS_GET_PARAM(TypeTag, double, LinearSolverReduction);
|
const double tolerance = EWOMS_GET_PARAM(TypeTag, double, LinearSolverReduction);
|
||||||
const bool matrix_add_well_contributions = EWOMS_GET_PARAM(TypeTag, bool, MatrixAddWellContributions);
|
const bool matrix_add_well_contributions = EWOMS_GET_PARAM(TypeTag, bool, MatrixAddWellContributions);
|
||||||
|
const int linear_solver_verbosity = parameters_.linear_solver_verbosity_;
|
||||||
if(use_gpu && !matrix_add_well_contributions){
|
if(use_gpu && !matrix_add_well_contributions){
|
||||||
std::cerr << "Error cannot use GPU solver if command line parameter --matrix-add-well-contributions is false, because the GPU solver performs a standard bicgstab" << std::endl;
|
std::cerr << "Error cannot use GPU solver if command line parameter --matrix-add-well-contributions is false, because the GPU solver performs a standard bicgstab" << std::endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
bdaBridge = new BdaBridge(use_gpu, maxit, tolerance);
|
bdaBridge = new BdaBridge(use_gpu, linear_solver_verbosity, maxit, tolerance);
|
||||||
#else
|
#else
|
||||||
const bool use_gpu = EWOMS_GET_PARAM(TypeTag, bool, UseGpu);
|
const bool use_gpu = EWOMS_GET_PARAM(TypeTag, bool, UseGpu);
|
||||||
if(use_gpu){
|
if(use_gpu){
|
||||||
|
@ -30,10 +30,10 @@ typedef Dune::InverseOperatorResult InverseOperatorResult;
|
|||||||
namespace Opm
|
namespace Opm
|
||||||
{
|
{
|
||||||
|
|
||||||
BdaBridge::BdaBridge(bool use_gpu_, int maxit, double tolerance) : use_gpu(use_gpu_){
|
BdaBridge::BdaBridge(bool use_gpu_, int linear_solver_verbosity, int maxit, double tolerance) : use_gpu(use_gpu_) {
|
||||||
#if HAVE_CUDA
|
#if HAVE_CUDA
|
||||||
if(use_gpu){
|
if(use_gpu){
|
||||||
backend = new cusparseSolverBackend(maxit, tolerance);
|
backend = new cusparseSolverBackend(linear_solver_verbosity, maxit, tolerance);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -45,7 +45,7 @@ private:
|
|||||||
bool use_gpu;
|
bool use_gpu;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
BdaBridge(bool use_gpu, int maxit, double tolerance);
|
BdaBridge(bool use_gpu, int linear_solver_verbosity, int maxit, double tolerance);
|
||||||
|
|
||||||
~BdaBridge();
|
~BdaBridge();
|
||||||
|
|
||||||
|
@ -35,12 +35,6 @@
|
|||||||
#include "cusparse_v2.h"
|
#include "cusparse_v2.h"
|
||||||
// For more information about cusparse, check https://docs.nvidia.com/cuda/cusparse/index.html
|
// For more information about cusparse, check https://docs.nvidia.com/cuda/cusparse/index.html
|
||||||
|
|
||||||
// print initial, intermediate and final norms, and used iterations
|
|
||||||
#define VERBOSE_BACKEND 0
|
|
||||||
|
|
||||||
// print more detailed timers of various solve elements and backend functions
|
|
||||||
#define PRINT_TIMERS_BACKEND 0
|
|
||||||
|
|
||||||
namespace Opm
|
namespace Opm
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -54,7 +48,7 @@ namespace Opm
|
|||||||
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
|
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
cusparseSolverBackend::cusparseSolverBackend(int maxit_, double tolerance_) : maxit(maxit_), tolerance(tolerance_), minit(0){
|
cusparseSolverBackend::cusparseSolverBackend(int verbosity_, int maxit_, double tolerance_) : verbosity(verbosity_), maxit(maxit_), tolerance(tolerance_), minit(0){
|
||||||
}
|
}
|
||||||
|
|
||||||
cusparseSolverBackend::~cusparseSolverBackend(){
|
cusparseSolverBackend::~cusparseSolverBackend(){
|
||||||
@ -84,10 +78,9 @@ namespace Opm
|
|||||||
cublasDcopy(cublasHandle, n, d_r, 1, d_p, 1);
|
cublasDcopy(cublasHandle, n, d_r, 1, d_p, 1);
|
||||||
cublasDnrm2(cublasHandle, n, d_r, 1, &norm_0);
|
cublasDnrm2(cublasHandle, n, d_r, 1, &norm_0);
|
||||||
|
|
||||||
#if VERBOSE_BACKEND
|
if(verbosity > 1){
|
||||||
printf("Initial norm: %.5e\n", norm_0);
|
printf("Initial norm: %.5e\n", norm_0);
|
||||||
printf("Tolerance: %.0e, nnzb: %d\n", tolerance, nnzb);
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
for(it = 0.5; it < maxit; it+=0.5){
|
for(it = 0.5; it < maxit; it+=0.5){
|
||||||
rhop = rho;
|
rhop = rho;
|
||||||
@ -153,27 +146,23 @@ namespace Opm
|
|||||||
if(norm < tolerance * norm_0 && it > minit){
|
if(norm < tolerance * norm_0 && it > minit){
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#if VERBOSE_BACKEND
|
|
||||||
if((int)it % 10 == 0){
|
if(verbosity > 1){
|
||||||
printf("it: %.1f, norm: %.5e\n", it, norm);
|
printf("it: %.1f, norm: %.5e\n", it, norm);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
t_total2 = second();
|
t_total2 = second();
|
||||||
#if PRINT_TIMERS_BACKEND
|
|
||||||
printf("Total solve time: %.6f s\n", t_total2-t_total1);
|
|
||||||
#endif
|
|
||||||
res.iterations = std::min(it, (float)maxit);
|
res.iterations = std::min(it, (float)maxit);
|
||||||
res.reduction = norm/norm_0;
|
res.reduction = norm/norm_0;
|
||||||
res.conv_rate = static_cast<double>(pow(res.reduction,1.0/it));
|
res.conv_rate = static_cast<double>(pow(res.reduction,1.0/it));
|
||||||
res.elapsed = t_total2-t_total1;
|
res.elapsed = t_total2 - t_total1;
|
||||||
res.converged = (it != (maxit + 0.5));
|
res.converged = (it != (maxit + 0.5));
|
||||||
#if VERBOSE_BACKEND
|
|
||||||
printf("Iterations: %.1f\n", it);
|
if(verbosity > 0){
|
||||||
printf("Final norm: %.5e\n", norm);
|
printf("=== converged: %d, conv_rate: %.2f, time: %f, time per iteration: %f, iterations: %.1f\n", res.converged, res.conv_rate, res.elapsed, res.elapsed/it, it);
|
||||||
printf("GPU converged: %d\n", res.converged);
|
}
|
||||||
#endif
|
|
||||||
return res.converged;
|
return res.converged;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -265,10 +254,10 @@ namespace Opm
|
|||||||
|
|
||||||
void cusparseSolverBackend::copy_system_to_gpu(double *vals, int *rows, int *cols, double *b){
|
void cusparseSolverBackend::copy_system_to_gpu(double *vals, int *rows, int *cols, double *b){
|
||||||
|
|
||||||
#if PRINT_TIMERS_BACKEND
|
|
||||||
double t1, t2;
|
double t1, t2;
|
||||||
t1 = second();
|
if(verbosity > 2){
|
||||||
#endif
|
t1 = second();
|
||||||
|
}
|
||||||
|
|
||||||
// information cudaHostRegister: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1ge8d5c17670f16ac4fc8fcb4181cb490c
|
// information cudaHostRegister: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1ge8d5c17670f16ac4fc8fcb4181cb490c
|
||||||
// possible flags for cudaHostRegister: cudaHostRegisterDefault, cudaHostRegisterPortable, cudaHostRegisterMapped, cudaHostRegisterIoMemory
|
// possible flags for cudaHostRegister: cudaHostRegisterDefault, cudaHostRegisterPortable, cudaHostRegisterMapped, cudaHostRegisterIoMemory
|
||||||
@ -285,29 +274,29 @@ namespace Opm
|
|||||||
this->cols = cols;
|
this->cols = cols;
|
||||||
this->rows = rows;
|
this->rows = rows;
|
||||||
|
|
||||||
#if PRINT_TIMERS_BACKEND
|
if(verbosity > 2){
|
||||||
t2 = second();
|
t2 = second();
|
||||||
printf("copy_system_to_gpu(): %f s\n", t2-t1);
|
printf("cusparseSolver::copy_system_to_gpu(): %f s\n", t2-t1);
|
||||||
#endif
|
}
|
||||||
} // end copy_system_to_gpu()
|
} // end copy_system_to_gpu()
|
||||||
|
|
||||||
|
|
||||||
// don't copy rowpointers and colindices, they stay the same
|
// don't copy rowpointers and colindices, they stay the same
|
||||||
void cusparseSolverBackend::update_system_on_gpu(double *vals, double *b){
|
void cusparseSolverBackend::update_system_on_gpu(double *vals, double *b){
|
||||||
|
|
||||||
#if PRINT_TIMERS_BACKEND
|
|
||||||
double t1, t2;
|
double t1, t2;
|
||||||
t1 = second();
|
if(verbosity > 2){
|
||||||
#endif
|
t1 = second();
|
||||||
|
}
|
||||||
|
|
||||||
cudaMemcpyAsync(d_bVals, vals, nnz * sizeof(double), cudaMemcpyHostToDevice, stream);
|
cudaMemcpyAsync(d_bVals, vals, nnz * sizeof(double), cudaMemcpyHostToDevice, stream);
|
||||||
cudaMemcpyAsync(d_b, b, N * sizeof(double), cudaMemcpyHostToDevice, stream);
|
cudaMemcpyAsync(d_b, b, N * sizeof(double), cudaMemcpyHostToDevice, stream);
|
||||||
cudaMemsetAsync(d_x, 0, sizeof(double) * N, stream);
|
cudaMemsetAsync(d_x, 0, sizeof(double) * N, stream);
|
||||||
|
|
||||||
#if PRINT_TIMERS_BACKEND
|
if(verbosity > 2){
|
||||||
t2 = second();
|
t2 = second();
|
||||||
printf("update_system_on_gpu(): %f s\n", t2-t1);
|
printf("cusparseSolver::update_system_on_gpu(): %f s\n", t2-t1);
|
||||||
#endif
|
}
|
||||||
} // end update_system_on_gpu()
|
} // end update_system_on_gpu()
|
||||||
|
|
||||||
|
|
||||||
@ -319,6 +308,11 @@ namespace Opm
|
|||||||
void cusparseSolverBackend::analyse_matrix(){
|
void cusparseSolverBackend::analyse_matrix(){
|
||||||
|
|
||||||
int d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize;
|
int d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize;
|
||||||
|
double t1, t2;
|
||||||
|
|
||||||
|
if(verbosity > 2){
|
||||||
|
t1 = second();
|
||||||
|
}
|
||||||
|
|
||||||
cusparseCreateMatDescr(&descr_B);
|
cusparseCreateMatDescr(&descr_B);
|
||||||
cusparseCreateMatDescr(&descr_M);
|
cusparseCreateMatDescr(&descr_M);
|
||||||
@ -385,14 +379,20 @@ namespace Opm
|
|||||||
BLOCK_SIZE, info_U, policy, d_buffer);
|
BLOCK_SIZE, info_U, policy, d_buffer);
|
||||||
cudaCheckLastError("Could not analyse level information");
|
cudaCheckLastError("Could not analyse level information");
|
||||||
|
|
||||||
|
if(verbosity > 2){
|
||||||
|
t2 = second();
|
||||||
|
printf("cusparseSolver::analyse_matrix(): %f s\n", t2-t1);
|
||||||
|
}
|
||||||
|
|
||||||
} // end analyse_matrix()
|
} // end analyse_matrix()
|
||||||
|
|
||||||
bool cusparseSolverBackend::create_preconditioner(){
|
bool cusparseSolverBackend::create_preconditioner(){
|
||||||
|
|
||||||
#if PRINT_TIMERS_BACKEND
|
|
||||||
double t1, t2;
|
double t1, t2;
|
||||||
t1 = second();
|
if(verbosity > 2){
|
||||||
#endif
|
t1 = second();
|
||||||
|
}
|
||||||
|
|
||||||
d_mCols = d_bCols;
|
d_mCols = d_bCols;
|
||||||
d_mRows = d_bRows;
|
d_mRows = d_bRows;
|
||||||
cusparseDbsrilu02(cusparseHandle, order, \
|
cusparseDbsrilu02(cusparseHandle, order, \
|
||||||
@ -407,11 +407,11 @@ namespace Opm
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if PRINT_TIMERS_BACKEND
|
if(verbosity > 2){
|
||||||
cudaStreamSynchronize(stream);
|
cudaStreamSynchronize(stream);
|
||||||
t2 = second();
|
t2 = second();
|
||||||
printf("Decomp time: %.6f s\n", t2-t1);
|
printf("cusparseSolver::create_preconditioner(): %f s\n", t2-t1);
|
||||||
#endif
|
}
|
||||||
return true;
|
return true;
|
||||||
} // end create_preconditioner()
|
} // end create_preconditioner()
|
||||||
|
|
||||||
@ -429,18 +429,18 @@ namespace Opm
|
|||||||
// copy result to host memory
|
// copy result to host memory
|
||||||
double* cusparseSolverBackend::post_process(){
|
double* cusparseSolverBackend::post_process(){
|
||||||
|
|
||||||
#if PRINT_TIMERS_BACKEND
|
|
||||||
double t1, t2;
|
double t1, t2;
|
||||||
t1 = second();
|
if(verbosity > 2){
|
||||||
#endif
|
t1 = second();
|
||||||
|
}
|
||||||
|
|
||||||
cudaMemcpyAsync(x, d_x, N * sizeof(double), cudaMemcpyDeviceToHost, stream);
|
cudaMemcpyAsync(x, d_x, N * sizeof(double), cudaMemcpyDeviceToHost, stream);
|
||||||
cudaStreamSynchronize(stream);
|
cudaStreamSynchronize(stream);
|
||||||
|
|
||||||
#if PRINT_TIMERS_BACKEND
|
if(verbosity > 2){
|
||||||
t2 = second();
|
t2 = second();
|
||||||
printf("Copy result back to CPU: %.6f s\n", t2-t1);
|
printf("cusparseSolver::post_process(): %f s\n", t2-t1);
|
||||||
#endif
|
}
|
||||||
|
|
||||||
return x;
|
return x;
|
||||||
} // end post_process()
|
} // end post_process()
|
||||||
|
@ -59,9 +59,17 @@ private:
|
|||||||
|
|
||||||
bool initialized = false;
|
bool initialized = false;
|
||||||
|
|
||||||
|
// verbosity
|
||||||
|
// 0: print nothing during solves, only when initializing
|
||||||
|
// 1: print number of iterations and final norm
|
||||||
|
// 2: also print norm each iteration
|
||||||
|
// 3: also print timings of different backend functions
|
||||||
|
|
||||||
|
int verbosity = 0;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
cusparseSolverBackend(int maxit, double tolerance);
|
cusparseSolverBackend(int linear_solver_verbosity, int maxit, double tolerance);
|
||||||
|
|
||||||
~cusparseSolverBackend();
|
~cusparseSolverBackend();
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user