mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Added some whitespace around brackets in accordance with OPM suggested coding standard
This commit is contained in:
parent
950d1c92c1
commit
a491e64883
@ -251,13 +251,13 @@ protected:
|
||||
const double tolerance = EWOMS_GET_PARAM(TypeTag, double, LinearSolverReduction);
|
||||
const bool matrix_add_well_contributions = EWOMS_GET_PARAM(TypeTag, bool, MatrixAddWellContributions);
|
||||
const int linear_solver_verbosity = parameters_.linear_solver_verbosity_;
|
||||
if(use_gpu && !matrix_add_well_contributions){
|
||||
if (use_gpu && !matrix_add_well_contributions) {
|
||||
OPM_THROW(std::logic_error,"Error cannot use GPU solver if command line parameter --matrix-add-well-contributions is false, because the GPU solver performs a standard bicgstab");
|
||||
}
|
||||
bdaBridge.reset(new BdaBridge(use_gpu, linear_solver_verbosity, maxit, tolerance));
|
||||
#else
|
||||
const bool use_gpu = EWOMS_GET_PARAM(TypeTag, bool, UseGpu);
|
||||
if(use_gpu){
|
||||
if (use_gpu) {
|
||||
OPM_THROW(std::logic_error,"Error cannot use GPU solver since CUDA was not found during compilation");
|
||||
}
|
||||
#endif
|
||||
@ -462,13 +462,13 @@ protected:
|
||||
#if HAVE_CUDA
|
||||
bdaBridge->solve_system(matrix_.get(), istlb, result);
|
||||
|
||||
if(result.converged){
|
||||
if (result.converged) {
|
||||
// get result vector x from non-Dune backend, iff solve was successful
|
||||
bdaBridge->get_result(x);
|
||||
}else{
|
||||
} else {
|
||||
// CPU fallback, or default case for Dune
|
||||
const bool use_gpu = EWOMS_GET_PARAM(TypeTag, bool, UseGpu);
|
||||
if(use_gpu){
|
||||
if (use_gpu) {
|
||||
OpmLog::warning("cusparseSolver did not converge, now trying Dune to solve current linear system...");
|
||||
}
|
||||
auto precond = constructPrecond(linearOperator, parallelInformation_arg);
|
||||
|
@ -37,7 +37,7 @@ namespace Opm
|
||||
|
||||
BdaBridge::BdaBridge(bool use_gpu_, int linear_solver_verbosity OPM_UNUSED, int maxit OPM_UNUSED, double tolerance OPM_UNUSED) : use_gpu(use_gpu_) {
|
||||
#if HAVE_CUDA
|
||||
if(use_gpu){
|
||||
if (use_gpu) {
|
||||
backend.reset(new cusparseSolverBackend(linear_solver_verbosity, maxit, tolerance));
|
||||
}
|
||||
#endif
|
||||
@ -52,16 +52,16 @@ int checkZeroDiagonal(BridgeMatrix& mat) {
|
||||
int numZeros = 0;
|
||||
const int dim = 3;
|
||||
const double zero_replace = 1e-15;
|
||||
if(diag_indices.size() == 0){
|
||||
if (diag_indices.size() == 0) {
|
||||
int N = mat.N();
|
||||
diag_indices.reserve(N);
|
||||
int roff = 0;
|
||||
for(typename BridgeMatrix::iterator r = mat.begin(); r != mat.end(); ++r){
|
||||
for (typename BridgeMatrix::iterator r = mat.begin(); r != mat.end(); ++r) {
|
||||
auto diag = r->find(r.index()); // diag is an iterator
|
||||
assert(diag.index() == r.index());
|
||||
for(int rr = 0; rr < dim; ++rr){
|
||||
for (int rr = 0; rr < dim; ++rr) {
|
||||
auto& val = (*diag)[rr][rr]; // reference to easily change the value
|
||||
if (val == 0.0){ // could be replaced by '< 1e-30' or similar
|
||||
if (val == 0.0) { // could be replaced by '< 1e-30' or similar
|
||||
val = zero_replace;
|
||||
++numZeros;
|
||||
}
|
||||
@ -69,12 +69,12 @@ int checkZeroDiagonal(BridgeMatrix& mat) {
|
||||
diag_indices.emplace_back(diag.offset());
|
||||
}
|
||||
}else{
|
||||
for(typename BridgeMatrix::iterator r = mat.begin(); r != mat.end(); ++r){
|
||||
for (typename BridgeMatrix::iterator r = mat.begin(); r != mat.end(); ++r) {
|
||||
typename BridgeMatrix::size_type offset = diag_indices[r.index()];
|
||||
auto& diag_block = r->getptr()[offset]; // diag_block is a reference to MatrixBlock, located on column r of row r
|
||||
for(int rr = 0; rr < dim; ++rr){
|
||||
for (int rr = 0; rr < dim; ++rr) {
|
||||
auto& val = diag_block[rr][rr];
|
||||
if(val == 0.0){ // could be replaced by '< 1e-30' or similar
|
||||
if (val == 0.0) { // could be replaced by '< 1e-30' or similar
|
||||
val = zero_replace;
|
||||
++numZeros;
|
||||
}
|
||||
@ -94,18 +94,18 @@ void getSparsityPattern(BridgeMatrix& mat, std::vector<int> &h_rows, std::vector
|
||||
int sum_nnzs = 0;
|
||||
|
||||
// convert colIndices and rowPointers
|
||||
if(h_rows.size() == 0){
|
||||
if (h_rows.size() == 0) {
|
||||
h_rows.emplace_back(0);
|
||||
for(typename BridgeMatrix::const_iterator r = mat.begin(); r != mat.end(); ++r){
|
||||
for (typename BridgeMatrix::const_iterator r = mat.begin(); r != mat.end(); ++r) {
|
||||
int size_row = 0;
|
||||
for(auto c = r->begin(); c != r->end(); ++c){
|
||||
for (auto c = r->begin(); c != r->end(); ++c) {
|
||||
h_cols.emplace_back(c.index());
|
||||
size_row++;
|
||||
}
|
||||
sum_nnzs += size_row;
|
||||
h_rows.emplace_back(sum_nnzs);
|
||||
}
|
||||
if(h_rows[mat.N()] != mat.nonzeroes()){
|
||||
if (h_rows[mat.N()] != mat.nonzeroes()) {
|
||||
OPM_THROW(std::logic_error, "Error size of rows do not sum to number of nonzeroes in BdaBridge::getSparsityPattern()");
|
||||
}
|
||||
}
|
||||
@ -118,7 +118,7 @@ void BdaBridge::solve_system(BridgeMatrix *mat OPM_UNUSED, BridgeVector &b OPM_U
|
||||
{
|
||||
|
||||
#if HAVE_CUDA
|
||||
if(use_gpu){
|
||||
if (use_gpu) {
|
||||
BdaResult result;
|
||||
result.converged = false;
|
||||
static std::vector<int> h_rows;
|
||||
@ -127,12 +127,12 @@ void BdaBridge::solve_system(BridgeMatrix *mat OPM_UNUSED, BridgeVector &b OPM_U
|
||||
int N = mat->N()*dim;
|
||||
int nnz = (h_rows.empty()) ? mat->nonzeroes()*dim*dim : h_rows.back()*dim*dim;
|
||||
|
||||
if(dim != 3){
|
||||
if (dim != 3) {
|
||||
OpmLog::warning("cusparseSolver only accepts blocksize = 3 at this time, will use Dune for the remainder of the program");
|
||||
use_gpu = false;
|
||||
}
|
||||
|
||||
if(h_rows.capacity() == 0){
|
||||
if (h_rows.capacity() == 0) {
|
||||
h_rows.reserve(N+1);
|
||||
h_cols.reserve(nnz);
|
||||
#if PRINT_TIMERS_BRIDGE
|
||||
@ -163,7 +163,7 @@ void BdaBridge::solve_system(BridgeMatrix *mat OPM_UNUSED, BridgeVector &b OPM_U
|
||||
typedef cusparseSolverBackend::cusparseSolverStatus cusparseSolverStatus;
|
||||
// assume that underlying data (nonzeroes) from mat (Dune::BCRSMatrix) are contiguous, if this is not the case, cusparseSolver is expected to perform undefined behaviour
|
||||
cusparseSolverStatus status = backend->solve_system(N, nnz, dim, static_cast<double*>(&(((*mat)[0][0][0][0]))), h_rows.data(), h_cols.data(), static_cast<double*>(&(b[0][0])), result);
|
||||
switch(status){
|
||||
switch(status) {
|
||||
case cusparseSolverStatus::CUSPARSE_SOLVER_SUCCESS:
|
||||
//OpmLog::info("cusparseSolver converged");
|
||||
break;
|
||||
@ -190,9 +190,9 @@ void BdaBridge::solve_system(BridgeMatrix *mat OPM_UNUSED, BridgeVector &b OPM_U
|
||||
|
||||
|
||||
template <class BridgeVector>
|
||||
void BdaBridge::get_result(BridgeVector &x OPM_UNUSED){
|
||||
void BdaBridge::get_result(BridgeVector &x OPM_UNUSED) {
|
||||
#if HAVE_CUDA
|
||||
if(use_gpu){
|
||||
if (use_gpu) {
|
||||
backend->post_process(static_cast<double*>(&(x[0][0])));
|
||||
}
|
||||
#endif
|
||||
|
@ -45,20 +45,20 @@ namespace Opm
|
||||
const cusparseOperation_t operation = CUSPARSE_OPERATION_NON_TRANSPOSE;
|
||||
const cusparseDirection_t order = CUSPARSE_DIRECTION_ROW;
|
||||
|
||||
double second(void){
|
||||
double second(void) {
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, nullptr);
|
||||
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
|
||||
}
|
||||
|
||||
cusparseSolverBackend::cusparseSolverBackend(int verbosity_, int maxit_, double tolerance_) : verbosity(verbosity_), maxit(maxit_), tolerance(tolerance_), minit(0){
|
||||
cusparseSolverBackend::cusparseSolverBackend(int verbosity_, int maxit_, double tolerance_) : verbosity(verbosity_), maxit(maxit_), tolerance(tolerance_), minit(0) {
|
||||
}
|
||||
|
||||
cusparseSolverBackend::~cusparseSolverBackend(){
|
||||
cusparseSolverBackend::~cusparseSolverBackend() {
|
||||
finalize();
|
||||
}
|
||||
|
||||
void cusparseSolverBackend::gpu_pbicgstab(BdaResult& res){
|
||||
void cusparseSolverBackend::gpu_pbicgstab(BdaResult& res) {
|
||||
double t_total1, t_total2;
|
||||
int n = N;
|
||||
double rho = 1.0, rhop;
|
||||
@ -80,17 +80,17 @@ namespace Opm
|
||||
cublasDcopy(cublasHandle, n, d_r, 1, d_p, 1);
|
||||
cublasDnrm2(cublasHandle, n, d_r, 1, &norm_0);
|
||||
|
||||
if(verbosity > 1){
|
||||
if (verbosity > 1) {
|
||||
std::ostringstream out;
|
||||
out << std::scientific << "cusparseSolver initial norm: " << norm_0;
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
|
||||
for(it = 0.5; it < maxit; it+=0.5){
|
||||
for (it = 0.5; it < maxit; it+=0.5) {
|
||||
rhop = rho;
|
||||
cublasDdot(cublasHandle, n, d_rw, 1, d_r, 1, &rho);
|
||||
|
||||
if(it > 1){
|
||||
if (it > 1) {
|
||||
beta = (rho/rhop) * (alpha/omega);
|
||||
nomega = -omega;
|
||||
cublasDaxpy(cublasHandle, n, &nomega, d_v, 1, d_p, 1);
|
||||
@ -118,7 +118,7 @@ namespace Opm
|
||||
cublasDaxpy(cublasHandle, n, &alpha, d_pw, 1, d_x, 1);
|
||||
cublasDnrm2(cublasHandle, n, d_r, 1, &norm);
|
||||
|
||||
if(norm < tolerance * norm_0 && it > minit){
|
||||
if (norm < tolerance * norm_0 && it > minit) {
|
||||
break;
|
||||
}
|
||||
|
||||
@ -147,11 +147,11 @@ namespace Opm
|
||||
cublasDnrm2(cublasHandle, n, d_r, 1, &norm);
|
||||
|
||||
|
||||
if(norm < tolerance * norm_0 && it > minit){
|
||||
if (norm < tolerance * norm_0 && it > minit) {
|
||||
break;
|
||||
}
|
||||
|
||||
if(verbosity > 1){
|
||||
if (verbosity > 1) {
|
||||
std::ostringstream out;
|
||||
out << "it: " << it << std::scientific << ", norm: " << norm;
|
||||
OpmLog::info(out.str());
|
||||
@ -166,7 +166,7 @@ namespace Opm
|
||||
res.elapsed = t_total2 - t_total1;
|
||||
res.converged = (it != (maxit + 0.5));
|
||||
|
||||
if(verbosity > 0){
|
||||
if (verbosity > 0) {
|
||||
std::ostringstream out;
|
||||
out << "=== converged: " << res.converged << ", conv_rate: " << res.conv_rate << ", time: " << res.elapsed << \
|
||||
", time per iteration: " << res.elapsed/it << ", iterations: " << it;
|
||||
@ -175,7 +175,7 @@ namespace Opm
|
||||
}
|
||||
|
||||
|
||||
void cusparseSolverBackend::initialize(int N, int nnz, int dim){
|
||||
void cusparseSolverBackend::initialize(int N, int nnz, int dim) {
|
||||
this->N = N;
|
||||
this->nnz = nnz;
|
||||
this->BLOCK_SIZE = dim;
|
||||
@ -235,7 +235,7 @@ namespace Opm
|
||||
initialized = true;
|
||||
} // end initialize()
|
||||
|
||||
void cusparseSolverBackend::finalize(){
|
||||
void cusparseSolverBackend::finalize() {
|
||||
cudaFree(d_x);
|
||||
cudaFree(d_b);
|
||||
cudaFree(d_r);
|
||||
@ -267,10 +267,10 @@ namespace Opm
|
||||
} // end finalize()
|
||||
|
||||
|
||||
void cusparseSolverBackend::copy_system_to_gpu(double *vals, int *rows, int *cols, double *b){
|
||||
void cusparseSolverBackend::copy_system_to_gpu(double *vals, int *rows, int *cols, double *b) {
|
||||
|
||||
double t1, t2;
|
||||
if(verbosity > 2){
|
||||
if (verbosity > 2) {
|
||||
t1 = second();
|
||||
}
|
||||
|
||||
@ -290,7 +290,7 @@ namespace Opm
|
||||
this->cols = cols;
|
||||
this->rows = rows;
|
||||
|
||||
if(verbosity > 2){
|
||||
if (verbosity > 2) {
|
||||
cudaStreamSynchronize(stream);
|
||||
t2 = second();
|
||||
std::ostringstream out;
|
||||
@ -301,10 +301,10 @@ namespace Opm
|
||||
|
||||
|
||||
// don't copy rowpointers and colindices, they stay the same
|
||||
void cusparseSolverBackend::update_system_on_gpu(double *vals, double *b){
|
||||
void cusparseSolverBackend::update_system_on_gpu(double *vals, double *b) {
|
||||
|
||||
double t1, t2;
|
||||
if(verbosity > 2){
|
||||
if (verbosity > 2) {
|
||||
t1 = second();
|
||||
}
|
||||
|
||||
@ -312,7 +312,7 @@ namespace Opm
|
||||
cudaMemcpyAsync(d_b, b, N * sizeof(double), cudaMemcpyHostToDevice, stream);
|
||||
cudaMemsetAsync(d_x, 0, sizeof(double) * N, stream);
|
||||
|
||||
if(verbosity > 2){
|
||||
if (verbosity > 2) {
|
||||
cudaStreamSynchronize(stream);
|
||||
t2 = second();
|
||||
std::ostringstream out;
|
||||
@ -322,17 +322,17 @@ namespace Opm
|
||||
} // end update_system_on_gpu()
|
||||
|
||||
|
||||
void cusparseSolverBackend::reset_prec_on_gpu(){
|
||||
void cusparseSolverBackend::reset_prec_on_gpu() {
|
||||
cudaMemcpyAsync(d_mVals, d_bVals, nnz * sizeof(double), cudaMemcpyDeviceToDevice, stream);
|
||||
}
|
||||
|
||||
|
||||
bool cusparseSolverBackend::analyse_matrix(){
|
||||
bool cusparseSolverBackend::analyse_matrix() {
|
||||
|
||||
int d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize;
|
||||
double t1, t2;
|
||||
|
||||
if(verbosity > 2){
|
||||
if (verbosity > 2) {
|
||||
t1 = second();
|
||||
}
|
||||
|
||||
@ -381,7 +381,7 @@ namespace Opm
|
||||
|
||||
int structural_zero;
|
||||
cusparseStatus_t status = cusparseXbsrilu02_zeroPivot(cusparseHandle, info_M, &structural_zero);
|
||||
if(CUSPARSE_STATUS_ZERO_PIVOT == status){
|
||||
if (CUSPARSE_STATUS_ZERO_PIVOT == status) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -395,7 +395,7 @@ namespace Opm
|
||||
BLOCK_SIZE, info_U, policy, d_buffer);
|
||||
cudaCheckLastError("Could not analyse level information");
|
||||
|
||||
if(verbosity > 2){
|
||||
if (verbosity > 2) {
|
||||
cudaStreamSynchronize(stream);
|
||||
t2 = second();
|
||||
std::ostringstream out;
|
||||
@ -406,10 +406,10 @@ namespace Opm
|
||||
return true;
|
||||
} // end analyse_matrix()
|
||||
|
||||
bool cusparseSolverBackend::create_preconditioner(){
|
||||
bool cusparseSolverBackend::create_preconditioner() {
|
||||
|
||||
double t1, t2;
|
||||
if(verbosity > 2){
|
||||
if (verbosity > 2) {
|
||||
t1 = second();
|
||||
}
|
||||
|
||||
@ -422,11 +422,11 @@ namespace Opm
|
||||
int structural_zero;
|
||||
// cusparseXbsrilu02_zeroPivot() calls cudaDeviceSynchronize()
|
||||
cusparseStatus_t status = cusparseXbsrilu02_zeroPivot(cusparseHandle, info_M, &structural_zero);
|
||||
if(CUSPARSE_STATUS_ZERO_PIVOT == status){
|
||||
if (CUSPARSE_STATUS_ZERO_PIVOT == status) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if(verbosity > 2){
|
||||
if (verbosity > 2) {
|
||||
cudaStreamSynchronize(stream);
|
||||
t2 = second();
|
||||
std::ostringstream out;
|
||||
@ -437,7 +437,7 @@ namespace Opm
|
||||
} // end create_preconditioner()
|
||||
|
||||
|
||||
void cusparseSolverBackend::solve_system(BdaResult &res){
|
||||
void cusparseSolverBackend::solve_system(BdaResult &res) {
|
||||
// actually solve
|
||||
gpu_pbicgstab(res);
|
||||
cudaStreamSynchronize(stream);
|
||||
@ -447,21 +447,21 @@ namespace Opm
|
||||
|
||||
// copy result to host memory
|
||||
// caller must be sure that x is a valid array
|
||||
void cusparseSolverBackend::post_process(double *x){
|
||||
void cusparseSolverBackend::post_process(double *x) {
|
||||
|
||||
if(!initialized){
|
||||
if (!initialized) {
|
||||
cudaHostRegister(x, N * sizeof(double), cudaHostRegisterDefault);
|
||||
}
|
||||
|
||||
double t1, t2;
|
||||
if(verbosity > 2){
|
||||
if (verbosity > 2) {
|
||||
t1 = second();
|
||||
}
|
||||
|
||||
cudaMemcpyAsync(x, d_x, N * sizeof(double), cudaMemcpyDeviceToHost, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
|
||||
if(verbosity > 2){
|
||||
if (verbosity > 2) {
|
||||
t2 = second();
|
||||
std::ostringstream out;
|
||||
out << "cusparseSolver::post_process(): " << t2-t1 << " s";
|
||||
@ -472,20 +472,20 @@ namespace Opm
|
||||
|
||||
typedef cusparseSolverBackend::cusparseSolverStatus cusparseSolverStatus;
|
||||
|
||||
cusparseSolverStatus cusparseSolverBackend::solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, BdaResult &res){
|
||||
if(initialized == false){
|
||||
cusparseSolverStatus cusparseSolverBackend::solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, BdaResult &res) {
|
||||
if (initialized == false) {
|
||||
initialize(N, nnz, dim);
|
||||
copy_system_to_gpu(vals, rows, cols, b);
|
||||
}else{
|
||||
update_system_on_gpu(vals, b);
|
||||
}
|
||||
if(analysis_done == false){
|
||||
if(!analyse_matrix()){
|
||||
if (analysis_done == false) {
|
||||
if (!analyse_matrix()) {
|
||||
return cusparseSolverStatus::CUSPARSE_SOLVER_ANALYSIS_FAILED;
|
||||
}
|
||||
}
|
||||
reset_prec_on_gpu();
|
||||
if(create_preconditioner()){
|
||||
if (create_preconditioner()) {
|
||||
solve_system(res);
|
||||
}else{
|
||||
return cusparseSolverStatus::CUSPARSE_SOLVER_CREATE_PRECONDITIONER_FAILED;
|
||||
|
Loading…
Reference in New Issue
Block a user