Added some whitespace around brackets in accordance with OPM suggested coding standard

This commit is contained in:
T.D. (Tongdong) Qiu 2019-12-18 15:54:14 +01:00
parent 950d1c92c1
commit a491e64883
3 changed files with 61 additions and 61 deletions

View File

@ -251,13 +251,13 @@ protected:
const double tolerance = EWOMS_GET_PARAM(TypeTag, double, LinearSolverReduction);
const bool matrix_add_well_contributions = EWOMS_GET_PARAM(TypeTag, bool, MatrixAddWellContributions);
const int linear_solver_verbosity = parameters_.linear_solver_verbosity_;
if(use_gpu && !matrix_add_well_contributions){
if (use_gpu && !matrix_add_well_contributions) {
OPM_THROW(std::logic_error,"Error cannot use GPU solver if command line parameter --matrix-add-well-contributions is false, because the GPU solver performs a standard bicgstab");
}
bdaBridge.reset(new BdaBridge(use_gpu, linear_solver_verbosity, maxit, tolerance));
#else
const bool use_gpu = EWOMS_GET_PARAM(TypeTag, bool, UseGpu);
if(use_gpu){
if (use_gpu) {
OPM_THROW(std::logic_error,"Error cannot use GPU solver since CUDA was not found during compilation");
}
#endif
@ -462,13 +462,13 @@ protected:
#if HAVE_CUDA
bdaBridge->solve_system(matrix_.get(), istlb, result);
if(result.converged){
if (result.converged) {
// get result vector x from non-Dune backend, iff solve was successful
bdaBridge->get_result(x);
}else{
} else {
// CPU fallback, or default case for Dune
const bool use_gpu = EWOMS_GET_PARAM(TypeTag, bool, UseGpu);
if(use_gpu){
if (use_gpu) {
OpmLog::warning("cusparseSolver did not converge, now trying Dune to solve current linear system...");
}
auto precond = constructPrecond(linearOperator, parallelInformation_arg);

View File

@ -37,7 +37,7 @@ namespace Opm
BdaBridge::BdaBridge(bool use_gpu_, int linear_solver_verbosity OPM_UNUSED, int maxit OPM_UNUSED, double tolerance OPM_UNUSED) : use_gpu(use_gpu_) {
#if HAVE_CUDA
if(use_gpu){
if (use_gpu) {
backend.reset(new cusparseSolverBackend(linear_solver_verbosity, maxit, tolerance));
}
#endif
@ -52,16 +52,16 @@ int checkZeroDiagonal(BridgeMatrix& mat) {
int numZeros = 0;
const int dim = 3;
const double zero_replace = 1e-15;
if(diag_indices.size() == 0){
if (diag_indices.size() == 0) {
int N = mat.N();
diag_indices.reserve(N);
int roff = 0;
for(typename BridgeMatrix::iterator r = mat.begin(); r != mat.end(); ++r){
for (typename BridgeMatrix::iterator r = mat.begin(); r != mat.end(); ++r) {
auto diag = r->find(r.index()); // diag is an iterator
assert(diag.index() == r.index());
for(int rr = 0; rr < dim; ++rr){
for (int rr = 0; rr < dim; ++rr) {
auto& val = (*diag)[rr][rr]; // reference to easily change the value
if (val == 0.0){ // could be replaced by '< 1e-30' or similar
if (val == 0.0) { // could be replaced by '< 1e-30' or similar
val = zero_replace;
++numZeros;
}
@ -69,12 +69,12 @@ int checkZeroDiagonal(BridgeMatrix& mat) {
diag_indices.emplace_back(diag.offset());
}
}else{
for(typename BridgeMatrix::iterator r = mat.begin(); r != mat.end(); ++r){
for (typename BridgeMatrix::iterator r = mat.begin(); r != mat.end(); ++r) {
typename BridgeMatrix::size_type offset = diag_indices[r.index()];
auto& diag_block = r->getptr()[offset]; // diag_block is a reference to MatrixBlock, located on column r of row r
for(int rr = 0; rr < dim; ++rr){
for (int rr = 0; rr < dim; ++rr) {
auto& val = diag_block[rr][rr];
if(val == 0.0){ // could be replaced by '< 1e-30' or similar
if (val == 0.0) { // could be replaced by '< 1e-30' or similar
val = zero_replace;
++numZeros;
}
@ -94,18 +94,18 @@ void getSparsityPattern(BridgeMatrix& mat, std::vector<int> &h_rows, std::vector
int sum_nnzs = 0;
// convert colIndices and rowPointers
if(h_rows.size() == 0){
if (h_rows.size() == 0) {
h_rows.emplace_back(0);
for(typename BridgeMatrix::const_iterator r = mat.begin(); r != mat.end(); ++r){
for (typename BridgeMatrix::const_iterator r = mat.begin(); r != mat.end(); ++r) {
int size_row = 0;
for(auto c = r->begin(); c != r->end(); ++c){
for (auto c = r->begin(); c != r->end(); ++c) {
h_cols.emplace_back(c.index());
size_row++;
}
sum_nnzs += size_row;
h_rows.emplace_back(sum_nnzs);
}
if(h_rows[mat.N()] != mat.nonzeroes()){
if (h_rows[mat.N()] != mat.nonzeroes()) {
OPM_THROW(std::logic_error, "Error size of rows do not sum to number of nonzeroes in BdaBridge::getSparsityPattern()");
}
}
@ -118,7 +118,7 @@ void BdaBridge::solve_system(BridgeMatrix *mat OPM_UNUSED, BridgeVector &b OPM_U
{
#if HAVE_CUDA
if(use_gpu){
if (use_gpu) {
BdaResult result;
result.converged = false;
static std::vector<int> h_rows;
@ -127,12 +127,12 @@ void BdaBridge::solve_system(BridgeMatrix *mat OPM_UNUSED, BridgeVector &b OPM_U
int N = mat->N()*dim;
int nnz = (h_rows.empty()) ? mat->nonzeroes()*dim*dim : h_rows.back()*dim*dim;
if(dim != 3){
if (dim != 3) {
OpmLog::warning("cusparseSolver only accepts blocksize = 3 at this time, will use Dune for the remainder of the program");
use_gpu = false;
}
if(h_rows.capacity() == 0){
if (h_rows.capacity() == 0) {
h_rows.reserve(N+1);
h_cols.reserve(nnz);
#if PRINT_TIMERS_BRIDGE
@ -163,7 +163,7 @@ void BdaBridge::solve_system(BridgeMatrix *mat OPM_UNUSED, BridgeVector &b OPM_U
typedef cusparseSolverBackend::cusparseSolverStatus cusparseSolverStatus;
// assume that underlying data (nonzeroes) from mat (Dune::BCRSMatrix) are contiguous, if this is not the case, cusparseSolver is expected to perform undefined behaviour
cusparseSolverStatus status = backend->solve_system(N, nnz, dim, static_cast<double*>(&(((*mat)[0][0][0][0]))), h_rows.data(), h_cols.data(), static_cast<double*>(&(b[0][0])), result);
switch(status){
switch(status) {
case cusparseSolverStatus::CUSPARSE_SOLVER_SUCCESS:
//OpmLog::info("cusparseSolver converged");
break;
@ -190,9 +190,9 @@ void BdaBridge::solve_system(BridgeMatrix *mat OPM_UNUSED, BridgeVector &b OPM_U
template <class BridgeVector>
void BdaBridge::get_result(BridgeVector &x OPM_UNUSED){
void BdaBridge::get_result(BridgeVector &x OPM_UNUSED) {
#if HAVE_CUDA
if(use_gpu){
if (use_gpu) {
backend->post_process(static_cast<double*>(&(x[0][0])));
}
#endif

View File

@ -45,20 +45,20 @@ namespace Opm
const cusparseOperation_t operation = CUSPARSE_OPERATION_NON_TRANSPOSE;
const cusparseDirection_t order = CUSPARSE_DIRECTION_ROW;
double second(void){
double second(void) {
struct timeval tv;
gettimeofday(&tv, nullptr);
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
}
cusparseSolverBackend::cusparseSolverBackend(int verbosity_, int maxit_, double tolerance_) : verbosity(verbosity_), maxit(maxit_), tolerance(tolerance_), minit(0){
cusparseSolverBackend::cusparseSolverBackend(int verbosity_, int maxit_, double tolerance_) : verbosity(verbosity_), maxit(maxit_), tolerance(tolerance_), minit(0) {
}
cusparseSolverBackend::~cusparseSolverBackend(){
cusparseSolverBackend::~cusparseSolverBackend() {
finalize();
}
void cusparseSolverBackend::gpu_pbicgstab(BdaResult& res){
void cusparseSolverBackend::gpu_pbicgstab(BdaResult& res) {
double t_total1, t_total2;
int n = N;
double rho = 1.0, rhop;
@ -80,17 +80,17 @@ namespace Opm
cublasDcopy(cublasHandle, n, d_r, 1, d_p, 1);
cublasDnrm2(cublasHandle, n, d_r, 1, &norm_0);
if(verbosity > 1){
if (verbosity > 1) {
std::ostringstream out;
out << std::scientific << "cusparseSolver initial norm: " << norm_0;
OpmLog::info(out.str());
}
for(it = 0.5; it < maxit; it+=0.5){
for (it = 0.5; it < maxit; it+=0.5) {
rhop = rho;
cublasDdot(cublasHandle, n, d_rw, 1, d_r, 1, &rho);
if(it > 1){
if (it > 1) {
beta = (rho/rhop) * (alpha/omega);
nomega = -omega;
cublasDaxpy(cublasHandle, n, &nomega, d_v, 1, d_p, 1);
@ -118,7 +118,7 @@ namespace Opm
cublasDaxpy(cublasHandle, n, &alpha, d_pw, 1, d_x, 1);
cublasDnrm2(cublasHandle, n, d_r, 1, &norm);
if(norm < tolerance * norm_0 && it > minit){
if (norm < tolerance * norm_0 && it > minit) {
break;
}
@ -147,11 +147,11 @@ namespace Opm
cublasDnrm2(cublasHandle, n, d_r, 1, &norm);
if(norm < tolerance * norm_0 && it > minit){
if (norm < tolerance * norm_0 && it > minit) {
break;
}
if(verbosity > 1){
if (verbosity > 1) {
std::ostringstream out;
out << "it: " << it << std::scientific << ", norm: " << norm;
OpmLog::info(out.str());
@ -166,7 +166,7 @@ namespace Opm
res.elapsed = t_total2 - t_total1;
res.converged = (it != (maxit + 0.5));
if(verbosity > 0){
if (verbosity > 0) {
std::ostringstream out;
out << "=== converged: " << res.converged << ", conv_rate: " << res.conv_rate << ", time: " << res.elapsed << \
", time per iteration: " << res.elapsed/it << ", iterations: " << it;
@ -175,7 +175,7 @@ namespace Opm
}
void cusparseSolverBackend::initialize(int N, int nnz, int dim){
void cusparseSolverBackend::initialize(int N, int nnz, int dim) {
this->N = N;
this->nnz = nnz;
this->BLOCK_SIZE = dim;
@ -235,7 +235,7 @@ namespace Opm
initialized = true;
} // end initialize()
void cusparseSolverBackend::finalize(){
void cusparseSolverBackend::finalize() {
cudaFree(d_x);
cudaFree(d_b);
cudaFree(d_r);
@ -267,10 +267,10 @@ namespace Opm
} // end finalize()
void cusparseSolverBackend::copy_system_to_gpu(double *vals, int *rows, int *cols, double *b){
void cusparseSolverBackend::copy_system_to_gpu(double *vals, int *rows, int *cols, double *b) {
double t1, t2;
if(verbosity > 2){
if (verbosity > 2) {
t1 = second();
}
@ -290,7 +290,7 @@ namespace Opm
this->cols = cols;
this->rows = rows;
if(verbosity > 2){
if (verbosity > 2) {
cudaStreamSynchronize(stream);
t2 = second();
std::ostringstream out;
@ -301,10 +301,10 @@ namespace Opm
// don't copy rowpointers and colindices, they stay the same
void cusparseSolverBackend::update_system_on_gpu(double *vals, double *b){
void cusparseSolverBackend::update_system_on_gpu(double *vals, double *b) {
double t1, t2;
if(verbosity > 2){
if (verbosity > 2) {
t1 = second();
}
@ -312,7 +312,7 @@ namespace Opm
cudaMemcpyAsync(d_b, b, N * sizeof(double), cudaMemcpyHostToDevice, stream);
cudaMemsetAsync(d_x, 0, sizeof(double) * N, stream);
if(verbosity > 2){
if (verbosity > 2) {
cudaStreamSynchronize(stream);
t2 = second();
std::ostringstream out;
@ -322,17 +322,17 @@ namespace Opm
} // end update_system_on_gpu()
void cusparseSolverBackend::reset_prec_on_gpu(){
void cusparseSolverBackend::reset_prec_on_gpu() {
cudaMemcpyAsync(d_mVals, d_bVals, nnz * sizeof(double), cudaMemcpyDeviceToDevice, stream);
}
bool cusparseSolverBackend::analyse_matrix(){
bool cusparseSolverBackend::analyse_matrix() {
int d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize;
double t1, t2;
if(verbosity > 2){
if (verbosity > 2) {
t1 = second();
}
@ -381,7 +381,7 @@ namespace Opm
int structural_zero;
cusparseStatus_t status = cusparseXbsrilu02_zeroPivot(cusparseHandle, info_M, &structural_zero);
if(CUSPARSE_STATUS_ZERO_PIVOT == status){
if (CUSPARSE_STATUS_ZERO_PIVOT == status) {
return false;
}
@ -395,7 +395,7 @@ namespace Opm
BLOCK_SIZE, info_U, policy, d_buffer);
cudaCheckLastError("Could not analyse level information");
if(verbosity > 2){
if (verbosity > 2) {
cudaStreamSynchronize(stream);
t2 = second();
std::ostringstream out;
@ -406,10 +406,10 @@ namespace Opm
return true;
} // end analyse_matrix()
bool cusparseSolverBackend::create_preconditioner(){
bool cusparseSolverBackend::create_preconditioner() {
double t1, t2;
if(verbosity > 2){
if (verbosity > 2) {
t1 = second();
}
@ -422,11 +422,11 @@ namespace Opm
int structural_zero;
// cusparseXbsrilu02_zeroPivot() calls cudaDeviceSynchronize()
cusparseStatus_t status = cusparseXbsrilu02_zeroPivot(cusparseHandle, info_M, &structural_zero);
if(CUSPARSE_STATUS_ZERO_PIVOT == status){
if (CUSPARSE_STATUS_ZERO_PIVOT == status) {
return false;
}
if(verbosity > 2){
if (verbosity > 2) {
cudaStreamSynchronize(stream);
t2 = second();
std::ostringstream out;
@ -437,7 +437,7 @@ namespace Opm
} // end create_preconditioner()
void cusparseSolverBackend::solve_system(BdaResult &res){
void cusparseSolverBackend::solve_system(BdaResult &res) {
// actually solve
gpu_pbicgstab(res);
cudaStreamSynchronize(stream);
@ -447,21 +447,21 @@ namespace Opm
// copy result to host memory
// caller must be sure that x is a valid array
void cusparseSolverBackend::post_process(double *x){
void cusparseSolverBackend::post_process(double *x) {
if(!initialized){
if (!initialized) {
cudaHostRegister(x, N * sizeof(double), cudaHostRegisterDefault);
}
double t1, t2;
if(verbosity > 2){
if (verbosity > 2) {
t1 = second();
}
cudaMemcpyAsync(x, d_x, N * sizeof(double), cudaMemcpyDeviceToHost, stream);
cudaStreamSynchronize(stream);
if(verbosity > 2){
if (verbosity > 2) {
t2 = second();
std::ostringstream out;
out << "cusparseSolver::post_process(): " << t2-t1 << " s";
@ -472,20 +472,20 @@ namespace Opm
typedef cusparseSolverBackend::cusparseSolverStatus cusparseSolverStatus;
cusparseSolverStatus cusparseSolverBackend::solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, BdaResult &res){
if(initialized == false){
cusparseSolverStatus cusparseSolverBackend::solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, BdaResult &res) {
if (initialized == false) {
initialize(N, nnz, dim);
copy_system_to_gpu(vals, rows, cols, b);
}else{
update_system_on_gpu(vals, b);
}
if(analysis_done == false){
if(!analyse_matrix()){
if (analysis_done == false) {
if (!analyse_matrix()) {
return cusparseSolverStatus::CUSPARSE_SOLVER_ANALYSIS_FAILED;
}
}
reset_prec_on_gpu();
if(create_preconditioner()){
if (create_preconditioner()) {
solve_system(res);
}else{
return cusparseSolverStatus::CUSPARSE_SOLVER_CREATE_PRECONDITIONER_FAILED;