diff --git a/opm/simulators/linalg/bda/BdaBridge.cpp b/opm/simulators/linalg/bda/BdaBridge.cpp index 9554fe778..245375d45 100644 --- a/opm/simulators/linalg/bda/BdaBridge.cpp +++ b/opm/simulators/linalg/bda/BdaBridge.cpp @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -43,49 +44,51 @@ BdaBridge::BdaBridge(bool use_gpu_, int linear_solver_verbosity OPM_UNUSED, int } + #if HAVE_CUDA template int checkZeroDiagonal(BridgeMatrix& mat) { - static std::vector diag_indices; // contains offsets of the diagonal nnzs + static std::vector diag_indices; // contains offsets of the diagonal nnzs int numZeros = 0; const int dim = 3; const double zero_replace = 1e-15; - double *nnzs = &(mat[0][0][0][0]); if(diag_indices.size() == 0){ - int N = mat.N()*dim; + int N = mat.N(); diag_indices.reserve(N); - for(typename BridgeMatrix::const_iterator r = mat.begin(); r != mat.end(); ++r){ - for(auto c = r->begin(); c != r->end(); ++c){ - if(r.index() == c.index()){ - for(int rr = 0; rr < dim; ++rr){ - // pointer arithmetic - int offset = (int)((long unsigned)&(mat[r.index()][c.index()][rr][rr]) - (long unsigned)nnzs); // in bytes - offset /= sizeof(double); // convert offset to doubles - diag_indices.emplace_back(offset); - double val = nnzs[offset]; - if(val == 0.0){ // could be replaced by '< 1e-30' or similar - nnzs[offset] = zero_replace; - ++numZeros; - } - } - break; + int roff = 0; + for(typename BridgeMatrix::iterator r = mat.begin(); r != mat.end(); ++r){ + auto diag = r->find(r.index()); // diag is an iterator + assert(diag.index() == r.index()); + for(int rr = 0; rr < dim; ++rr){ + auto& val = (*diag)[rr][rr]; // reference to easily change the value + if (val == 0.0){ // could be replaced by '< 1e-30' or similar + val = zero_replace; + ++numZeros; + } + } + diag_indices.emplace_back(diag.offset()); + } + }else{ + for(typename BridgeMatrix::iterator r = mat.begin(); r != mat.end(); ++r){ + typename BridgeMatrix::size_type offset = diag_indices[r.index()]; + auto& diag_block = r->getptr()[offset]; // diag_block is a reference to MatrixBlock, located on column r of row r + for(int rr = 0; rr < dim; ++rr){ + auto& val = diag_block[rr][rr]; + if(val == 0.0){ // could be replaced by '< 1e-30' or similar + val = zero_replace; + ++numZeros; } } } - }else{ - for(int offset : diag_indices){ - if(nnzs[offset] == 0.0){ // could be replaced by '< 1e-30' or similar - nnzs[offset] = zero_replace; - ++numZeros; - } - } } + return numZeros; } // iterate sparsity pattern from Matrix and put colIndices and rowPointers in arrays // sparsity pattern should stay the same due to matrix-add-well-contributions +// this could be removed if Dune::BCRSMatrix features an API call that returns colIndices and rowPointers template void getSparsityPattern(BridgeMatrix& mat, std::vector &h_rows, std::vector &h_cols) { int sum_nnzs = 0; @@ -102,8 +105,9 @@ void getSparsityPattern(BridgeMatrix& mat, std::vector &h_rows, std::vector sum_nnzs += size_row; h_rows.emplace_back(sum_nnzs); } - // set last rowpointer - h_rows[mat.N()] = mat.nonzeroes(); + if(h_rows[mat.N()] != mat.nonzeroes()){ + OPM_THROW(std::logic_error, "Error size of rows do not sum to number of nonzeroes in BdaBridge::getSparsityPattern()"); + } } } // end getSparsityPattern() @@ -121,16 +125,16 @@ void BdaBridge::solve_system(BridgeMatrix *mat OPM_UNUSED, BridgeVector &b OPM_U static std::vector h_cols; int dim = (*mat)[0][0].N(); int N = mat->N()*dim; - int nnz = mat->nonzeroes()*dim*dim; + int nnz = (h_rows.empty()) ? mat->nonzeroes()*dim*dim : h_rows.back()*dim*dim; if(dim != 3){ - OpmLog::error("cusparseSolver only accepts blocksize = 3 at this time"); + OpmLog::warning("cusparseSolver only accepts blocksize = 3 at this time, will use Dune for the remainder of the program"); use_gpu = false; } if(h_rows.capacity() == 0){ h_rows.reserve(N+1); - h_cols.reserve(nnz); + h_cols.reserve(nnz); #if PRINT_TIMERS_BRIDGE Dune::Timer t; #endif @@ -157,7 +161,7 @@ void BdaBridge::solve_system(BridgeMatrix *mat OPM_UNUSED, BridgeVector &b OPM_U // actually solve typedef cusparseSolverBackend::cusparseSolverStatus cusparseSolverStatus; - // assume that underlying data (nonzeroes) from mat (Dune::BCRSMatrix) are contiguous, if this is not the case, cusparseSolver is expected to produce garbage + // assume that underlying data (nonzeroes) from mat (Dune::BCRSMatrix) are contiguous, if this is not the case, cusparseSolver is expected to perform undefined behaviour cusparseSolverStatus status = backend->solve_system(N, nnz, dim, static_cast(&(((*mat)[0][0][0][0]))), h_rows.data(), h_cols.data(), static_cast(&(b[0][0])), result); switch(status){ case cusparseSolverStatus::CUSPARSE_SOLVER_SUCCESS: @@ -189,7 +193,7 @@ template void BdaBridge::get_result(BridgeVector &x OPM_UNUSED){ #if HAVE_CUDA if(use_gpu){ - backend->post_process(&(x[0][0])); + backend->post_process(static_cast(&(x[0][0]))); } #endif } diff --git a/opm/simulators/linalg/bda/cuda_header.h b/opm/simulators/linalg/bda/cuda_header.hpp similarity index 94% rename from opm/simulators/linalg/bda/cuda_header.h rename to opm/simulators/linalg/bda/cuda_header.hpp index 7fc01b60e..f1991a2f8 100644 --- a/opm/simulators/linalg/bda/cuda_header.h +++ b/opm/simulators/linalg/bda/cuda_header.hpp @@ -17,13 +17,11 @@ along with OPM. If not, see . */ -#ifndef CUDA_HEADER_H -#define CUDA_HEADER_H +#ifndef CUDA_HEADER_HEADER_INCLUDED +#define CUDA_HEADER_HEADER_INCLUDED #include -typedef double Block[9]; - #define cudaCheckLastError(msg) __cudaCheckError( __FILE__, __LINE__, #msg ) inline void __cudaCheckError(const char *file, const int line, const char *msg){ diff --git a/opm/simulators/linalg/bda/cusparseSolverBackend.cu b/opm/simulators/linalg/bda/cusparseSolverBackend.cu index 8f01ab6fc..ff1169255 100644 --- a/opm/simulators/linalg/bda/cusparseSolverBackend.cu +++ b/opm/simulators/linalg/bda/cusparseSolverBackend.cu @@ -32,7 +32,7 @@ #include #include -#include +#include #include "cublas_v2.h" #include "cusparse_v2.h" diff --git a/opm/simulators/linalg/bda/cusparseSolverBackend.hpp b/opm/simulators/linalg/bda/cusparseSolverBackend.hpp index 491f44883..07f22e45c 100644 --- a/opm/simulators/linalg/bda/cusparseSolverBackend.hpp +++ b/opm/simulators/linalg/bda/cusparseSolverBackend.hpp @@ -17,8 +17,8 @@ along with OPM. If not, see . */ -#ifndef OPM_cuSPARSESOLVER_BACKEND_HEADER_INCLUDED -#define OPM_cuSPARSESOLVER_BACKEND_HEADER_INCLUDED +#ifndef OPM_CUSPARSESOLVER_BACKEND_HEADER_INCLUDED +#define OPM_CUSPARSESOLVER_BACKEND_HEADER_INCLUDED #include "cublas_v2.h"