reenable normal opencl

Rebased
This commit is contained in:
Tong Dong Qiu 2022-04-21 17:18:54 +02:00
parent dee5e16fb8
commit 61f693dbaf
12 changed files with 129 additions and 104 deletions

View File

@ -192,7 +192,6 @@ protected:
} }
std::unique_ptr<TransmissibilityType> globalTrans_; std::unique_ptr<TransmissibilityType> globalTrans_;
//std::vector<int> cell_part_;
}; };
} // namespace Opm } // namespace Opm

View File

@ -181,6 +181,7 @@ void EclGenericCpGridVanguard<ElementMapper,GridView,Scalar>::doLoadBalance_(Dun
// cells that exist only on other ranks even in the case of distributed wells // cells that exist only on other ranks even in the case of distributed wells
// But we need all connections to figure out the first cell of a well (e.g. for // But we need all connections to figure out the first cell of a well (e.g. for
// pressure). Hence this is now skipped. Rank 0 had everything even before. // pressure). Hence this is now skipped. Rank 0 had everything even before.
if (numJacobiBlocks > 0 && mpiSize == 1) { if (numJacobiBlocks > 0 && mpiSize == 1) {
const auto wells = schedule.getWellsatEnd(); const auto wells = schedule.getWellsatEnd();
cell_part_.resize(grid_->numCells()); cell_part_.resize(grid_->numCells());

View File

@ -161,15 +161,15 @@ namespace Opm
// Set it up manually // Set it up manually
ElementMapper elemMapper(simulator_.vanguard().gridView(), Dune::mcmgElementLayout()); ElementMapper elemMapper(simulator_.vanguard().gridView(), Dune::mcmgElementLayout());
detail::findOverlapAndInterior(simulator_.vanguard().grid(), elemMapper, overlapRows_, interiorRows_); detail::findOverlapAndInterior(simulator_.vanguard().grid(), elemMapper, overlapRows_, interiorRows_);
numJacobiBlocks_ = EWOMS_GET_PARAM(TypeTag, int, NumJacobiBlocks); numJacobiBlocks_ = EWOMS_GET_PARAM(TypeTag, int, NumJacobiBlocks);
useWellConn_ = EWOMS_GET_PARAM(TypeTag, bool, MatrixAddWellContributions); useWellConn_ = EWOMS_GET_PARAM(TypeTag, bool, MatrixAddWellContributions);
if (numJacobiBlocks_ > 1) { if (numJacobiBlocks_ > 1) {
const auto wellsForConn = simulator_.vanguard().schedule().getWellsatEnd(); const auto wellsForConn = simulator_.vanguard().schedule().getWellsatEnd();
detail::setWellConnections(simulator_.vanguard().grid(), wellsForConn, useWellConn_, detail::setWellConnections(simulator_.vanguard().grid(), wellsForConn, useWellConn_,
wellConnectionsGraph_, numJacobiBlocks_); wellConnectionsGraph_, numJacobiBlocks_);
std::cout << "Create block-Jacobi pattern" << std::endl; std::cout << "Create block-Jacobi pattern" << std::endl;
blockJacobiAdjacency(); blockJacobiAdjacency();
} }
useWellConn_ = EWOMS_GET_PARAM(TypeTag, bool, MatrixAddWellContributions); useWellConn_ = EWOMS_GET_PARAM(TypeTag, bool, MatrixAddWellContributions);
#if HAVE_FPGA #if HAVE_FPGA
// check usage of MatrixAddWellContributions: for FPGA they must be included // check usage of MatrixAddWellContributions: for FPGA they must be included
@ -281,15 +281,15 @@ namespace Opm
} }
#endif #endif
if (numJacobiBlocks_ > 1) { if (numJacobiBlocks_ > 1) {
copyMatToBlockJac(getMatrix(), *blockJacobiForGPUILU0_); copyMatToBlockJac(getMatrix(), *blockJacobiForGPUILU0_);
// Const_cast needed since the CUDA stuff overwrites values for better matrix condition.. // Const_cast needed since the CUDA stuff overwrites values for better matrix condition..
bdaBridge->solve_system(const_cast<Matrix*>(&getMatrix()), &*blockJacobiForGPUILU0_, bdaBridge->solve_system(const_cast<Matrix*>(&getMatrix()), &*blockJacobiForGPUILU0_,
numJacobiBlocks_, *rhs_, *wellContribs, result); numJacobiBlocks_, *rhs_, *wellContribs, result);
} }
else else
bdaBridge->solve_system(const_cast<Matrix*>(&getMatrix()), const_cast<Matrix*>(&getMatrix()), bdaBridge->solve_system(const_cast<Matrix*>(&getMatrix()), const_cast<Matrix*>(&getMatrix()),
numJacobiBlocks_, *rhs_, *wellContribs, result); numJacobiBlocks_, *rhs_, *wellContribs, result);
if (result.converged) { if (result.converged) {
// get result vector x from non-Dune backend, iff solve was successful // get result vector x from non-Dune backend, iff solve was successful
bdaBridge->get_result(x); bdaBridge->get_result(x);
@ -512,78 +512,78 @@ namespace Opm
} }
} }
/// Create sparsity pattern for block-Jacobi matrix based on partitioning of grid. /// Create sparsity pattern for block-Jacobi matrix based on partitioning of grid.
void blockJacobiAdjacency() void blockJacobiAdjacency()
{ {
const auto& grid = simulator_.vanguard().grid(); const auto& grid = simulator_.vanguard().grid();
std::vector<int> cell_part = simulator_.vanguard().cellPartition(); std::vector<int> cell_part = simulator_.vanguard().cellPartition();
typedef typename Matrix::size_type size_type; typedef typename Matrix::size_type size_type;
size_type numCells = grid.size( 0 ); size_type numCells = grid.size( 0 );
blockJacobiForGPUILU0_.reset(new Matrix(numCells, numCells, Matrix::random)); blockJacobiForGPUILU0_.reset(new Matrix(numCells, numCells, Matrix::random));
std::vector<std::set<size_type>> pattern; std::vector<std::set<size_type>> pattern;
pattern.resize(numCells); pattern.resize(numCells);
const auto& lid = grid.localIdSet(); const auto& lid = grid.localIdSet();
const auto& gridView = grid.leafGridView(); const auto& gridView = grid.leafGridView();
auto elemIt = gridView.template begin<0>(); auto elemIt = gridView.template begin<0>();
const auto& elemEndIt = gridView.template end<0>(); const auto& elemEndIt = gridView.template end<0>();
//Loop over cells //Loop over cells
for (; elemIt != elemEndIt; ++elemIt) for (; elemIt != elemEndIt; ++elemIt)
{ {
const auto& elem = *elemIt; const auto& elem = *elemIt;
size_type idx = lid.id(elem); size_type idx = lid.id(elem);
pattern[idx].insert(idx); pattern[idx].insert(idx);
// Add well non-zero connections // Add well non-zero connections
for (auto wc = wellConnectionsGraph_[idx].begin(); wc!=wellConnectionsGraph_[idx].end(); ++wc) for (auto wc = wellConnectionsGraph_[idx].begin(); wc!=wellConnectionsGraph_[idx].end(); ++wc)
pattern[idx].insert(*wc); pattern[idx].insert(*wc);
int locPart = cell_part[idx]; int locPart = cell_part[idx];
//Add neighbor if it is on the same part //Add neighbor if it is on the same part
auto isend = gridView.iend(elem); auto isend = gridView.iend(elem);
for (auto is = gridView.ibegin(elem); is!=isend; ++is) for (auto is = gridView.ibegin(elem); is!=isend; ++is)
{ {
//check if face has neighbor //check if face has neighbor
if (is->neighbor()) if (is->neighbor())
{ {
size_type nid = lid.id(is->outside()); size_type nid = lid.id(is->outside());
int nabPart = cell_part[nid]; int nabPart = cell_part[nid];
if (locPart == nabPart) if (locPart == nabPart)
pattern[idx].insert(nid); pattern[idx].insert(nid);
} }
blockJacobiForGPUILU0_->setrowsize(idx, pattern[idx].size()); blockJacobiForGPUILU0_->setrowsize(idx, pattern[idx].size());
} }
} }
blockJacobiForGPUILU0_->endrowsizes(); blockJacobiForGPUILU0_->endrowsizes();
for (size_type dofId = 0; dofId < numCells; ++dofId) for (size_type dofId = 0; dofId < numCells; ++dofId)
{ {
auto nabIdx = pattern[dofId].begin(); auto nabIdx = pattern[dofId].begin();
auto endNab = pattern[dofId].end(); auto endNab = pattern[dofId].end();
for (; nabIdx != endNab; ++nabIdx) for (; nabIdx != endNab; ++nabIdx)
{ {
blockJacobiForGPUILU0_->addindex(dofId, *nabIdx); blockJacobiForGPUILU0_->addindex(dofId, *nabIdx);
} }
} }
blockJacobiForGPUILU0_->endindices(); blockJacobiForGPUILU0_->endindices();
} }
void copyMatToBlockJac(Matrix& mat, Matrix& blockJac) void copyMatToBlockJac(Matrix& mat, Matrix& blockJac)
{ {
auto rbegin = blockJac.begin(); auto rbegin = blockJac.begin();
auto rend = blockJac.end(); auto rend = blockJac.end();
for (auto row = rbegin; row != rend; ++row) { for (auto row = rbegin; row != rend; ++row) {
for (auto col = (*row).begin(); col != (*row).end(); ++col) { for (auto col = (*row).begin(); col != (*row).end(); ++col) {
blockJac[row.index()][col.index()] = mat[row.index()][col.index()]; blockJac[row.index()][col.index()] = mat[row.index()][col.index()];
} }
} }
} }
Matrix& getMatrix() Matrix& getMatrix()
{ {
@ -619,7 +619,7 @@ namespace Opm
FlowLinearSolverParameters parameters_; FlowLinearSolverParameters parameters_;
PropertyTree prm_; PropertyTree prm_;
bool scale_variables_; bool scale_variables_;
int numJacobiBlocks_; int numJacobiBlocks_;
std::shared_ptr< CommunicationType > comm_; std::shared_ptr< CommunicationType > comm_;
}; // end ISTLSolver }; // end ISTLSolver

View File

@ -211,11 +211,6 @@ void BdaBridge<BridgeMatrix, BridgeVector, block_size>::solve_system([[maybe_unu
const int nnzb = (h_rows.empty()) ? mat->nonzeroes() : h_rows.back(); const int nnzb = (h_rows.empty()) ? mat->nonzeroes() : h_rows.back();
const int nnz = nnzb * dim * dim; const int nnz = nnzb * dim * dim;
static std::vector<int> bm_h_rows;
static std::vector<int> bm_h_cols;
const int bm_nnzb = (bm_h_rows.empty()) ? blockMat->nonzeroes() : bm_h_rows.back();
const int bm_nnz = bm_nnzb * dim * dim;
if (dim != 3) { if (dim != 3) {
OpmLog::warning("BdaSolver only accepts blocksize = 3 at this time, will use Dune for the remainder of the program"); OpmLog::warning("BdaSolver only accepts blocksize = 3 at this time, will use Dune for the remainder of the program");
use_gpu = use_fpga = false; use_gpu = use_fpga = false;
@ -228,15 +223,9 @@ void BdaBridge<BridgeMatrix, BridgeVector, block_size>::solve_system([[maybe_unu
copySparsityPatternFromISTL(*mat, h_rows, h_cols); copySparsityPatternFromISTL(*mat, h_rows, h_cols);
} }
if (bm_h_rows.capacity() == 0) {
bm_h_rows.reserve(Nb+1);
bm_h_cols.reserve(bm_nnzb);
copySparsityPatternFromISTL(*blockMat, bm_h_rows, bm_h_cols);
}
Dune::Timer t_zeros; Dune::Timer t_zeros;
int numZeros = checkZeroDiagonal(*mat); int numZeros = checkZeroDiagonal(*mat);
int bm_numZeros = checkZeroDiagonal(*blockMat);
if (verbosity >= 2) { if (verbosity >= 2) {
std::ostringstream out; std::ostringstream out;
out << "Checking zeros took: " << t_zeros.stop() << " s, found " << numZeros << " zeros"; out << "Checking zeros took: " << t_zeros.stop() << " s, found " << numZeros << " zeros";
@ -247,16 +236,34 @@ void BdaBridge<BridgeMatrix, BridgeVector, block_size>::solve_system([[maybe_unu
///////////////////////// /////////////////////////
// actually solve // actually solve
SolverStatus status; SolverStatus status;
if (numJacobiBlocks > 1) if (numJacobiBlocks < 2) {
// assume that underlying data (nonzeroes) from mat (Dune::BCRSMatrix) are contiguous, if this is not the case, the chosen BdaSolver is expected to perform undefined behaviour
status = backend->solve_system(N, nnz, dim, static_cast<double*>(&(((*mat)[0][0][0][0]))), h_rows.data(), h_cols.data(), static_cast<double*>(&(b[0][0])), wellContribs, result);
}
else {
static std::vector<int> bm_h_rows;
static std::vector<int> bm_h_cols;
const int bm_nnzb = (bm_h_rows.empty()) ? blockMat->nonzeroes() : bm_h_rows.back();
const int bm_nnz = bm_nnzb * dim * dim;
if (bm_h_rows.capacity() == 0) {
bm_h_rows.reserve(Nb+1);
bm_h_cols.reserve(bm_nnzb);
copySparsityPatternFromISTL(*blockMat, bm_h_rows, bm_h_cols);
}
int bm_numZeros = checkZeroDiagonal(*blockMat);
if (verbosity >= 2) {
std::ostringstream out;
out << "Checking zeros took: " << t_zeros.stop() << " s, found " << numZeros << " zeros";
OpmLog::info(out.str());
}
status = backend->solve_system2(N, nnz, dim, static_cast<double*>(&(((*mat)[0][0][0][0]))), h_rows.data(), h_cols.data(), static_cast<double*>(&(b[0][0])), status = backend->solve_system2(N, nnz, dim, static_cast<double*>(&(((*mat)[0][0][0][0]))), h_rows.data(), h_cols.data(), static_cast<double*>(&(b[0][0])),
bm_nnz, static_cast<double*>(&(((*blockMat)[0][0][0][0]))), bm_h_rows.data(), bm_h_cols.data(), bm_nnz, static_cast<double*>(&(((*blockMat)[0][0][0][0]))), bm_h_rows.data(), bm_h_cols.data(),
wellContribs, result); wellContribs, result);
else }
status = backend->solve_system(N, nnz, dim, static_cast<double*>(&(((*mat)[0][0][0][0]))), h_rows.data(), h_cols.data(), static_cast<double*>(&(b[0][0])), wellContribs, result);
// assume that underlying data (nonzeroes) from mat (Dune::BCRSMatrix) are contiguous, if this is not the case, the chosen BdaSolver is expected to perform undefined behaviour
//SolverStatus status = backend->solve_system(N, nnz, dim, static_cast<double*>(&(((*mat)[0][0][0][0]))), h_rows.data(), h_cols.data(), static_cast<double*>(&(b[0][0])), wellContribs, result);
switch(status) { switch(status) {
case SolverStatus::BDA_SOLVER_SUCCESS: case SolverStatus::BDA_SOLVER_SUCCESS:
//OpmLog::info("BdaSolver converged"); //OpmLog::info("BdaSolver converged");

View File

@ -223,6 +223,7 @@ void FpgaSolverBackend<block_size>::get_result(double *x_)
} }
} // end get_result() } // end get_result()
template <unsigned int block_size> template <unsigned int block_size>
SolverStatus FpgaSolverBackend<block_size>::solve_system(int N_, int nnz_, int dim, double *vals, int *rows, int *cols, double *b, WellContributions&, BdaResult &res) SolverStatus FpgaSolverBackend<block_size>::solve_system(int N_, int nnz_, int dim, double *vals, int *rows, int *cols, double *b, WellContributions&, BdaResult &res)
{ {
@ -255,7 +256,8 @@ SolverStatus FpgaSolverBackend<block_size>::solve_system2(int N_, int nnz_, int
int nnz2, double *vals2, int *rows2, int *cols2, int nnz2, double *vals2, int *rows2, int *cols2,
WellContributions& wellContribs, BdaResult &res) WellContributions& wellContribs, BdaResult &res)
{ {
return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED; (void) nnz2; (void) vals2; (void) rows2; (void) cols2;
return solve_system(N_, nnz_, dim, vals, rows, cols, b, wellContribs, res);
} }
template <unsigned int block_size> template <unsigned int block_size>

View File

@ -365,7 +365,8 @@ SolverStatus amgclSolverBackend<block_size>::solve_system2(int N_, int nnz_, int
int nnz2, double *vals2, int *rows2, int *cols2, int nnz2, double *vals2, int *rows2, int *cols2,
WellContributions& wellContribs, BdaResult &res) WellContributions& wellContribs, BdaResult &res)
{ {
return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED; (void) nnz2; (void) vals2; (void) rows2; (void) cols2;
return solve_system(N_, nnz_, dim, vals, rows, cols, b, wellContribs, res);
} }
template <unsigned int block_size> template <unsigned int block_size>
@ -380,7 +381,6 @@ SolverStatus amgclSolverBackend<block_size>::solve_system(int N_, int nnz_, int
} }
#define INSTANTIATE_BDA_FUNCTIONS(n) \ #define INSTANTIATE_BDA_FUNCTIONS(n) \
template amgclSolverBackend<n>::amgclSolverBackend(int, int, double, unsigned int, unsigned int); \ template amgclSolverBackend<n>::amgclSolverBackend(int, int, double, unsigned int, unsigned int); \

View File

@ -506,7 +506,8 @@ SolverStatus cusparseSolverBackend<block_size>::solve_system2(int N_, int nnz_,
int nnz2, double *vals2, int *rows2, int *cols2, int nnz2, double *vals2, int *rows2, int *cols2,
WellContributions& wellContribs, BdaResult &res) WellContributions& wellContribs, BdaResult &res)
{ {
return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED; (void) nnz2; (void) vals2; (void) rows2; (void) cols2;
return solve_system(N_, nnz_, dim, vals, rows, cols, b, wellContribs, res);
} }
#define INSTANTIATE_BDA_FUNCTIONS(n) \ #define INSTANTIATE_BDA_FUNCTIONS(n) \

View File

@ -365,11 +365,11 @@ template <unsigned int block_size>
bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat, BlockedMatrix *jacMat) bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat, BlockedMatrix *jacMat)
{ {
const unsigned int bs = block_size; const unsigned int bs = block_size;
auto *m = mat;
auto *jm = jacMat; auto *jm = jacMat;
if (opencl_ilu_reorder != ILUReorder::NONE) { if (opencl_ilu_reorder != ILUReorder::NONE) {
m = rmat.get();
jm = rJacMat.get(); jm = rJacMat.get();
Timer t_reorder; Timer t_reorder;
reorderBlockedMatrixByPattern(mat, toOrder.data(), fromOrder.data(), rmat.get()); reorderBlockedMatrixByPattern(mat, toOrder.data(), fromOrder.data(), rmat.get());

View File

@ -94,6 +94,7 @@ bool BISAI<block_size>::analyze_matrix(BlockedMatrix *mat)
template <unsigned int block_size> template <unsigned int block_size>
bool BISAI<block_size>::analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat) bool BISAI<block_size>::analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat)
{ {
(void) jacMat;
return analyze_matrix(mat); return analyze_matrix(mat);
} }
@ -263,6 +264,7 @@ bool BISAI<block_size>::create_preconditioner(BlockedMatrix *mat)
template <unsigned int block_size> template <unsigned int block_size>
bool BISAI<block_size>::create_preconditioner(BlockedMatrix *mat, BlockedMatrix *jacMat) bool BISAI<block_size>::create_preconditioner(BlockedMatrix *mat, BlockedMatrix *jacMat)
{ {
(void) jacMat;
return create_preconditioner(mat); return create_preconditioner(mat);
} }

View File

@ -82,6 +82,7 @@ bool CPR<block_size>::analyze_matrix(BlockedMatrix *mat_) {
template <unsigned int block_size> template <unsigned int block_size>
bool CPR<block_size>::analyze_matrix(BlockedMatrix *mat_, BlockedMatrix *jacMat) { bool CPR<block_size>::analyze_matrix(BlockedMatrix *mat_, BlockedMatrix *jacMat) {
(void) jacMat;
return analyze_matrix(mat_); return analyze_matrix(mat_);
} }
@ -107,6 +108,7 @@ bool CPR<block_size>::create_preconditioner(BlockedMatrix *mat_) {
template <unsigned int block_size> template <unsigned int block_size>
bool CPR<block_size>::create_preconditioner(BlockedMatrix *mat_, BlockedMatrix *jacMat) { bool CPR<block_size>::create_preconditioner(BlockedMatrix *mat_, BlockedMatrix *jacMat) {
(void) jacMat;
return create_preconditioner(mat_); return create_preconditioner(mat_);
} }
// return the absolute value of the N elements for which the absolute value is highest // return the absolute value of the N elements for which the absolute value is highest

View File

@ -590,7 +590,11 @@ template <unsigned int block_size>
bool openclSolverBackend<block_size>::analyze_matrix() { bool openclSolverBackend<block_size>::analyze_matrix() {
Timer t; Timer t;
bool success = prec->analyze_matrix(mat.get(), jacMat.get()); bool success;
if (blockJacVersion)
success = prec->analyze_matrix(mat.get(), jacMat.get());
else
success = prec->analyze_matrix(mat.get());
if (opencl_ilu_reorder == ILUReorder::NONE) { if (opencl_ilu_reorder == ILUReorder::NONE) {
rmat = mat.get(); rmat = mat.get();
@ -641,7 +645,11 @@ template <unsigned int block_size>
bool openclSolverBackend<block_size>::create_preconditioner() { bool openclSolverBackend<block_size>::create_preconditioner() {
Timer t; Timer t;
bool result = prec->create_preconditioner(mat.get(), jacMat.get()); bool result;
if (blockJacVersion)
result = prec->create_preconditioner(mat.get(), jacMat.get());
else
result = prec->create_preconditioner(mat.get());
if (verbosity > 2) { if (verbosity > 2) {
std::ostringstream out; std::ostringstream out;
@ -730,6 +738,7 @@ SolverStatus openclSolverBackend<block_size>::solve_system2(int N_, int nnz_, in
WellContributions& wellContribs, BdaResult &res) WellContributions& wellContribs, BdaResult &res)
{ {
if (initialized == false) { if (initialized == false) {
blockJacVersion = true;
initialize2(N_, nnz_, dim, vals, rows, cols, nnz2, vals2, rows2, cols2); initialize2(N_, nnz_, dim, vals, rows, cols, nnz2, vals2, rows2, cols2);
if (analysis_done == false) { if (analysis_done == false) {
if (!analyze_matrix()) { if (!analyze_matrix()) {

View File

@ -65,6 +65,8 @@ private:
int jac_nnz; int jac_nnz;
int jac_nnzb; int jac_nnzb;
bool blockJacVersion = false;
std::unique_ptr<Preconditioner<block_size> > prec; std::unique_ptr<Preconditioner<block_size> > prec;
// can perform blocked ILU0 and AMG on pressure component // can perform blocked ILU0 and AMG on pressure component
bool is_root; // allow for nested solvers, the root solver is called by BdaBridge bool is_root; // allow for nested solvers, the root solver is called by BdaBridge