Adapt rocsparse separate wells PR to changes made to ISTLSolverEbos

This commit is contained in:
Razvan Nane 2023-09-26 06:17:18 +02:00 committed by Razvan Nane
parent 177a46366d
commit e4abc12a05
4 changed files with 3 additions and 173 deletions

View File

@ -684,10 +684,6 @@ if(USE_BDA_BRIDGE)
if(VexCL_FOUND) if(VexCL_FOUND)
target_link_libraries( opmsimulators PUBLIC OPM::VexCL::OpenCL ) target_link_libraries( opmsimulators PUBLIC OPM::VexCL::OpenCL )
endif() endif()
if(hip_FOUND)
target_link_libraries( opmsimulators PUBLIC hip::device )
endif()
endif() endif()
if(Damaris_FOUND) if(Damaris_FOUND)

View File

@ -203,172 +203,6 @@ void FlexibleSolverInfo<Matrix,Vector,Comm>::create(const Matrix& matrix,
} }
} }
//Razvan<<<<<<< HEAD
//Razvan=======
#if COMPILE_BDA_BRIDGE
template<class Matrix, class Vector>
BdaSolverInfo<Matrix,Vector>::
BdaSolverInfo(const std::string& accelerator_mode,
const int linear_solver_verbosity,
const int maxit,
const double tolerance,
const int platformID,
const int deviceID,
const bool opencl_ilu_parallel,
const std::string& linsolver)
: bridge_(std::make_unique<Bridge>(accelerator_mode,
linear_solver_verbosity, maxit,
tolerance, platformID, deviceID,
opencl_ilu_parallel, linsolver))
, accelerator_mode_(accelerator_mode)
{}
template<class Matrix, class Vector>
BdaSolverInfo<Matrix,Vector>::~BdaSolverInfo() = default;
template<class Matrix, class Vector>
template<class Grid>
void BdaSolverInfo<Matrix,Vector>::
prepare(const Grid& grid,
const Dune::CartesianIndexMapper<Grid>& cartMapper,
const std::vector<Well>& wellsForConn,
const std::vector<int>& cellPartition,
const size_t nonzeroes,
const bool useWellConn)
{
if (numJacobiBlocks_ > 1) {
detail::setWellConnections(grid, cartMapper, wellsForConn,
useWellConn,
wellConnectionsGraph_,
numJacobiBlocks_);
this->blockJacobiAdjacency(grid, cellPartition, nonzeroes);
}
}
template<class Matrix, class Vector>
bool BdaSolverInfo<Matrix,Vector>::
apply(Vector& rhs,
const bool useWellConn,
WellContribFunc getContribs,
const int rank,
Matrix& matrix,
Vector& x,
Dune::InverseOperatorResult& result)
{
bool use_gpu = bridge_->getUseGpu();
if (use_gpu) {
auto wellContribs = WellContributions::create(accelerator_mode_, useWellConn);
bridge_->initWellContributions(*wellContribs, x.N() * x[0].N());
// the WellContributions can only be applied separately with CUDA, OpenCL or rocsparse, not with amgcl or rocalution
#if HAVE_CUDA || HAVE_OPENCL || HAVE_ROCSPARSE
if (!useWellConn) {
getContribs(*wellContribs);
}
#endif
if (numJacobiBlocks_ > 1) {
this->copyMatToBlockJac(matrix, *blockJacobiForGPUILU0_);
// Const_cast needed since the CUDA stuff overwrites values for better matrix condition..
bridge_->solve_system(&matrix, blockJacobiForGPUILU0_.get(),
numJacobiBlocks_, rhs, *wellContribs, result);
}
else
bridge_->solve_system(&matrix, &matrix,
numJacobiBlocks_, rhs, *wellContribs, result);
if (result.converged) {
// get result vector x from non-Dune backend, iff solve was successful
bridge_->get_result(x);
return true;
} else {
// warn about CPU fallback
// BdaBridge might have disabled its BdaSolver for this simulation due to some error
// in that case the BdaBridge is disabled and flexibleSolver is always used
// or maybe the BdaSolver did not converge in time, then it will be used next linear solve
if (rank == 0) {
OpmLog::warning(bridge_->getAccleratorName() + " did not converge, now trying Dune to solve current linear system...");
}
}
}
return false;
}
template<class Matrix, class Vector>
bool BdaSolverInfo<Matrix,Vector>::
gpuActive()
{
return bridge_->getUseGpu();
}
template<class Matrix, class Vector>
template<class Grid>
void BdaSolverInfo<Matrix,Vector>::
blockJacobiAdjacency(const Grid& grid,
const std::vector<int>& cell_part,
size_t nonzeroes)
{
using size_type = typename Matrix::size_type;
using Iter = typename Matrix::CreateIterator;
size_type numCells = grid.size(0);
blockJacobiForGPUILU0_ = std::make_unique<Matrix>(numCells, numCells,
nonzeroes, Matrix::row_wise);
const auto& lid = grid.localIdSet();
const auto& gridView = grid.leafGridView();
auto elemIt = gridView.template begin<0>(); // should never overrun, since blockJacobiForGPUILU0_ is initialized with numCells rows
//Loop over cells
for (Iter row = blockJacobiForGPUILU0_->createbegin(); row != blockJacobiForGPUILU0_->createend(); ++elemIt, ++row)
{
const auto& elem = *elemIt;
size_type idx = lid.id(elem);
row.insert(idx);
// Add well non-zero connections
for (const auto wc : wellConnectionsGraph_[idx]) {
row.insert(wc);
}
int locPart = cell_part[idx];
//Add neighbor if it is on the same part
auto isend = gridView.iend(elem);
for (auto is = gridView.ibegin(elem); is!=isend; ++is)
{
//check if face has neighbor
if (is->neighbor())
{
size_type nid = lid.id(is->outside());
int nabPart = cell_part[nid];
if (locPart == nabPart) {
row.insert(nid);
}
}
}
}
}
template<class Matrix, class Vector>
void BdaSolverInfo<Matrix,Vector>::
copyMatToBlockJac(const Matrix& mat, Matrix& blockJac)
{
auto rbegin = blockJac.begin();
auto rend = blockJac.end();
auto outerRow = mat.begin();
for (auto row = rbegin; row != rend; ++row, ++outerRow) {
auto outerCol = (*outerRow).begin();
for (auto col = (*row).begin(); col != (*row).end(); ++col) {
// outerRow is guaranteed to have all column entries that row has!
while(outerCol.index() < col.index()) ++outerCol;
assert(outerCol.index() == col.index());
*col = *outerCol; // copy nonzero block
}
}
}
#endif // COMPILE_BDA_BRIDGE
//Razvan>>>>>>> 1a32e4cc1 (Make sure rocsparse can get wellcontributions)
template<int Dim> template<int Dim>
using BM = Dune::BCRSMatrix<MatrixBlock<double,Dim,Dim>>; using BM = Dune::BCRSMatrix<MatrixBlock<double,Dim,Dim>>;
template<int Dim> template<int Dim>

View File

@ -100,8 +100,8 @@ apply(Vector& rhs,
auto wellContribs = WellContributions::create(accelerator_mode_, useWellConn); auto wellContribs = WellContributions::create(accelerator_mode_, useWellConn);
bridge_->initWellContributions(*wellContribs, x.N() * x[0].N()); bridge_->initWellContributions(*wellContribs, x.N() * x[0].N());
// the WellContributions can only be applied separately with CUDA or OpenCL, not with amgcl or rocalution // the WellContributions can only be applied separately with CUDA, OpenCL or rocsparse, not with amgcl or rocalution
#if HAVE_CUDA || HAVE_OPENCL #if HAVE_CUDA || HAVE_OPENCL || HAVE_ROCSPARSE
if (!useWellConn) { if (!useWellConn) {
getContribs(*wellContribs); getContribs(*wellContribs);
} }

View File

@ -143,7 +143,7 @@ void WellContributionsRocsparse::apply_stdwells([[maybe_unused]] double *d_x,
[[maybe_unused]] double *d_y){ [[maybe_unused]] double *d_y){
#ifdef __HIP__ #ifdef __HIP__
unsigned gridDim = num_std_wells; unsigned gridDim = num_std_wells;
unsigned blockDim = 32; unsigned blockDim = 64;
unsigned shared_mem_size = (blockDim + 2 * dim_wells) * sizeof(double); // shared memory for localSum, z1 and z2 unsigned shared_mem_size = (blockDim + 2 * dim_wells) * sizeof(double); // shared memory for localSum, z1 and z2
// dim3(N) will create a vector {N, 1, 1} // dim3(N) will create a vector {N, 1, 1}
stdwell_apply<<<dim3(gridDim), dim3(blockDim), shared_mem_size, stream>>>( stdwell_apply<<<dim3(gridDim), dim3(blockDim), shared_mem_size, stream>>>(