Adapt rocsparse separate wells PR to changes made to ISTLSolverEbos

2025-02-25 18:55:30 -06:00 · 2023-09-26 06:17:18 +02:00 · 2023-09-26 06:17:18 +02:00 · e4abc12a05
commit e4abc12a05
parent 177a46366d
4 changed files with 3 additions and 173 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -684,10 +684,6 @@ if(USE_BDA_BRIDGE)
  if(VexCL_FOUND)
    target_link_libraries( opmsimulators PUBLIC OPM::VexCL::OpenCL )
  endif()
  if(hip_FOUND)
    target_link_libraries( opmsimulators PUBLIC hip::device )
  endif()
 endif()
 if(Damaris_FOUND)
--- a/opm/simulators/linalg/ISTLSolverEbos.cpp
+++ b/opm/simulators/linalg/ISTLSolverEbos.cpp
@ -203,172 +203,6 @@ void FlexibleSolverInfo<Matrix,Vector,Comm>::create(const Matrix& matrix,
    }
 }
 //Razvan<<<<<<< HEAD
 //Razvan=======
 #if COMPILE_BDA_BRIDGE
 template<class Matrix, class Vector>
 BdaSolverInfo<Matrix,Vector>::
 BdaSolverInfo(const std::string& accelerator_mode,
              const int linear_solver_verbosity,
              const int maxit,
              const double tolerance,
              const int platformID,
              const int deviceID,
              const bool opencl_ilu_parallel,
              const std::string& linsolver)
    : bridge_(std::make_unique<Bridge>(accelerator_mode,
                                       linear_solver_verbosity, maxit,
                                       tolerance, platformID, deviceID,
                                       opencl_ilu_parallel, linsolver))
    , accelerator_mode_(accelerator_mode)
 {}
 template<class Matrix, class Vector>
 BdaSolverInfo<Matrix,Vector>::~BdaSolverInfo() = default;
 template<class Matrix, class Vector>
 template<class Grid>
 void BdaSolverInfo<Matrix,Vector>::
 prepare(const Grid& grid,
        const Dune::CartesianIndexMapper<Grid>& cartMapper,
        const std::vector<Well>& wellsForConn,
        const std::vector<int>& cellPartition,
        const size_t nonzeroes,
        const bool useWellConn)
 {
    if (numJacobiBlocks_ > 1) {
      detail::setWellConnections(grid, cartMapper, wellsForConn,
                                 useWellConn,
                                 wellConnectionsGraph_,
                                 numJacobiBlocks_);
      this->blockJacobiAdjacency(grid, cellPartition, nonzeroes);
    }
 }
 template<class Matrix, class Vector>
 bool BdaSolverInfo<Matrix,Vector>::
 apply(Vector& rhs,
      const bool useWellConn,
      WellContribFunc getContribs,
      const int rank,
      Matrix& matrix,
      Vector& x,
      Dune::InverseOperatorResult& result)
 {
    bool use_gpu = bridge_->getUseGpu();
    if (use_gpu) {
        auto wellContribs = WellContributions::create(accelerator_mode_, useWellConn);
        bridge_->initWellContributions(*wellContribs, x.N() * x[0].N());
        // the WellContributions can only be applied separately with CUDA, OpenCL or rocsparse, not with amgcl or rocalution
 #if HAVE_CUDA || HAVE_OPENCL || HAVE_ROCSPARSE
        if (!useWellConn) {
            getContribs(*wellContribs);
        }
 #endif
        if (numJacobiBlocks_ > 1) {
            this->copyMatToBlockJac(matrix, *blockJacobiForGPUILU0_);
            // Const_cast needed since the CUDA stuff overwrites values for better matrix condition..
            bridge_->solve_system(&matrix, blockJacobiForGPUILU0_.get(),
                                  numJacobiBlocks_, rhs, *wellContribs, result);
        }
        else
            bridge_->solve_system(&matrix, &matrix,
                                  numJacobiBlocks_, rhs, *wellContribs, result);
        if (result.converged) {
            // get result vector x from non-Dune backend, iff solve was successful
            bridge_->get_result(x);
            return true;
        } else {
            // warn about CPU fallback
            // BdaBridge might have disabled its BdaSolver for this simulation due to some error
            // in that case the BdaBridge is disabled and flexibleSolver is always used
            // or maybe the BdaSolver did not converge in time, then it will be used next linear solve
            if (rank == 0) {
                OpmLog::warning(bridge_->getAccleratorName() + " did not converge, now trying Dune to solve current linear system...");
            }
        }
    }
    return false;
 }
 template<class Matrix, class Vector>
 bool BdaSolverInfo<Matrix,Vector>::
 gpuActive()
 {
    return bridge_->getUseGpu();
 }
 template<class Matrix, class Vector>
 template<class Grid>
 void BdaSolverInfo<Matrix,Vector>::
 blockJacobiAdjacency(const Grid& grid,
                     const std::vector<int>& cell_part,
                     size_t nonzeroes)
 {
    using size_type = typename Matrix::size_type;
    using Iter = typename Matrix::CreateIterator;
    size_type numCells = grid.size(0);
    blockJacobiForGPUILU0_ = std::make_unique<Matrix>(numCells, numCells,
                                                      nonzeroes, Matrix::row_wise);
    const auto& lid = grid.localIdSet();
    const auto& gridView = grid.leafGridView();
    auto elemIt = gridView.template begin<0>(); // should never overrun, since blockJacobiForGPUILU0_ is initialized with numCells rows
    //Loop over cells
    for (Iter row = blockJacobiForGPUILU0_->createbegin(); row != blockJacobiForGPUILU0_->createend(); ++elemIt, ++row)
    {
        const auto& elem = *elemIt;
        size_type idx = lid.id(elem);
        row.insert(idx);
        // Add well non-zero connections
        for (const auto wc : wellConnectionsGraph_[idx]) {
            row.insert(wc);
        }
        int locPart = cell_part[idx];
        //Add neighbor if it is on the same part
        auto isend = gridView.iend(elem);
        for (auto is = gridView.ibegin(elem); is!=isend; ++is)
        {
            //check if face has neighbor
            if (is->neighbor())
            {
                size_type nid = lid.id(is->outside());
                int nabPart = cell_part[nid];
                if (locPart == nabPart) {
                    row.insert(nid);
                }
            }
        }
    }
 }
 template<class Matrix, class Vector>
 void BdaSolverInfo<Matrix,Vector>::
 copyMatToBlockJac(const Matrix& mat, Matrix& blockJac)
 {
    auto rbegin = blockJac.begin();
    auto rend = blockJac.end();
    auto outerRow = mat.begin();
    for (auto row = rbegin; row != rend; ++row, ++outerRow) {
        auto outerCol = (*outerRow).begin();
        for (auto col = (*row).begin(); col != (*row).end(); ++col) {
            // outerRow is guaranteed to have all column entries that row has!
            while(outerCol.index() < col.index()) ++outerCol;
            assert(outerCol.index() == col.index());
            *col = *outerCol; // copy nonzero block
        }
    }
 }
 #endif // COMPILE_BDA_BRIDGE
 //Razvan>>>>>>> 1a32e4cc1 (Make sure rocsparse can get wellcontributions)
 template<int Dim>
 using BM = Dune::BCRSMatrix<MatrixBlock<double,Dim,Dim>>;
 template<int Dim>
--- a/opm/simulators/linalg/ISTLSolverEbosBda.cpp
+++ b/opm/simulators/linalg/ISTLSolverEbosBda.cpp
@ -100,8 +100,8 @@ apply(Vector& rhs,
        auto wellContribs = WellContributions::create(accelerator_mode_, useWellConn);
        bridge_->initWellContributions(*wellContribs, x.N() * x[0].N());
-        // the WellContributions can only be applied separately with CUDA or OpenCL, not with amgcl or rocalution
+         // the WellContributions can only be applied separately with CUDA, OpenCL or rocsparse, not with amgcl or rocalution
-#if HAVE_CUDA || HAVE_OPENCL
+#if HAVE_CUDA || HAVE_OPENCL || HAVE_ROCSPARSE
        if (!useWellConn) {
            getContribs(*wellContribs);
        }
--- a/opm/simulators/linalg/bda/rocsparseWellContributions.cpp
+++ b/opm/simulators/linalg/bda/rocsparseWellContributions.cpp
@ -143,7 +143,7 @@ void WellContributionsRocsparse::apply_stdwells([[maybe_unused]] double *d_x,
                                                [[maybe_unused]] double *d_y){
 #ifdef __HIP__
    unsigned gridDim = num_std_wells;
-    unsigned blockDim = 32;
+    unsigned blockDim = 64;
    unsigned shared_mem_size = (blockDim + 2 * dim_wells) * sizeof(double); // shared memory for localSum, z1 and z2
    // dim3(N) will create a vector {N, 1, 1}
    stdwell_apply<<<dim3(gridDim), dim3(blockDim), shared_mem_size, stream>>>(