Adapt rocsparse separate wells PR to changes made to ISTLSolverEbos

2025-02-25 18:55:30 -06:00 · 2023-09-26 06:17:18 +02:00 · 2023-09-26 06:17:18 +02:00 · e4abc12a05
commit e4abc12a05
parent 177a46366d
4 changed files with 3 additions and 173 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -684,10 +684,6 @@ if(USE_BDA_BRIDGE)
  if(VexCL_FOUND)
    target_link_libraries( opmsimulators PUBLIC OPM::VexCL::OpenCL )
  endif()
-
-  if(hip_FOUND)
-    target_link_libraries( opmsimulators PUBLIC hip::device )
-  endif()
 endif()

 if(Damaris_FOUND)
--- a/opm/simulators/linalg/ISTLSolverEbos.cpp
+++ b/opm/simulators/linalg/ISTLSolverEbos.cpp
@ -203,172 +203,6 @@ void FlexibleSolverInfo<Matrix,Vector,Comm>::create(const Matrix& matrix,
    }
 }

-//Razvan<<<<<<< HEAD
-//Razvan=======
-#if COMPILE_BDA_BRIDGE
-template<class Matrix, class Vector>
-BdaSolverInfo<Matrix,Vector>::
-BdaSolverInfo(const std::string& accelerator_mode,
-              const int linear_solver_verbosity,
-              const int maxit,
-              const double tolerance,
-              const int platformID,
-              const int deviceID,
-              const bool opencl_ilu_parallel,
-              const std::string& linsolver)
-    : bridge_(std::make_unique<Bridge>(accelerator_mode,
-                                       linear_solver_verbosity, maxit,
-                                       tolerance, platformID, deviceID,
-                                       opencl_ilu_parallel, linsolver))
-    , accelerator_mode_(accelerator_mode)
-{}
-
-template<class Matrix, class Vector>
-BdaSolverInfo<Matrix,Vector>::~BdaSolverInfo() = default;
-
-template<class Matrix, class Vector>
-template<class Grid>
-void BdaSolverInfo<Matrix,Vector>::
-prepare(const Grid& grid,
-        const Dune::CartesianIndexMapper<Grid>& cartMapper,
-        const std::vector<Well>& wellsForConn,
-        const std::vector<int>& cellPartition,
-        const size_t nonzeroes,
-        const bool useWellConn)
-{
-    if (numJacobiBlocks_ > 1) {
-      detail::setWellConnections(grid, cartMapper, wellsForConn,
-                                 useWellConn,
-                                 wellConnectionsGraph_,
-                                 numJacobiBlocks_);
-      this->blockJacobiAdjacency(grid, cellPartition, nonzeroes);
-    }
-}
-
-template<class Matrix, class Vector>
-bool BdaSolverInfo<Matrix,Vector>::
-apply(Vector& rhs,
-      const bool useWellConn,
-      WellContribFunc getContribs,
-      const int rank,
-      Matrix& matrix,
-      Vector& x,
-      Dune::InverseOperatorResult& result)
-{
-    bool use_gpu = bridge_->getUseGpu();
-    if (use_gpu) {
-        auto wellContribs = WellContributions::create(accelerator_mode_, useWellConn);
-        bridge_->initWellContributions(*wellContribs, x.N() * x[0].N());
-
-        // the WellContributions can only be applied separately with CUDA, OpenCL or rocsparse, not with amgcl or rocalution
-#if HAVE_CUDA || HAVE_OPENCL || HAVE_ROCSPARSE
-        if (!useWellConn) {
-            getContribs(*wellContribs);
-        }
-#endif
-
-        if (numJacobiBlocks_ > 1) {
-            this->copyMatToBlockJac(matrix, *blockJacobiForGPUILU0_);
-            // Const_cast needed since the CUDA stuff overwrites values for better matrix condition..
-            bridge_->solve_system(&matrix, blockJacobiForGPUILU0_.get(),
-                                  numJacobiBlocks_, rhs, *wellContribs, result);
-        }
-        else
-            bridge_->solve_system(&matrix, &matrix,
-                                  numJacobiBlocks_, rhs, *wellContribs, result);
-        if (result.converged) {
-            // get result vector x from non-Dune backend, iff solve was successful
-            bridge_->get_result(x);
-            return true;
-        } else {
-            // warn about CPU fallback
-            // BdaBridge might have disabled its BdaSolver for this simulation due to some error
-            // in that case the BdaBridge is disabled and flexibleSolver is always used
-            // or maybe the BdaSolver did not converge in time, then it will be used next linear solve
-            if (rank == 0) {
-                OpmLog::warning(bridge_->getAccleratorName() + " did not converge, now trying Dune to solve current linear system...");
-            }
-        }
-    }
-
-    return false;
-}
-
-template<class Matrix, class Vector>
-bool BdaSolverInfo<Matrix,Vector>::
-gpuActive()
-{
-    return bridge_->getUseGpu();
-}
-
-template<class Matrix, class Vector>
-template<class Grid>
-void BdaSolverInfo<Matrix,Vector>::
-blockJacobiAdjacency(const Grid& grid,
-                     const std::vector<int>& cell_part,
-                     size_t nonzeroes)
-{
-    using size_type = typename Matrix::size_type;
-    using Iter = typename Matrix::CreateIterator;
-    size_type numCells = grid.size(0);
-    blockJacobiForGPUILU0_ = std::make_unique<Matrix>(numCells, numCells,
-                                                      nonzeroes, Matrix::row_wise);
-
-    const auto& lid = grid.localIdSet();
-    const auto& gridView = grid.leafGridView();
-    auto elemIt = gridView.template begin<0>(); // should never overrun, since blockJacobiForGPUILU0_ is initialized with numCells rows
-
-    //Loop over cells
-    for (Iter row = blockJacobiForGPUILU0_->createbegin(); row != blockJacobiForGPUILU0_->createend(); ++elemIt, ++row)
-    {
-        const auto& elem = *elemIt;
-        size_type idx = lid.id(elem);
-        row.insert(idx);
-
-        // Add well non-zero connections
-        for (const auto wc : wellConnectionsGraph_[idx]) {
-            row.insert(wc);
-        }
-
-        int locPart = cell_part[idx];
-
-        //Add neighbor if it is on the same part
-        auto isend = gridView.iend(elem);
-        for (auto is = gridView.ibegin(elem); is!=isend; ++is)
-        {
-            //check if face has neighbor
-            if (is->neighbor())
-            {
-                size_type nid = lid.id(is->outside());
-                int nabPart = cell_part[nid];
-                if (locPart == nabPart) {
-                    row.insert(nid);
-                }
-            }
-        }
-    }
-}
-
-template<class Matrix, class Vector>
-void BdaSolverInfo<Matrix,Vector>::
-copyMatToBlockJac(const Matrix& mat, Matrix& blockJac)
-{
-    auto rbegin = blockJac.begin();
-    auto rend = blockJac.end();
-    auto outerRow = mat.begin();
-    for (auto row = rbegin; row != rend; ++row, ++outerRow) {
-        auto outerCol = (*outerRow).begin();
-        for (auto col = (*row).begin(); col != (*row).end(); ++col) {
-            // outerRow is guaranteed to have all column entries that row has!
-            while(outerCol.index() < col.index()) ++outerCol;
-            assert(outerCol.index() == col.index());
-            *col = *outerCol; // copy nonzero block
-        }
-    }
-}
-#endif // COMPILE_BDA_BRIDGE
-
-//Razvan>>>>>>> 1a32e4cc1 (Make sure rocsparse can get wellcontributions)
 template<int Dim>
 using BM = Dune::BCRSMatrix<MatrixBlock<double,Dim,Dim>>;
 template<int Dim>
--- a/opm/simulators/linalg/ISTLSolverEbosBda.cpp
+++ b/opm/simulators/linalg/ISTLSolverEbosBda.cpp
@ -100,8 +100,8 @@ apply(Vector& rhs,
        auto wellContribs = WellContributions::create(accelerator_mode_, useWellConn);
        bridge_->initWellContributions(*wellContribs, x.N() * x[0].N());

-        // the WellContributions can only be applied separately with CUDA or OpenCL, not with amgcl or rocalution
-#if HAVE_CUDA || HAVE_OPENCL
+         // the WellContributions can only be applied separately with CUDA, OpenCL or rocsparse, not with amgcl or rocalution
+#if HAVE_CUDA || HAVE_OPENCL || HAVE_ROCSPARSE
        if (!useWellConn) {
            getContribs(*wellContribs);
        }
--- a/opm/simulators/linalg/bda/rocsparseWellContributions.cpp
+++ b/opm/simulators/linalg/bda/rocsparseWellContributions.cpp
@ -143,7 +143,7 @@ void WellContributionsRocsparse::apply_stdwells([[maybe_unused]] double *d_x,
                                                [[maybe_unused]] double *d_y){
 #ifdef __HIP__
    unsigned gridDim = num_std_wells;
-    unsigned blockDim = 32;
+    unsigned blockDim = 64;
    unsigned shared_mem_size = (blockDim + 2 * dim_wells) * sizeof(double); // shared memory for localSum, z1 and z2
    // dim3(N) will create a vector {N, 1, 1}
    stdwell_apply<<<dim3(gridDim), dim3(blockDim), shared_mem_size, stream>>>(