From a49aaf53d25a393868800be50f2ac0ec497dcfa5 Mon Sep 17 00:00:00 2001 From: Tong Dong Qiu Date: Tue, 28 Mar 2023 16:31:50 +0200 Subject: [PATCH 01/14] Add new files rocsparseWellContributions With HIP stdwell_apply kernel MultisegmentWells are done on CPU via UMFPack --- CMakeLists_files.cmake | 3 + .../linalg/bda/rocsparseWellContributions.cpp | 212 ++++++++++++++++++ .../linalg/bda/rocsparseWellContributions.hpp | 55 +++++ 3 files changed, 270 insertions(+) create mode 100644 opm/simulators/linalg/bda/rocsparseWellContributions.cpp create mode 100644 opm/simulators/linalg/bda/rocsparseWellContributions.hpp diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index 077290e28..3202fe8ab 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -195,6 +195,7 @@ if(CUDA_FOUND) list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/set_device.hpp) endif() + if(USE_BDA_BRIDGE) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/BdaBridge.cpp opm/simulators/linalg/bda/WellContributions.cpp @@ -219,6 +220,7 @@ if(USE_BDA_BRIDGE) endif() if(rocsparse_FOUND AND rocblas_FOUND) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/rocsparseSolverBackend.cpp) + list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/rocsparseWellContributions.cpp) endif() if(amgcl_FOUND) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/amgclSolverBackend.cpp) @@ -473,6 +475,7 @@ list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/bda/MultisegmentWellContribution.hpp opm/simulators/linalg/bda/rocalutionSolverBackend.hpp opm/simulators/linalg/bda/rocsparseSolverBackend.hpp + opm/simulators/linalg/bda/rocsparseWellContributions.hpp opm/simulators/linalg/bda/WellContributions.hpp opm/simulators/linalg/amgcpr.hh opm/simulators/linalg/twolevelmethodcpr.hh diff --git a/opm/simulators/linalg/bda/rocsparseWellContributions.cpp b/opm/simulators/linalg/bda/rocsparseWellContributions.cpp new file mode 100644 index 000000000..b04131061 --- /dev/null +++ b/opm/simulators/linalg/bda/rocsparseWellContributions.cpp @@ -0,0 +1,212 @@ +/* + Copyright 2020 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ + +#include // CMake + +#include + +#include +#include + +#include + + +#define HIP_CHECK(stat) \ + { \ + if(stat != hipSuccess) \ + { \ + OPM_THROW(std::logic_error, "HIP error"); \ + } \ + } + +namespace Opm +{ + +/// HIP kernel to apply the standard wellcontributions +__global__ void stdwell_apply( + const double *Cnnzs, + const double *Dnnzs, + const double *Bnnzs, + const unsigned *Ccols, + const unsigned *Bcols, + const double *x, + double *y, + const unsigned dim, + const unsigned dim_wells, + const unsigned *val_pointers) +{ + unsigned wgId = blockIdx.x; + unsigned wiId = threadIdx.x; + unsigned valSize = val_pointers[wgId + 1] - val_pointers[wgId]; + unsigned valsPerBlock = dim*dim_wells; + unsigned numActiveWorkItems = (blockDim.x/valsPerBlock)*valsPerBlock; + unsigned numBlocksPerWarp = blockDim.x/valsPerBlock; + unsigned c = wiId % dim; + unsigned r = (wiId/dim) % dim_wells; + double temp; + + extern __shared__ double localSum[]; + double *z1 = localSum + gridDim.x; + double *z2 = z1 + dim_wells; + + localSum[wiId] = 0; + if(wiId < numActiveWorkItems){ + unsigned b = wiId/valsPerBlock + val_pointers[wgId]; + while(b < valSize + val_pointers[wgId]){ + int colIdx = Bcols[b]; + localSum[wiId] += Bnnzs[b*dim*dim_wells + r*dim + c]*x[colIdx*dim + c]; + b += numBlocksPerWarp; + } + + // merge all blocks in this workgroup into 1 block + // if numBlocksPerWarp >= 3, should use loop + // block 1: block 2: + // 0 1 2 12 13 14 + // 3 4 5 15 16 17 + // 6 7 8 18 19 20 + // 9 10 11 21 22 23 + // workitem i will hold the sum of workitems i and i + valsPerBlock + if(wiId < valsPerBlock){ + for (unsigned i = 1; i < numBlocksPerWarp; ++i) { + localSum[wiId] += localSum[wiId + i*valsPerBlock]; + } + } + + if(c == 0 && wiId < valsPerBlock){ + for(unsigned i = dim - 1; i > 0; --i){ + localSum[wiId] += localSum[wiId + i]; + } + z1[r] = localSum[wiId]; + } + } + + __syncthreads(); + + if(wiId < dim_wells){ + temp = 0.0; + for(unsigned i = 0; i < dim_wells; ++i){ + temp += Dnnzs[wgId*dim_wells*dim_wells + wiId*dim_wells + i]*z1[i]; + } + z2[wiId] = temp; + } + + __syncthreads(); + + if(wiId < dim*valSize){ + temp = 0.0; + unsigned bb = wiId/dim + val_pointers[wgId]; + for (unsigned j = 0; j < dim_wells; ++j){ + temp += Cnnzs[bb*dim*dim_wells + j*dim + c]*z2[j]; + } + + int colIdx = Ccols[bb]; + y[colIdx*dim + c] -= temp; + } +} + + +void WellContributionsRocsparse::apply_stdwells(double *d_x, double *d_y){ + unsigned gridDim = num_std_wells; + unsigned blockDim = 32; + unsigned shared_mem_size = (blockDim + 2 * dim_wells) * sizeof(double); // shared memory for localSum, z1 and z2 + // dim3(N) will create a vector {N, 1, 1} + stdwell_apply<<>>( + d_Cnnzs_hip, d_Dnnzs_hip, d_Bnnzs_hip, d_Ccols_hip, d_Bcols_hip, + d_x, d_y, dim, dim_wells, d_val_pointers_hip + ); + HIP_CHECK(hipStreamSynchronize(stream)); +} + +void WellContributionsRocsparse::apply_mswells(double *d_x, double *d_y){ + if (h_x.empty()) { + h_x.resize(N); + h_y.resize(N); + } + + HIP_CHECK(hipMemcpyAsync(h_x.data(), d_x, sizeof(double) * N, hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipMemcpyAsync(h_y.data(), d_y, sizeof(double) * N, hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + // actually apply MultisegmentWells + for (auto& well : multisegments) { + well->apply(h_x.data(), h_y.data()); + } + + // copy vector y from CPU to GPU + HIP_CHECK(hipMemcpyAsync(d_y, h_y.data(), sizeof(double) * N, hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); +} + +void WellContributionsRocsparse::apply(double *d_x, double *d_y){ + if(num_std_wells > 0){ + apply_stdwells(d_x, d_y); + } + + if(num_ms_wells > 0){ + apply_mswells(d_x, d_y); + } +} + +void WellContributionsRocsparse::APIaddMatrix(MatrixType type, + int* colIndices, + double* values, + unsigned int val_size) +{ + if (!allocated) { + OPM_THROW(std::logic_error, "Error cannot add wellcontribution before allocating memory in WellContributions"); + } + + switch (type) { + case MatrixType::C: + HIP_CHECK(hipMemcpyAsync(d_Cnnzs_hip + num_blocks_so_far * dim * dim_wells, values, sizeof(d_Cnnzs_hip) * val_size * dim * dim_wells, hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipMemcpyAsync(d_Ccols_hip + num_blocks_so_far, colIndices, sizeof(d_Ccols_hip) * val_size, hipMemcpyHostToDevice, stream)); + break; + + case MatrixType::D: + HIP_CHECK(hipMemcpyAsync(d_Dnnzs_hip + num_std_wells_so_far * dim_wells * dim_wells, values, sizeof(d_Dnnzs_hip) * dim_wells * dim_wells, hipMemcpyHostToDevice, stream)); + break; + + case MatrixType::B: + HIP_CHECK(hipMemcpyAsync(d_Bnnzs_hip + num_blocks_so_far * dim * dim_wells, values, sizeof(d_Bnnzs_hip) * val_size * dim * dim_wells, hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipMemcpyAsync(d_Bcols_hip + num_blocks_so_far, colIndices, sizeof(d_Bcols_hip) * val_size, hipMemcpyHostToDevice, stream)); + + val_pointers[num_std_wells_so_far] = num_blocks_so_far; + if (num_std_wells_so_far == num_std_wells - 1) { + val_pointers[num_std_wells] = num_blocks; + HIP_CHECK(hipMemcpyAsync(d_val_pointers_hip, val_pointers.data(), sizeof(d_val_pointers_hip) * (num_std_wells + 1), hipMemcpyHostToDevice, stream)); + } + break; + + default: + OPM_THROW(std::logic_error, "Error unsupported matrix ID for WellContributionsRocsparse::addMatrix()"); + } + HIP_CHECK(hipStreamSynchronize(stream)); +} + +void WellContributionsRocsparse::APIalloc() +{ + HIP_CHECK(hipMalloc((void**)&d_Cnnzs_hip, sizeof(d_Cnnzs_hip) * num_blocks * dim * dim_wells)); + HIP_CHECK(hipMalloc((void**)&d_Dnnzs_hip, sizeof(d_Dnnzs_hip) * num_std_wells * dim_wells * dim_wells)); + HIP_CHECK(hipMalloc((void**)&d_Bnnzs_hip, sizeof(d_Bnnzs_hip) * num_blocks * dim * dim_wells)); + HIP_CHECK(hipMalloc((void**)&d_Ccols_hip, sizeof(d_Ccols_hip) * num_blocks)); + HIP_CHECK(hipMalloc((void**)&d_Bcols_hip, sizeof(d_Bcols_hip) * num_blocks)); + HIP_CHECK(hipMalloc((void**)&d_val_pointers_hip, sizeof(d_val_pointers_hip) * (num_std_wells + 1))); +} + +} //namespace Opm diff --git a/opm/simulators/linalg/bda/rocsparseWellContributions.hpp b/opm/simulators/linalg/bda/rocsparseWellContributions.hpp new file mode 100644 index 000000000..e9ca6a686 --- /dev/null +++ b/opm/simulators/linalg/bda/rocsparseWellContributions.hpp @@ -0,0 +1,55 @@ +/* + Copyright 2020 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ + +#ifndef WELLCONTRIBUTIONS_ROCSPARSE_HEADER_INCLUDED +#define WELLCONTRIBUTIONS_ROCSPARSE_HEADER_INCLUDED + +#include + + +#include + + +namespace Opm +{ + +class WellContributionsRocsparse : public WellContributions +{ +public: + void apply_stdwells(double *d_x, double *d_y); + void apply_mswells(double *d_x, double *d_y); + void apply(double *d_x, double *d_y); + +protected: + /// Allocate memory for the StandardWells + void APIalloc() override; + + void APIaddMatrix(MatrixType type, int *colIndices, double *values, unsigned int val_size) override; + + double *d_Cnnzs_hip, *d_Dnnzs_hip, *d_Bnnzs_hip; + unsigned *d_Ccols_hip, *d_Bcols_hip; + unsigned *d_val_pointers_hip; + + std::vector h_x; + std::vector h_y; +}; + +} //namespace Opm + +#endif From 9bef10a018de32b962152e7548296bceddbd7012 Mon Sep 17 00:00:00 2001 From: Tong Dong Qiu Date: Fri, 14 Apr 2023 09:35:15 +0200 Subject: [PATCH 02/14] Set hipStream for rocsparseWellcontributions --- opm/simulators/linalg/bda/rocsparseSolverBackend.cpp | 6 ++++++ opm/simulators/linalg/bda/rocsparseWellContributions.cpp | 4 ++++ opm/simulators/linalg/bda/rocsparseWellContributions.hpp | 5 +++++ 3 files changed, 15 insertions(+) diff --git a/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp b/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp index 14392564d..52db594ad 100644 --- a/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp +++ b/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp @@ -153,6 +153,12 @@ void rocsparseSolverBackend::gpu_pbicgstab([[maybe_unused]] WellCont Timer t_total, t_prec(false), t_spmv(false), t_rest(false); + // set stream here, the WellContributions object is destroyed every linear solve + // the number of wells can change every linear solve + if(wellContribs.getNumWells() > 0){ + static_cast(wellContribs).setStream(stream); + } + // HIP_VERSION is defined as (HIP_VERSION_MAJOR * 10000000 + HIP_VERSION_MINOR * 100000 + HIP_VERSION_PATCH) #if HIP_VERSION >= 50400000 ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation, diff --git a/opm/simulators/linalg/bda/rocsparseWellContributions.cpp b/opm/simulators/linalg/bda/rocsparseWellContributions.cpp index b04131061..504f66609 100644 --- a/opm/simulators/linalg/bda/rocsparseWellContributions.cpp +++ b/opm/simulators/linalg/bda/rocsparseWellContributions.cpp @@ -163,6 +163,10 @@ void WellContributionsRocsparse::apply(double *d_x, double *d_y){ } } +void WellContributionsRocsparse::setStream(hipStream_t stream_){ + stream = stream_; +} + void WellContributionsRocsparse::APIaddMatrix(MatrixType type, int* colIndices, double* values, diff --git a/opm/simulators/linalg/bda/rocsparseWellContributions.hpp b/opm/simulators/linalg/bda/rocsparseWellContributions.hpp index e9ca6a686..b6e6dc81d 100644 --- a/opm/simulators/linalg/bda/rocsparseWellContributions.hpp +++ b/opm/simulators/linalg/bda/rocsparseWellContributions.hpp @@ -22,6 +22,7 @@ #include +#include #include @@ -31,10 +32,14 @@ namespace Opm class WellContributionsRocsparse : public WellContributions { +private: + hipStream_t stream; + public: void apply_stdwells(double *d_x, double *d_y); void apply_mswells(double *d_x, double *d_y); void apply(double *d_x, double *d_y); + void setStream(hipStream_t stream); protected: /// Allocate memory for the StandardWells From 50ccea0880bf2d2b64896533b5a35013e5fbe06f Mon Sep 17 00:00:00 2001 From: Tong Dong Qiu Date: Fri, 14 Apr 2023 09:35:44 +0200 Subject: [PATCH 03/14] Allow rocsparseSolver to choose deviceID --- opm/simulators/linalg/bda/rocsparseSolverBackend.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp b/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp index 52db594ad..d85fc4063 100644 --- a/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp +++ b/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp @@ -96,11 +96,12 @@ using Dune::Timer; template rocsparseSolverBackend::rocsparseSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int platformID_, unsigned int deviceID_) : BdaSolver(verbosity_, maxit_, tolerance_, platformID_, deviceID_) { - hipDevice_t device; - if(hipDeviceGet(&device, deviceID) != hipSuccess) - { - OPM_THROW(std::logic_error, "HIP Error: could not get device"); + int numDevices = 0; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + if (static_cast(deviceID) >= numDevices) { + OPM_THROW(std::runtime_error, "Error chosen too high HIP device ID"); } + HIP_CHECK(hipSetDevice(deviceID)); ROCSPARSE_CHECK(rocsparse_create_handle(&handle)); ROCBLAS_CHECK(rocblas_create_handle(&blas_handle)); From cb20d041c3c1d172e03887a8a7c2b94eac08aecb Mon Sep 17 00:00:00 2001 From: Tong Dong Qiu Date: Tue, 28 Mar 2023 16:31:50 +0200 Subject: [PATCH 04/14] Allow WellContributionsRocsparse to be used --- .../linalg/bda/WellContributions.cpp | 11 ++++++++- .../linalg/bda/rocsparseSolverBackend.cpp | 23 ++++++++++++++++--- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/opm/simulators/linalg/bda/WellContributions.cpp b/opm/simulators/linalg/bda/WellContributions.cpp index 4e0675b40..f60b1d17a 100644 --- a/opm/simulators/linalg/bda/WellContributions.cpp +++ b/opm/simulators/linalg/bda/WellContributions.cpp @@ -32,6 +32,10 @@ #include #endif +#ifdef HAVE_ROCSPARSE +#include +#endif + namespace Opm { @@ -54,9 +58,14 @@ WellContributions::create(const std::string& accelerator_mode, bool useWellConn) } else if(accelerator_mode.compare("rocsparse") == 0){ if (!useWellConn) { - OPM_THROW(std::logic_error, "Error rocsparse requires --matrix-add-well-contributions=true"); +#if HAVE_ROCSPARSE + return std::make_unique(); +#else + OPM_THROW(std::runtime_error, "Cannot initialize well contributions: rocsparse is not enabled"); +#endif } return std::make_unique(); + } else if(accelerator_mode.compare("amgcl") == 0){ if (!useWellConn) { diff --git a/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp b/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp index d85fc4063..635bde61c 100644 --- a/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp +++ b/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp @@ -37,6 +37,7 @@ #undef HAVE_CUDA #include +#include #include @@ -152,7 +153,7 @@ void rocsparseSolverBackend::gpu_pbicgstab([[maybe_unused]] WellCont double one = 1.0; double mone = -1.0; - Timer t_total, t_prec(false), t_spmv(false), t_rest(false); + Timer t_total, t_prec(false), t_spmv(false), t_well(false), t_rest(false); // set stream here, the WellContributions object is destroyed every linear solve // the number of wells can change every linear solve @@ -232,10 +233,18 @@ void rocsparseSolverBackend::gpu_pbicgstab([[maybe_unused]] WellCont if (verbosity >= 3) { HIP_CHECK(hipStreamSynchronize(stream)); t_spmv.stop(); - t_rest.start(); + t_well.start(); } // apply wellContributions + if(wellContribs.getNumWells() > 0){ + static_cast(wellContribs).apply(d_pw, d_v); + } + if (verbosity >= 3) { + HIP_CHECK(hipStreamSynchronize(stream)); + t_well.stop(); + t_rest.start(); + } ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_rw, 1, d_v, 1, &tmp1)); alpha = rho / tmp1; @@ -285,10 +294,18 @@ void rocsparseSolverBackend::gpu_pbicgstab([[maybe_unused]] WellCont if(verbosity >= 3){ HIP_CHECK(hipStreamSynchronize(stream)); t_spmv.stop(); - t_rest.start(); + t_well.start(); } // apply wellContributions + if(wellContribs.getNumWells() > 0){ + static_cast(wellContribs).apply(d_s, d_t); + } + if (verbosity >= 3) { + HIP_CHECK(hipStreamSynchronize(stream)); + t_well.stop(); + t_rest.start(); + } ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_t, 1, d_r, 1, &tmp1)); ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_t, 1, d_t, 1, &tmp2)); From 684397129bffb4dddfa96372fd2d29b1ff85caf6 Mon Sep 17 00:00:00 2001 From: Tong Dong Qiu Date: Tue, 28 Mar 2023 16:31:50 +0200 Subject: [PATCH 05/14] Make sure rocsparse can get wellcontributions --- opm/simulators/linalg/ISTLSolverEbos.cpp | 166 +++++++++++++++++++++++ 1 file changed, 166 insertions(+) diff --git a/opm/simulators/linalg/ISTLSolverEbos.cpp b/opm/simulators/linalg/ISTLSolverEbos.cpp index 241df3fb0..9f8f64e46 100644 --- a/opm/simulators/linalg/ISTLSolverEbos.cpp +++ b/opm/simulators/linalg/ISTLSolverEbos.cpp @@ -203,6 +203,172 @@ void FlexibleSolverInfo::create(const Matrix& matrix, } } +//Razvan<<<<<<< HEAD +//Razvan======= +#if COMPILE_BDA_BRIDGE +template +BdaSolverInfo:: +BdaSolverInfo(const std::string& accelerator_mode, + const int linear_solver_verbosity, + const int maxit, + const double tolerance, + const int platformID, + const int deviceID, + const bool opencl_ilu_parallel, + const std::string& linsolver) + : bridge_(std::make_unique(accelerator_mode, + linear_solver_verbosity, maxit, + tolerance, platformID, deviceID, + opencl_ilu_parallel, linsolver)) + , accelerator_mode_(accelerator_mode) +{} + +template +BdaSolverInfo::~BdaSolverInfo() = default; + +template +template +void BdaSolverInfo:: +prepare(const Grid& grid, + const Dune::CartesianIndexMapper& cartMapper, + const std::vector& wellsForConn, + const std::vector& cellPartition, + const size_t nonzeroes, + const bool useWellConn) +{ + if (numJacobiBlocks_ > 1) { + detail::setWellConnections(grid, cartMapper, wellsForConn, + useWellConn, + wellConnectionsGraph_, + numJacobiBlocks_); + this->blockJacobiAdjacency(grid, cellPartition, nonzeroes); + } +} + +template +bool BdaSolverInfo:: +apply(Vector& rhs, + const bool useWellConn, + WellContribFunc getContribs, + const int rank, + Matrix& matrix, + Vector& x, + Dune::InverseOperatorResult& result) +{ + bool use_gpu = bridge_->getUseGpu(); + if (use_gpu) { + auto wellContribs = WellContributions::create(accelerator_mode_, useWellConn); + bridge_->initWellContributions(*wellContribs, x.N() * x[0].N()); + + // the WellContributions can only be applied separately with CUDA, OpenCL or rocsparse, not with amgcl or rocalution +#if HAVE_CUDA || HAVE_OPENCL || HAVE_ROCSPARSE + if (!useWellConn) { + getContribs(*wellContribs); + } +#endif + + if (numJacobiBlocks_ > 1) { + this->copyMatToBlockJac(matrix, *blockJacobiForGPUILU0_); + // Const_cast needed since the CUDA stuff overwrites values for better matrix condition.. + bridge_->solve_system(&matrix, blockJacobiForGPUILU0_.get(), + numJacobiBlocks_, rhs, *wellContribs, result); + } + else + bridge_->solve_system(&matrix, &matrix, + numJacobiBlocks_, rhs, *wellContribs, result); + if (result.converged) { + // get result vector x from non-Dune backend, iff solve was successful + bridge_->get_result(x); + return true; + } else { + // warn about CPU fallback + // BdaBridge might have disabled its BdaSolver for this simulation due to some error + // in that case the BdaBridge is disabled and flexibleSolver is always used + // or maybe the BdaSolver did not converge in time, then it will be used next linear solve + if (rank == 0) { + OpmLog::warning(bridge_->getAccleratorName() + " did not converge, now trying Dune to solve current linear system..."); + } + } + } + + return false; +} + +template +bool BdaSolverInfo:: +gpuActive() +{ + return bridge_->getUseGpu(); +} + +template +template +void BdaSolverInfo:: +blockJacobiAdjacency(const Grid& grid, + const std::vector& cell_part, + size_t nonzeroes) +{ + using size_type = typename Matrix::size_type; + using Iter = typename Matrix::CreateIterator; + size_type numCells = grid.size(0); + blockJacobiForGPUILU0_ = std::make_unique(numCells, numCells, + nonzeroes, Matrix::row_wise); + + const auto& lid = grid.localIdSet(); + const auto& gridView = grid.leafGridView(); + auto elemIt = gridView.template begin<0>(); // should never overrun, since blockJacobiForGPUILU0_ is initialized with numCells rows + + //Loop over cells + for (Iter row = blockJacobiForGPUILU0_->createbegin(); row != blockJacobiForGPUILU0_->createend(); ++elemIt, ++row) + { + const auto& elem = *elemIt; + size_type idx = lid.id(elem); + row.insert(idx); + + // Add well non-zero connections + for (const auto wc : wellConnectionsGraph_[idx]) { + row.insert(wc); + } + + int locPart = cell_part[idx]; + + //Add neighbor if it is on the same part + auto isend = gridView.iend(elem); + for (auto is = gridView.ibegin(elem); is!=isend; ++is) + { + //check if face has neighbor + if (is->neighbor()) + { + size_type nid = lid.id(is->outside()); + int nabPart = cell_part[nid]; + if (locPart == nabPart) { + row.insert(nid); + } + } + } + } +} + +template +void BdaSolverInfo:: +copyMatToBlockJac(const Matrix& mat, Matrix& blockJac) +{ + auto rbegin = blockJac.begin(); + auto rend = blockJac.end(); + auto outerRow = mat.begin(); + for (auto row = rbegin; row != rend; ++row, ++outerRow) { + auto outerCol = (*outerRow).begin(); + for (auto col = (*row).begin(); col != (*row).end(); ++col) { + // outerRow is guaranteed to have all column entries that row has! + while(outerCol.index() < col.index()) ++outerCol; + assert(outerCol.index() == col.index()); + *col = *outerCol; // copy nonzero block + } + } +} +#endif // COMPILE_BDA_BRIDGE + +//Razvan>>>>>>> 1a32e4cc1 (Make sure rocsparse can get wellcontributions) template using BM = Dune::BCRSMatrix>; template From ef6be5859efdddc00d8a823b5ccce51bbf409b51 Mon Sep 17 00:00:00 2001 From: Tong Dong Qiu Date: Fri, 14 Apr 2023 12:59:07 +0200 Subject: [PATCH 06/14] Restore old HAVE_CUDA value --- .../linalg/bda/rocsparseWellContributions.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/opm/simulators/linalg/bda/rocsparseWellContributions.cpp b/opm/simulators/linalg/bda/rocsparseWellContributions.cpp index 504f66609..22619dfb5 100644 --- a/opm/simulators/linalg/bda/rocsparseWellContributions.cpp +++ b/opm/simulators/linalg/bda/rocsparseWellContributions.cpp @@ -19,11 +19,27 @@ #include // CMake +// MultisegmentWellContribution includes the cuda runtime if found by CMake +// this leads to inclusion of both amd_hip_vector_types.h and vector_types.h +// which both define vector types like uchar2, short3 and double4. +// Restore the value (if defined) afterwards. +#ifdef HAVE_CUDA +#define HIP_HAVE_CUDA_DEFINED HAVE_CUDA +#endif + +#undef HAVE_CUDA + #include +#ifdef HIP_HAVE_CUDA_DEFINED +#define HAVE_CUDA HIP_HAVE_CUDA_DEFINED +#undef HIP_HAVE_CUDA_DEFINED +#endif + #include #include + #include From abce3a897c1f2080122826139f416c3e19da84cb Mon Sep 17 00:00:00 2001 From: Tong Dong Qiu Date: Fri, 14 Apr 2023 13:00:29 +0200 Subject: [PATCH 07/14] Only compile HIP kernel with hipcc --- opm/simulators/linalg/bda/rocsparseWellContributions.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/opm/simulators/linalg/bda/rocsparseWellContributions.cpp b/opm/simulators/linalg/bda/rocsparseWellContributions.cpp index 22619dfb5..4e448ff94 100644 --- a/opm/simulators/linalg/bda/rocsparseWellContributions.cpp +++ b/opm/simulators/linalg/bda/rocsparseWellContributions.cpp @@ -54,6 +54,7 @@ namespace Opm { +#ifdef __HIP__ /// HIP kernel to apply the standard wellcontributions __global__ void stdwell_apply( const double *Cnnzs, @@ -135,9 +136,12 @@ __global__ void stdwell_apply( y[colIdx*dim + c] -= temp; } } +#endif -void WellContributionsRocsparse::apply_stdwells(double *d_x, double *d_y){ +void WellContributionsRocsparse::apply_stdwells([[maybe_unused]] double *d_x, + [[maybe_unused]] double *d_y){ +#ifdef __HIP__ unsigned gridDim = num_std_wells; unsigned blockDim = 32; unsigned shared_mem_size = (blockDim + 2 * dim_wells) * sizeof(double); // shared memory for localSum, z1 and z2 @@ -147,6 +151,9 @@ void WellContributionsRocsparse::apply_stdwells(double *d_x, double *d_y){ d_x, d_y, dim, dim_wells, d_val_pointers_hip ); HIP_CHECK(hipStreamSynchronize(stream)); +#else + OPM_THROW(std::logic_error, "Error separate wellcontributions for rocsparse only supported when compiling with hipcc"); +#endif } void WellContributionsRocsparse::apply_mswells(double *d_x, double *d_y){ From b2fea287bc38718b06effa71dce4d8a135345548 Mon Sep 17 00:00:00 2001 From: Tong Dong Qiu Date: Fri, 14 Apr 2023 13:01:44 +0200 Subject: [PATCH 08/14] Print time to apply wells for verbosity>=3 --- opm/simulators/linalg/bda/rocsparseSolverBackend.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp b/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp index 635bde61c..56351725d 100644 --- a/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp +++ b/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp @@ -347,6 +347,7 @@ void rocsparseSolverBackend::gpu_pbicgstab([[maybe_unused]] WellCont std::ostringstream out; out << "rocsparseSolver::prec_apply: " << t_prec.elapsed() << " s\n"; out << "rocsparseSolver::spmv: " << t_spmv.elapsed() << " s\n"; + out << "rocsparseSolver::well: " << t_well.elapsed() << " s\n"; out << "rocsparseSolver::rest: " << t_rest.elapsed() << " s\n"; out << "rocsparseSolver::total_solve: " << res.elapsed << " s\n"; OpmLog::info(out.str()); From ca210612e1fa685d88dc761b78c2c2bd5cc7b00f Mon Sep 17 00:00:00 2001 From: Razvan Nane Date: Fri, 30 Jun 2023 14:40:45 +0200 Subject: [PATCH 09/14] Fix hipErrorNoBinaryForGpu when hip_FOUND --- CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index e16ffc284..bcd523838 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -684,6 +684,10 @@ if(USE_BDA_BRIDGE) if(VexCL_FOUND) target_link_libraries( opmsimulators PUBLIC OPM::VexCL::OpenCL ) endif() + + if(hip_FOUND) + target_link_libraries( opmsimulators PUBLIC hip::device ) + endif() endif() if(Damaris_FOUND) From 177a46366d82bc36365e435a71341c15611507dc Mon Sep 17 00:00:00 2001 From: Razvan Nane Date: Fri, 30 Jun 2023 14:44:08 +0200 Subject: [PATCH 10/14] Add to accelerator-mode option in doc --- opm/simulators/linalg/FlowLinearSolverParameters.hpp | 2 +- opm/simulators/linalg/bda/BdaBridge.cpp | 2 +- opm/simulators/linalg/bda/BdaBridge.hpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/opm/simulators/linalg/FlowLinearSolverParameters.hpp b/opm/simulators/linalg/FlowLinearSolverParameters.hpp index 4e896b759..b24ad935c 100644 --- a/opm/simulators/linalg/FlowLinearSolverParameters.hpp +++ b/opm/simulators/linalg/FlowLinearSolverParameters.hpp @@ -311,7 +311,7 @@ namespace Opm EWOMS_REGISTER_PARAM(TypeTag, bool, LinearSolverPrintJsonDefinition, "Write the JSON definition of the linear solver setup to the DBG file."); EWOMS_REGISTER_PARAM(TypeTag, int, CprReuseSetup, "Reuse preconditioner setup. Valid options are 0: recreate the preconditioner for every linear solve, 1: recreate once every timestep, 2: recreate if last linear solve took more than 10 iterations, 3: never recreate, 4: recreated every CprReuseInterval"); EWOMS_REGISTER_PARAM(TypeTag, int, CprReuseInterval, "Reuse preconditioner interval. Used when CprReuseSetup is set to 4, then the preconditioner will be fully recreated instead of reused every N linear solve, where N is this parameter."); - EWOMS_REGISTER_PARAM(TypeTag, std::string, AcceleratorMode, "Choose a linear solver, usage: '--accelerator-mode=[none|cusparse|opencl|amgcl|rocalution]'"); + EWOMS_REGISTER_PARAM(TypeTag, std::string, AcceleratorMode, "Choose a linear solver, usage: '--accelerator-mode=[none|cusparse|opencl|amgcl|rocalution|rocsparse]'"); EWOMS_REGISTER_PARAM(TypeTag, int, BdaDeviceId, "Choose device ID for cusparseSolver or openclSolver, use 'nvidia-smi' or 'clinfo' to determine valid IDs"); EWOMS_REGISTER_PARAM(TypeTag, int, OpenclPlatformId, "Choose platform ID for openclSolver, use 'clinfo' to determine valid platform IDs"); EWOMS_REGISTER_PARAM(TypeTag, bool, OpenclIluParallel, "Parallelize ILU decomposition and application on GPU"); diff --git a/opm/simulators/linalg/bda/BdaBridge.cpp b/opm/simulators/linalg/bda/BdaBridge.cpp index 1ddb3216c..5434e22fe 100644 --- a/opm/simulators/linalg/bda/BdaBridge.cpp +++ b/opm/simulators/linalg/bda/BdaBridge.cpp @@ -108,7 +108,7 @@ BdaBridge::BdaBridge(std::string acceler } else if (accelerator_mode.compare("none") == 0) { use_gpu = false; } else { - OPM_THROW(std::logic_error, "Error unknown value for parameter 'AcceleratorMode', should be passed like '--accelerator-mode=[none|cusparse|opencl|amgcl|rocalution]"); + OPM_THROW(std::logic_error, "Error unknown value for parameter 'AcceleratorMode', should be passed like '--accelerator-mode=[none|cusparse|opencl|amgcl|rocalution|rocsparse]"); } } diff --git a/opm/simulators/linalg/bda/BdaBridge.hpp b/opm/simulators/linalg/bda/BdaBridge.hpp index 8001b9552..03d5904c3 100644 --- a/opm/simulators/linalg/bda/BdaBridge.hpp +++ b/opm/simulators/linalg/bda/BdaBridge.hpp @@ -49,7 +49,7 @@ private: public: /// Construct a BdaBridge - /// \param[in] accelerator_mode to select if an accelerated solver is used, is passed via command-line: '--accelerator-mode=[none|cusparse|opencl|amgcl|rocalution]' + /// \param[in] accelerator_mode to select if an accelerated solver is used, is passed via command-line: '--accelerator-mode=[none|cusparse|opencl|amgcl|rocalution|rocsparse]' /// \param[in] linear_solver_verbosity verbosity of BdaSolver /// \param[in] maxit maximum number of iterations for BdaSolver /// \param[in] tolerance required relative tolerance for BdaSolver From e4abc12a05222e62376f539877385fe4672a458c Mon Sep 17 00:00:00 2001 From: Razvan Nane Date: Tue, 26 Sep 2023 06:17:18 +0200 Subject: [PATCH 11/14] Adapt rocsparse separate wells PR to changes made to ISTLSolverEbos --- CMakeLists.txt | 4 - opm/simulators/linalg/ISTLSolverEbos.cpp | 166 ------------------ opm/simulators/linalg/ISTLSolverEbosBda.cpp | 4 +- .../linalg/bda/rocsparseWellContributions.cpp | 2 +- 4 files changed, 3 insertions(+), 173 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bcd523838..e16ffc284 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -684,10 +684,6 @@ if(USE_BDA_BRIDGE) if(VexCL_FOUND) target_link_libraries( opmsimulators PUBLIC OPM::VexCL::OpenCL ) endif() - - if(hip_FOUND) - target_link_libraries( opmsimulators PUBLIC hip::device ) - endif() endif() if(Damaris_FOUND) diff --git a/opm/simulators/linalg/ISTLSolverEbos.cpp b/opm/simulators/linalg/ISTLSolverEbos.cpp index 9f8f64e46..241df3fb0 100644 --- a/opm/simulators/linalg/ISTLSolverEbos.cpp +++ b/opm/simulators/linalg/ISTLSolverEbos.cpp @@ -203,172 +203,6 @@ void FlexibleSolverInfo::create(const Matrix& matrix, } } -//Razvan<<<<<<< HEAD -//Razvan======= -#if COMPILE_BDA_BRIDGE -template -BdaSolverInfo:: -BdaSolverInfo(const std::string& accelerator_mode, - const int linear_solver_verbosity, - const int maxit, - const double tolerance, - const int platformID, - const int deviceID, - const bool opencl_ilu_parallel, - const std::string& linsolver) - : bridge_(std::make_unique(accelerator_mode, - linear_solver_verbosity, maxit, - tolerance, platformID, deviceID, - opencl_ilu_parallel, linsolver)) - , accelerator_mode_(accelerator_mode) -{} - -template -BdaSolverInfo::~BdaSolverInfo() = default; - -template -template -void BdaSolverInfo:: -prepare(const Grid& grid, - const Dune::CartesianIndexMapper& cartMapper, - const std::vector& wellsForConn, - const std::vector& cellPartition, - const size_t nonzeroes, - const bool useWellConn) -{ - if (numJacobiBlocks_ > 1) { - detail::setWellConnections(grid, cartMapper, wellsForConn, - useWellConn, - wellConnectionsGraph_, - numJacobiBlocks_); - this->blockJacobiAdjacency(grid, cellPartition, nonzeroes); - } -} - -template -bool BdaSolverInfo:: -apply(Vector& rhs, - const bool useWellConn, - WellContribFunc getContribs, - const int rank, - Matrix& matrix, - Vector& x, - Dune::InverseOperatorResult& result) -{ - bool use_gpu = bridge_->getUseGpu(); - if (use_gpu) { - auto wellContribs = WellContributions::create(accelerator_mode_, useWellConn); - bridge_->initWellContributions(*wellContribs, x.N() * x[0].N()); - - // the WellContributions can only be applied separately with CUDA, OpenCL or rocsparse, not with amgcl or rocalution -#if HAVE_CUDA || HAVE_OPENCL || HAVE_ROCSPARSE - if (!useWellConn) { - getContribs(*wellContribs); - } -#endif - - if (numJacobiBlocks_ > 1) { - this->copyMatToBlockJac(matrix, *blockJacobiForGPUILU0_); - // Const_cast needed since the CUDA stuff overwrites values for better matrix condition.. - bridge_->solve_system(&matrix, blockJacobiForGPUILU0_.get(), - numJacobiBlocks_, rhs, *wellContribs, result); - } - else - bridge_->solve_system(&matrix, &matrix, - numJacobiBlocks_, rhs, *wellContribs, result); - if (result.converged) { - // get result vector x from non-Dune backend, iff solve was successful - bridge_->get_result(x); - return true; - } else { - // warn about CPU fallback - // BdaBridge might have disabled its BdaSolver for this simulation due to some error - // in that case the BdaBridge is disabled and flexibleSolver is always used - // or maybe the BdaSolver did not converge in time, then it will be used next linear solve - if (rank == 0) { - OpmLog::warning(bridge_->getAccleratorName() + " did not converge, now trying Dune to solve current linear system..."); - } - } - } - - return false; -} - -template -bool BdaSolverInfo:: -gpuActive() -{ - return bridge_->getUseGpu(); -} - -template -template -void BdaSolverInfo:: -blockJacobiAdjacency(const Grid& grid, - const std::vector& cell_part, - size_t nonzeroes) -{ - using size_type = typename Matrix::size_type; - using Iter = typename Matrix::CreateIterator; - size_type numCells = grid.size(0); - blockJacobiForGPUILU0_ = std::make_unique(numCells, numCells, - nonzeroes, Matrix::row_wise); - - const auto& lid = grid.localIdSet(); - const auto& gridView = grid.leafGridView(); - auto elemIt = gridView.template begin<0>(); // should never overrun, since blockJacobiForGPUILU0_ is initialized with numCells rows - - //Loop over cells - for (Iter row = blockJacobiForGPUILU0_->createbegin(); row != blockJacobiForGPUILU0_->createend(); ++elemIt, ++row) - { - const auto& elem = *elemIt; - size_type idx = lid.id(elem); - row.insert(idx); - - // Add well non-zero connections - for (const auto wc : wellConnectionsGraph_[idx]) { - row.insert(wc); - } - - int locPart = cell_part[idx]; - - //Add neighbor if it is on the same part - auto isend = gridView.iend(elem); - for (auto is = gridView.ibegin(elem); is!=isend; ++is) - { - //check if face has neighbor - if (is->neighbor()) - { - size_type nid = lid.id(is->outside()); - int nabPart = cell_part[nid]; - if (locPart == nabPart) { - row.insert(nid); - } - } - } - } -} - -template -void BdaSolverInfo:: -copyMatToBlockJac(const Matrix& mat, Matrix& blockJac) -{ - auto rbegin = blockJac.begin(); - auto rend = blockJac.end(); - auto outerRow = mat.begin(); - for (auto row = rbegin; row != rend; ++row, ++outerRow) { - auto outerCol = (*outerRow).begin(); - for (auto col = (*row).begin(); col != (*row).end(); ++col) { - // outerRow is guaranteed to have all column entries that row has! - while(outerCol.index() < col.index()) ++outerCol; - assert(outerCol.index() == col.index()); - *col = *outerCol; // copy nonzero block - } - } -} -#endif // COMPILE_BDA_BRIDGE - -//Razvan>>>>>>> 1a32e4cc1 (Make sure rocsparse can get wellcontributions) template using BM = Dune::BCRSMatrix>; template diff --git a/opm/simulators/linalg/ISTLSolverEbosBda.cpp b/opm/simulators/linalg/ISTLSolverEbosBda.cpp index c6f462a4f..e1261d17a 100644 --- a/opm/simulators/linalg/ISTLSolverEbosBda.cpp +++ b/opm/simulators/linalg/ISTLSolverEbosBda.cpp @@ -100,8 +100,8 @@ apply(Vector& rhs, auto wellContribs = WellContributions::create(accelerator_mode_, useWellConn); bridge_->initWellContributions(*wellContribs, x.N() * x[0].N()); - // the WellContributions can only be applied separately with CUDA or OpenCL, not with amgcl or rocalution -#if HAVE_CUDA || HAVE_OPENCL + // the WellContributions can only be applied separately with CUDA, OpenCL or rocsparse, not with amgcl or rocalution +#if HAVE_CUDA || HAVE_OPENCL || HAVE_ROCSPARSE if (!useWellConn) { getContribs(*wellContribs); } diff --git a/opm/simulators/linalg/bda/rocsparseWellContributions.cpp b/opm/simulators/linalg/bda/rocsparseWellContributions.cpp index 4e448ff94..f8dc0ae32 100644 --- a/opm/simulators/linalg/bda/rocsparseWellContributions.cpp +++ b/opm/simulators/linalg/bda/rocsparseWellContributions.cpp @@ -143,7 +143,7 @@ void WellContributionsRocsparse::apply_stdwells([[maybe_unused]] double *d_x, [[maybe_unused]] double *d_y){ #ifdef __HIP__ unsigned gridDim = num_std_wells; - unsigned blockDim = 32; + unsigned blockDim = 64; unsigned shared_mem_size = (blockDim + 2 * dim_wells) * sizeof(double); // shared memory for localSum, z1 and z2 // dim3(N) will create a vector {N, 1, 1} stdwell_apply<<>>( From dcc3bd70f01b5ff12fec54bb4f172d51e01b79c9 Mon Sep 17 00:00:00 2001 From: Razvan Nane Date: Fri, 29 Sep 2023 13:22:07 +0200 Subject: [PATCH 12/14] Reinclude changes lost in adaptation to ISTLSolverEbosBda --- opm/simulators/linalg/bda/rocsparseWellContributions.cpp | 8 ++++---- opm/simulators/linalg/bda/rocsparseWellContributions.hpp | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/opm/simulators/linalg/bda/rocsparseWellContributions.cpp b/opm/simulators/linalg/bda/rocsparseWellContributions.cpp index f8dc0ae32..07b46d7d2 100644 --- a/opm/simulators/linalg/bda/rocsparseWellContributions.cpp +++ b/opm/simulators/linalg/bda/rocsparseWellContributions.cpp @@ -1,5 +1,5 @@ /* - Copyright 2020 Equinor ASA + Copyright 2023 Equinor ASA This file is part of the Open Porous Media project (OPM). @@ -191,9 +191,9 @@ void WellContributionsRocsparse::setStream(hipStream_t stream_){ } void WellContributionsRocsparse::APIaddMatrix(MatrixType type, - int* colIndices, - double* values, - unsigned int val_size) + int* colIndices, + double* values, + unsigned int val_size) { if (!allocated) { OPM_THROW(std::logic_error, "Error cannot add wellcontribution before allocating memory in WellContributions"); diff --git a/opm/simulators/linalg/bda/rocsparseWellContributions.hpp b/opm/simulators/linalg/bda/rocsparseWellContributions.hpp index b6e6dc81d..bfa7e888a 100644 --- a/opm/simulators/linalg/bda/rocsparseWellContributions.hpp +++ b/opm/simulators/linalg/bda/rocsparseWellContributions.hpp @@ -1,5 +1,5 @@ /* - Copyright 2020 Equinor ASA + Copyright 2023 Equinor ASA This file is part of the Open Porous Media project (OPM). From 7673118f143d57f832c051258878bf702dd0c6ce Mon Sep 17 00:00:00 2001 From: Razvan Nane Date: Sun, 8 Oct 2023 08:57:37 +0200 Subject: [PATCH 13/14] CUDA and HIP not allowed at the same time --- CMakeLists.txt | 7 +++++++ CMakeLists_files.cmake | 9 ++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e16ffc284..3312c08b2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -312,6 +312,13 @@ macro (files_hook) set(HDF5_FOUND OFF) unset(HAVE_HDF5) endif() + if(HAVE_ROCSPARSE AND HAVE_CUDA) + message(WARNING "WARNING! Using CUDA and HIP at the same time is not allowed. Please choose only one of them. Disabling both...\n") + set(CUDA_FOUND OFF) + set(rocsparse_FOUND OFF) + unset(HAVE_CUDA) + unset(HAVE_ROCSPARSE) + endif() # read the list of components from this file (in the project directory); # it should set various lists with the names of the files to include # include needs to be here to make reset HDF5_FOUND available in diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index 3202fe8ab..ffedbce48 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -151,11 +151,6 @@ if (Damaris_FOUND AND MPI_FOUND) list (APPEND MAIN_SOURCE_FILES opm/simulators/utils/initDamarisXmlFile.cpp) endif() if(CUDA_FOUND) - if(USE_BDA_BRIDGE) - list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/cuda/cusparseSolverBackend.cu) - list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/cuda/cuWellContributions.cu) - endif() - # CUISTL SOURCE list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/detail/CuBlasHandle.cpp) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/detail/CuSparseHandle.cpp) @@ -222,6 +217,10 @@ if(USE_BDA_BRIDGE) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/rocsparseSolverBackend.cpp) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/rocsparseWellContributions.cpp) endif() + if(CUDA_FOUND) + list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/cuda/cusparseSolverBackend.cu) + list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/cuda/cuWellContributions.cu) + endif() if(amgcl_FOUND) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/amgclSolverBackend.cpp) if(CUDA_FOUND) From 7e8528b43a8ba0a1094e20f287ec909a9624474f Mon Sep 17 00:00:00 2001 From: Razvan Nane Date: Wed, 11 Oct 2023 13:29:40 +0200 Subject: [PATCH 14/14] Disable only CUDA when ROCm also selected --- CMakeLists.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3312c08b2..5c0418d3d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -313,11 +313,9 @@ macro (files_hook) unset(HAVE_HDF5) endif() if(HAVE_ROCSPARSE AND HAVE_CUDA) - message(WARNING "WARNING! Using CUDA and HIP at the same time is not allowed. Please choose only one of them. Disabling both...\n") + message(WARNING "WARNING! Using CUDA and ROCm at the same time is not allowed. Please choose only one of them by setting CMAKE_DISABLE_FIND_PACKAGE_=. Disabling CUDA...\n") set(CUDA_FOUND OFF) - set(rocsparse_FOUND OFF) unset(HAVE_CUDA) - unset(HAVE_ROCSPARSE) endif() # read the list of components from this file (in the project directory); # it should set various lists with the names of the files to include