From 91a3e238ce8fc49cf2b014f76ff87496e8f6e663 Mon Sep 17 00:00:00 2001 From: Tong Dong Qiu Date: Wed, 18 Jan 2023 10:11:42 +0100 Subject: [PATCH 01/12] Add rocsparseSolver --- CMakeLists.txt | 13 + CMakeLists_files.cmake | 4 + opm-simulators-prereqs.cmake | 4 + opm/simulators/linalg/bda/BdaBridge.cpp | 11 + .../linalg/bda/WellContributions.cpp | 6 + .../linalg/bda/rocalutionSolverBackend.cpp | 6 + .../linalg/bda/rocsparseSolverBackend.cpp | 556 ++++++++++++++++++ .../linalg/bda/rocsparseSolverBackend.hpp | 153 +++++ 8 files changed, 753 insertions(+) create mode 100644 opm/simulators/linalg/bda/rocsparseSolverBackend.cpp create mode 100644 opm/simulators/linalg/bda/rocsparseSolverBackend.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 477d5c23c..a061f36cb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -209,6 +209,14 @@ else() endif() endif() +find_package(rocblas) +find_package(rocsparse) + +if(rocsparse_FOUND AND rocblas_FOUND) + set(HAVE_ROCSPARSE 1) + set(COMPILE_BDA_BRIDGE 1) +endif() + find_package(amgcl) if(amgcl_FOUND) set(HAVE_AMGCL 1) @@ -531,6 +539,11 @@ if(ROCALUTION_FOUND) target_include_directories(opmsimulators PUBLIC ${rocalution_INCLUDE_DIR}/rocalution) endif() +if(rocsparse_FOUND AND rocblas_FOUND) + target_link_libraries( opmsimulators PUBLIC rocsparse ) + target_link_libraries( opmsimulators PUBLIC rocblas ) +endif() + if(VexCL_FOUND) target_link_libraries( opmsimulators PUBLIC OPM::VexCL::OpenCL ) endif() diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index 60b5c1603..836b09fad 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -161,6 +161,9 @@ endif() if(ROCALUTION_FOUND) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/rocalutionSolverBackend.cpp) endif() +if(rocsparse_FOUND AND rocblas_FOUND) + list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/rocsparseSolverBackend.cpp) +endif() if(COMPILE_BDA_BRIDGE) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/BdaBridge.cpp) endif() @@ -337,6 +340,7 @@ list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/bda/Matrix.hpp opm/simulators/linalg/bda/MultisegmentWellContribution.hpp opm/simulators/linalg/bda/rocalutionSolverBackend.hpp + opm/simulators/linalg/bda/rocsparseSolverBackend.hpp opm/simulators/linalg/bda/WellContributions.hpp opm/simulators/linalg/amgcpr.hh opm/simulators/linalg/twolevelmethodcpr.hh diff --git a/opm-simulators-prereqs.cmake b/opm-simulators-prereqs.cmake index dfb5f1711..6f00c8d0b 100644 --- a/opm-simulators-prereqs.cmake +++ b/opm-simulators-prereqs.cmake @@ -12,6 +12,7 @@ set (opm-simulators_CONFIG_VAR HAVE_AMGCL HAVE_VEXCL HAVE_ROCALUTION + HAVE_ROCSPARSE HAVE_SUITESPARSE_UMFPACK_H HAVE_DUNE_ISTL DUNE_ISTL_WITH_CHECKING @@ -44,6 +45,9 @@ set (opm-simulators_DEPS "SuperLU" # ROCALUTION from ROCM framework "rocalution" + # packages from ROCm framework + "rocblas" + "rocsparse" # OPM dependency "opm-common REQUIRED" "opm-grid REQUIRED" diff --git a/opm/simulators/linalg/bda/BdaBridge.cpp b/opm/simulators/linalg/bda/BdaBridge.cpp index 0d1a7e18c..e3752133e 100644 --- a/opm/simulators/linalg/bda/BdaBridge.cpp +++ b/opm/simulators/linalg/bda/BdaBridge.cpp @@ -45,6 +45,10 @@ #include #endif +#if HAVE_ROCSPARSE +#include +#endif + typedef Dune::InverseOperatorResult InverseOperatorResult; namespace Opm @@ -91,6 +95,13 @@ BdaBridge::BdaBridge(std::string acceler backend.reset(new Opm::Accelerator::rocalutionSolverBackend(linear_solver_verbosity, maxit, tolerance)); #else OPM_THROW(std::logic_error, "Error rocalutionSolver was chosen, but rocalution was not found by CMake"); +#endif + } else if (accelerator_mode.compare("rocsparse") == 0) { +#if HAVE_ROCSPARSE + use_gpu = true; // should be replaced by a 'use_bridge' boolean + backend.reset(new Opm::Accelerator::rocsparseSolverBackend(linear_solver_verbosity, maxit, tolerance, platformID, deviceID)); +#else + OPM_THROW(std::logic_error, "Error openclSolver was chosen, but rocsparse was not found by CMake"); #endif } else if (accelerator_mode.compare("none") == 0) { use_gpu = false; diff --git a/opm/simulators/linalg/bda/WellContributions.cpp b/opm/simulators/linalg/bda/WellContributions.cpp index bd3011c6f..4e0675b40 100644 --- a/opm/simulators/linalg/bda/WellContributions.cpp +++ b/opm/simulators/linalg/bda/WellContributions.cpp @@ -52,6 +52,12 @@ WellContributions::create(const std::string& accelerator_mode, bool useWellConn) OPM_THROW(std::runtime_error, "Cannot initialize well contributions: OpenCL is not enabled"); #endif } + else if(accelerator_mode.compare("rocsparse") == 0){ + if (!useWellConn) { + OPM_THROW(std::logic_error, "Error rocsparse requires --matrix-add-well-contributions=true"); + } + return std::make_unique(); + } else if(accelerator_mode.compare("amgcl") == 0){ if (!useWellConn) { OPM_THROW(std::logic_error, "Error amgcl requires --matrix-add-well-contributions=true"); diff --git a/opm/simulators/linalg/bda/rocalutionSolverBackend.cpp b/opm/simulators/linalg/bda/rocalutionSolverBackend.cpp index 95b372e67..c09546357 100644 --- a/opm/simulators/linalg/bda/rocalutionSolverBackend.cpp +++ b/opm/simulators/linalg/bda/rocalutionSolverBackend.cpp @@ -26,6 +26,12 @@ #include #include +// WellContributions are included via the solver +// MultisegmentWellContribution includes the cuda runtime if found by CMake +// this leads to inclusion of both amd_hip_vector_types.h and vector_types.h +// which both define vector types like uchar2, short3 and double4. +#undef HAVE_CUDA + #include #include diff --git a/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp b/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp new file mode 100644 index 000000000..e231d2390 --- /dev/null +++ b/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp @@ -0,0 +1,556 @@ +/* + Copyright 2020 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ + +#include +#include +#include + +#include +#include +#include + +// WellContributions are included via the solver +// MultisegmentWellContribution includes the cuda runtime if found by CMake +// this leads to inclusion of both amd_hip_vector_types.h and vector_types.h +// which both define vector types like uchar2, short3 and double4. +#undef HAVE_CUDA + +#include + +#include + +#include + +#define HIP_CHECK(stat) \ + { \ + if(stat != hipSuccess) \ + { \ + OPM_THROW(std::logic_error, "HIP error"); \ + } \ + } + +#define ROCSPARSE_CHECK(stat) \ + { \ + if(stat != rocsparse_status_success) \ + { \ + OPM_THROW(std::logic_error, "rocsparse error"); \ + } \ + } + +#define ROCBLAS_CHECK(stat) \ + { \ + if(stat != rocblas_status_success) \ + { \ + OPM_THROW(std::logic_error, "rocblas error"); \ + } \ + } + + +namespace Opm +{ +namespace Accelerator +{ + +using Opm::OpmLog; +using Dune::Timer; + +template +rocsparseSolverBackend::rocsparseSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int platformID_, unsigned int deviceID_) : BdaSolver(verbosity_, maxit_, tolerance_, platformID_, deviceID_) { + +} + + + +template +rocsparseSolverBackend::~rocsparseSolverBackend() { + try { + HIP_CHECK(hipStreamSynchronize(stream)); + HIP_CHECK(hipStreamDestroy(stream)); + ROCSPARSE_CHECK(rocsparse_destroy_handle(handle)); + ROCBLAS_CHECK(rocblas_destroy_handle(blas_handle)); + } catch (const std::logic_error& err) { + OpmLog::error(err.what()); + } +} + + +template +void rocsparseSolverBackend::gpu_pbicgstab([[maybe_unused]] WellContributions& wellContribs, + BdaResult& res) +{ + float it = 0.5; + double rho, rhop, beta, alpha, nalpha, omega, nomega, tmp1, tmp2; + double norm, norm_0; + double zero = 0.0; + double one = 1.0; + double mone = -1.0; + + Timer t_total, t_prec(false), t_spmv(false), t_rest(false); + + ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation, + Nb, Nb, nnzb, &one, descr_M, + d_Avals, d_Arows, d_Acols, block_size, + d_x, &zero, d_r)); + + ROCBLAS_CHECK(rocblas_dscal(blas_handle, N, &mone, d_r, 1)); + ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &one, d_b, 1, d_r, 1)); + ROCBLAS_CHECK(rocblas_dcopy(blas_handle, N, d_r, 1, d_rw, 1)); + ROCBLAS_CHECK(rocblas_dcopy(blas_handle, N, d_r, 1, d_p, 1)); + ROCBLAS_CHECK(rocblas_dnrm2(blas_handle, N, d_r, 1, &norm_0)); + + if (verbosity >= 2) { + std::ostringstream out; + out << std::scientific << "rocsparseSolver initial norm: " << norm_0; + OpmLog::info(out.str()); + } + + if (verbosity >= 3) { + t_rest.start(); + } + for (it = 0.5; it < maxit; it += 0.5) { + rhop = rho; + ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_rw, 1, d_r, 1, &rho)); + + if (it > 1) { + beta = (rho / rhop) * (alpha / omega); + nomega = -omega; + ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &nomega, d_v, 1, d_p, 1)); + ROCBLAS_CHECK(rocblas_dscal(blas_handle, N, &beta, d_p, 1)); + ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &one, d_r, 1, d_p, 1)); + } + if (verbosity >= 3) { + HIP_CHECK(hipStreamSynchronize(stream)); + t_rest.stop(); + t_prec.start(); + } + + // apply ilu0 + ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(handle, dir, \ + operation, Nb, nnzbs_prec, &one, \ + descr_L, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, d_p, d_t, rocsparse_solve_policy_auto, d_buffer)); + ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(handle, dir, \ + operation, Nb, nnzbs_prec, &one, \ + descr_U, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, d_t, d_pw, rocsparse_solve_policy_auto, d_buffer)); + if (verbosity >= 3) { + HIP_CHECK(hipStreamSynchronize(stream)); + t_prec.stop(); + t_spmv.start(); + } + + // spmv + ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation, + Nb, Nb, nnzb, &one, descr_M, + d_Avals, d_Arows, d_Acols, block_size, + d_pw, &zero, d_v)); + if (verbosity >= 3) { + HIP_CHECK(hipStreamSynchronize(stream)); + t_spmv.stop(); + t_rest.start(); + } + + // apply wellContributions + + ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_rw, 1, d_v, 1, &tmp1)); + alpha = rho / tmp1; + nalpha = -alpha; + ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &nalpha, d_v, 1, d_r, 1)); + ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &alpha, d_pw, 1, d_x, 1)); + ROCBLAS_CHECK(rocblas_dnrm2(blas_handle, N, d_r, 1, &norm)); + if (verbosity >= 3) { + HIP_CHECK(hipStreamSynchronize(stream)); + t_rest.stop(); + } + + if (norm < tolerance * norm_0) { + break; + } + + it += 0.5; + + // apply ilu0 + if (verbosity >= 3) { + t_prec.start(); + } + ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(handle, dir, \ + operation, Nb, nnzbs_prec, &one, \ + descr_L, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, d_r, d_t, rocsparse_solve_policy_auto, d_buffer)); + ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(handle, dir, \ + operation, Nb, nnzbs_prec, &one, \ + descr_U, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, d_t, d_s, rocsparse_solve_policy_auto, d_buffer)); + if (verbosity >= 3) { + HIP_CHECK(hipStreamSynchronize(stream)); + t_prec.stop(); + t_spmv.start(); + } + + // spmv + ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation, + Nb, Nb, nnzb, &one, descr_M, + d_Avals, d_Arows, d_Acols, block_size, + d_s, &zero, d_t)); + if(verbosity >= 3){ + HIP_CHECK(hipStreamSynchronize(stream)); + t_spmv.stop(); + t_rest.start(); + } + + // apply wellContributions + + ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_t, 1, d_r, 1, &tmp1)); + ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_t, 1, d_t, 1, &tmp2)); + omega = tmp1 / tmp2; + nomega = -omega; + ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &omega, d_s, 1, d_x, 1)); + ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &nomega, d_t, 1, d_r, 1)); + + ROCBLAS_CHECK(rocblas_dnrm2(blas_handle, N, d_r, 1, &norm)); + if (verbosity >= 3) { + HIP_CHECK(hipStreamSynchronize(stream)); + t_rest.stop(); + } + + if (norm < tolerance * norm_0) { + break; + } + + if (verbosity > 1) { + std::ostringstream out; + out << "it: " << it << std::scientific << ", norm: " << norm; + OpmLog::info(out.str()); + } + } + + res.iterations = std::min(it, (float)maxit); + res.reduction = norm / norm_0; + res.conv_rate = static_cast(pow(res.reduction, 1.0 / it)); + res.elapsed = t_total.stop(); + res.converged = (it != (maxit + 0.5)); + + if (verbosity >= 1) { + std::ostringstream out; + out << "=== converged: " << res.converged << ", conv_rate: " << res.conv_rate << ", time: " << res.elapsed << \ + ", time per iteration: " << res.elapsed / it << ", iterations: " << it; + OpmLog::info(out.str()); + } + if (verbosity >= 3) { + std::ostringstream out; + out << "rocsparseSolver::prec_apply: " << t_prec.elapsed() << " s\n"; + out << "rocsparseSolver::spmv: " << t_spmv.elapsed() << " s\n"; + out << "rocsparseSolver::rest: " << t_rest.elapsed() << " s\n"; + out << "rocsparseSolver::total_solve: " << res.elapsed << " s\n"; + OpmLog::info(out.str()); + } +} + + +template +void rocsparseSolverBackend::initialize(std::shared_ptr matrix, std::shared_ptr jacMatrix) { + this->Nb = matrix->Nb; + this->N = Nb * block_size; + this->nnzb = matrix->nnzbs; + this->nnz = nnzb * block_size * block_size; + nnzbs_prec = nnzb; + + if (jacMatrix) { + useJacMatrix = true; + nnzbs_prec = jacMatrix->nnzbs; + } + + std::ostringstream out; + out << "Initializing GPU, matrix size: " << Nb << " blockrows, nnzb: " << nnzb << "\n"; + if (useJacMatrix) { + out << "Blocks in ILU matrix: " << jacMatrix->nnzbs << "\n"; + } + out << "Maxit: " << maxit << std::scientific << ", tolerance: " << tolerance << "\n"; + out << "PlatformID: " << platformID << ", deviceID: " << deviceID << "\n"; + OpmLog::info(out.str()); + out.str(""); + out.clear(); + + mat = matrix; + jacMat = jacMatrix; + + HIP_CHECK(hipMalloc((void**)&d_r, sizeof(double) * N)); + HIP_CHECK(hipMalloc((void**)&d_rw, sizeof(double) * N)); + HIP_CHECK(hipMalloc((void**)&d_p, sizeof(double) * N)); + HIP_CHECK(hipMalloc((void**)&d_pw, sizeof(double) * N)); + HIP_CHECK(hipMalloc((void**)&d_s, sizeof(double) * N)); + HIP_CHECK(hipMalloc((void**)&d_t, sizeof(double) * N)); + HIP_CHECK(hipMalloc((void**)&d_v, sizeof(double) * N)); + + ROCSPARSE_CHECK(rocsparse_create_handle(&handle)); + ROCBLAS_CHECK(rocblas_create_handle(&blas_handle)); + + ROCSPARSE_CHECK(rocsparse_get_version(handle, &ver)); + ROCSPARSE_CHECK(rocsparse_get_git_rev(handle, rev)); + + out << "rocSPARSE version: " << ver / 100000 << "." << ver / 100 % 1000 << "." + << ver % 100 << "-" << rev << "\n"; + OpmLog::info(out.str()); + + HIP_CHECK(hipStreamCreate(&stream)); + ROCSPARSE_CHECK(rocsparse_set_stream(handle, stream)); + ROCBLAS_CHECK(rocblas_set_stream(blas_handle, stream)); + + HIP_CHECK(hipMalloc((void**)&d_Arows, sizeof(rocsparse_int) * (Nb + 1))); + HIP_CHECK(hipMalloc((void**)&d_Acols, sizeof(rocsparse_int) * nnzb)); + HIP_CHECK(hipMalloc((void**)&d_Avals, sizeof(double) * nnz)); + HIP_CHECK(hipMalloc((void**)&d_x, sizeof(double) * N)); + HIP_CHECK(hipMalloc((void**)&d_b, sizeof(double) * N)); + + if (useJacMatrix) { + HIP_CHECK(hipMalloc((void**)&d_Mrows, sizeof(rocsparse_int) * (Nb + 1))); + HIP_CHECK(hipMalloc((void**)&d_Mcols, sizeof(rocsparse_int) * nnzbs_prec)); + HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(double) * nnzbs_prec * block_size * block_size)); + } else { // preconditioner matrix is same + HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(double) * nnzbs_prec * block_size * block_size)); + d_Mcols = d_Acols; + d_Mrows = d_Arows; + } + + initialized = true; +} // end initialize() + +template +void rocsparseSolverBackend::copy_system_to_gpu(double *b) { + Timer t; + + HIP_CHECK(hipMemcpyAsync(d_Arows, mat->rowPointers, sizeof(rocsparse_int) * (Nb + 1), hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipMemcpyAsync(d_Acols, mat->colIndices, sizeof(rocsparse_int) * nnzb, hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipMemcpyAsync(d_Avals, mat->nnzValues, sizeof(double) * nnz, hipMemcpyHostToDevice, stream)); + if (useJacMatrix) { + HIP_CHECK(hipMemcpyAsync(d_Mrows, jacMat->rowPointers, sizeof(rocsparse_int) * (Nb + 1), hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipMemcpyAsync(d_Mcols, jacMat->colIndices, sizeof(rocsparse_int) * nnzbs_prec, hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipMemcpyAsync(d_Mvals, jacMat->nnzValues, sizeof(double) * nnzbs_prec * block_size * block_size, hipMemcpyHostToDevice, stream)); + } else { + HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals, sizeof(double) * nnz, hipMemcpyDeviceToDevice, stream)); + } + HIP_CHECK(hipMemsetAsync(d_x, 0, sizeof(double) * N, stream)); + HIP_CHECK(hipMemcpyAsync(d_b, b, sizeof(double) * N, hipMemcpyHostToDevice, stream)); + + if (verbosity >= 3) { + HIP_CHECK(hipStreamSynchronize(stream)); + std::ostringstream out; + out << "rocsparseSolver::copy_system_to_gpu(): " << t.stop() << " s"; + OpmLog::info(out.str()); + } +} // end copy_system_to_gpu() + +// don't copy rowpointers and colindices, they stay the same +template +void rocsparseSolverBackend::update_system_on_gpu(double *b) { + Timer t; + + HIP_CHECK(hipMemcpyAsync(d_Avals, mat->nnzValues, sizeof(double) * nnz, hipMemcpyHostToDevice, stream)); + if (useJacMatrix) { + HIP_CHECK(hipMemcpyAsync(d_Mvals, jacMat->nnzValues, sizeof(double) * nnzbs_prec * block_size * block_size, hipMemcpyHostToDevice, stream)); + } else { + HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals, sizeof(double) * nnz, hipMemcpyDeviceToDevice, stream)); + } + HIP_CHECK(hipMemsetAsync(d_x, 0, sizeof(double) * N, stream)); + HIP_CHECK(hipMemcpyAsync(d_b, b, sizeof(double) * N, hipMemcpyHostToDevice, stream)); + + if (verbosity >= 3) { + HIP_CHECK(hipStreamSynchronize(stream)); + std::ostringstream out; + out << "rocsparseSolver::update_system_on_gpu(): " << t.stop() << " s"; + OpmLog::info(out.str()); + } +} // end update_system_on_gpu() + +template +bool rocsparseSolverBackend::analyze_matrix() { + size_t d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize; + Timer t; + + ROCSPARSE_CHECK(rocsparse_set_pointer_mode(handle, rocsparse_pointer_mode_host)); + + ROCSPARSE_CHECK(rocsparse_create_mat_info(&ilu_info)); + + ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_M)); + + ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_L)); + ROCSPARSE_CHECK(rocsparse_set_mat_fill_mode(descr_L, rocsparse_fill_mode_lower)); + ROCSPARSE_CHECK(rocsparse_set_mat_diag_type(descr_L, rocsparse_diag_type_unit)); + + ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_U)); + ROCSPARSE_CHECK(rocsparse_set_mat_fill_mode(descr_U, rocsparse_fill_mode_upper)); + ROCSPARSE_CHECK(rocsparse_set_mat_diag_type(descr_U, rocsparse_diag_type_non_unit)); + + ROCSPARSE_CHECK(rocsparse_dbsrilu0_buffer_size(handle, dir, Nb, nnzbs_prec, + descr_M, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_M)); + ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(handle, dir, operation, Nb, nnzbs_prec, + descr_L, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_L)); + ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(handle, dir, operation, Nb, nnzbs_prec, + descr_U, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_U)); + + d_bufferSize = std::max(d_bufferSize_M, std::max(d_bufferSize_L, d_bufferSize_U)); + + HIP_CHECK(hipMalloc((void**)&d_buffer, d_bufferSize)); + + // analysis of ilu LU decomposition + ROCSPARSE_CHECK(rocsparse_dbsrilu0_analysis(handle, dir, \ + Nb, nnzbs_prec, descr_M, d_Mvals, d_Mrows, d_Mcols, \ + block_size, ilu_info, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, d_buffer)); + + int zero_position = 0; + rocsparse_status status = rocsparse_bsrilu0_zero_pivot(handle, ilu_info, &zero_position); + if (rocsparse_status_success != status) { + printf("L has structural and/or numerical zero at L(%d,%d)\n", zero_position, zero_position); + return false; + } + + // analysis of ilu apply + ROCSPARSE_CHECK(rocsparse_dbsrsv_analysis(handle, dir, operation, \ + Nb, nnzbs_prec, descr_L, d_Mvals, d_Mrows, d_Mcols, \ + block_size, ilu_info, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, d_buffer)); + ROCSPARSE_CHECK(rocsparse_dbsrsv_analysis(handle, dir, operation, \ + Nb, nnzbs_prec, descr_U, d_Mvals, d_Mrows, d_Mcols, \ + block_size, ilu_info, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, d_buffer)); + + if (verbosity >= 3) { + HIP_CHECK(hipStreamSynchronize(stream)); + std::ostringstream out; + out << "rocsparseSolver::analyze_matrix(): " << t.stop() << " s"; + OpmLog::info(out.str()); + } + + analysis_done = true; + + return true; +} // end analyze_matrix() + + +template +bool rocsparseSolverBackend::create_preconditioner() { + Timer t; + + bool result = true; + ROCSPARSE_CHECK(rocsparse_dbsrilu0(handle, dir, Nb, nnzbs_prec, descr_M, + d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, rocsparse_solve_policy_auto, d_buffer)); + + // Check for zero pivot + int zero_position = 0; + rocsparse_status status = rocsparse_bsrilu0_zero_pivot(handle, ilu_info, &zero_position); + if(rocsparse_status_success != status) + { + printf("L has structural and/or numerical zero at L(%d,%d)\n", zero_position, zero_position); + return false; + } + + if (verbosity >= 3) { + HIP_CHECK(hipStreamSynchronize(stream)); + std::ostringstream out; + out << "rocsparseSolver::create_preconditioner(): " << t.stop() << " s"; + OpmLog::info(out.str()); + } + return result; +} // end create_preconditioner() + + +template +void rocsparseSolverBackend::solve_system(WellContributions &wellContribs, BdaResult &res) { + Timer t; + + // actually solve + try { + gpu_pbicgstab(wellContribs, res); + } catch (const cl::Error& error) { + std::ostringstream oss; + oss << "rocsparseSolverBackend::solve_system error: " << error.what() << "(" << error.err() << ")\n"; + oss << getErrorString(error.err()); + // rethrow exception + OPM_THROW(std::logic_error, oss.str()); + } catch (const std::logic_error& error) { + // rethrow exception by OPM_THROW in the try{}, without this, a segfault occurs + throw error; + } + + if (verbosity >= 3) { + HIP_CHECK(hipStreamSynchronize(stream)); + std::ostringstream out; + out << "rocsparseSolver::solve_system(): " << t.stop() << " s"; + OpmLog::info(out.str()); + } + +} // end solve_system() + + +// copy result to host memory +// caller must be sure that x is a valid array +template +void rocsparseSolverBackend::get_result(double *x) { + Timer t; + + HIP_CHECK(hipMemcpyAsync(x, d_x, sizeof(double) * N, hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); // always wait, caller might want to use x immediately + + if (verbosity >= 3) { + std::ostringstream out; + out << "rocsparseSolver::get_result(): " << t.stop() << " s"; + OpmLog::info(out.str()); + } +} // end get_result() + + +template +SolverStatus rocsparseSolverBackend::solve_system(std::shared_ptr matrix, + double *b, + std::shared_ptr jacMatrix, + WellContributions& wellContribs, + BdaResult &res) +{ + if (initialized == false) { + initialize(matrix, jacMatrix); + copy_system_to_gpu(b); + if (analysis_done == false) { + if (!analyze_matrix()) { + return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED; + } + } + if (!create_preconditioner()) { + return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED; + } + } else { + update_system_on_gpu(b); + if (!create_preconditioner()) { + return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED; + } + } + solve_system(wellContribs, res); + + return SolverStatus::BDA_SOLVER_SUCCESS; +} + + +#define INSTANTIATE_BDA_FUNCTIONS(n) \ +template rocsparseSolverBackend::rocsparseSolverBackend( \ + int, int, double, unsigned int, unsigned int); + +INSTANTIATE_BDA_FUNCTIONS(1); +INSTANTIATE_BDA_FUNCTIONS(2); +INSTANTIATE_BDA_FUNCTIONS(3); +INSTANTIATE_BDA_FUNCTIONS(4); +INSTANTIATE_BDA_FUNCTIONS(5); +INSTANTIATE_BDA_FUNCTIONS(6); + +#undef INSTANTIATE_BDA_FUNCTIONS + +} // namespace Accelerator +} // namespace Opm diff --git a/opm/simulators/linalg/bda/rocsparseSolverBackend.hpp b/opm/simulators/linalg/bda/rocsparseSolverBackend.hpp new file mode 100644 index 000000000..4563b671c --- /dev/null +++ b/opm/simulators/linalg/bda/rocsparseSolverBackend.hpp @@ -0,0 +1,153 @@ +/* + Copyright 2020 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ + +#ifndef OPM_ROCSPARSESOLVER_BACKEND_HEADER_INCLUDED +#define OPM_ROCSPARSESOLVER_BACKEND_HEADER_INCLUDED + +#include + +#include +#include +#include +#include + +#include +#include + +namespace Opm +{ +namespace Accelerator +{ + +/// This class implements a rocsparse-based ilu0-bicgstab solver on GPU +template +class rocsparseSolverBackend : public BdaSolver +{ + typedef BdaSolver Base; + + using Base::N; + using Base::Nb; + using Base::nnz; + using Base::nnzb; + using Base::verbosity; + using Base::platformID; + using Base::deviceID; + using Base::maxit; + using Base::tolerance; + using Base::initialized; + +private: + + bool useJacMatrix = false; + + bool analysis_done = false; + std::shared_ptr mat = nullptr; // original matrix + std::shared_ptr jacMat = nullptr; // matrix for preconditioner + int nnzbs_prec = 0; // number of nnz blocks in preconditioner matrix M + + rocsparse_direction dir = rocsparse_direction_row; + rocsparse_operation operation = rocsparse_operation_none; + rocsparse_handle handle; + rocblas_handle blas_handle; + rocsparse_mat_descr descr_M, descr_L, descr_U; + rocsparse_mat_info ilu_info; + hipStream_t stream; + + rocsparse_int *d_Arows, *d_Mrows; + rocsparse_int *d_Acols, *d_Mcols; + double *d_Avals, *d_Mvals; + double *d_x, *d_b, *d_r, *d_rw, *d_p; // vectors, used during linear solve + double *d_pw, *d_s, *d_t, *d_v; + void *d_buffer; // buffer space, used by rocsparse ilu0 analysis + int ver; + char rev[64]; + + + /// Solve linear system using ilu0-bicgstab + /// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A + /// \param[inout] res summary of solver result + void gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res); + + /// Initialize GPU and allocate memory + /// \param[in] matrix matrix A + /// \param[in] jacMatrix matrix for preconditioner + void initialize(std::shared_ptr matrix, std::shared_ptr jacMatrix); + + /// Copy linear system to GPU + /// \param[in] b input vector, contains N values + void copy_system_to_gpu(double *b); + + /// Update linear system to GPU + /// \param[in] b input vector, contains N values + void update_system_on_gpu(double *b); + + /// Analyze sparsity pattern to extract parallelism + /// \return true iff analysis was successful + bool analyze_matrix(); + + /// Perform ilu0-decomposition + /// \return true iff decomposition was successful + bool create_preconditioner(); + + /// Solve linear system + /// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A + /// \param[inout] res summary of solver result + void solve_system(WellContributions &wellContribs, BdaResult &res); + +public: + /// Construct a openclSolver + /// \param[in] linear_solver_verbosity verbosity of openclSolver + /// \param[in] maxit maximum number of iterations for openclSolver + /// \param[in] tolerance required relative tolerance for openclSolver + /// \param[in] platformID the OpenCL platform to be used + /// \param[in] deviceID the device to be used + rocsparseSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID); + + /// For the CPR coarse solver + // rocsparseSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, ILUReorder opencl_ilu_reorder); + + /// Destroy a openclSolver, and free memory + ~rocsparseSolverBackend(); + + /// Solve linear system, A*x = b, matrix A must be in blocked-CSR format + /// \param[in] matrix matrix A + /// \param[in] b input vector, contains N values + /// \param[in] jacMatrix matrix for preconditioner + /// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A + /// \param[inout] res summary of solver result + /// \return status code + SolverStatus solve_system(std::shared_ptr matrix, double *b, + std::shared_ptr jacMatrix, WellContributions& wellContribs, BdaResult &res) override; + + /// Solve scalar linear system, for example a coarse system of an AMG preconditioner + /// Data is already on the GPU + // SolverStatus solve_system(BdaResult &res); + + /// Get result after linear solve, and peform postprocessing if necessary + /// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array + void get_result(double *x) override; + +}; // end class rocsparseSolverBackend + +} // namespace Accelerator +} // namespace Opm + +#endif + + From 11ea024fbec1e270480b9507baa81ab520514092 Mon Sep 17 00:00:00 2001 From: Tong Dong Qiu Date: Wed, 15 Feb 2023 16:24:52 +0100 Subject: [PATCH 02/12] Add test_rocsparseSolver --- CMakeLists_files.cmake | 3 + tests/test_rocsparseSolver.cpp | 217 +++++++++++++++++++++++++++++++++ 2 files changed, 220 insertions(+) create mode 100644 tests/test_rocsparseSolver.cpp diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index 836b09fad..cae09804a 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -229,6 +229,9 @@ endif() if(ROCALUTION_FOUND) list(APPEND TEST_SOURCE_FILES tests/test_rocalutionSolver.cpp) endif() +if(rocsparse_FOUND AND rocblas_FOUND) + list(APPEND TEST_SOURCE_FILES tests/test_rocsparseSolver.cpp) +endif() if(HDF5_FOUND) list(APPEND TEST_SOURCE_FILES tests/test_HDF5File.cpp) list(APPEND TEST_SOURCE_FILES tests/test_HDF5Serializer.cpp) diff --git a/tests/test_rocsparseSolver.cpp b/tests/test_rocsparseSolver.cpp new file mode 100644 index 000000000..9502902f5 --- /dev/null +++ b/tests/test_rocsparseSolver.cpp @@ -0,0 +1,217 @@ +/* + Copyright 2019 SINTEF Digital, Mathematics and Cybernetics. + Copyright 2022 Equinor + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ + +#include + +#include +#include +#include + +#define BOOST_TEST_MODULE OPM_test_rocsparseSolver +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +class HIPInitException : public std::logic_error +{ +public: + HIPInitException(std::string msg) : logic_error(msg){}; +}; + +template +using Matrix = Dune::BCRSMatrix>; +template +using Vector = Dune::BlockVector>; + +template +void readLinearSystem(const std::string& matrix_filename, const std::string& rhs_filename, Matrix& matrix, Vector& rhs) +{ + { + std::ifstream mfile(matrix_filename); + if (!mfile) { + throw std::runtime_error("Could not read matrix file"); + } + readMatrixMarket(matrix, mfile); + } + { + std::ifstream rhsfile(rhs_filename); + if (!rhsfile) { + throw std::runtime_error("Could not read rhs file"); + } + readMatrixMarket(rhs, rhsfile); + } +} + +template +Dune::BlockVector> +getDuneSolution(Matrix& matrix, Vector& rhs) +{ + Dune::InverseOperatorResult result; + + Vector x(rhs.size()); + + typedef Dune::MatrixAdapter,Vector,Vector > Operator; + Operator fop(matrix); + double relaxation = 0.9; + Dune::SeqILU,Vector,Vector > prec(matrix, relaxation); + double reduction = 1e-2; + int maxit = 10; + int verbosity = 0; + Dune::BiCGSTABSolver > solver(fop, prec, reduction, maxit, verbosity); + solver.apply(x, rhs, result); + return x; +} + +template +void +createBridge(const boost::property_tree::ptree& prm, std::unique_ptr, Vector, bz> >& bridge) +{ + const int linear_solver_verbosity = prm.get("verbosity"); + const int maxit = prm.get("maxiter"); + const double tolerance = prm.get("tol"); + const bool opencl_ilu_parallel(true); + const int platformID = 0; + const int deviceID = 0; + const std::string accelerator_mode("rocsparse"); + const std::string linsolver("ilu0"); + + try { + bridge = std::make_unique, Vector, bz> >(accelerator_mode, + linear_solver_verbosity, + maxit, + tolerance, + platformID, + deviceID, + opencl_ilu_parallel, + linsolver); + } catch (const std::logic_error& error) { + BOOST_WARN_MESSAGE(true, error.what()); + if (strstr(error.what(), "HIP error: hipInit() failed") != nullptr) + throw HIPInitException(error.what()); + else + throw error; + } +} + +template +Dune::BlockVector> +testRocsparseSolver(std::unique_ptr, Vector, bz> >& bridge, Matrix& matrix, Vector& rhs) +{ + Dune::InverseOperatorResult result; + Vector x(rhs.size()); + auto wellContribs = Opm::WellContributions::create("rocsparse", true); + auto mat2 = matrix; // deep copy to make sure nnz values are in contiguous memory + // matrix created by readMatrixMarket() did not have contiguous memory + bridge->solve_system(&mat2, &mat2, /*numJacobiBlocks=*/0, rhs, *wellContribs, result); + bridge->get_result(x); + + return x; +} + +template +Dune::BlockVector> +testRocsparseSolverJacobi(std::unique_ptr, Vector, bz> >& bridge, Matrix& matrix, Vector& rhs) +{ + Dune::InverseOperatorResult result; + Vector x(rhs.size()); + auto wellContribs = Opm::WellContributions::create("rocsparse", true); + auto mat2 = matrix; // deep copy to make sure nnz values are in contiguous memory + // matrix created by readMatrixMarket() did not have contiguous memory + auto mat3 = matrix; // another deep copy, to make sure Jacobi matrix memory is different + // the sparsity pattern and values are actually the same + bridge->solve_system(&mat2, &mat3, /*numJacobiBlocks=*/2, rhs, *wellContribs, result); + bridge->get_result(x); + + return x; +} + +namespace pt = boost::property_tree; + +void test3(const pt::ptree& prm) +{ + const int bz = 3; + Matrix matrix; + Vector rhs; + readLinearSystem("matr33.txt", "rhs3.txt", matrix, rhs); + Vector rhs2 = rhs; // deep copy, getDuneSolution() changes values in rhs vector + auto duneSolution = getDuneSolution(matrix, rhs); + + // create bridge twice, because rocsparseSolver allocates memory for + // the jacobi matrix if passed, during the first solve_system() call + // if not present, no memory is allocated, and subsequent calls + // with a jacobi matrix will cause nans + { + std::unique_ptr, Vector, bz> > bridge; + createBridge(prm, bridge); // create bridge with rocsparseSolver + + // test rocsparseSolver without Jacobi matrix + auto sol = testRocsparseSolver(bridge, matrix, rhs2); + BOOST_REQUIRE_EQUAL(sol.size(), duneSolution.size()); + for (size_t i = 0; i < sol.size(); ++i) { + for (int row = 0; row < bz; ++row) { + BOOST_CHECK_CLOSE(sol[i][row], duneSolution[i][row], 1e-3); + } + } + } + + { + std::unique_ptr, Vector, bz> > bridge; + createBridge(prm, bridge); // create bridge with rocsparseSolver + + // test rocsparseSolver with Jacobi matrix + auto solJacobi = testRocsparseSolverJacobi(bridge, matrix, rhs2); + BOOST_REQUIRE_EQUAL(solJacobi.size(), duneSolution.size()); + for (size_t i = 0; i < solJacobi.size(); ++i) { + for (int row = 0; row < bz; ++row) { + BOOST_CHECK_CLOSE(solJacobi[i][row], duneSolution[i][row], 1e-3); + } + } + } +} + + +BOOST_AUTO_TEST_CASE(TestRocsparseSolver) +{ + pt::ptree prm; + + // Read parameters. + { + std::ifstream file("options_flexiblesolver.json"); + pt::read_json(file, prm); + } + + try { + // Test with 3x3 block solvers. + test3(prm); + } catch(const HIPInitException& ) { + BOOST_WARN_MESSAGE(true, "Problem with initializing HIP, skipping test"); + } +} From eaef4b8c8557a8a921b640becc18ce114e51ba0f Mon Sep 17 00:00:00 2001 From: Tong Dong Qiu Date: Wed, 25 Jan 2023 14:15:47 +0100 Subject: [PATCH 03/12] Update CMakeLists for rocm --- CMakeLists.txt | 51 +++++++++++++++---- .../linalg/bda/rocsparseSolverBackend.cpp | 14 ++--- .../linalg/bda/rocsparseSolverBackend.hpp | 2 +- tests/test_rocsparseSolver.cpp | 2 +- 4 files changed, 47 insertions(+), 22 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a061f36cb..c6a49a628 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -209,12 +209,47 @@ else() endif() endif() -find_package(rocblas) -find_package(rocsparse) +message(INFO " HAVE_ROCSPARSE: ${HAVE_ROCSPARSE}") +find_package(hip) -if(rocsparse_FOUND AND rocblas_FOUND) - set(HAVE_ROCSPARSE 1) - set(COMPILE_BDA_BRIDGE 1) +message(INFO " hip version: ${hip_VERSION}") +message(INFO " rocblas version: ${rocblas_VERSION}") +message(INFO " rocsparse version: ${rocsparse_VERSION}") +message(INFO " CMAKE_CXX_COMPILER: ${CMAKE_CXX_COMPILER}") +message(INFO " HAVE_ROCSPARSE: ${HAVE_ROCSPARSE}") + +if(hip_FOUND) + #message(INFO " CXX_COMPILER: ${CXX_COMPILER}") + get_filename_component(CXX_COMPILER ${CMAKE_CXX_COMPILER} NAME) + #message(INFO " CXX_COMPILER: ${CXX_COMPILER}") + message(INFO " CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}") + if(hip_VERSION VERSION_LESS "5.3") + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + message(WARNING " Cannot use gcc for rocm < 5.3") + unset(HAVE_ROCSPARSE) + endif() + find_package(rocalution) + if(ROCALUTION_FOUND) + set(HAVE_ROCALUTION 1) + set(COMPILE_BDA_BRIDGE 1) + endif() + else() + find_package(rocblas) + find_package(rocsparse) + if(rocsparse_FOUND AND rocblas_FOUND) + set(HAVE_ROCSPARSE 1) + set(COMPILE_BDA_BRIDGE 1) + endif() + if(${CXX_COMPILER} STREQUAL "hipcc") + find_package(rocalution) + if(ROCALUTION_FOUND) + set(HAVE_ROCALUTION 1) + set(COMPILE_BDA_BRIDGE 1) + endif() + else() + message(WARNING " Cannot use hipcc for rocm >= 5.4") + endif() + endif() endif() find_package(amgcl) @@ -248,12 +283,6 @@ if(OpenCL_FOUND) endif() endif() -find_package(rocalution) -if(ROCALUTION_FOUND) - set(HAVE_ROCALUTION 1) - set(COMPILE_BDA_BRIDGE 1) -endif() - macro (config_hook) opm_need_version_of ("dune-common") diff --git a/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp b/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp index e231d2390..e82c1612f 100644 --- a/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp +++ b/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp @@ -1,5 +1,5 @@ /* - Copyright 2020 Equinor ASA + Copyright 2023 Equinor ASA This file is part of the Open Porous Media project (OPM). @@ -79,14 +79,10 @@ rocsparseSolverBackend::rocsparseSolverBackend(int verbosity_, int m template rocsparseSolverBackend::~rocsparseSolverBackend() { - try { - HIP_CHECK(hipStreamSynchronize(stream)); - HIP_CHECK(hipStreamDestroy(stream)); - ROCSPARSE_CHECK(rocsparse_destroy_handle(handle)); - ROCBLAS_CHECK(rocblas_destroy_handle(blas_handle)); - } catch (const std::logic_error& err) { - OpmLog::error(err.what()); - } + HIP_CHECK(hipStreamSynchronize(stream)); + HIP_CHECK(hipStreamDestroy(stream)); + ROCSPARSE_CHECK(rocsparse_destroy_handle(handle)); + ROCBLAS_CHECK(rocblas_destroy_handle(blas_handle)); } diff --git a/opm/simulators/linalg/bda/rocsparseSolverBackend.hpp b/opm/simulators/linalg/bda/rocsparseSolverBackend.hpp index 4563b671c..824e90116 100644 --- a/opm/simulators/linalg/bda/rocsparseSolverBackend.hpp +++ b/opm/simulators/linalg/bda/rocsparseSolverBackend.hpp @@ -1,5 +1,5 @@ /* - Copyright 2020 Equinor ASA + Copyright 2023 Equinor ASA This file is part of the Open Porous Media project (OPM). diff --git a/tests/test_rocsparseSolver.cpp b/tests/test_rocsparseSolver.cpp index 9502902f5..7bc5fad96 100644 --- a/tests/test_rocsparseSolver.cpp +++ b/tests/test_rocsparseSolver.cpp @@ -1,6 +1,6 @@ /* Copyright 2019 SINTEF Digital, Mathematics and Cybernetics. - Copyright 2022 Equinor + Copyright 2023 Equinor This file is part of the Open Porous Media project (OPM). From 2c50951ddd820396ae03cd3521447e0edb35acf6 Mon Sep 17 00:00:00 2001 From: Tong Dong Qiu Date: Tue, 28 Feb 2023 16:21:21 +0100 Subject: [PATCH 04/12] Implement PR comments --- CMakeLists.txt | 6 ++++-- opm/simulators/linalg/bda/BdaBridge.cpp | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c6a49a628..6ba525ec9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -239,6 +239,8 @@ if(hip_FOUND) if(rocsparse_FOUND AND rocblas_FOUND) set(HAVE_ROCSPARSE 1) set(COMPILE_BDA_BRIDGE 1) + else() + unset(HAVE_ROCSPARSE) endif() if(${CXX_COMPILER} STREQUAL "hipcc") find_package(rocalution) @@ -569,8 +571,8 @@ if(ROCALUTION_FOUND) endif() if(rocsparse_FOUND AND rocblas_FOUND) - target_link_libraries( opmsimulators PUBLIC rocsparse ) - target_link_libraries( opmsimulators PUBLIC rocblas ) + target_link_libraries( opmsimulators PUBLIC roc::rocsparse ) + target_link_libraries( opmsimulators PUBLIC roc::rocblas ) endif() if(VexCL_FOUND) diff --git a/opm/simulators/linalg/bda/BdaBridge.cpp b/opm/simulators/linalg/bda/BdaBridge.cpp index e3752133e..3722825d4 100644 --- a/opm/simulators/linalg/bda/BdaBridge.cpp +++ b/opm/simulators/linalg/bda/BdaBridge.cpp @@ -101,7 +101,7 @@ BdaBridge::BdaBridge(std::string acceler use_gpu = true; // should be replaced by a 'use_bridge' boolean backend.reset(new Opm::Accelerator::rocsparseSolverBackend(linear_solver_verbosity, maxit, tolerance, platformID, deviceID)); #else - OPM_THROW(std::logic_error, "Error openclSolver was chosen, but rocsparse was not found by CMake"); + OPM_THROW(std::logic_error, "Error rocsparseSolver was chosen, but rocsparse/rocblas was not found by CMake"); #endif } else if (accelerator_mode.compare("none") == 0) { use_gpu = false; From 35ea8d7e3063386580d52d5ad778f8ee58bc9a79 Mon Sep 17 00:00:00 2001 From: Tong Dong Qiu Date: Thu, 2 Mar 2023 11:31:05 +0100 Subject: [PATCH 05/12] Remove debug prints --- CMakeLists.txt | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6ba525ec9..e4b2bd8a1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -209,20 +209,10 @@ else() endif() endif() -message(INFO " HAVE_ROCSPARSE: ${HAVE_ROCSPARSE}") find_package(hip) -message(INFO " hip version: ${hip_VERSION}") -message(INFO " rocblas version: ${rocblas_VERSION}") -message(INFO " rocsparse version: ${rocsparse_VERSION}") -message(INFO " CMAKE_CXX_COMPILER: ${CMAKE_CXX_COMPILER}") -message(INFO " HAVE_ROCSPARSE: ${HAVE_ROCSPARSE}") - if(hip_FOUND) - #message(INFO " CXX_COMPILER: ${CXX_COMPILER}") get_filename_component(CXX_COMPILER ${CMAKE_CXX_COMPILER} NAME) - #message(INFO " CXX_COMPILER: ${CXX_COMPILER}") - message(INFO " CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}") if(hip_VERSION VERSION_LESS "5.3") if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") message(WARNING " Cannot use gcc for rocm < 5.3") From ed8f37ec82ed0bf5f819420af9de63dfd5835ec1 Mon Sep 17 00:00:00 2001 From: Tong Dong Qiu Date: Thu, 2 Mar 2023 13:37:58 +0100 Subject: [PATCH 06/12] Initialize HIP/rocsparse in constructor --- .../linalg/bda/rocsparseSolverBackend.cpp | 34 +++++++++++-------- tests/test_rocsparseSolver.cpp | 2 +- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp b/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp index e82c1612f..45284061a 100644 --- a/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp +++ b/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp @@ -72,11 +72,29 @@ using Dune::Timer; template rocsparseSolverBackend::rocsparseSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int platformID_, unsigned int deviceID_) : BdaSolver(verbosity_, maxit_, tolerance_, platformID_, deviceID_) { + hipDevice_t device; + if(hipDeviceGet(&device, deviceID) != hipSuccess) + { + OPM_THROW(std::logic_error, "HIP Error: could not get device"); + } + ROCSPARSE_CHECK(rocsparse_create_handle(&handle)); + ROCBLAS_CHECK(rocblas_create_handle(&blas_handle)); + + ROCSPARSE_CHECK(rocsparse_get_version(handle, &ver)); + ROCSPARSE_CHECK(rocsparse_get_git_rev(handle, rev)); + + std::ostringstream out; + out << "rocSPARSE version: " << ver / 100000 << "." << ver / 100 % 1000 << "." + << ver % 100 << "-" << rev << "\n"; + OpmLog::info(out.str()); + + HIP_CHECK(hipStreamCreate(&stream)); + ROCSPARSE_CHECK(rocsparse_set_stream(handle, stream)); + ROCBLAS_CHECK(rocblas_set_stream(blas_handle, stream)); } - template rocsparseSolverBackend::~rocsparseSolverBackend() { HIP_CHECK(hipStreamSynchronize(stream)); @@ -290,20 +308,6 @@ void rocsparseSolverBackend::initialize(std::shared_ptr Date: Thu, 2 Mar 2023 15:45:21 +0100 Subject: [PATCH 07/12] Detect ROCm version to suppress deprecated warning --- .../linalg/bda/rocsparseSolverBackend.cpp | 34 +++++++++++++++++++ .../linalg/bda/rocsparseSolverBackend.hpp | 7 +++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp b/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp index 45284061a..13273d493 100644 --- a/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp +++ b/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp @@ -36,6 +36,7 @@ #include #include +#include #define HIP_CHECK(stat) \ { \ @@ -117,10 +118,18 @@ void rocsparseSolverBackend::gpu_pbicgstab([[maybe_unused]] WellCont Timer t_total, t_prec(false), t_spmv(false), t_rest(false); +// HIP_VERSION is defined as (HIP_VERSION_MAJOR * 10000000 + HIP_VERSION_MINOR * 100000 + HIP_VERSION_PATCH) +#if HIP_VERSION >= 50400000 + ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation, + Nb, Nb, nnzb, &one, descr_M, + d_Avals, d_Arows, d_Acols, block_size, + spmv_info, d_x, &zero, d_r)); +#else ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation, Nb, Nb, nnzb, &one, descr_M, d_Avals, d_Arows, d_Acols, block_size, d_x, &zero, d_r)); +#endif ROCBLAS_CHECK(rocblas_dscal(blas_handle, N, &mone, d_r, 1)); ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &one, d_b, 1, d_r, 1)); @@ -168,10 +177,17 @@ void rocsparseSolverBackend::gpu_pbicgstab([[maybe_unused]] WellCont } // spmv +#if HIP_VERSION >= 50400000 + ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation, + Nb, Nb, nnzb, &one, descr_M, + d_Avals, d_Arows, d_Acols, block_size, + spmv_info, d_pw, &zero, d_v)); +#else ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation, Nb, Nb, nnzb, &one, descr_M, d_Avals, d_Arows, d_Acols, block_size, d_pw, &zero, d_v)); +#endif if (verbosity >= 3) { HIP_CHECK(hipStreamSynchronize(stream)); t_spmv.stop(); @@ -214,10 +230,17 @@ void rocsparseSolverBackend::gpu_pbicgstab([[maybe_unused]] WellCont } // spmv +#if HIP_VERSION >= 50400000 + ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation, + Nb, Nb, nnzb, &one, descr_M, + d_Avals, d_Arows, d_Acols, block_size, + spmv_info, d_s, &zero, d_t)); +#else ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation, Nb, Nb, nnzb, &one, descr_M, d_Avals, d_Arows, d_Acols, block_size, d_s, &zero, d_t)); +#endif if(verbosity >= 3){ HIP_CHECK(hipStreamSynchronize(stream)); t_spmv.stop(); @@ -382,7 +405,11 @@ bool rocsparseSolverBackend::analyze_matrix() { ROCSPARSE_CHECK(rocsparse_set_pointer_mode(handle, rocsparse_pointer_mode_host)); ROCSPARSE_CHECK(rocsparse_create_mat_info(&ilu_info)); +#if HIP_VERSION >= 50400000 + ROCSPARSE_CHECK(rocsparse_create_mat_info(&spmv_info)); +#endif + ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_A)); ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_M)); ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_L)); @@ -424,6 +451,13 @@ bool rocsparseSolverBackend::analyze_matrix() { Nb, nnzbs_prec, descr_U, d_Mvals, d_Mrows, d_Mcols, \ block_size, ilu_info, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, d_buffer)); +#if HIP_VERSION >= 50400000 + ROCSPARSE_CHECK(rocsparse_dbsrmv_ex_analysis(handle, dir, operation, + Nb, Nb, nnzb, + descr_A, d_Avals, d_Arows, d_Acols, + block_size, spmv_info)); +#endif + if (verbosity >= 3) { HIP_CHECK(hipStreamSynchronize(stream)); std::ostringstream out; diff --git a/opm/simulators/linalg/bda/rocsparseSolverBackend.hpp b/opm/simulators/linalg/bda/rocsparseSolverBackend.hpp index 824e90116..4fa470774 100644 --- a/opm/simulators/linalg/bda/rocsparseSolverBackend.hpp +++ b/opm/simulators/linalg/bda/rocsparseSolverBackend.hpp @@ -30,6 +30,8 @@ #include #include +#include + namespace Opm { namespace Accelerator @@ -65,8 +67,11 @@ private: rocsparse_operation operation = rocsparse_operation_none; rocsparse_handle handle; rocblas_handle blas_handle; - rocsparse_mat_descr descr_M, descr_L, descr_U; + rocsparse_mat_descr descr_A, descr_M, descr_L, descr_U; rocsparse_mat_info ilu_info; +#if HIP_VERSION >= 50400000 + rocsparse_mat_info spmv_info; +#endif hipStream_t stream; rocsparse_int *d_Arows, *d_Mrows; From 675fab353447fe29c28c127c2848c631ee3c5f40 Mon Sep 17 00:00:00 2001 From: Tong Dong Qiu Date: Thu, 16 Mar 2023 13:39:07 +0100 Subject: [PATCH 08/12] Moved HIP config into config_hook. Disabled hipcc as compiler. --- CMakeLists.txt | 63 ++++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 35 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e4b2bd8a1..a1c323d96 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -209,41 +209,6 @@ else() endif() endif() -find_package(hip) - -if(hip_FOUND) - get_filename_component(CXX_COMPILER ${CMAKE_CXX_COMPILER} NAME) - if(hip_VERSION VERSION_LESS "5.3") - if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - message(WARNING " Cannot use gcc for rocm < 5.3") - unset(HAVE_ROCSPARSE) - endif() - find_package(rocalution) - if(ROCALUTION_FOUND) - set(HAVE_ROCALUTION 1) - set(COMPILE_BDA_BRIDGE 1) - endif() - else() - find_package(rocblas) - find_package(rocsparse) - if(rocsparse_FOUND AND rocblas_FOUND) - set(HAVE_ROCSPARSE 1) - set(COMPILE_BDA_BRIDGE 1) - else() - unset(HAVE_ROCSPARSE) - endif() - if(${CXX_COMPILER} STREQUAL "hipcc") - find_package(rocalution) - if(ROCALUTION_FOUND) - set(HAVE_ROCALUTION 1) - set(COMPILE_BDA_BRIDGE 1) - endif() - else() - message(WARNING " Cannot use hipcc for rocm >= 5.4") - endif() - endif() -endif() - find_package(amgcl) if(amgcl_FOUND) set(HAVE_AMGCL 1) @@ -291,6 +256,34 @@ macro (config_hook) include_directories(${EXTRA_INCLUDES}) include(UseDamaris) + + if(hip_FOUND) + get_filename_component(CXX_COMPILER ${CMAKE_CXX_COMPILER} NAME) + if(${CXX_COMPILER} STREQUAL "hipcc") + message(FATAL_ERROR " hipcc does not work, use g++ or clang++") + endif() + if(hip_VERSION VERSION_LESS "5.3") + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + message(WARNING " Cannot use g++ for rocsparse with rocm < 5.3\n Disabling rocsparseSolver") + unset(HAVE_ROCSPARSE) + endif() + if(ROCALUTION_FOUND) + set(HAVE_ROCALUTION 1) + set(COMPILE_BDA_BRIDGE 1) + endif() + else() + if(rocsparse_FOUND AND rocblas_FOUND) + set(HAVE_ROCSPARSE 1) + set(COMPILE_BDA_BRIDGE 1) + else() + unset(HAVE_ROCSPARSE) + endif() + if(ROCALUTION_FOUND) + set(HAVE_ROCALUTION 1) + set(COMPILE_BDA_BRIDGE 1) + endif() + endif() + endif() endmacro (config_hook) macro (prereqs_hook) From bec51c4471a27a8698b15b8022af1aebea9cf56f Mon Sep 17 00:00:00 2001 From: Tong Dong Qiu Date: Fri, 24 Mar 2023 10:44:37 +0100 Subject: [PATCH 09/12] Prevent throwing in destructors --- .../linalg/bda/rocsparseSolverBackend.cpp | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp b/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp index 13273d493..95729c269 100644 --- a/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp +++ b/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp @@ -98,10 +98,22 @@ rocsparseSolverBackend::rocsparseSolverBackend(int verbosity_, int m template rocsparseSolverBackend::~rocsparseSolverBackend() { - HIP_CHECK(hipStreamSynchronize(stream)); - HIP_CHECK(hipStreamDestroy(stream)); - ROCSPARSE_CHECK(rocsparse_destroy_handle(handle)); - ROCBLAS_CHECK(rocblas_destroy_handle(blas_handle)); + hipError_t hipstatus = hipStreamSynchronize(stream); + if(hipstatus != hipSuccess){ + OpmLog::error("Could not synchronize with hipStream"); + } + hipstatus = hipStreamDestroy(stream); + if(hipstatus != hipSuccess){ + OpmLog::error("Could not destroy hipStream"); + } + rocsparse_status status1 = rocsparse_destroy_handle(handle); + if(status1 != rocsparse_status_success){ + OpmLog::error("Could not destroy rocsparse handle"); + } + rocblas_status status2 = rocblas_destroy_handle(blas_handle); + if(status2 != rocblas_status_success){ + OpmLog::error("Could not destroy rocblas handle"); + } } From 9708a9650100cd0349a6cd90a3f053f59ecb452c Mon Sep 17 00:00:00 2001 From: Tong Dong Qiu Date: Thu, 30 Mar 2023 15:28:26 +0200 Subject: [PATCH 10/12] Enable hipcc, works if other modules are compiled by clang++ Disable rocalution for hipcc/clang with rocm < 5.3 --- CMakeLists.txt | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a1c323d96..72503fd73 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -259,17 +259,11 @@ macro (config_hook) if(hip_FOUND) get_filename_component(CXX_COMPILER ${CMAKE_CXX_COMPILER} NAME) - if(${CXX_COMPILER} STREQUAL "hipcc") - message(FATAL_ERROR " hipcc does not work, use g++ or clang++") - endif() if(hip_VERSION VERSION_LESS "5.3") - if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - message(WARNING " Cannot use g++ for rocsparse with rocm < 5.3\n Disabling rocsparseSolver") - unset(HAVE_ROCSPARSE) - endif() - if(ROCALUTION_FOUND) - set(HAVE_ROCALUTION 1) - set(COMPILE_BDA_BRIDGE 1) + if(ROCALUTION_FOUND AND NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + message(WARNING " Cannot use hipcc/clang for rocalution with rocm < 5.3\n Disabling rocalutionSolver") + unset(ROCALUTION_FOUND) + unset(HAVE_ROCALUTION) endif() else() if(rocsparse_FOUND AND rocblas_FOUND) @@ -278,10 +272,10 @@ macro (config_hook) else() unset(HAVE_ROCSPARSE) endif() - if(ROCALUTION_FOUND) - set(HAVE_ROCALUTION 1) - set(COMPILE_BDA_BRIDGE 1) - endif() + endif() + if(ROCALUTION_FOUND) + set(HAVE_ROCALUTION 1) + set(COMPILE_BDA_BRIDGE 1) endif() endif() endmacro (config_hook) From a4cf50c1fc88026bb50e9c6ccda7ba2b995ed661 Mon Sep 17 00:00:00 2001 From: Tong Dong Qiu Date: Thu, 6 Apr 2023 10:04:44 +0200 Subject: [PATCH 11/12] Move rocalution check to files_hook, because otherwise rocalutionSolverBackend.cpp is included before it is disabled --- CMakeLists.txt | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 72503fd73..339bda4e2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -256,7 +256,22 @@ macro (config_hook) include_directories(${EXTRA_INCLUDES}) include(UseDamaris) +endmacro (config_hook) +macro (prereqs_hook) +endmacro (prereqs_hook) + +macro (sources_hook) + if(OPENCL_FOUND) + include(opencl-source-provider) + list(APPEND opm-simulators_SOURCES ${PROJECT_BINARY_DIR}/clSources.cpp) + endif() +endmacro (sources_hook) + +macro (fortran_hook) +endmacro (fortran_hook) + +macro (files_hook) if(hip_FOUND) get_filename_component(CXX_COMPILER ${CMAKE_CXX_COMPILER} NAME) if(hip_VERSION VERSION_LESS "5.3") @@ -278,22 +293,6 @@ macro (config_hook) set(COMPILE_BDA_BRIDGE 1) endif() endif() -endmacro (config_hook) - -macro (prereqs_hook) -endmacro (prereqs_hook) - -macro (sources_hook) - if(OPENCL_FOUND) - include(opencl-source-provider) - list(APPEND opm-simulators_SOURCES ${PROJECT_BINARY_DIR}/clSources.cpp) - endif() -endmacro (sources_hook) - -macro (fortran_hook) -endmacro (fortran_hook) - -macro (files_hook) if(MPI_FOUND AND HDF5_FOUND AND NOT HDF5_IS_PARALLEL) message(WARNING "When building parallel OPM flow we need a " "parallel version of hdf5, but found only a serial one. " From 270dcd53fca1d2c2e05a75cd48046ccc3a2500f0 Mon Sep 17 00:00:00 2001 From: Tong Dong Qiu Date: Thu, 6 Apr 2023 10:11:00 +0200 Subject: [PATCH 12/12] Restore old HAVE_CUDA value --- opm/simulators/linalg/bda/rocalutionSolverBackend.cpp | 10 ++++++++++ opm/simulators/linalg/bda/rocsparseSolverBackend.cpp | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/opm/simulators/linalg/bda/rocalutionSolverBackend.cpp b/opm/simulators/linalg/bda/rocalutionSolverBackend.cpp index c09546357..077e6e8e0 100644 --- a/opm/simulators/linalg/bda/rocalutionSolverBackend.cpp +++ b/opm/simulators/linalg/bda/rocalutionSolverBackend.cpp @@ -30,6 +30,11 @@ // MultisegmentWellContribution includes the cuda runtime if found by CMake // this leads to inclusion of both amd_hip_vector_types.h and vector_types.h // which both define vector types like uchar2, short3 and double4. +// Restore the value (if defined) afterwards. +#ifdef HAVE_CUDA +#define HIP_HAVE_CUDA_DEFINED HAVE_CUDA +#endif + #undef HAVE_CUDA #include @@ -37,6 +42,11 @@ #include #include // check if blocks are interpreted as row-major or column-major +#ifdef HIP_HAVE_CUDA_DEFINED +#define HAVE_CUDA HIP_HAVE_CUDA_DEFINED +#undef HIP_HAVE_CUDA_DEFINED +#endif + namespace Opm { namespace Accelerator diff --git a/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp b/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp index 95729c269..3443b23ae 100644 --- a/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp +++ b/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp @@ -29,6 +29,11 @@ // MultisegmentWellContribution includes the cuda runtime if found by CMake // this leads to inclusion of both amd_hip_vector_types.h and vector_types.h // which both define vector types like uchar2, short3 and double4. +// Restore the value (if defined) afterwards. +#ifdef HAVE_CUDA +#define HIP_HAVE_CUDA_DEFINED HAVE_CUDA +#endif + #undef HAVE_CUDA #include @@ -38,6 +43,11 @@ #include #include +#ifdef HIP_HAVE_CUDA_DEFINED +#define HAVE_CUDA HIP_HAVE_CUDA_DEFINED +#undef HIP_HAVE_CUDA_DEFINED +#endif + #define HIP_CHECK(stat) \ { \ if(stat != hipSuccess) \