Merge pull request #5706 from BigDataAccelerate/rename-bda-to-gpu

Remove bda naming from project
This commit is contained in:
Arne Morten Kvarving
2024-11-11 16:26:21 +01:00
committed by GitHub
108 changed files with 456 additions and 456 deletions

View File

@@ -32,7 +32,7 @@ option(USE_CHOW_PATEL_ILU_GPU "Run iterative ILU decomposition on GPU? Requires
option(USE_CHOW_PATEL_ILU_GPU_PARALLEL "Try to use more parallelism on the GPU during the iterative ILU decomposition? Requires USE_CHOW_PATEL_ILU_GPU" OFF)
option(BUILD_FLOW_ALU_GRID "Build flow blackoil with alu grid" OFF)
option(USE_DAMARIS_LIB "Use the Damaris library for asynchronous I/O?" OFF)
option(USE_BDA_BRIDGE "Enable the BDA bridge (GPU/AMGCL solvers)" ON)
option(USE_GPU_BRIDGE "Enable the GPU bridge (GPU/AMGCL solvers)" ON)
option(USE_TRACY_PROFILER "Enable tracy profiling" OFF)
option(CONVERT_CUDA_TO_HIP "Convert CUDA code to HIP (to run on AMD cards)" OFF)
set(OPM_COMPILE_COMPONENTS "2;3;4;5;6;7" CACHE STRING "The components to compile support for")
@@ -141,8 +141,8 @@ if(USE_MPI)
set(HDF5_PREFER_PARALLEL TRUE)
endif()
if(USE_BDA_BRIDGE)
set(COMPILE_BDA_BRIDGE 1)
if(USE_GPU_BRIDGE)
set(COMPILE_GPU_BRIDGE 1)
endif()
# not the same location as most of the other projects? this hook overrides
@@ -381,7 +381,7 @@ macro (files_hook)
set(HDF5_FOUND OFF)
unset(HAVE_HDF5)
endif()
if(HAVE_ROCSPARSE AND HAVE_CUDA AND USE_BDA_BRIDGE) # unsure if this is the correct way to change this
if(HAVE_ROCSPARSE AND HAVE_CUDA AND USE_GPU_BRIDGE) # unsure if this is the correct way to change this
message(WARNING "WARNING! Using CUDA and ROCm at the same time is not allowed. Please choose only one of them by setting CMAKE_DISABLE_FIND_PACKAGE_<rocsparse|CUDA>=<ON|OFF>. Disabling CUDA...\n")
set(CUDA_FOUND OFF)
unset(HAVE_CUDA)
@@ -654,7 +654,7 @@ if(CUDA_FOUND)
target_link_libraries(${tgt} CUDA::cudart)
endforeach()
endif()
if(USE_BDA_BRIDGE)
if(USE_GPU_BRIDGE)
set_tests_properties(cusparseSolver PROPERTIES LABELS gpu_cuda)
endif()
@@ -686,7 +686,7 @@ if(CUDA_FOUND)
PROPERTIES LABELS ${gpu_label})
endif()
if(USE_BDA_BRIDGE)
if(USE_GPU_BRIDGE)
if(OpenCL_FOUND)
target_link_libraries( opmsimulators PUBLIC ${OpenCL_LIBRARIES} )
set_tests_properties(openclSolver solvetransposed3x3 csrToCscOffsetMap

View File

@@ -306,47 +306,47 @@ if (HAVE_CUDA)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg set_device.hpp)
endif()
if(USE_BDA_BRIDGE)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/BdaBridge.cpp
opm/simulators/linalg/bda/CprCreation.cpp
opm/simulators/linalg/bda/Misc.cpp
opm/simulators/linalg/bda/WellContributions.cpp
opm/simulators/linalg/bda/MultisegmentWellContribution.cpp
opm/simulators/linalg/ISTLSolverBda.cpp)
if(USE_GPU_BRIDGE)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/gpubridge/GpuBridge.cpp
opm/simulators/linalg/gpubridge/CprCreation.cpp
opm/simulators/linalg/gpubridge/Misc.cpp
opm/simulators/linalg/gpubridge/WellContributions.cpp
opm/simulators/linalg/gpubridge/MultisegmentWellContribution.cpp
opm/simulators/linalg/ISTLSolverGpuBridge.cpp)
if(OPENCL_FOUND)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/BlockedMatrix.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/opencl/openclBILU0.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/Reorder.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/opencl/ChowPatelIlu.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/opencl/openclBISAI.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/opencl/openclCPR.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/opencl/opencl.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/opencl/openclKernels.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/opencl/OpenclMatrix.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/opencl/openclPreconditioner.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/opencl/openclSolverBackend.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/opencl/openclWellContributions.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/gpubridge/BlockedMatrix.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/gpubridge/opencl/openclBILU0.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/gpubridge/Reorder.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/gpubridge/opencl/ChowPatelIlu.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/gpubridge/opencl/openclBISAI.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/gpubridge/opencl/openclCPR.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/gpubridge/opencl/opencl.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/gpubridge/opencl/openclKernels.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/gpubridge/opencl/OpenclMatrix.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/gpubridge/opencl/openclPreconditioner.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/gpubridge/opencl/openclSolverBackend.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/gpubridge/opencl/openclWellContributions.cpp)
endif()
if(ROCALUTION_FOUND)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/rocm/rocalutionSolverBackend.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/gpubridge/rocm/rocalutionSolverBackend.cpp)
endif()
if(rocsparse_FOUND AND rocblas_FOUND)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/rocm/rocsparseCPR.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/rocm/rocsparseBILU0.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/rocm/rocsparsePreconditioner.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/rocm/rocsparseSolverBackend.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/rocm/rocsparseWellContributions.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/rocm/hipKernels.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/rocm/rocsparseMatrix.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/gpubridge/rocm/rocsparseCPR.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/gpubridge/rocm/rocsparseBILU0.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/gpubridge/rocm/rocsparsePreconditioner.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/gpubridge/rocm/rocsparseSolverBackend.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/gpubridge/rocm/rocsparseWellContributions.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/gpubridge/rocm/hipKernels.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/gpubridge/rocm/rocsparseMatrix.cpp)
endif()
if(CUDA_FOUND)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/cuda/cusparseSolverBackend.cu)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/cuda/cuWellContributions.cu)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/gpubridge/cuda/cusparseSolverBackend.cu)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/gpubridge/cuda/cuWellContributions.cu)
endif()
if(amgcl_FOUND)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/amgclSolverBackend.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/gpubridge/amgclSolverBackend.cpp)
if(CUDA_FOUND)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/cuda/amgclSolverBackend.cu)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/gpubridge/cuda/amgclSolverBackend.cu)
endif()
endif()
endif()
@@ -430,7 +430,7 @@ if(MPI_FOUND)
endif()
if(CUDA_FOUND)
if(USE_BDA_BRIDGE)
if(USE_GPU_BRIDGE)
list(APPEND TEST_SOURCE_FILES tests/test_cusparseSolver.cpp)
endif()
endif()
@@ -473,7 +473,7 @@ if (HAVE_CUDA)
endif()
endif()
if(USE_BDA_BRIDGE)
if(USE_GPU_BRIDGE)
if(OPENCL_FOUND)
list(APPEND TEST_SOURCE_FILES tests/test_openclSolver.cpp)
list(APPEND TEST_SOURCE_FILES tests/test_solvetransposed3x3.cpp)
@@ -1021,41 +1021,41 @@ list (APPEND PUBLIC_HEADER_FILES
opm/simulators/wells/WellTest.hpp
opm/simulators/wells/WGState.hpp
)
if (USE_BDA_BRIDGE)
if (USE_GPU_BRIDGE)
list (APPEND PUBLIC_HEADER_FILES
opm/simulators/linalg/bda/amgclSolverBackend.hpp
opm/simulators/linalg/bda/BdaBridge.hpp
opm/simulators/linalg/bda/BdaResult.hpp
opm/simulators/linalg/bda/BdaSolver.hpp
opm/simulators/linalg/bda/CprCreation.hpp
opm/simulators/linalg/bda/Preconditioner.hpp
opm/simulators/linalg/bda/Misc.hpp
opm/simulators/linalg/bda/opencl/openclBILU0.hpp
opm/simulators/linalg/bda/BlockedMatrix.hpp
opm/simulators/linalg/bda/opencl/openclCPR.hpp
opm/simulators/linalg/bda/cuda/cuda_header.hpp
opm/simulators/linalg/bda/cuda/cusparseSolverBackend.hpp
opm/simulators/linalg/bda/opencl/ChowPatelIlu.hpp
opm/simulators/linalg/bda/opencl/openclBISAI.hpp
opm/simulators/linalg/bda/Reorder.hpp
opm/simulators/linalg/bda/opencl/opencl.hpp
opm/simulators/linalg/bda/opencl/openclKernels.hpp
opm/simulators/linalg/bda/opencl/OpenclMatrix.hpp
opm/simulators/linalg/bda/opencl/openclPreconditioner.hpp
opm/simulators/linalg/bda/opencl/openclSolverBackend.hpp
opm/simulators/linalg/bda/opencl/openclWellContributions.hpp
opm/simulators/linalg/bda/Matrix.hpp
opm/simulators/linalg/bda/MultisegmentWellContribution.hpp
opm/simulators/linalg/bda/rocm/hipKernels.hpp
opm/simulators/linalg/bda/rocm/rocalutionSolverBackend.hpp
opm/simulators/linalg/bda/rocm/rocsparseBILU0.hpp
opm/simulators/linalg/bda/rocm/rocsparseCPR.hpp
opm/simulators/linalg/bda/rocm/rocsparsePreconditioner.hpp
opm/simulators/linalg/bda/rocm/rocsparseSolverBackend.hpp
opm/simulators/linalg/bda/rocm/rocsparseWellContributions.hpp
opm/simulators/linalg/bda/rocm/rocsparseMatrix.hpp
opm/simulators/linalg/bda/WellContributions.hpp
opm/simulators/linalg/ISTLSolverBda.hpp
opm/simulators/linalg/gpubridge/amgclSolverBackend.hpp
opm/simulators/linalg/gpubridge/GpuBridge.hpp
opm/simulators/linalg/gpubridge/GpuResult.hpp
opm/simulators/linalg/gpubridge/GpuSolver.hpp
opm/simulators/linalg/gpubridge/CprCreation.hpp
opm/simulators/linalg/gpubridge/Preconditioner.hpp
opm/simulators/linalg/gpubridge/Misc.hpp
opm/simulators/linalg/gpubridge/opencl/openclBILU0.hpp
opm/simulators/linalg/gpubridge/BlockedMatrix.hpp
opm/simulators/linalg/gpubridge/opencl/openclCPR.hpp
opm/simulators/linalg/gpubridge/cuda/cuda_header.hpp
opm/simulators/linalg/gpubridge/cuda/cusparseSolverBackend.hpp
opm/simulators/linalg/gpubridge/opencl/ChowPatelIlu.hpp
opm/simulators/linalg/gpubridge/opencl/openclBISAI.hpp
opm/simulators/linalg/gpubridge/Reorder.hpp
opm/simulators/linalg/gpubridge/opencl/opencl.hpp
opm/simulators/linalg/gpubridge/opencl/openclKernels.hpp
opm/simulators/linalg/gpubridge/opencl/OpenclMatrix.hpp
opm/simulators/linalg/gpubridge/opencl/openclPreconditioner.hpp
opm/simulators/linalg/gpubridge/opencl/openclSolverBackend.hpp
opm/simulators/linalg/gpubridge/opencl/openclWellContributions.hpp
opm/simulators/linalg/gpubridge/Matrix.hpp
opm/simulators/linalg/gpubridge/MultisegmentWellContribution.hpp
opm/simulators/linalg/gpubridge/rocm/hipKernels.hpp
opm/simulators/linalg/gpubridge/rocm/rocalutionSolverBackend.hpp
opm/simulators/linalg/gpubridge/rocm/rocsparseBILU0.hpp
opm/simulators/linalg/gpubridge/rocm/rocsparseCPR.hpp
opm/simulators/linalg/gpubridge/rocm/rocsparsePreconditioner.hpp
opm/simulators/linalg/gpubridge/rocm/rocsparseSolverBackend.hpp
opm/simulators/linalg/gpubridge/rocm/rocsparseWellContributions.hpp
opm/simulators/linalg/gpubridge/rocm/rocsparseMatrix.hpp
opm/simulators/linalg/gpubridge/WellContributions.hpp
opm/simulators/linalg/ISTLSolverGpuBridge.hpp
)
endif()

View File

@@ -1,5 +1,5 @@
set(BDA_DIR opm/simulators/linalg/bda)
set(KERNELS_DIR ${BDA_DIR}/opencl/kernels)
set(GPUBRIDGE_DIR opm/simulators/linalg/gpubridge)
set(KERNELS_DIR ${GPUBRIDGE_DIR}/opencl/kernels)
option(DEBUG_OPENCL_KERNELS_INTEL "Run ocloc to check kernel (works only on Intel)" OFF)
if(DEBUG_OPENCL_KERNELS_INTEL)
@@ -14,7 +14,7 @@ endif()
set(CL_SRC_FILE ${PROJECT_BINARY_DIR}/clSources.cpp)
file(WRITE ${CL_SRC_FILE} "// This file is auto-generated. Do not edit!\n\n")
file(APPEND ${CL_SRC_FILE} "#include <config.h>\n\n")
file(APPEND ${CL_SRC_FILE} "#include <${BDA_DIR}/opencl/openclKernels.hpp>\n\n")
file(APPEND ${CL_SRC_FILE} "#include <${GPUBRIDGE_DIR}/opencl/openclKernels.hpp>\n\n")
file(APPEND ${CL_SRC_FILE} "namespace Opm\{\n\n")
file(APPEND ${CL_SRC_FILE} "namespace Accelerator\{\n\n")

View File

@@ -5,7 +5,7 @@ set (opm-simulators_CONFIG_VAR
HAVE_EWOMS
HAVE_MPI
HAVE_PETSC
COMPILE_BDA_BRIDGE
COMPILE_GPU_BRIDGE
HAVE_CUDA
HAVE_OPENCL
HAVE_OPENCL_HPP

View File

@@ -38,8 +38,8 @@
#include <opm/simulators/linalg/extractMatrix.hpp>
#if COMPILE_BDA_BRIDGE
#include <opm/simulators/linalg/ISTLSolverBda.hpp>
#if COMPILE_GPU_BRIDGE
#include <opm/simulators/linalg/ISTLSolverGpuBridge.hpp>
#else
#include <opm/simulators/linalg/ISTLSolver.hpp>
#endif

View File

@@ -67,7 +67,7 @@ void FlowLinearSolverParameters::init(bool cprRequestedInDataFile)
}
accelerator_mode_ = Parameters::Get<Parameters::AcceleratorMode>();
bda_device_id_ = Parameters::Get<Parameters::BdaDeviceId>();
gpu_device_id_ = Parameters::Get<Parameters::GpuDeviceId>();
opencl_platform_id_ = Parameters::Get<Parameters::OpenclPlatformId>();
opencl_ilu_parallel_ = Parameters::Get<Parameters::OpenclIluParallel>();
}
@@ -142,7 +142,7 @@ void FlowLinearSolverParameters::registerParameters()
Parameters::Register<Parameters::AcceleratorMode>
("Choose a linear solver, usage: "
"'--accelerator-mode=[none|cusparse|opencl|amgcl|rocalution|rocsparse]'");
Parameters::Register<Parameters::BdaDeviceId>
Parameters::Register<Parameters::GpuDeviceId>
("Choose device ID for cusparseSolver or openclSolver, "
"use 'nvidia-smi' or 'clinfo' to determine valid IDs");
Parameters::Register<Parameters::OpenclPlatformId>
@@ -175,7 +175,7 @@ void FlowLinearSolverParameters::reset()
cpr_reuse_setup_ = 4;
cpr_reuse_interval_ = 30;
accelerator_mode_ = "none";
bda_device_id_ = 0;
gpu_device_id_ = 0;
opencl_platform_id_ = 0;
opencl_ilu_parallel_ = true;
}

View File

@@ -32,7 +32,7 @@
namespace Opm {
template <class TypeTag>
class ISTLSolverBda;
class ISTLSolverGpuBridge;
template <class TypeTag>
class ISTLSolver;
@@ -51,8 +51,8 @@ struct FlowIstlSolverParams {};
template<class TypeTag>
struct LinearSolverBackend<TypeTag, TTag::FlowIstlSolverParams>
{
#if COMPILE_BDA_BRIDGE
using type = ISTLSolverBda<TypeTag>;
#if COMPILE_GPU_BRIDGE
using type = ISTLSolverGpuBridge<TypeTag>;
#else
using type = ISTLSolver<TypeTag>;
#endif
@@ -82,7 +82,7 @@ struct LinearSolverPrintJsonDefinition { static constexpr auto value = true; };
struct CprReuseSetup { static constexpr int value = 4; };
struct CprReuseInterval { static constexpr int value = 30; };
struct AcceleratorMode { static constexpr auto value = "none"; };
struct BdaDeviceId { static constexpr int value = 0; };
struct GpuDeviceId { static constexpr int value = 0; };
struct OpenclPlatformId { static constexpr int value = 0; };
struct OpenclIluParallel { static constexpr bool value = true; }; // note: false should only be used in debug
@@ -112,7 +112,7 @@ struct FlowLinearSolverParameters
int cpr_reuse_setup_;
int cpr_reuse_interval_;
std::string accelerator_mode_;
int bda_device_id_;
int gpu_device_id_;
int opencl_platform_id_;
bool opencl_ilu_parallel_;

View File

@@ -33,9 +33,9 @@
#include <fmt/format.h>
#if COMPILE_BDA_BRIDGE
#include <opm/simulators/linalg/bda/BdaBridge.hpp>
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#if COMPILE_GPU_BRIDGE
#include <opm/simulators/linalg/gpubridge/GpuBridge.hpp>
#include <opm/simulators/linalg/gpubridge/WellContributions.hpp>
#endif
namespace Opm {

View File

@@ -21,7 +21,7 @@
#include <config.h>
#include <opm/common/TimingMacros.hpp>
#include <opm/simulators/linalg/ISTLSolverBda.hpp>
#include <opm/simulators/linalg/ISTLSolverGpuBridge.hpp>
#include <dune/istl/schwarz.hh>
@@ -33,8 +33,8 @@
#include <fmt/format.h>
#include <opm/simulators/linalg/bda/BdaBridge.hpp>
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#include <opm/simulators/linalg/gpubridge/GpuBridge.hpp>
#include <opm/simulators/linalg/gpubridge/WellContributions.hpp>
#if HAVE_DUNE_ALUGRID
#include <dune/alugrid/grid.hh>
@@ -53,8 +53,8 @@ std::shared_ptr<std::thread> copyThread;
namespace Opm::detail {
template<class Matrix, class Vector>
BdaSolverInfo<Matrix,Vector>::
BdaSolverInfo(const std::string& accelerator_mode,
GpuSolverInfo<Matrix,Vector>::
GpuSolverInfo(const std::string& accelerator_mode,
const int linear_solver_verbosity,
const int maxit,
const Scalar tolerance,
@@ -70,11 +70,11 @@ BdaSolverInfo(const std::string& accelerator_mode,
{}
template<class Matrix, class Vector>
BdaSolverInfo<Matrix,Vector>::~BdaSolverInfo() = default;
GpuSolverInfo<Matrix,Vector>::~GpuSolverInfo() = default;
template<class Matrix, class Vector>
template<class Grid>
void BdaSolverInfo<Matrix,Vector>::
void GpuSolverInfo<Matrix,Vector>::
prepare(const Grid& grid,
const Dune::CartesianIndexMapper<Grid>& cartMapper,
const std::vector<Well>& wellsForConn,
@@ -94,7 +94,7 @@ prepare(const Grid& grid,
}
template<class Matrix, class Vector>
bool BdaSolverInfo<Matrix,Vector>::
bool GpuSolverInfo<Matrix,Vector>::
apply(Vector& rhs,
const bool useWellConn,
[[maybe_unused]] WellContribFunc getContribs,
@@ -148,9 +148,9 @@ apply(Vector& rhs,
return true;
} else {
// warn about CPU fallback
// BdaBridge might have disabled its BdaSolver for this simulation due to some error
// in that case the BdaBridge is disabled and flexibleSolver is always used
// or maybe the BdaSolver did not converge in time, then it will be used next linear solve
// GpuBridge might have disabled its GpuSolver for this simulation due to some error
// in that case the GpuBridge is disabled and flexibleSolver is always used
// or maybe the GpuSolver did not converge in time, then it will be used next linear solve
if (rank == 0) {
OpmLog::warning(bridge_->getAccleratorName() + " did not converge, now trying Dune to solve current linear system...");
}
@@ -161,7 +161,7 @@ apply(Vector& rhs,
}
template<class Matrix, class Vector>
bool BdaSolverInfo<Matrix,Vector>::
bool GpuSolverInfo<Matrix,Vector>::
gpuActive()
{
return bridge_->getUseGpu();
@@ -169,7 +169,7 @@ gpuActive()
template<class Matrix, class Vector>
template<class Grid>
void BdaSolverInfo<Matrix,Vector>::
void GpuSolverInfo<Matrix,Vector>::
blockJacobiAdjacency(const Grid& grid,
const std::vector<int>& cell_part,
std::size_t nonzeroes)
@@ -217,7 +217,7 @@ blockJacobiAdjacency(const Grid& grid,
}
template<class Matrix, class Vector>
void BdaSolverInfo<Matrix,Vector>::
void GpuSolverInfo<Matrix,Vector>::
copyMatToBlockJac(const Matrix& mat, Matrix& blockJac)
{
auto rbegin = blockJac.begin();
@@ -242,7 +242,7 @@ template<class Scalar, int Dim>
using BV = Dune::BlockVector<Dune::FieldVector<Scalar,Dim>>;
#define INSTANTIATE_GRID(T, Dim, Grid) \
template void BdaSolverInfo<BM<T,Dim>,BV<T,Dim>>:: \
template void GpuSolverInfo<BM<T,Dim>,BV<T,Dim>>:: \
prepare(const Grid&, \
const Dune::CartesianIndexMapper<Grid>&, \
const std::vector<Well>&, \
@@ -257,13 +257,13 @@ using PolyHedralGrid3D = Dune::PolyhedralGrid<3, 3>;
using ALUGrid3CN = Dune::ALUGrid<3, 3, Dune::cube, Dune::nonconforming, Dune::ALUGridNoComm>;
#endif //HAVE_MPI
#define INSTANTIATE(T,Dim) \
template struct BdaSolverInfo<BM<T,Dim>,BV<T,Dim>>; \
template struct GpuSolverInfo<BM<T,Dim>,BV<T,Dim>>; \
INSTANTIATE_GRID(T,Dim,Dune::CpGrid) \
INSTANTIATE_GRID(T,Dim,ALUGrid3CN) \
INSTANTIATE_GRID(T,Dim,PolyHedralGrid3D)
#else
#define INSTANTIATE(T,Dim) \
template struct BdaSolverInfo<BM<T,Dim>,BV<T,Dim>>; \
template struct GpuSolverInfo<BM<T,Dim>,BV<T,Dim>>; \
INSTANTIATE_GRID(T,Dim,Dune::CpGrid) \
INSTANTIATE_GRID(T,Dim,PolyHedralGrid3D)
#endif

View File

@@ -19,8 +19,8 @@
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_ISTLSOLVER_WITH_BDA_HEADER_INCLUDED
#define OPM_ISTLSOLVER_WITH_BDA_HEADER_INCLUDED
#ifndef OPM_ISTLSOLVER_WITH_GPUBRIDGE_HEADER_INCLUDED
#define OPM_ISTLSOLVER_WITH_GPUBRIDGE_HEADER_INCLUDED
#include <opm/simulators/linalg/ISTLSolver.hpp>
@@ -34,18 +34,18 @@ namespace Opm {
class Well;
template<class Matrix, class Vector, int block_size> class BdaBridge;
template<class Matrix, class Vector, int block_size> class GpuBridge;
template<class Scalar> class WellContributions;
namespace detail {
template<class Matrix, class Vector>
struct BdaSolverInfo
struct GpuSolverInfo
{
using Scalar = typename Vector::field_type;
using WellContribFunc = std::function<void(WellContributions<Scalar>&)>;
using Bridge = BdaBridge<Matrix,Vector,Matrix::block_type::rows>;
using Bridge = GpuBridge<Matrix,Vector,Matrix::block_type::rows>;
BdaSolverInfo(const std::string& accelerator_mode,
GpuSolverInfo(const std::string& accelerator_mode,
const int linear_solver_verbosity,
const int maxit,
const Scalar tolerance,
@@ -54,7 +54,7 @@ struct BdaSolverInfo
const bool opencl_ilu_parallel,
const std::string& linsolver);
~BdaSolverInfo();
~GpuSolverInfo();
template<class Grid>
void prepare(const Grid& grid,
@@ -100,7 +100,7 @@ private:
/// as a block-structured matrix (one block for all cell variables) for a fixed
/// number of cell variables np .
template <class TypeTag>
class ISTLSolverBda : public ISTLSolver<TypeTag>
class ISTLSolverGpuBridge : public ISTLSolver<TypeTag>
{
protected:
using ParentType = ISTLSolver<TypeTag>;
@@ -134,23 +134,23 @@ public:
/// \param[in] simulator The opm-models simulator object
/// \param[in] parameters Explicit parameters for solver setup, do not
/// read them from command line parameters.
ISTLSolverBda(const Simulator& simulator, const FlowLinearSolverParameters& parameters)
ISTLSolverGpuBridge(const Simulator& simulator, const FlowLinearSolverParameters& parameters)
: ParentType(simulator, parameters)
{
initializeBda();
initializeGpu();
}
/// Construct a system solver.
/// \param[in] simulator The opm-models simulator object
explicit ISTLSolverBda(const Simulator& simulator)
explicit ISTLSolverGpuBridge(const Simulator& simulator)
: ParentType(simulator)
{
initializeBda();
initializeGpu();
}
void initializeBda()
void initializeGpu()
{
OPM_TIMEBLOCK(initializeBda);
OPM_TIMEBLOCK(initializeGpu);
std::string accelerator_mode = Parameters::Get<Parameters::AcceleratorMode>();
// Force accelerator mode to none if using MPI.
@@ -166,15 +166,15 @@ public:
return;
}
// Initialize the BdaBridge
// Initialize the GpuBridge
const int platformID = Parameters::Get<Parameters::OpenclPlatformId>();
const int deviceID = Parameters::Get<Parameters::BdaDeviceId>();
const int deviceID = Parameters::Get<Parameters::GpuDeviceId>();
const int maxit = Parameters::Get<Parameters::LinearSolverMaxIter>();
const double tolerance = Parameters::Get<Parameters::LinearSolverReduction>();
const bool opencl_ilu_parallel = Parameters::Get<Parameters::OpenclIluParallel>();
const int linear_solver_verbosity = this->parameters_[0].linear_solver_verbosity_;
std::string linsolver = Parameters::Get<Parameters::LinearSolver>();
bdaBridge_ = std::make_unique<detail::BdaSolverInfo<Matrix,Vector>>(accelerator_mode,
gpuBridge_ = std::make_unique<detail::GpuSolverInfo<Matrix,Vector>>(accelerator_mode,
linear_solver_verbosity,
maxit,
tolerance,
@@ -191,7 +191,7 @@ public:
// Avoid performing the decomposition on CPU when we also do it on GPU,
// but we do need to initialize the pointers.
if (bdaBridge_) {
if (gpuBridge_) {
ParentType::initPrepare(M,b);
} else {
ParentType::prepare(M,b);
@@ -199,13 +199,13 @@ public:
#if HAVE_OPENCL || HAVE_ROCSPARSE || HAVE_CUDA
// update matrix entries for solvers.
if (firstcall && bdaBridge_) {
if (firstcall && gpuBridge_) {
// model will not change the matrix object. Hence simply store a pointer
// to the original one with a deleter that does nothing.
// Outch! We need to be able to scale the linear system! Hence const_cast
// setup sparsity pattern for jacobi matrix for preconditioner (only used for openclSolver)
bdaBridge_->numJacobiBlocks_ = Parameters::Get<Parameters::NumJacobiBlocks>();
bdaBridge_->prepare(this->simulator_.vanguard().grid(),
gpuBridge_->numJacobiBlocks_ = Parameters::Get<Parameters::NumJacobiBlocks>();
gpuBridge_->prepare(this->simulator_.vanguard().grid(),
this->simulator_.vanguard().cartesianIndexMapper(),
this->simulator_.vanguard().schedule().getWellsatEnd(),
this->simulator_.vanguard().schedule().getPossibleFutureConnections(),
@@ -233,11 +233,11 @@ public:
bool solve(Vector& x)
{
if (!bdaBridge_) {
if (!gpuBridge_) {
return ParentType::solve(x);
}
OPM_TIMEBLOCK(istlSolverBdaSolve);
OPM_TIMEBLOCK(istlSolverGpuBridgeSolve);
this->solveCount_ += 1;
// Write linear system if asked for.
const int verbosity = this->prm_[this->activeSolverNum_].template get<int>("verbosity", 0);
@@ -257,13 +257,13 @@ public:
{
this->simulator_.problem().wellModel().getWellContributions(w);
};
if (!bdaBridge_->apply(*(this->rhs_), this->useWellConn_, getContribs,
if (!gpuBridge_->apply(*(this->rhs_), this->useWellConn_, getContribs,
this->simulator_.gridView().comm().rank(),
const_cast<Matrix&>(this->getMatrix()),
x, result))
{
if(bdaBridge_->gpuActive()){
// bda solve fails use istl solver setup need to be done since it is not setup in prepare
if(gpuBridge_->gpuActive()){
// gpu solve fails use istl solver setup need to be done since it is not setup in prepare
ParentType::prepareFlexibleSolver();
}
assert(this->flexibleSolver_[this->activeSolverNum_].solver_);
@@ -277,9 +277,9 @@ public:
}
protected:
std::unique_ptr<detail::BdaSolverInfo<Matrix, Vector>> bdaBridge_;
std::unique_ptr<detail::GpuSolverInfo<Matrix, Vector>> gpuBridge_;
}; // end ISTLSolver
} // namespace Opm
#endif // OPM_ISTLSOLVER_WITH_BDA_HEADER_INCLUDED
#endif // OPM_ISTLSOLVER_WITH_GPUBRIDGE_HEADER_INCLUDED

View File

@@ -22,9 +22,9 @@
#include <opm/common/OpmLog/OpmLog.hpp>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/bda/BlockedMatrix.hpp>
#include <opm/simulators/linalg/bda/Matrix.hpp>
#include <opm/simulators/linalg/bda/Matrix.hpp>
#include <opm/simulators/linalg/gpubridge/BlockedMatrix.hpp>
#include <opm/simulators/linalg/gpubridge/Matrix.hpp>
#include <opm/simulators/linalg/gpubridge/Matrix.hpp>
namespace Opm::Accelerator {

View File

@@ -28,11 +28,11 @@
#include <opm/simulators/linalg/PreconditionerFactory.hpp>
#include <opm/simulators/linalg/PropertyTree.hpp>
#include <opm/simulators/linalg/bda/BdaBridge.hpp>
#include <opm/simulators/linalg/bda/BlockedMatrix.hpp>
#include <opm/simulators/linalg/bda/CprCreation.hpp>
#include <opm/simulators/linalg/gpubridge/GpuBridge.hpp>
#include <opm/simulators/linalg/gpubridge/BlockedMatrix.hpp>
#include <opm/simulators/linalg/gpubridge/CprCreation.hpp>
#include <opm/simulators/linalg/bda/Misc.hpp>
#include <opm/simulators/linalg/gpubridge/Misc.hpp>
namespace Opm::Accelerator {
@@ -232,7 +232,7 @@ analyzeHierarchy()
}
}
Opm::BdaBridge<DuneMat, DuneVec, 1>::copySparsityPatternFromISTL(A, Amatrices.back().rowPointers, Amatrices.back().colIndices);
Opm::GpuBridge<DuneMat, DuneVec, 1>::copySparsityPatternFromISTL(A, Amatrices.back().rowPointers, Amatrices.back().colIndices);
// compute inverse diagonal values for current level
invDiags.emplace_back(A.N());

View File

@@ -24,8 +24,8 @@
#include <dune/istl/paamg/matrixhierarchy.hh>
#include <dune/istl/umfpack.hh>
#include <opm/simulators/linalg/bda/Matrix.hpp>
#include <opm/simulators/linalg/bda/Preconditioner.hpp>
#include <opm/simulators/linalg/gpubridge/Matrix.hpp>
#include <opm/simulators/linalg/gpubridge/Preconditioner.hpp>
#include <type_traits>

View File

@@ -25,29 +25,29 @@
#include <opm/common/OpmLog/OpmLog.hpp>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/bda/BdaBridge.hpp>
#include <opm/simulators/linalg/bda/BdaResult.hpp>
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#include <opm/simulators/linalg/gpubridge/GpuBridge.hpp>
#include <opm/simulators/linalg/gpubridge/GpuResult.hpp>
#include <opm/simulators/linalg/gpubridge/WellContributions.hpp>
#if HAVE_CUDA
#include <opm/simulators/linalg/bda/cuda/cusparseSolverBackend.hpp>
#include <opm/simulators/linalg/gpubridge/cuda/cusparseSolverBackend.hpp>
#endif
#if HAVE_OPENCL
#include <opm/simulators/linalg/bda/opencl/openclSolverBackend.hpp>
#include <opm/simulators/linalg/bda/opencl/openclWellContributions.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclSolverBackend.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclWellContributions.hpp>
#endif
#if HAVE_AMGCL
#include <opm/simulators/linalg/bda/amgclSolverBackend.hpp>
#include <opm/simulators/linalg/gpubridge/amgclSolverBackend.hpp>
#endif
#if HAVE_ROCALUTION
#include <opm/simulators/linalg/bda/rocm/rocalutionSolverBackend.hpp>
#include <opm/simulators/linalg/gpubridge/rocm/rocalutionSolverBackend.hpp>
#endif
#if HAVE_ROCSPARSE
#include <opm/simulators/linalg/bda/rocm/rocsparseSolverBackend.hpp>
#include <opm/simulators/linalg/gpubridge/rocm/rocsparseSolverBackend.hpp>
#endif
#include <type_traits>
@@ -56,13 +56,13 @@ using InverseOperatorResult = Dune::InverseOperatorResult;
namespace Opm {
using Accelerator::BdaResult;
using Accelerator::BdaSolver;
using Accelerator::GpuResult;
using Accelerator::GpuSolver;
using Accelerator::SolverStatus;
template<class BridgeMatrix, class BridgeVector, int block_size>
BdaBridge<BridgeMatrix, BridgeVector, block_size>::
BdaBridge(std::string accelerator_mode_,
GpuBridge<BridgeMatrix, BridgeVector, block_size>::
GpuBridge(std::string accelerator_mode_,
int linear_solver_verbosity,
[[maybe_unused]] int maxit,
[[maybe_unused]] Scalar tolerance,
@@ -176,7 +176,7 @@ int replaceZeroDiagonal(BridgeMatrix& mat,
// sparsity pattern should stay the same
// this could be removed if Dune::BCRSMatrix features an API call that returns colIndices and rowPointers
template <class BridgeMatrix, class BridgeVector, int block_size>
void BdaBridge<BridgeMatrix, BridgeVector, block_size>::
void GpuBridge<BridgeMatrix, BridgeVector, block_size>::
copySparsityPatternFromISTL(const BridgeMatrix& mat,
std::vector<int>& h_rows,
std::vector<int>& h_cols)
@@ -197,7 +197,7 @@ copySparsityPatternFromISTL(const BridgeMatrix& mat,
if (static_cast<unsigned int>(h_rows[mat.N()]) != mat.nonzeroes()) {
OPM_THROW(std::logic_error,
"Error size of rows do not sum to number of nonzeroes "
"in BdaBridge::copySparsityPatternFromISTL()");
"in GpuBridge::copySparsityPatternFromISTL()");
}
}
@@ -225,7 +225,7 @@ void checkMemoryContiguous(const BridgeMatrix& mat)
}
template <class BridgeMatrix, class BridgeVector, int block_size>
void BdaBridge<BridgeMatrix, BridgeVector, block_size>::
void GpuBridge<BridgeMatrix, BridgeVector, block_size>::
solve_system(BridgeMatrix* bridgeMat,
BridgeMatrix* jacMat,
int numJacobiBlocks,
@@ -234,14 +234,14 @@ solve_system(BridgeMatrix* bridgeMat,
InverseOperatorResult& res)
{
if (use_gpu) {
BdaResult result;
GpuResult result;
result.converged = false;
const int dim = (*bridgeMat)[0][0].N();
const int Nb = bridgeMat->N();
const int nnzb = bridgeMat->nonzeroes();
if (dim != 3) {
OpmLog::warning("BdaSolver only accepts blocksize = 3 at this time, will use Dune for the remainder of the program");
OpmLog::warning("GpuSolver only accepts blocksize = 3 at this time, will use Dune for the remainder of the program");
use_gpu = false;
return;
}
@@ -294,27 +294,27 @@ solve_system(BridgeMatrix* bridgeMat,
/////////////////////////
// actually solve
// assume that underlying data (nonzeroes) from b (Dune::BlockVector) are contiguous, if this is not the case, the chosen BdaSolver is expected to perform undefined behaviour
// assume that underlying data (nonzeroes) from b (Dune::BlockVector) are contiguous, if this is not the case, the chosen GpuSolver is expected to perform undefined behaviour
SolverStatus status = backend->solve_system(matrix,
static_cast<Scalar*>(&(b[0][0])),
jacMatrix, wellContribs, result);
switch (status) {
case SolverStatus::BDA_SOLVER_SUCCESS:
//OpmLog::info("BdaSolver converged");
case SolverStatus::GPU_SOLVER_SUCCESS:
//OpmLog::info("GpuSolver converged");
break;
case SolverStatus::BDA_SOLVER_ANALYSIS_FAILED:
OpmLog::warning("BdaSolver could not analyse level information of matrix, "
case SolverStatus::GPU_SOLVER_ANALYSIS_FAILED:
OpmLog::warning("GpuSolver could not analyse level information of matrix, "
"perhaps there is still a 0.0 on the diagonal of a "
"block on the diagonal");
break;
case SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED:
OpmLog::warning("BdaSolver could not create preconditioner, "
case SolverStatus::GPU_SOLVER_CREATE_PRECONDITIONER_FAILED:
OpmLog::warning("GpuSolver could not create preconditioner, "
"perhaps there is still a 0.0 on the diagonal "
"of a block on the diagonal");
break;
default:
OpmLog::warning("BdaSolver returned unknown status code");
OpmLog::warning("GpuSolver returned unknown status code");
}
res.iterations = result.iterations;
@@ -328,7 +328,7 @@ solve_system(BridgeMatrix* bridgeMat,
}
template <class BridgeMatrix, class BridgeVector, int block_size>
void BdaBridge<BridgeMatrix, BridgeVector, block_size>::
void GpuBridge<BridgeMatrix, BridgeVector, block_size>::
get_result([[maybe_unused]] BridgeVector& x)
{
if (use_gpu) {
@@ -337,7 +337,7 @@ get_result([[maybe_unused]] BridgeVector& x)
}
template <class BridgeMatrix, class BridgeVector, int block_size>
void BdaBridge<BridgeMatrix, BridgeVector, block_size>::
void GpuBridge<BridgeMatrix, BridgeVector, block_size>::
initWellContributions([[maybe_unused]] WellContributions<Scalar>& wellContribs,
[[maybe_unused]] unsigned N)
{
@@ -356,19 +356,19 @@ initWellContributions([[maybe_unused]] WellContributions<Scalar>& wellContribs,
}
// the tests use Dune::FieldMatrix, Flow uses Opm::MatrixBlock
#define INSTANTIATE_BDA_FUNCTIONS(T,n) \
template class BdaBridge<Dune::BCRSMatrix<MatrixBlock<T,n,n>>, \
#define INSTANTIATE_GPU_FUNCTIONS(T,n) \
template class GpuBridge<Dune::BCRSMatrix<MatrixBlock<T,n,n>>, \
Dune::BlockVector<Dune::FieldVector<T,n>>,n>; \
template class BdaBridge<Dune::BCRSMatrix<Dune::FieldMatrix<T,n,n>>, \
template class GpuBridge<Dune::BCRSMatrix<Dune::FieldMatrix<T,n,n>>, \
Dune::BlockVector<Dune::FieldVector<T,n>>,n>;
#define INSTANTIATE_TYPE(T) \
INSTANTIATE_BDA_FUNCTIONS(T,1) \
INSTANTIATE_BDA_FUNCTIONS(T,2) \
INSTANTIATE_BDA_FUNCTIONS(T,3) \
INSTANTIATE_BDA_FUNCTIONS(T,4) \
INSTANTIATE_BDA_FUNCTIONS(T,5) \
INSTANTIATE_BDA_FUNCTIONS(T,6)
INSTANTIATE_GPU_FUNCTIONS(T,1) \
INSTANTIATE_GPU_FUNCTIONS(T,2) \
INSTANTIATE_GPU_FUNCTIONS(T,3) \
INSTANTIATE_GPU_FUNCTIONS(T,4) \
INSTANTIATE_GPU_FUNCTIONS(T,5) \
INSTANTIATE_GPU_FUNCTIONS(T,6)
INSTANTIATE_TYPE(double)

View File

@@ -17,12 +17,12 @@
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef BDABRIDGE_HEADER_INCLUDED
#define BDABRIDGE_HEADER_INCLUDED
#ifndef GPUBRIDGE_HEADER_INCLUDED
#define GPUBRIDGE_HEADER_INCLUDED
#include "dune/istl/solver.hh" // for struct InverseOperatorResult
#include <opm/simulators/linalg/bda/BdaSolver.hpp>
#include <opm/simulators/linalg/gpubridge/GpuSolver.hpp>
namespace Opm
{
@@ -31,16 +31,16 @@ template<class Scalar> class WellContributions;
typedef Dune::InverseOperatorResult InverseOperatorResult;
/// BdaBridge acts as interface between opm-simulators with the BdaSolvers
/// GpuBridge acts as interface between opm-simulators with the GpuSolvers
template <class BridgeMatrix, class BridgeVector, int block_size>
class BdaBridge
class GpuBridge
{
private:
using Scalar = typename BridgeVector::field_type;
int verbosity = 0;
bool use_gpu = false;
std::string accelerator_mode;
std::unique_ptr<Accelerator::BdaSolver<Scalar,block_size>> backend;
std::unique_ptr<Accelerator::GpuSolver<Scalar,block_size>> backend;
std::shared_ptr<Accelerator::BlockedMatrix<Scalar>> matrix; // 'stores' matrix, actually points to h_rows, h_cols and the received BridgeMatrix for the nonzeroes
std::shared_ptr<Accelerator::BlockedMatrix<Scalar>> jacMatrix; // 'stores' preconditioner matrix, actually points to h_rows, h_cols and the received BridgeMatrix for the nonzeroes
std::vector<int> h_rows, h_cols; // store the sparsity pattern of the matrix
@@ -49,16 +49,16 @@ private:
std::vector<typename BridgeMatrix::size_type> jacDiagIndices; // same but for jacMatrix
public:
/// Construct a BdaBridge
/// Construct a GpuBridge
/// \param[in] accelerator_mode to select if an accelerated solver is used, is passed via command-line: '--accelerator-mode=[none|cusparse|opencl|amgcl|rocalution|rocsparse]'
/// \param[in] linear_solver_verbosity verbosity of BdaSolver
/// \param[in] maxit maximum number of iterations for BdaSolver
/// \param[in] tolerance required relative tolerance for BdaSolver
/// \param[in] linear_solver_verbosity verbosity of GpuSolver
/// \param[in] maxit maximum number of iterations for GpuSolver
/// \param[in] tolerance required relative tolerance for GpuSolver
/// \param[in] platformID the OpenCL platform ID to be used
/// \param[in] deviceID the device ID to be used by the cusparse- and openclSolvers, too high values could cause runtime errors
/// \param[in] opencl_ilu_parallel whether to parallelize the ILU decomposition and application in OpenCL with level_scheduling
/// \param[in] linsolver indicating the preconditioner, equal to the --linear-solver cmdline argument
BdaBridge(std::string accelerator_mode,
GpuBridge(std::string accelerator_mode,
int linear_solver_verbosity,
int maxit,
Scalar tolerance,
@@ -87,8 +87,8 @@ public:
/// \param[inout] x vector x, should be of type Dune::BlockVector
void get_result(BridgeVector &x);
/// Return whether the BdaBridge will use the GPU or not
/// return whether the BdaBridge will use the GPU or not
/// Return whether the GpuBridge will use the GPU or not
/// return whether the GpuBridge will use the GPU or not
bool getUseGpu()
{
return use_gpu;
@@ -113,7 +113,7 @@ public:
{
return accelerator_mode;
}
}; // end class BdaBridge
}; // end class GpuBridge
}

View File

@@ -17,8 +17,8 @@
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef BDARESULT_HEADER_INCLUDED
#define BDARESULT_HEADER_INCLUDED
#ifndef GPURESULT_HEADER_INCLUDED
#define GPURESULT_HEADER_INCLUDED
namespace Opm
{
@@ -27,7 +27,7 @@ namespace Accelerator
/// This class is based on InverseOperatorResult struct from dune/istl/solver.hh
/// It is needed to prevent a compile error in basearray.hh, the nvcc compiler might not support all features in there
class BdaResult
class GpuResult
{
public:
@@ -39,7 +39,7 @@ public:
// Dune 2.6 has a member 'double condition_estimate = -1' in InverseOperatorResult
}; // end class BdaResult
}; // end class GpuResult
} // namespace Accelerator
} // namespace Opm

View File

@@ -17,12 +17,12 @@
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_BDASOLVER_BACKEND_HEADER_INCLUDED
#define OPM_BDASOLVER_BACKEND_HEADER_INCLUDED
#ifndef OPM_GPUSOLVER_BACKEND_HEADER_INCLUDED
#define OPM_GPUSOLVER_BACKEND_HEADER_INCLUDED
#include <opm/simulators/linalg/bda/BdaResult.hpp>
#include <opm/simulators/linalg/bda/BlockedMatrix.hpp>
#include <opm/simulators/linalg/gpubridge/GpuResult.hpp>
#include <opm/simulators/linalg/gpubridge/BlockedMatrix.hpp>
#include <memory>
@@ -33,16 +33,16 @@ template<class Scalar> class WellContributions;
namespace Accelerator {
enum class SolverStatus {
BDA_SOLVER_SUCCESS,
BDA_SOLVER_ANALYSIS_FAILED,
BDA_SOLVER_CREATE_PRECONDITIONER_FAILED,
BDA_SOLVER_UNKNOWN_ERROR
GPU_SOLVER_SUCCESS,
GPU_SOLVER_ANALYSIS_FAILED,
GPU_SOLVER_CREATE_PRECONDITIONER_FAILED,
GPU_SOLVER_UNKNOWN_ERROR
};
/// This class serves to simplify choosing between different backend solvers, such as cusparseSolver and openclSolver
/// This class is abstract, no instantiations can of it can be made, only of its children
template<class Scalar, unsigned int block_size>
class BdaSolver
class GpuSolver
{
protected:
// verbosity
@@ -66,24 +66,24 @@ protected:
bool initialized = false;
public:
/// Construct a BdaSolver
/// Construct a GpuSolver
/// \param[in] linear_solver_verbosity verbosity of solver
/// \param[in] maxit maximum number of iterations for solver
/// \param[in] tolerance required relative tolerance for solver
/// \param[in] platformID the OpenCL platform to be used, only used in openclSolver
/// \param[in] deviceID the device to be used
BdaSolver(int linear_solver_verbosity, int max_it, Scalar tolerance_)
GpuSolver(int linear_solver_verbosity, int max_it, Scalar tolerance_)
: verbosity(linear_solver_verbosity)
, maxit(max_it)
, tolerance(tolerance_)
{}
BdaSolver(int linear_solver_verbosity, int max_it,
GpuSolver(int linear_solver_verbosity, int max_it,
Scalar tolerance_, unsigned int deviceID_)
: verbosity(linear_solver_verbosity)
, maxit(max_it)
, tolerance(tolerance_)
, deviceID(deviceID_) {};
BdaSolver(int linear_solver_verbosity, int max_it,
GpuSolver(int linear_solver_verbosity, int max_it,
double tolerance_, unsigned int platformID_,
unsigned int deviceID_)
: verbosity(linear_solver_verbosity)
@@ -94,17 +94,17 @@ public:
{}
/// Define virtual destructor, so that the derivedclass destructor will be called
virtual ~BdaSolver() = default;
virtual ~GpuSolver() = default;
/// Define as pure virtual functions, so derivedclass must implement them
virtual SolverStatus solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
Scalar* b,
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
WellContributions<Scalar>& wellContribs,
BdaResult& res) = 0;
GpuResult& res) = 0;
virtual void get_result(Scalar* x) = 0;
}; // end class BdaSolver
}; // end class GpuSolver
} // namespace Accelerator
} // namespace Opm

View File

@@ -1,6 +1,6 @@
#include <config.h>
#include <opm/simulators/linalg/bda/Misc.hpp>
#include <opm/simulators/linalg/gpubridge/Misc.hpp>
#include <cmath>
#include <algorithm>

View File

@@ -18,7 +18,7 @@
*/
#include <config.h> // CMake
#include <opm/simulators/linalg/bda/MultisegmentWellContribution.hpp>
#include <opm/simulators/linalg/gpubridge/MultisegmentWellContribution.hpp>
#include <opm/common/ErrorMacros.hpp>
#include <opm/common/TimingMacros.hpp>

View File

@@ -21,7 +21,7 @@
#define OPM_PRECONDITIONER_HEADER_INCLUDED
#if HAVE_OPENCL
#include <opm/simulators/linalg/bda/opencl/opencl.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/opencl.hpp>
#endif
#include <memory>

View File

@@ -21,7 +21,7 @@
#include "config.h"
#endif // HAVE_CONFIG_H
#include <opm/simulators/linalg/bda/Reorder.hpp>
#include <opm/simulators/linalg/gpubridge/Reorder.hpp>
#include <vector>
#include <cassert>

View File

@@ -18,23 +18,23 @@
*/
#include <config.h> // CMake
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#include <opm/simulators/linalg/gpubridge/WellContributions.hpp>
#include <opm/common/OpmLog/OpmLog.hpp>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/bda/MultisegmentWellContribution.hpp>
#include <opm/simulators/linalg/gpubridge/MultisegmentWellContribution.hpp>
#ifdef HAVE_OPENCL
#include <opm/simulators/linalg/bda/opencl/openclWellContributions.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclWellContributions.hpp>
#endif
#ifdef HAVE_CUDA
#include <opm/simulators/linalg/bda/cuda/cuWellContributions.hpp>
#include <opm/simulators/linalg/gpubridge/cuda/cuWellContributions.hpp>
#endif
#ifdef HAVE_ROCSPARSE
#include <opm/simulators/linalg/bda/rocm/rocsparseWellContributions.hpp>
#include <opm/simulators/linalg/gpubridge/rocm/rocsparseWellContributions.hpp>
#endif
namespace Opm {

View File

@@ -35,7 +35,7 @@ template<class Scalar> class MultisegmentWellContribution;
/// This class serves to eliminate the need to include the WellContributions into the matrix (with --matrix-add-well-contributions=true) for the cusparseSolver or openclSolver.
/// If the --matrix-add-well-contributions commandline parameter is true, this class should still be used, but be empty.
/// StandardWell and MultisegmentWell are supported for both cusparseSolver and openclSolver.
/// A single instance (or pointer) of this class is passed to the BdaSolver.
/// A single instance (or pointer) of this class is passed to the GpuSolver.
/// For StandardWell, this class contains all the data and handles the computation. For MultisegmentWell, the vector 'multisegments' contains all the data. For more information, check the MultisegmentWellContribution class.
/// A StandardWell uses C, D and B and performs y -= (C^T * (D^-1 * (B*x)))

View File

@@ -19,9 +19,9 @@
#include <config.h>
#include <opm/simulators/linalg/bda/amgclSolverBackend.hpp>
#include <opm/simulators/linalg/gpubridge/amgclSolverBackend.hpp>
#include <opm/simulators/linalg/bda/BdaResult.hpp>
#include <opm/simulators/linalg/gpubridge/GpuResult.hpp>
#include <opm/common/OpmLog/OpmLog.hpp>
#include <opm/common/ErrorMacros.hpp>
@@ -283,7 +283,7 @@ void solve_vexcl(const AIJInfo& A,
template<class Scalar, unsigned int block_size>
void amgclSolverBackend<Scalar,block_size>::
solve_system(Scalar* b, BdaResult& res)
solve_system(Scalar* b, GpuResult& res)
{
Timer t;
@@ -417,7 +417,7 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
Scalar* b,
[[maybe_unused]] std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
[[maybe_unused]] WellContributions<Scalar>& wellContribs,
BdaResult& res)
GpuResult& res)
{
if (initialized == false) {
initialize(matrix->Nb, matrix->nnzbs);
@@ -425,7 +425,7 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
}
convert_data(matrix->nnzValues, matrix->rowPointers);
solve_system(b, res);
return SolverStatus::BDA_SOLVER_SUCCESS;
return SolverStatus::GPU_SOLVER_SUCCESS;
}
#define INSTANTIATE_TYPE(T) \

View File

@@ -20,9 +20,9 @@
#ifndef OPM_AMGCLSOLVER_BACKEND_HEADER_INCLUDED
#define OPM_AMGCLSOLVER_BACKEND_HEADER_INCLUDED
#include <opm/simulators/linalg/bda/BdaResult.hpp>
#include <opm/simulators/linalg/bda/BdaSolver.hpp>
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#include <opm/simulators/linalg/gpubridge/GpuResult.hpp>
#include <opm/simulators/linalg/gpubridge/GpuSolver.hpp>
#include <opm/simulators/linalg/gpubridge/WellContributions.hpp>
#include <boost/property_tree/ptree.hpp>
@@ -46,9 +46,9 @@ namespace Opm::Accelerator {
/// This class does not implement a solver, but converts the BCSR format to normal CSR and uses amgcl for solving
/// Note amgcl also implements blocked solvers, but looks like it needs unblocked input data
template<class Scalar, unsigned int block_size>
class amgclSolverBackend : public BdaSolver<Scalar,block_size>
class amgclSolverBackend : public GpuSolver<Scalar,block_size>
{
using Base = BdaSolver<Scalar,block_size>;
using Base = GpuSolver<Scalar,block_size>;
using Base::N;
using Base::Nb;
@@ -115,7 +115,7 @@ private:
/// Solve linear system
/// \param[in] b pointer to b vector
/// \param[inout] res summary of solver result
void solve_system(Scalar* b, BdaResult& res);
void solve_system(Scalar* b, GpuResult& res);
public:
/// Construct an amgcl solver
@@ -142,7 +142,7 @@ public:
Scalar* b,
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
WellContributions<Scalar>& wellContribs,
BdaResult& res) override;
GpuResult& res) override;
/// Get result after linear solve, and peform postprocessing if necessary
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array

View File

@@ -24,7 +24,7 @@
#include <amgcl/backend/cuda.hpp>
#include <amgcl/relaxation/cusparse_ilu0.hpp>
#include <opm/simulators/linalg/bda/amgclSolverBackend.hpp>
#include <opm/simulators/linalg/gpubridge/amgclSolverBackend.hpp>
/// This file is only compiled when both amgcl and CUDA are found by CMake

View File

@@ -19,13 +19,13 @@
#include <config.h> // CMake
#include <opm/simulators/linalg/bda/cuda/cuWellContributions.hpp>
#include <opm/simulators/linalg/gpubridge/cuda/cuWellContributions.hpp>
#include <opm/common/ErrorMacros.hpp>
#include <opm/common/OpmLog/OpmLog.hpp>
#include <opm/simulators/linalg/bda/MultisegmentWellContribution.hpp>
#include <opm/simulators/linalg/bda/cuda/cuda_header.hpp>
#include <opm/simulators/linalg/gpubridge/MultisegmentWellContribution.hpp>
#include <opm/simulators/linalg/gpubridge/cuda/cuda_header.hpp>
#include <cuda_runtime.h>

View File

@@ -20,7 +20,7 @@
#ifndef WELLCONTRIBUTIONS_CUDA_HEADER_INCLUDED
#define WELLCONTRIBUTIONS_CUDA_HEADER_INCLUDED
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#include <opm/simulators/linalg/gpubridge/WellContributions.hpp>
#include <cuda_runtime.h>

View File

@@ -38,7 +38,7 @@ inline void __cudaCheckError(const char *file, const int line, const char *msg){
if (cudaSuccess != err){
std::ostringstream out;
out << cudaGetErrorString(err) << "\n";
out << "BDA error message: " << msg << "\n";
out << "GPU error message: " << msg << "\n";
OPM_THROW(std::logic_error, out.str());
}
}

View File

@@ -25,10 +25,10 @@
#include <opm/common/OpmLog/OpmLog.hpp>
#include <dune/common/timer.hh>
#include <opm/simulators/linalg/bda/cuda/cusparseSolverBackend.hpp>
#include <opm/simulators/linalg/bda/cuda/cuWellContributions.hpp>
#include <opm/simulators/linalg/bda/BdaResult.hpp>
#include <opm/simulators/linalg/bda/cuda/cuda_header.hpp>
#include <opm/simulators/linalg/gpubridge/cuda/cusparseSolverBackend.hpp>
#include <opm/simulators/linalg/gpubridge/cuda/cuWellContributions.hpp>
#include <opm/simulators/linalg/gpubridge/GpuResult.hpp>
#include <opm/simulators/linalg/gpubridge/cuda/cuda_header.hpp>
#include "cublas_v2.h"
#include "cusparse_v2.h"
@@ -94,7 +94,7 @@ cusparseSolverBackend<Scalar,block_size>::~cusparseSolverBackend()
template<class Scalar, unsigned int block_size>
void cusparseSolverBackend<Scalar,block_size>::
gpu_pbicgstab(WellContributions<Scalar>& wellContribs, BdaResult& res)
gpu_pbicgstab(WellContributions<Scalar>& wellContribs, GpuResult& res)
{
Timer t_total, t_prec(false), t_spmv(false), t_well(false), t_rest(false);
int n = N;
@@ -666,7 +666,7 @@ bool cusparseSolverBackend<Scalar,block_size>::create_preconditioner()
template<class Scalar, unsigned int block_size>
void cusparseSolverBackend<Scalar,block_size>::
solve_system(WellContributions<Scalar>& wellContribs, BdaResult& res)
solve_system(WellContributions<Scalar>& wellContribs, GpuResult& res)
{
// actually solve
gpu_pbicgstab(wellContribs, res);
@@ -697,7 +697,7 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
Scalar* b,
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
WellContributions<Scalar>& wellContribs,
BdaResult& res)
GpuResult& res)
{
if (initialized == false) {
initialize(matrix, jacMatrix);
@@ -707,15 +707,15 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
}
if (analysis_done == false) {
if (!analyse_matrix()) {
return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED;
return SolverStatus::GPU_SOLVER_ANALYSIS_FAILED;
}
}
if (create_preconditioner()) {
solve_system(wellContribs, res);
} else {
return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
return SolverStatus::GPU_SOLVER_CREATE_PRECONDITIONER_FAILED;
}
return SolverStatus::BDA_SOLVER_SUCCESS;
return SolverStatus::GPU_SOLVER_SUCCESS;
}
#define INSTANTIATE_TYPE(T) \

View File

@@ -24,17 +24,17 @@
#include "cublas_v2.h"
#include "cusparse_v2.h"
#include <opm/simulators/linalg/bda/BdaResult.hpp>
#include <opm/simulators/linalg/bda/BdaSolver.hpp>
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#include <opm/simulators/linalg/gpubridge/GpuResult.hpp>
#include <opm/simulators/linalg/gpubridge/GpuSolver.hpp>
#include <opm/simulators/linalg/gpubridge/WellContributions.hpp>
namespace Opm::Accelerator {
/// This class implements a cusparse-based ilu0-bicgstab solver on GPU
template<class Scalar, unsigned int block_size>
class cusparseSolverBackend : public BdaSolver<Scalar,block_size>
class cusparseSolverBackend : public GpuSolver<Scalar,block_size>
{
using Base = BdaSolver<Scalar,block_size>;
using Base = GpuSolver<Scalar,block_size>;
using Base::N;
using Base::Nb;
@@ -73,7 +73,7 @@ private:
/// Solve linear system using ilu0-bicgstab
/// \param[in] wellContribs contains all WellContributions, to apply them separately, instead of adding them to matrix A
/// \param[inout] res summary of solver result
void gpu_pbicgstab(WellContributions<Scalar>& wellContribs, BdaResult& res);
void gpu_pbicgstab(WellContributions<Scalar>& wellContribs, GpuResult& res);
/// Initialize GPU and allocate memory
/// \param[in] matrix matrix for spmv
@@ -113,7 +113,7 @@ private:
/// Solve linear system
/// \param[in] wellContribs contains all WellContributions, to apply them separately, instead of adding them to matrix A
/// \param[inout] res summary of solver result
void solve_system(WellContributions<Scalar>& wellContribs, BdaResult &res);
void solve_system(WellContributions<Scalar>& wellContribs, GpuResult &res);
public:
/// Construct a cusparseSolver
@@ -138,7 +138,7 @@ public:
Scalar* b,
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
WellContributions<Scalar>& wellContribs,
BdaResult& res) override;
GpuResult& res) override;
/// Get resulting vector x after linear solve, also includes post processing if necessary
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array

View File

@@ -23,8 +23,8 @@
#include <opm/common/ErrorMacros.hpp>
#include <dune/common/timer.hh>
#include <opm/simulators/linalg/bda/BlockedMatrix.hpp>
#include <opm/simulators/linalg/bda/opencl/ChowPatelIlu.hpp>
#include <opm/simulators/linalg/gpubridge/BlockedMatrix.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/ChowPatelIlu.hpp>
#if CHOW_PATEL
@@ -983,7 +983,7 @@ void ChowPatelIlu<block_size>::gpu_decomposition(
}
#define INSTANTIATE_BDA_FUNCTIONS(n) \
#define INSTANTIATE_GPU_FUNCTIONS(n) \
template void ChowPatelIlu<n>::decomposition( \
cl::CommandQueue *queue, cl::Context *context, \
BlockedMatrix *LUmat, BlockedMatrix *Lmat, BlockedMatrix *Umat, \
@@ -992,14 +992,14 @@ template void ChowPatelIlu<n>::decomposition( \
cl::Buffer& d_Lvals, cl::Buffer& d_Lcols, cl::Buffer& d_Lrows, \
cl::Buffer& d_Uvals, cl::Buffer& d_Ucols, cl::Buffer& d_Urows);
INSTANTIATE_BDA_FUNCTIONS(1);
INSTANTIATE_BDA_FUNCTIONS(2);
INSTANTIATE_BDA_FUNCTIONS(3);
INSTANTIATE_BDA_FUNCTIONS(4);
INSTANTIATE_BDA_FUNCTIONS(5);
INSTANTIATE_BDA_FUNCTIONS(6);
INSTANTIATE_GPU_FUNCTIONS(1);
INSTANTIATE_GPU_FUNCTIONS(2);
INSTANTIATE_GPU_FUNCTIONS(3);
INSTANTIATE_GPU_FUNCTIONS(4);
INSTANTIATE_GPU_FUNCTIONS(5);
INSTANTIATE_GPU_FUNCTIONS(6);
#undef INSTANTIATE_BDA_FUNCTIONS
#undef INSTANTIATE_GPU_FUNCTIONS
} // namespace Accelerator
} // namespace Opm

View File

@@ -23,7 +23,7 @@
#include <mutex>
#include <opm/simulators/linalg/bda/opencl/opencl.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/opencl.hpp>
// Variables CHOW_PATEL, CHOW_PATEL_GPU and CHOW_PATEL_GPU_PARALLEL are set by CMake
// Pass -DUSE_CHOW_PATEL_ILU=1 to cmake to define CHOW_PATEL and use the iterative ILU decomposition

View File

@@ -22,9 +22,9 @@
#include <opm/common/OpmLog/OpmLog.hpp>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/bda/opencl/OpenclMatrix.hpp>
#include <opm/simulators/linalg/bda/BlockedMatrix.hpp>
#include <opm/simulators/linalg/bda/Matrix.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/OpenclMatrix.hpp>
#include <opm/simulators/linalg/gpubridge/BlockedMatrix.hpp>
#include <opm/simulators/linalg/gpubridge/Matrix.hpp>
namespace Opm
{

View File

@@ -22,7 +22,7 @@
#include <vector>
#include <opm/simulators/linalg/bda/opencl/opencl.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/opencl.hpp>
namespace Opm
{

View File

@@ -18,7 +18,7 @@
*/
#include <config.h>
#include <opm/simulators/linalg/bda/opencl/opencl.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/opencl.hpp>
#include <string>
namespace Opm

View File

@@ -18,7 +18,7 @@
*/
/// This file includes the relevant OpenCL header(s)
/// All bda files using OpenCL declarations should include this header
/// All gpu files using OpenCL declarations should include this header
#define CL_HPP_ENABLE_EXCEPTIONS
#define CL_TARGET_OPENCL_VERSION 120 // indicate OpenCL 1.2 is used

View File

@@ -24,10 +24,10 @@
#include <opm/common/ErrorMacros.hpp>
#include <dune/common/timer.hh>
#include <opm/simulators/linalg/bda/opencl/openclBILU0.hpp>
#include <opm/simulators/linalg/bda/opencl/ChowPatelIlu.hpp>
#include <opm/simulators/linalg/bda/opencl/openclKernels.hpp>
#include <opm/simulators/linalg/bda/Reorder.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclBILU0.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/ChowPatelIlu.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclKernels.hpp>
#include <opm/simulators/linalg/gpubridge/Reorder.hpp>
#include <sstream>

View File

@@ -20,11 +20,11 @@
#ifndef OPM_OPENCLBILU0_HPP
#define OPM_OPENCLBILU0_HPP
#include <opm/simulators/linalg/bda/BlockedMatrix.hpp>
#include <opm/simulators/linalg/gpubridge/BlockedMatrix.hpp>
#include <opm/simulators/linalg/bda/opencl/opencl.hpp>
#include <opm/simulators/linalg/bda/opencl/openclPreconditioner.hpp>
#include <opm/simulators/linalg/bda/opencl/ChowPatelIlu.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/opencl.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclPreconditioner.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/ChowPatelIlu.hpp>
#include <memory>
#include <mutex>

View File

@@ -24,13 +24,13 @@
#include <opm/common/ErrorMacros.hpp>
#include <dune/common/timer.hh>
#include <opm/simulators/linalg/bda/BdaSolver.hpp>
#include <opm/simulators/linalg/bda/opencl/opencl.hpp>
#include <opm/simulators/linalg/bda/opencl/openclBILU0.hpp>
#include <opm/simulators/linalg/bda/opencl/openclBISAI.hpp>
#include <opm/simulators/linalg/bda/opencl/openclKernels.hpp>
#include <opm/simulators/linalg/bda/Reorder.hpp>
#include <opm/simulators/linalg/bda/opencl/ChowPatelIlu.hpp> // disable BISAI if ChowPatel is selected
#include <opm/simulators/linalg/gpubridge/GpuSolver.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/opencl.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclBILU0.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclBISAI.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclKernels.hpp>
#include <opm/simulators/linalg/gpubridge/Reorder.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/ChowPatelIlu.hpp> // disable BISAI if ChowPatel is selected
#include <sstream>

View File

@@ -22,9 +22,9 @@
#include <mutex>
#include <opm/simulators/linalg/bda/opencl/opencl.hpp>
#include <opm/simulators/linalg/bda/opencl/openclBILU0.hpp>
#include <opm/simulators/linalg/bda/opencl/openclPreconditioner.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/opencl.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclBILU0.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclPreconditioner.hpp>
namespace Opm::Accelerator {

View File

@@ -28,13 +28,13 @@
#include <opm/simulators/linalg/PreconditionerFactory.hpp>
#include <opm/simulators/linalg/PropertyTree.hpp>
#include <opm/simulators/linalg/bda/BdaBridge.hpp>
#include <opm/simulators/linalg/bda/BlockedMatrix.hpp>
#include <opm/simulators/linalg/bda/opencl/openclCPR.hpp>
#include <opm/simulators/linalg/bda/opencl/OpenclMatrix.hpp>
#include <opm/simulators/linalg/bda/opencl/openclKernels.hpp>
#include <opm/simulators/linalg/gpubridge/GpuBridge.hpp>
#include <opm/simulators/linalg/gpubridge/BlockedMatrix.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclCPR.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/OpenclMatrix.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclKernels.hpp>
#include <opm/simulators/linalg/bda/Misc.hpp>
#include <opm/simulators/linalg/gpubridge/Misc.hpp>
#include <type_traits>

View File

@@ -23,14 +23,14 @@
#include <dune/istl/paamg/matrixhierarchy.hh>
#include <dune/istl/umfpack.hh>
#include <opm/simulators/linalg/bda/opencl/opencl.hpp>
#include <opm/simulators/linalg/bda/opencl/openclBILU0.hpp>
#include <opm/simulators/linalg/bda/Matrix.hpp>
#include <opm/simulators/linalg/bda/CprCreation.hpp>
#include <opm/simulators/linalg/bda/opencl/OpenclMatrix.hpp>
#include <opm/simulators/linalg/bda/opencl/openclPreconditioner.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/opencl.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclBILU0.hpp>
#include <opm/simulators/linalg/gpubridge/Matrix.hpp>
#include <opm/simulators/linalg/gpubridge/CprCreation.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/OpenclMatrix.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclPreconditioner.hpp>
#include <opm/simulators/linalg/bda/opencl/openclSolverBackend.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclSolverBackend.hpp>
#include <type_traits>

View File

@@ -18,15 +18,15 @@
*/
#include <config.h>
#include <opm/simulators/linalg/bda/opencl/openclKernels.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclKernels.hpp>
#include <opm/common/OpmLog/OpmLog.hpp>
#include <opm/common/ErrorMacros.hpp>
#include <dune/common/timer.hh>
#include <opm/simulators/linalg/bda/opencl/ChowPatelIlu.hpp> // defines CHOW_PATEL
#include <opm/simulators/linalg/gpubridge/opencl/ChowPatelIlu.hpp> // defines CHOW_PATEL
#include <opm/simulators/linalg/bda/Misc.hpp>
#include <opm/simulators/linalg/gpubridge/Misc.hpp>
#include <cmath>
#include <sstream>

View File

@@ -24,7 +24,7 @@
#include <memory>
#include <cstddef>
#include <opm/simulators/linalg/bda/opencl/opencl.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/opencl.hpp>
namespace Opm::Accelerator {

View File

@@ -22,10 +22,10 @@
#include <opm/common/TimingMacros.hpp>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/bda/opencl/openclBILU0.hpp>
#include <opm/simulators/linalg/bda/opencl/openclBISAI.hpp>
#include <opm/simulators/linalg/bda/opencl/openclCPR.hpp>
#include <opm/simulators/linalg/bda/opencl/openclPreconditioner.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclBILU0.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclBISAI.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclCPR.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclPreconditioner.hpp>
#include <memory>
#include <string>

View File

@@ -20,8 +20,8 @@
#ifndef OPM_OPENCLPRECONDITIONER_HEADER_INCLUDED
#define OPM_OPENCLPRECONDITIONER_HEADER_INCLUDED
#include <opm/simulators/linalg/bda/opencl/opencl.hpp>
#include <opm/simulators/linalg/bda/Preconditioner.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/opencl.hpp>
#include <opm/simulators/linalg/gpubridge/Preconditioner.hpp>
namespace Opm::Accelerator {

View File

@@ -25,12 +25,12 @@
#include <opm/common/ErrorMacros.hpp>
#include <dune/common/timer.hh>
#include <opm/simulators/linalg/bda/opencl/opencl.hpp>
#include <opm/simulators/linalg/bda/opencl/openclKernels.hpp>
#include <opm/simulators/linalg/bda/opencl/openclSolverBackend.hpp>
#include <opm/simulators/linalg/bda/opencl/openclWellContributions.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/opencl.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclKernels.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclSolverBackend.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclWellContributions.hpp>
#include <opm/simulators/linalg/bda/BdaResult.hpp>
#include <opm/simulators/linalg/gpubridge/GpuResult.hpp>
// iff true, the nonzeroes of the matrix are copied row-by-row into a contiguous, pinned memory array, then a single GPU memcpy is done
@@ -248,7 +248,7 @@ setOpencl(std::shared_ptr<cl::Context>& context_,
template<class Scalar, unsigned int block_size>
void openclSolverBackend<Scalar,block_size>::
gpu_pbicgstab(WellContributions<Scalar>& wellContribs,
BdaResult& res)
GpuResult& res)
{
float it;
Scalar rho, rhop, beta, alpha, omega, tmp1, tmp2;
@@ -633,7 +633,7 @@ create_preconditioner()
template<class Scalar, unsigned int block_size>
void openclSolverBackend<Scalar,block_size>::
solve_system(WellContributions<Scalar>& wellContribs, BdaResult& res)
solve_system(WellContributions<Scalar>& wellContribs, GpuResult& res)
{
Timer t;
@@ -682,29 +682,29 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
Scalar* b,
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
WellContributions<Scalar>& wellContribs,
BdaResult& res)
GpuResult& res)
{
if (initialized == false) {
initialize(matrix, jacMatrix);
if (analysis_done == false) {
if (!analyze_matrix()) {
return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED;
return SolverStatus::GPU_SOLVER_ANALYSIS_FAILED;
}
}
update_system(matrix->nnzValues, b);
if (!create_preconditioner()) {
return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
return SolverStatus::GPU_SOLVER_CREATE_PRECONDITIONER_FAILED;
}
copy_system_to_gpu();
} else {
update_system(matrix->nnzValues, b);
if (!create_preconditioner()) {
return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
return SolverStatus::GPU_SOLVER_CREATE_PRECONDITIONER_FAILED;
}
update_system_on_gpu();
}
solve_system(wellContribs, res);
return SolverStatus::BDA_SOLVER_SUCCESS;
return SolverStatus::GPU_SOLVER_SUCCESS;
}
#define INSTANTIATE_TYPE(T) \

View File

@@ -20,20 +20,20 @@
#ifndef OPM_OPENCLSOLVER_BACKEND_HEADER_INCLUDED
#define OPM_OPENCLSOLVER_BACKEND_HEADER_INCLUDED
#include <opm/simulators/linalg/bda/opencl/opencl.hpp>
#include <opm/simulators/linalg/bda/BdaResult.hpp>
#include <opm/simulators/linalg/bda/BdaSolver.hpp>
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/opencl.hpp>
#include <opm/simulators/linalg/gpubridge/GpuResult.hpp>
#include <opm/simulators/linalg/gpubridge/GpuSolver.hpp>
#include <opm/simulators/linalg/gpubridge/WellContributions.hpp>
#include <opm/simulators/linalg/bda/opencl/openclPreconditioner.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclPreconditioner.hpp>
namespace Opm::Accelerator {
/// This class implements a opencl-based ilu0-bicgstab solver on GPU
template<class Scalar, unsigned int block_size>
class openclSolverBackend : public BdaSolver<Scalar,block_size>
class openclSolverBackend : public GpuSolver<Scalar,block_size>
{
using Base = BdaSolver<Scalar,block_size>;
using Base = GpuSolver<Scalar,block_size>;
using Base::N;
using Base::Nb;
@@ -62,7 +62,7 @@ private:
std::unique_ptr<openclPreconditioner<Scalar,block_size>> prec;
// can perform blocked ILU0 and AMG on pressure component
bool is_root; // allow for nested solvers, the root solver is called by BdaBridge
bool is_root; // allow for nested solvers, the root solver is called by GpuBridge
bool analysis_done = false;
std::shared_ptr<BlockedMatrix<Scalar>> mat{}; // original matrix
std::shared_ptr<BlockedMatrix<Scalar>> jacMat{}; // matrix for preconditioner
@@ -73,7 +73,7 @@ private:
/// Solve linear system using ilu0-bicgstab
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
/// \param[inout] res summary of solver result
void gpu_pbicgstab(WellContributions<Scalar>& wellContribs, BdaResult& res);
void gpu_pbicgstab(WellContributions<Scalar>& wellContribs, GpuResult& res);
/// Initialize GPU and allocate memory
/// \param[in] matrix matrix A
@@ -104,7 +104,7 @@ private:
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
/// could be empty
/// \param[inout] res summary of solver result
void solve_system(WellContributions<Scalar>& wellContribs, BdaResult& res);
void solve_system(WellContributions<Scalar>& wellContribs, GpuResult& res);
public:
std::shared_ptr<cl::Context> context{};
@@ -138,11 +138,11 @@ public:
Scalar* b,
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
WellContributions<Scalar>& wellContribs,
BdaResult& res) override;
GpuResult& res) override;
/// Solve scalar linear system, for example a coarse system of an AMG preconditioner
/// Data is already on the GPU
// SolverStatus solve_system(BdaResult &res);
// SolverStatus solve_system(GpuResult &res);
/// Get result after linear solve, and peform postprocessing if necessary
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array

View File

@@ -18,12 +18,12 @@
*/
#include <config.h> // CMake
#include <opm/simulators/linalg/bda/opencl/openclWellContributions.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclWellContributions.hpp>
#include <opm/common/OpmLog/OpmLog.hpp>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/bda/MultisegmentWellContribution.hpp>
#include <opm/simulators/linalg/gpubridge/MultisegmentWellContribution.hpp>
namespace Opm {

View File

@@ -20,10 +20,10 @@
#ifndef WELLCONTRIBUTIONS_OPENCL_HEADER_INCLUDED
#define WELLCONTRIBUTIONS_OPENCL_HEADER_INCLUDED
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#include <opm/simulators/linalg/gpubridge/WellContributions.hpp>
#include <opm/simulators/linalg/bda/opencl/opencl.hpp>
#include <opm/simulators/linalg/bda/opencl/openclKernels.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/opencl.hpp>
#include <opm/simulators/linalg/gpubridge/opencl/openclKernels.hpp>
#include <memory>
#include <vector>

View File

@@ -25,9 +25,9 @@
#include <opm/common/ErrorMacros.hpp>
#include <dune/common/timer.hh>
#include <opm/simulators/linalg/bda/rocm/hipKernels.hpp>
#include <opm/simulators/linalg/gpubridge/rocm/hipKernels.hpp>
#include <opm/simulators/linalg/bda/Misc.hpp>
#include <opm/simulators/linalg/gpubridge/Misc.hpp>
#include <hip/hip_runtime.h>
namespace Opm {

View File

@@ -37,7 +37,7 @@
#undef HAVE_CUDA
#include <opm/simulators/linalg/bda/rocm/rocalutionSolverBackend.hpp>
#include <opm/simulators/linalg/gpubridge/rocm/rocalutionSolverBackend.hpp>
#include <rocalution.hpp>
#include <base/matrix_formats_ind.hpp> // check if blocks are interpreted as row-major or column-major
@@ -157,7 +157,7 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
Scalar* b,
[[maybe_unused]] std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
[[maybe_unused]] WellContributions<Scalar>& wellContribs,
BdaResult& res)
GpuResult& res)
{
if (initialized == false) {
initialize(matrix.get());
@@ -234,7 +234,7 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
OpmLog::info(out.str());
}
return SolverStatus::BDA_SOLVER_SUCCESS;
return SolverStatus::GPU_SOLVER_SUCCESS;
}
#define INSTANTIATE_TYPE(T) \

View File

@@ -20,9 +20,9 @@
#ifndef OPM_ROCALUTIONSOLVER_BACKEND_HEADER_INCLUDED
#define OPM_ROCALUTIONSOLVER_BACKEND_HEADER_INCLUDED
#include <opm/simulators/linalg/bda/BdaResult.hpp>
#include <opm/simulators/linalg/bda/BdaSolver.hpp>
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#include <opm/simulators/linalg/gpubridge/GpuResult.hpp>
#include <opm/simulators/linalg/gpubridge/GpuSolver.hpp>
#include <opm/simulators/linalg/gpubridge/WellContributions.hpp>
namespace rocalution {
template<class Matrix, class Vector, class Scalar> class BiCGStab;
@@ -36,9 +36,9 @@ namespace Opm::Accelerator {
/// This class implements a rocalution based linear solver solver on GPU
/// It uses ilu0-bicgstab
template<class Scalar, unsigned int block_size>
class rocalutionSolverBackend : public BdaSolver<Scalar,block_size>
class rocalutionSolverBackend : public GpuSolver<Scalar,block_size>
{
using Base = BdaSolver<Scalar,block_size>;
using Base = GpuSolver<Scalar,block_size>;
using Base::N;
using Base::Nb;
@@ -95,7 +95,7 @@ public:
Scalar* b,
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
WellContributions<Scalar>& wellContribs,
BdaResult& res) override;
GpuResult& res) override;
/// Get result after linear solve, and peform postprocessing if necessary
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array

View File

@@ -24,9 +24,9 @@
#include <opm/common/ErrorMacros.hpp>
#include <dune/common/timer.hh>
#include <opm/simulators/linalg/bda/rocm/rocsparseBILU0.hpp>
#include <opm/simulators/linalg/bda/Reorder.hpp>
#include <opm/simulators/linalg/bda/Misc.hpp>
#include <opm/simulators/linalg/gpubridge/rocm/rocsparseBILU0.hpp>
#include <opm/simulators/linalg/gpubridge/Reorder.hpp>
#include <opm/simulators/linalg/gpubridge/Misc.hpp>
#include <sstream>
#include <thread>

View File

@@ -20,9 +20,9 @@
#ifndef OPM_ROCSPARSEBILU0_HPP
#define OPM_ROCSPARSEBILU0_HPP
#include <opm/simulators/linalg/bda/BlockedMatrix.hpp>
#include <opm/simulators/linalg/gpubridge/BlockedMatrix.hpp>
#include <opm/simulators/linalg/bda/rocm/rocsparsePreconditioner.hpp>
#include <opm/simulators/linalg/gpubridge/rocm/rocsparsePreconditioner.hpp>
#include <rocblas/rocblas.h>
#include <rocsparse/rocsparse.h>

View File

@@ -28,12 +28,12 @@
#include <opm/simulators/linalg/PreconditionerFactory.hpp>
#include <opm/simulators/linalg/PropertyTree.hpp>
#include <opm/simulators/linalg/bda/BdaBridge.hpp>
#include <opm/simulators/linalg/bda/BlockedMatrix.hpp>
#include <opm/simulators/linalg/bda/rocm/rocsparseCPR.hpp>
#include <opm/simulators/linalg/bda/rocm/hipKernels.hpp>
#include <opm/simulators/linalg/gpubridge/GpuBridge.hpp>
#include <opm/simulators/linalg/gpubridge/BlockedMatrix.hpp>
#include <opm/simulators/linalg/gpubridge/rocm/rocsparseCPR.hpp>
#include <opm/simulators/linalg/gpubridge/rocm/hipKernels.hpp>
#include <opm/simulators/linalg/bda/Misc.hpp>
#include <opm/simulators/linalg/gpubridge/Misc.hpp>
#include <type_traits>

View File

@@ -22,13 +22,13 @@
#include <mutex>
#include <opm/simulators/linalg/bda/rocm/rocsparseBILU0.hpp>
#include <opm/simulators/linalg/bda/Matrix.hpp>
#include <opm/simulators/linalg/bda/CprCreation.hpp>
#include <opm/simulators/linalg/bda/rocm/rocsparseMatrix.hpp>
#include <opm/simulators/linalg/bda/rocm/rocsparsePreconditioner.hpp>
#include <opm/simulators/linalg/gpubridge/rocm/rocsparseBILU0.hpp>
#include <opm/simulators/linalg/gpubridge/Matrix.hpp>
#include <opm/simulators/linalg/gpubridge/CprCreation.hpp>
#include <opm/simulators/linalg/gpubridge/rocm/rocsparseMatrix.hpp>
#include <opm/simulators/linalg/gpubridge/rocm/rocsparsePreconditioner.hpp>
#include <opm/simulators/linalg/bda/rocm/rocsparseSolverBackend.hpp>
#include <opm/simulators/linalg/gpubridge/rocm/rocsparseSolverBackend.hpp>
namespace Opm::Accelerator {

View File

@@ -22,10 +22,10 @@
#include <opm/common/OpmLog/OpmLog.hpp>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/bda/rocm/rocsparseMatrix.hpp>
#include <opm/simulators/linalg/bda/BlockedMatrix.hpp>
#include <opm/simulators/linalg/bda/Matrix.hpp>
#include <opm/simulators/linalg/bda/Misc.hpp>
#include <opm/simulators/linalg/gpubridge/rocm/rocsparseMatrix.hpp>
#include <opm/simulators/linalg/gpubridge/BlockedMatrix.hpp>
#include <opm/simulators/linalg/gpubridge/Matrix.hpp>
#include <opm/simulators/linalg/gpubridge/Misc.hpp>
#include <sstream>
#include <iostream>

View File

@@ -22,9 +22,9 @@
#include <opm/common/TimingMacros.hpp>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/bda/rocm/rocsparseBILU0.hpp>
#include <opm/simulators/linalg/bda/rocm/rocsparseCPR.hpp>
#include <opm/simulators/linalg/bda/rocm/rocsparsePreconditioner.hpp>
#include <opm/simulators/linalg/gpubridge/rocm/rocsparseBILU0.hpp>
#include <opm/simulators/linalg/gpubridge/rocm/rocsparseCPR.hpp>
#include <opm/simulators/linalg/gpubridge/rocm/rocsparsePreconditioner.hpp>
namespace Opm::Accelerator {

View File

@@ -20,7 +20,7 @@
#ifndef OPM_ROCSPARSEPRECONDITIONER_HEADER_INCLUDED
#define OPM_ROCSPARSEPRECONDITIONER_HEADER_INCLUDED
#include <opm/simulators/linalg/bda/Preconditioner.hpp>
#include <opm/simulators/linalg/gpubridge/Preconditioner.hpp>
#include <rocsparse/rocsparse.h>

View File

@@ -36,14 +36,14 @@
#undef HAVE_CUDA
#include <opm/simulators/linalg/bda/rocm/rocsparseSolverBackend.hpp>
#include <opm/simulators/linalg/bda/rocm/rocsparseWellContributions.hpp>
#include <opm/simulators/linalg/gpubridge/rocm/rocsparseSolverBackend.hpp>
#include <opm/simulators/linalg/gpubridge/rocm/rocsparseWellContributions.hpp>
#include <opm/simulators/linalg/bda/BdaResult.hpp>
#include <opm/simulators/linalg/gpubridge/GpuResult.hpp>
#include <opm/simulators/linalg/bda/Preconditioner.hpp>
#include <opm/simulators/linalg/gpubridge/Preconditioner.hpp>
#include <opm/simulators/linalg/bda/Misc.hpp>
#include <opm/simulators/linalg/gpubridge/Misc.hpp>
#ifdef HIP_HAVE_CUDA_DEFINED
#define HAVE_CUDA HIP_HAVE_CUDA_DEFINED
@@ -133,7 +133,7 @@ rocsparseSolverBackend<Scalar,block_size>::~rocsparseSolverBackend()
template<class Scalar, unsigned int block_size>
void rocsparseSolverBackend<Scalar,block_size>::
gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs,
BdaResult& res)
GpuResult& res)
{
float it = 0.5;
Scalar rho, rhop, beta, alpha, nalpha, omega, nomega, tmp1, tmp2;
@@ -638,7 +638,7 @@ create_preconditioner()
template<class Scalar, unsigned int block_size>
void rocsparseSolverBackend<Scalar,block_size>::
solve_system(WellContributions<Scalar>& wellContribs, BdaResult& res)
solve_system(WellContributions<Scalar>& wellContribs, GpuResult& res)
{
Timer t;
@@ -678,28 +678,28 @@ solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
Scalar* b,
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
WellContributions<Scalar>& wellContribs,
BdaResult& res)
GpuResult& res)
{
if (initialized == false) {
initialize(matrix, jacMatrix);
copy_system_to_gpu(b);
if (analysis_done == false) {
if (!analyze_matrix()) {
return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED;
return SolverStatus::GPU_SOLVER_ANALYSIS_FAILED;
}
}
if (!create_preconditioner()) {
return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
return SolverStatus::GPU_SOLVER_CREATE_PRECONDITIONER_FAILED;
}
} else {
update_system_on_gpu(b);
if (!create_preconditioner()) {
return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
return SolverStatus::GPU_SOLVER_CREATE_PRECONDITIONER_FAILED;
}
}
solve_system(wellContribs, res);
return SolverStatus::BDA_SOLVER_SUCCESS;
return SolverStatus::GPU_SOLVER_SUCCESS;
}
#define INSTANTIATE_TYPE(T) \

View File

@@ -22,11 +22,11 @@
#include <memory>
#include <opm/simulators/linalg/bda/BdaResult.hpp>
#include <opm/simulators/linalg/bda/BdaSolver.hpp>
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#include <opm/simulators/linalg/gpubridge/GpuResult.hpp>
#include <opm/simulators/linalg/gpubridge/GpuSolver.hpp>
#include <opm/simulators/linalg/gpubridge/WellContributions.hpp>
#include <opm/simulators/linalg/bda/rocm/rocsparsePreconditioner.hpp>
#include <opm/simulators/linalg/gpubridge/rocm/rocsparsePreconditioner.hpp>
#include <rocblas/rocblas.h>
#include <rocsparse/rocsparse.h>
@@ -37,9 +37,9 @@ namespace Opm::Accelerator {
/// This class implements a rocsparse-based ilu0-bicgstab solver on GPU
template<class Scalar, unsigned int block_size>
class rocsparseSolverBackend : public BdaSolver<Scalar,block_size>
class rocsparseSolverBackend : public GpuSolver<Scalar,block_size>
{
using Base = BdaSolver<Scalar,block_size>;
using Base = GpuSolver<Scalar,block_size>;
using Base::N;
using Base::Nb;
@@ -82,7 +82,7 @@ private:
/// Solve linear system using ilu0-bicgstab
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
/// \param[inout] res summary of solver result
void gpu_pbicgstab(WellContributions<Scalar>& wellContribs, BdaResult& res);
void gpu_pbicgstab(WellContributions<Scalar>& wellContribs, GpuResult& res);
/// Initialize GPU and allocate memory
/// \param[in] matrix matrix A
@@ -109,7 +109,7 @@ private:
/// Solve linear system
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
/// \param[inout] res summary of solver result
void solve_system(WellContributions<Scalar>& wellContribs, BdaResult& res);
void solve_system(WellContributions<Scalar>& wellContribs, GpuResult& res);
public:
/// Construct a rocsparseSolver
@@ -143,7 +143,7 @@ public:
Scalar* b,
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
WellContributions<Scalar>& wellContribs,
BdaResult& res) override;
GpuResult& res) override;
/// Get result after linear solve, and peform postprocessing if necessary
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array

View File

@@ -29,7 +29,7 @@
#undef HAVE_CUDA
#include <opm/simulators/linalg/bda/rocm/rocsparseWellContributions.hpp>
#include <opm/simulators/linalg/gpubridge/rocm/rocsparseWellContributions.hpp>
#ifdef HIP_HAVE_CUDA_DEFINED
#define HAVE_CUDA HIP_HAVE_CUDA_DEFINED
@@ -39,8 +39,8 @@
#include <opm/common/OpmLog/OpmLog.hpp>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/bda/MultisegmentWellContribution.hpp>
#include <opm/simulators/linalg/bda/Misc.hpp>
#include <opm/simulators/linalg/gpubridge/MultisegmentWellContribution.hpp>
#include <opm/simulators/linalg/gpubridge/Misc.hpp>
#include <hip/hip_runtime.h>
namespace Opm

View File

@@ -20,7 +20,7 @@
#ifndef WELLCONTRIBUTIONS_ROCSPARSE_HEADER_INCLUDED
#define WELLCONTRIBUTIONS_ROCSPARSE_HEADER_INCLUDED
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#include <opm/simulators/linalg/gpubridge/WellContributions.hpp>
#include <hip/hip_runtime_api.h>

View File

@@ -88,7 +88,7 @@ struct EnableTerminalOutput { static constexpr bool value = true; };
namespace Opm {
#if COMPILE_BDA_BRIDGE
#if COMPILE_GPU_BRIDGE
template<class Scalar> class WellContributions;
#endif
@@ -304,7 +304,7 @@ template<class Scalar> class WellContributions;
void applyDomain(const BVector& x, BVector& Ax, const int domainIndex) const;
#if COMPILE_BDA_BRIDGE
#if COMPILE_GPU_BRIDGE
// accumulate the contributions of all Wells in the WellContributions object
void getWellContributions(WellContributions<Scalar>& x) const;
#endif

View File

@@ -49,8 +49,8 @@
#include <opm/simulators/utils/MPIPacker.hpp>
#include <opm/simulators/utils/phaseUsageFromDeck.hpp>
#if COMPILE_BDA_BRIDGE
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#if COMPILE_GPU_BRIDGE
#include <opm/simulators/linalg/gpubridge/WellContributions.hpp>
#endif
#if HAVE_MPI
@@ -1785,7 +1785,7 @@ namespace Opm {
}
}
#if COMPILE_BDA_BRIDGE
#if COMPILE_GPU_BRIDGE
template<typename TypeTag>
void
BlackoilWellModel<TypeTag>::

View File

@@ -29,8 +29,8 @@
#include <opm/input/eclipse/Schedule/MSW/WellSegments.hpp>
#if COMPILE_BDA_BRIDGE
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#if COMPILE_GPU_BRIDGE
#include <opm/simulators/linalg/gpubridge/WellContributions.hpp>
#endif
#include <opm/simulators/linalg/istlsparsematrixadapter.hh>
@@ -206,7 +206,7 @@ recoverSolutionWell(const BVector& x, BVectorWell& xw) const
xw = mswellhelpers::applyUMFPack(*duneDSolver_, resWell);
}
#if COMPILE_BDA_BRIDGE
#if COMPILE_GPU_BRIDGE
template<class Scalar, int numWellEq, int numEq>
void MultisegmentWellEquations<Scalar,numWellEq,numEq>::
extract(WellContributions<Scalar>& wellContribs) const

View File

@@ -38,7 +38,7 @@ namespace Opm
template<class Scalar, int numWellEq, int numEq> class MultisegmentWellEquationAccess;
template<class Scalar> class MultisegmentWellGeneric;
#if COMPILE_BDA_BRIDGE
#if COMPILE_GPU_BRIDGE
template<class Scalar> class WellContributions;
#endif
template<class Scalar> class WellInterfaceGeneric;
@@ -101,7 +101,7 @@ public:
//! \details xw = inv(D)*(rw - C*x)
void recoverSolutionWell(const BVector& x, BVectorWell& xw) const;
#if COMPILE_BDA_BRIDGE
#if COMPILE_GPU_BRIDGE
//! \brief Add the matrices of this well to the WellContributions object.
void extract(WellContributions<Scalar>& wellContribs) const;
#endif

View File

@@ -46,8 +46,8 @@
#include <cstddef>
#include <string>
#if COMPILE_BDA_BRIDGE && (HAVE_CUDA || HAVE_OPENCL)
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#if COMPILE_GPU_BRIDGE && (HAVE_CUDA || HAVE_OPENCL)
#include <opm/simulators/linalg/gpubridge/WellContributions.hpp>
#endif
namespace Opm

Some files were not shown because too many files have changed in this diff Show More