Merge pull request #5554 from multitalentloes/refactor_cuistl

refactor cuistl to gpuistl
This commit is contained in:
Kjetil Olsen Lye 2024-08-26 09:55:13 +02:00 committed by GitHub
commit f97389d1b5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
82 changed files with 1035 additions and 1035 deletions

View File

@ -530,7 +530,7 @@ if(CUDA_FOUND)
if (NOT USE_HIP)
target_link_libraries( opmsimulators PUBLIC ${CUDA_cusparse_LIBRARY} )
target_link_libraries( opmsimulators PUBLIC ${CUDA_cublas_LIBRARY} )
foreach(tgt test_cuda_safe_call test_cuda_check_last_error test_cuvector)
foreach(tgt test_gpu_safe_call test_cuda_check_last_error test_GpuVector)
target_link_libraries(${tgt} CUDA::cudart)
endforeach()
endif()
@ -545,21 +545,21 @@ if(CUDA_FOUND)
endif()
set_tests_properties(cusparse_safe_call
cublas_safe_call
cuda_safe_call
gpu_safe_call
cuda_check_last_error
cublas_handle
cujac
cudilu
GpuJac
GpuDILU
cusparse_handle
cuSparse_matrix_operations
cuVector_operations
cuvector
cusparsematrix
cuseqilu0
cuowneroverlapcopy
GpuVector
GpuSparseMatrix
GpuSeqILU0
GpuOwnerOverlapCopy
solver_adapter
cubuffer
cuview
GpuBuffer
GpuView
PROPERTIES LABELS ${gpu_label})
endif()

View File

@ -28,15 +28,15 @@
# hipification, we a dependency that will trigger when the cuda source code is
# changed.
macro (ADD_CUDA_OR_HIP_FILE LIST DIR FILE)
set (cuda_file_path "${PROJECT_SOURCE_DIR}/${DIR}/cuistl/${FILE}")
set (cuda_file_path "${PROJECT_SOURCE_DIR}/${DIR}/gpuistl/${FILE}")
if(CUDA_FOUND AND NOT CONVERT_CUDA_TO_HIP)
list (APPEND ${LIST} "${DIR}/cuistl/${FILE}")
list (APPEND ${LIST} "${DIR}/gpuistl/${FILE}")
else()
# we must hipify the code
# and include the correct path which is in the build/binary dir
string(REPLACE ".cu" ".hip" HIP_SOURCE_FILE ${FILE})
set (hip_file_path "${PROJECT_BINARY_DIR}/${DIR}/hipistl/${HIP_SOURCE_FILE}")
set (hip_file_path "${PROJECT_BINARY_DIR}/${DIR}/gpuistl_hip/${HIP_SOURCE_FILE}")
file(RELATIVE_PATH relpath ${PROJECT_SOURCE_DIR} ${hip_file_path})
# add a custom command that will hipify
@ -207,42 +207,42 @@ endif()
# add these files if we should compile the hip code
if (HAVE_CUDA)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/CuBlasHandle.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/cusparse_matrix_operations.cu)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/gpusparse_matrix_operations.cu)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/CuSparseHandle.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuBuffer.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuBuffer.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/preconditionerKernels/DILUKernels.cu)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/preconditionerKernels/ILU0Kernels.cu)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/preconditionerKernels/JacKernels.cu)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuVector.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuView.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuVector.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuView.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/vector_operations.cu)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuSparseMatrix.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuDILU.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuSparseMatrix.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuDILU.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg OpmCuILU0.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuJac.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuSeqILU0.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuJac.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuSeqILU0.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg set_device.cpp)
# HEADERS
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/autotuner.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/coloringAndReorderingUtils.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/cuda_safe_call.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/cusparse_matrix_operations.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/gpu_safe_call.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/gpusparse_matrix_operations.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/cusparse_safe_call.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/cublas_safe_call.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/cuda_check_last_error.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuBlasHandle.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuSparseHandle.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuBuffer.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuBuffer.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/preconditionerKernels/DILUKernels.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/preconditionerKernels/ILU0Kernels.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/preconditionerKernels/JacKernels.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuDILU.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuDILU.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg OpmCuILU0.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuJac.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuVector.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuView.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuSparseMatrix.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuJac.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuVector.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuView.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuSparseMatrix.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuMatrixDescription.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuSparseResource.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuSparseResource_impl.hpp)
@ -256,12 +256,12 @@ if (HAVE_CUDA)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/deviceBlockOperations.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/gpuThreadUtils.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg PreconditionerAdapter.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuSeqILU0.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuSeqILU0.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/fix_zero_diagonal.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg PreconditionerConvertFieldTypeAdapter.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuOwnerOverlapCopy.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuOwnerOverlapCopy.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg SolverAdapter.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuBlockPreconditioner.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuBlockPreconditioner.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg PreconditionerHolder.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg set_device.hpp)
endif()
@ -389,19 +389,19 @@ if (HAVE_CUDA)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_converttofloatadapter.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cublas_handle.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cublas_safe_call.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cubuffer.cu)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuview.cu)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuBuffer.cu)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuView.cu)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cusparse_safe_call.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuda_safe_call.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_gpu_safe_call.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuda_check_last_error.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cudilu.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cujac.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuowneroverlapcopy.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuseqilu0.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuDILU.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuJac.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuOwnerOverlapCopy.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuSeqILU0.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cusparse_handle.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuSparse_matrix_operations.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cusparsematrix.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuvector.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuSparseMatrix.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuVector.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuVector_operations.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_safe_conversion.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_solver_adapter.cpp)

View File

@ -15,6 +15,6 @@ hipify-perl $input_file > $output_file
sed -i 's/^#include <hipblas\.h>/#include <hipblas\/hipblas.h>/g' $output_file
sed -i 's/^#include <hipsparse\.h>/#include <hipsparse\/hipsparse.h>/g' $output_file
# make sure includes refer to hipistl/ files (the ones that are also hipified)
sed -i 's/cuistl\//hipistl\//g' $output_file
sed -i 's/gpuistl\//gpuistl_hip\//g' $output_file
echo "$output_file hipified"

View File

@ -36,7 +36,7 @@
#endif
#if HAVE_CUDA
#include <opm/simulators/linalg/cuistl/set_device.hpp>
#include <opm/simulators/linalg/gpuistl/set_device.hpp>
#endif
namespace Opm {
@ -163,7 +163,7 @@ void Main::initMPI()
}
#if HAVE_CUDA
Opm::cuistl::setDevice(FlowGenericVanguard::comm().rank(), FlowGenericVanguard::comm().size());
Opm::gpuistl::setDevice(FlowGenericVanguard::comm().rank(), FlowGenericVanguard::comm().size());
#endif
#endif // HAVE_MPI

View File

@ -39,9 +39,9 @@
#if HAVE_CUDA
#if USE_HIP
#include <opm/simulators/linalg/hipistl/SolverAdapter.hpp>
#include <opm/simulators/linalg/gpuistl_hip/SolverAdapter.hpp>
#else
#include <opm/simulators/linalg/cuistl/SolverAdapter.hpp>
#include <opm/simulators/linalg/gpuistl/SolverAdapter.hpp>
#endif
#endif
@ -205,7 +205,7 @@ namespace Dune
#endif
#if HAVE_CUDA
} else if (solver_type == "cubicgstab") {
linsolver_.reset(new Opm::cuistl::SolverAdapter<Operator, Dune::BiCGSTABSolver, VectorType>(
linsolver_.reset(new Opm::gpuistl::SolverAdapter<Operator, Dune::BiCGSTABSolver, VectorType>(
*linearoperator_for_solver_,
*scalarproduct_,
preconditioner_,

View File

@ -22,22 +22,22 @@
// both with the normal cuistl path, and the hipistl path
#if HAVE_CUDA
#if USE_HIP
#include <opm/simulators/linalg/hipistl/CuBlockPreconditioner.hpp>
#include <opm/simulators/linalg/hipistl/CuDILU.hpp>
#include <opm/simulators/linalg/hipistl/OpmCuILU0.hpp>
#include <opm/simulators/linalg/hipistl/CuJac.hpp>
#include <opm/simulators/linalg/hipistl/CuSeqILU0.hpp>
#include <opm/simulators/linalg/hipistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/hipistl/PreconditionerConvertFieldTypeAdapter.hpp>
#include <opm/simulators/linalg/hipistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl_hip/GpuBlockPreconditioner.hpp>
#include <opm/simulators/linalg/gpuistl_hip/GpuDILU.hpp>
#include <opm/simulators/linalg/gpuistl_hip/OpmCuILU0.hpp>
#include <opm/simulators/linalg/gpuistl_hip/GpuJac.hpp>
#include <opm/simulators/linalg/gpuistl_hip/GpuSeqILU0.hpp>
#include <opm/simulators/linalg/gpuistl_hip/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/gpuistl_hip/PreconditionerConvertFieldTypeAdapter.hpp>
#include <opm/simulators/linalg/gpuistl_hip/detail/gpu_safe_call.hpp>
#else
#include <opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp>
#include <opm/simulators/linalg/cuistl/CuDILU.hpp>
#include <opm/simulators/linalg/cuistl/OpmCuILU0.hpp>
#include <opm/simulators/linalg/cuistl/CuJac.hpp>
#include <opm/simulators/linalg/cuistl/CuSeqILU0.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/GpuBlockPreconditioner.hpp>
#include <opm/simulators/linalg/gpuistl/GpuDILU.hpp>
#include <opm/simulators/linalg/gpuistl/OpmCuILU0.hpp>
#include <opm/simulators/linalg/gpuistl/GpuJac.hpp>
#include <opm/simulators/linalg/gpuistl/GpuSeqILU0.hpp>
#include <opm/simulators/linalg/gpuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/gpuistl/PreconditionerConvertFieldTypeAdapter.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
#endif
#endif

View File

@ -322,39 +322,39 @@ struct StandardPreconditioners {
}
#if HAVE_CUDA
F::addCreator("CUILU0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
F::addCreator("GPUILU0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
const double w = prm.get<double>("relaxation", 1.0);
using field_type = typename V::field_type;
using CuILU0 = typename cuistl::
CuSeqILU0<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
auto cuILU0 = std::make_shared<CuILU0>(op.getmat(), w);
using GpuILU0 = typename gpuistl::
GpuSeqILU0<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
auto gpuILU0 = std::make_shared<GpuILU0>(op.getmat(), w);
auto adapted = std::make_shared<cuistl::PreconditionerAdapter<V, V, CuILU0>>(cuILU0);
auto wrapped = std::make_shared<cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, GpuILU0>>(gpuILU0);
auto wrapped = std::make_shared<gpuistl::GpuBlockPreconditioner<V, V, Comm>>(adapted, comm);
return wrapped;
});
F::addCreator("CUJac", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
F::addCreator("GPUJAC", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
const double w = prm.get<double>("relaxation", 1.0);
using field_type = typename V::field_type;
using CuJac =
typename cuistl::CuJac<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
auto cuJac = std::make_shared<CuJac>(op.getmat(), w);
using GpuJac =
typename gpuistl::GpuJac<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
auto gpuJac = std::make_shared<GpuJac>(op.getmat(), w);
auto adapted = std::make_shared<cuistl::PreconditionerAdapter<V, V, CuJac>>(cuJac);
auto wrapped = std::make_shared<cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, GpuJac>>(gpuJac);
auto wrapped = std::make_shared<gpuistl::GpuBlockPreconditioner<V, V, Comm>>(adapted, comm);
return wrapped;
});
F::addCreator("CUDILU", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
F::addCreator("GPUDILU", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
const bool split_matrix = prm.get<bool>("split_matrix", true);
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
using field_type = typename V::field_type;
using CuDILU = typename cuistl::CuDILU<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
auto cuDILU = std::make_shared<CuDILU>(op.getmat(), split_matrix, tune_gpu_kernels);
using GpuDILU = typename gpuistl::GpuDILU<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
auto gpuDILU = std::make_shared<GpuDILU>(op.getmat(), split_matrix, tune_gpu_kernels);
auto adapted = std::make_shared<cuistl::PreconditionerAdapter<V, V, CuDILU>>(cuDILU);
auto wrapped = std::make_shared<cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, GpuDILU>>(gpuDILU);
auto wrapped = std::make_shared<gpuistl::GpuBlockPreconditioner<V, V, Comm>>(adapted, comm);
return wrapped;
});
@ -362,11 +362,11 @@ struct StandardPreconditioners {
const bool split_matrix = prm.get<bool>("split_matrix", true);
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
using field_type = typename V::field_type;
using OpmCuILU0 = typename cuistl::OpmCuILU0<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
using OpmCuILU0 = typename gpuistl::OpmCuILU0<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
auto cuilu0 = std::make_shared<OpmCuILU0>(op.getmat(), split_matrix, tune_gpu_kernels);
auto adapted = std::make_shared<cuistl::PreconditionerAdapter<V, V, OpmCuILU0>>(cuilu0);
auto wrapped = std::make_shared<cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, OpmCuILU0>>(cuilu0);
auto wrapped = std::make_shared<gpuistl::GpuBlockPreconditioner<V, V, Comm>>(adapted, comm);
return wrapped;
});
#endif
@ -582,68 +582,68 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
});
#if HAVE_CUDA
F::addCreator("CUILU0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
F::addCreator("GPUILU0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
const double w = prm.get<double>("relaxation", 1.0);
using field_type = typename V::field_type;
using CuILU0 = typename cuistl::
CuSeqILU0<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
return std::make_shared<cuistl::PreconditionerAdapter<V, V, CuILU0>>(
std::make_shared<CuILU0>(op.getmat(), w));
using GpuILU0 = typename gpuistl::
GpuSeqILU0<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, GpuILU0>>(
std::make_shared<GpuILU0>(op.getmat(), w));
});
F::addCreator("CUILU0Float", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
F::addCreator("GPUILU0Float", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
const double w = prm.get<double>("relaxation", 1.0);
using block_type = typename V::block_type;
using VTo = Dune::BlockVector<Dune::FieldVector<float, block_type::dimension>>;
using matrix_type_to =
typename Dune::BCRSMatrix<Dune::FieldMatrix<float, block_type::dimension, block_type::dimension>>;
using CuILU0 = typename cuistl::
CuSeqILU0<matrix_type_to, cuistl::CuVector<float>, cuistl::CuVector<float>>;
using Adapter = typename cuistl::PreconditionerAdapter<VTo, VTo, CuILU0>;
using Converter = typename cuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
using GpuILU0 = typename gpuistl::
GpuSeqILU0<matrix_type_to, gpuistl::GpuVector<float>, gpuistl::GpuVector<float>>;
using Adapter = typename gpuistl::PreconditionerAdapter<VTo, VTo, GpuILU0>;
using Converter = typename gpuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
auto converted = std::make_shared<Converter>(op.getmat());
auto adapted = std::make_shared<Adapter>(std::make_shared<CuILU0>(converted->getConvertedMatrix(), w));
auto adapted = std::make_shared<Adapter>(std::make_shared<GpuILU0>(converted->getConvertedMatrix(), w));
converted->setUnderlyingPreconditioner(adapted);
return converted;
});
F::addCreator("CUJac", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
F::addCreator("GPUJAC", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
const double w = prm.get<double>("relaxation", 1.0);
using field_type = typename V::field_type;
using CUJac =
typename cuistl::CuJac<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
return std::make_shared<cuistl::PreconditionerAdapter<V, V, CUJac>>(
std::make_shared<CUJac>(op.getmat(), w));
using GPUJac =
typename gpuistl::GpuJac<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, GPUJac>>(
std::make_shared<GPUJac>(op.getmat(), w));
});
F::addCreator("OPMCUILU0", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
const bool split_matrix = prm.get<bool>("split_matrix", true);
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
using field_type = typename V::field_type;
using CUILU0 = typename cuistl::OpmCuILU0<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
using CUILU0 = typename gpuistl::OpmCuILU0<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
return std::make_shared<cuistl::PreconditionerAdapter<V, V, CUILU0>>(std::make_shared<CUILU0>(op.getmat(), split_matrix, tune_gpu_kernels));
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, CUILU0>>(std::make_shared<CUILU0>(op.getmat(), split_matrix, tune_gpu_kernels));
});
F::addCreator("CUDILU", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
F::addCreator("GPUDILU", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
const bool split_matrix = prm.get<bool>("split_matrix", true);
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
using field_type = typename V::field_type;
using CUDILU = typename cuistl::CuDILU<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
return std::make_shared<cuistl::PreconditionerAdapter<V, V, CUDILU>>(std::make_shared<CUDILU>(op.getmat(), split_matrix, tune_gpu_kernels));
using GPUDILU = typename gpuistl::GpuDILU<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, GPUDILU>>(std::make_shared<GPUDILU>(op.getmat(), split_matrix, tune_gpu_kernels));
});
F::addCreator("CUDILUFloat", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
F::addCreator("GPUDILUFloat", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
const bool split_matrix = prm.get<bool>("split_matrix", true);
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
using block_type = typename V::block_type;
using VTo = Dune::BlockVector<Dune::FieldVector<float, block_type::dimension>>;
using matrix_type_to = typename Dune::BCRSMatrix<Dune::FieldMatrix<float, block_type::dimension, block_type::dimension>>;
using CuDILU = typename cuistl::CuDILU<matrix_type_to, cuistl::CuVector<float>, cuistl::CuVector<float>>;
using Adapter = typename cuistl::PreconditionerAdapter<VTo, VTo, CuDILU>;
using Converter = typename cuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
using GpuDILU = typename gpuistl::GpuDILU<matrix_type_to, gpuistl::GpuVector<float>, gpuistl::GpuVector<float>>;
using Adapter = typename gpuistl::PreconditionerAdapter<VTo, VTo, GpuDILU>;
using Converter = typename gpuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
auto converted = std::make_shared<Converter>(op.getmat());
auto adapted = std::make_shared<Adapter>(std::make_shared<CuDILU>(converted->getConvertedMatrix(), split_matrix, tune_gpu_kernels));
auto adapted = std::make_shared<Adapter>(std::make_shared<GpuDILU>(converted->getConvertedMatrix(), split_matrix, tune_gpu_kernels));
converted->setUnderlyingPreconditioner(adapted);
return converted;
});

View File

@ -16,22 +16,22 @@
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_CUISTL_CUBLOCKPRECONDITIONER_HPP
#define OPM_CUISTL_CUBLOCKPRECONDITIONER_HPP
#ifndef OPM_CUISTL_GPUBLOCKPRECONDITIONER_HPP
#define OPM_CUISTL_GPUBLOCKPRECONDITIONER_HPP
#include <dune/common/shared_ptr.hh>
#include <memory>
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerHolder.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>
#include <opm/simulators/linalg/gpuistl/PreconditionerHolder.hpp>
#include <opm/simulators/linalg/gpuistl/detail/preconditioner_should_call_post_pre.hpp>
namespace Opm::cuistl
namespace Opm::gpuistl
{
//! @brief Is an adaptation of Dune::BlockPreconditioner that works within the CuISTL framework.
//!
//! @note We aim to intgrate this into OwningBlockPreconditioner (or a relative thereof).
template <class X, class Y, class C, class P = Dune::PreconditionerWithUpdate<X, Y>>
class CuBlockPreconditioner : public Dune::PreconditionerWithUpdate<X, Y>, public PreconditionerHolder<X, Y>
class GpuBlockPreconditioner : public Dune::PreconditionerWithUpdate<X, Y>, public PreconditionerHolder<X, Y>
{
public:
using domain_type = X;
@ -47,13 +47,13 @@ public:
//! @param c The communication object for syncing overlap and copy
//! data points. (E.~g. OwnerOverlapCopyCommunication )
//!
CuBlockPreconditioner(const std::shared_ptr<P>& p, const std::shared_ptr<const communication_type>& c)
GpuBlockPreconditioner(const std::shared_ptr<P>& p, const std::shared_ptr<const communication_type>& c)
: m_preconditioner(p)
, m_communication(c)
{
}
CuBlockPreconditioner(const std::shared_ptr<P>& p, const communication_type& c)
GpuBlockPreconditioner(const std::shared_ptr<P>& p, const communication_type& c)
: m_preconditioner(p)
, m_communication(Dune::stackobject_to_shared_ptr(c))
{
@ -125,5 +125,5 @@ private:
//! \brief the communication object
std::shared_ptr<const communication_type> m_communication;
};
} // namespace Opm::cuistl
} // namespace Opm::gpuistl
#endif

View File

@ -20,83 +20,83 @@
#include <cuda_runtime.h>
#include <algorithm>
#include <fmt/core.h>
#include <opm/simulators/linalg/cuistl/CuBuffer.hpp>
#include <opm/simulators/linalg/cuistl/CuView.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/GpuBuffer.hpp>
#include <opm/simulators/linalg/gpuistl/GpuView.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
namespace Opm::cuistl
namespace Opm::gpuistl
{
template <class T>
CuBuffer<T>::CuBuffer(const std::vector<T>& data)
: CuBuffer(data.data(), data.size())
GpuBuffer<T>::GpuBuffer(const std::vector<T>& data)
: GpuBuffer(data.data(), data.size())
{
}
template <class T>
CuBuffer<T>::CuBuffer(const size_t numberOfElements)
GpuBuffer<T>::GpuBuffer(const size_t numberOfElements)
: m_numberOfElements(numberOfElements)
{
if (numberOfElements < 1) {
OPM_THROW(std::invalid_argument, "Setting a CuBuffer size to a non-positive number is not allowed");
OPM_THROW(std::invalid_argument, "Setting a GpuBuffer size to a non-positive number is not allowed");
}
OPM_CUDA_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * m_numberOfElements));
OPM_GPU_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * m_numberOfElements));
}
template <class T>
CuBuffer<T>::CuBuffer(const T* dataOnHost, const size_t numberOfElements)
: CuBuffer(numberOfElements)
GpuBuffer<T>::GpuBuffer(const T* dataOnHost, const size_t numberOfElements)
: GpuBuffer(numberOfElements)
{
OPM_CUDA_SAFE_CALL(cudaMemcpy(
OPM_GPU_SAFE_CALL(cudaMemcpy(
m_dataOnDevice, dataOnHost, m_numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
}
template <class T>
CuBuffer<T>::CuBuffer(const CuBuffer<T>& other)
: CuBuffer(other.m_numberOfElements)
GpuBuffer<T>::GpuBuffer(const GpuBuffer<T>& other)
: GpuBuffer(other.m_numberOfElements)
{
assertHasElements();
assertSameSize(other);
OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
OPM_GPU_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
other.m_dataOnDevice,
m_numberOfElements * sizeof(T),
cudaMemcpyDeviceToDevice));
}
template <class T>
CuBuffer<T>::~CuBuffer()
GpuBuffer<T>::~GpuBuffer()
{
OPM_CUDA_WARN_IF_ERROR(cudaFree(m_dataOnDevice));
OPM_GPU_WARN_IF_ERROR(cudaFree(m_dataOnDevice));
}
template <typename T>
typename CuBuffer<T>::size_type
CuBuffer<T>::size() const
typename GpuBuffer<T>::size_type
GpuBuffer<T>::size() const
{
return m_numberOfElements;
}
template <typename T>
void
CuBuffer<T>::resize(size_t newSize)
GpuBuffer<T>::resize(size_t newSize)
{
if (newSize < 1) {
OPM_THROW(std::invalid_argument, "Setting a CuBuffer size to a non-positive number is not allowed");
OPM_THROW(std::invalid_argument, "Setting a GpuBuffer size to a non-positive number is not allowed");
}
// Allocate memory for the new buffer
T* tmpBuffer = nullptr;
OPM_CUDA_SAFE_CALL(cudaMalloc(&tmpBuffer, sizeof(T) * newSize));
OPM_GPU_SAFE_CALL(cudaMalloc(&tmpBuffer, sizeof(T) * newSize));
// Move the data from the old to the new buffer with truncation
size_t sizeOfMove = std::min({m_numberOfElements, newSize});
OPM_CUDA_SAFE_CALL(cudaMemcpy(tmpBuffer,
OPM_GPU_SAFE_CALL(cudaMemcpy(tmpBuffer,
m_dataOnDevice,
sizeOfMove * sizeof(T),
cudaMemcpyDeviceToDevice));
// free the old buffer
OPM_CUDA_SAFE_CALL(cudaFree(m_dataOnDevice));
OPM_GPU_SAFE_CALL(cudaFree(m_dataOnDevice));
// swap the buffers
m_dataOnDevice = tmpBuffer;
@ -107,7 +107,7 @@ CuBuffer<T>::resize(size_t newSize)
template <typename T>
std::vector<T>
CuBuffer<T>::asStdVector() const
GpuBuffer<T>::asStdVector() const
{
std::vector<T> temporary(m_numberOfElements);
copyToHost(temporary);
@ -116,14 +116,14 @@ CuBuffer<T>::asStdVector() const
template <typename T>
void
CuBuffer<T>::assertSameSize(const CuBuffer<T>& x) const
GpuBuffer<T>::assertSameSize(const GpuBuffer<T>& x) const
{
assertSameSize(x.m_numberOfElements);
}
template <typename T>
void
CuBuffer<T>::assertSameSize(size_t size) const
GpuBuffer<T>::assertSameSize(size_t size) const
{
if (size != m_numberOfElements) {
OPM_THROW(std::invalid_argument,
@ -133,7 +133,7 @@ CuBuffer<T>::assertSameSize(size_t size) const
template <typename T>
void
CuBuffer<T>::assertHasElements() const
GpuBuffer<T>::assertHasElements() const
{
if (m_numberOfElements <= 0) {
OPM_THROW(std::invalid_argument, "We have 0 elements");
@ -142,21 +142,21 @@ CuBuffer<T>::assertHasElements() const
template <typename T>
T*
CuBuffer<T>::data()
GpuBuffer<T>::data()
{
return m_dataOnDevice;
}
template <typename T>
const T*
CuBuffer<T>::data() const
GpuBuffer<T>::data() const
{
return m_dataOnDevice;
}
template <class T>
void
CuBuffer<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
GpuBuffer<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
{
if (numberOfElements > size()) {
OPM_THROW(std::runtime_error,
@ -164,41 +164,41 @@ CuBuffer<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
size(),
numberOfElements));
}
OPM_CUDA_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
OPM_GPU_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
}
template <class T>
void
CuBuffer<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
GpuBuffer<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
{
assertSameSize(numberOfElements);
OPM_CUDA_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
OPM_GPU_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
}
template <class T>
void
CuBuffer<T>::copyFromHost(const std::vector<T>& data)
GpuBuffer<T>::copyFromHost(const std::vector<T>& data)
{
copyFromHost(data.data(), data.size());
}
template <class T>
void
CuBuffer<T>::copyToHost(std::vector<T>& data) const
GpuBuffer<T>::copyToHost(std::vector<T>& data) const
{
copyToHost(data.data(), data.size());
}
template class CuBuffer<double>;
template class CuBuffer<float>;
template class CuBuffer<int>;
template class GpuBuffer<double>;
template class GpuBuffer<float>;
template class GpuBuffer<int>;
template <class T>
CuView<const T> make_view(const CuBuffer<T>& buf) {
return CuView<const T>(buf.data(), buf.size());
GpuView<const T> make_view(const GpuBuffer<T>& buf) {
return GpuView<const T>(buf.data(), buf.size());
}
template CuView<const double> make_view<double>(const CuBuffer<double>&);
template CuView<const float> make_view<float>(const CuBuffer<float>&);
template CuView<const int> make_view<int>(const CuBuffer<int>&);
template GpuView<const double> make_view<double>(const GpuBuffer<double>&);
template GpuView<const float> make_view<float>(const GpuBuffer<float>&);
template GpuView<const int> make_view<int>(const GpuBuffer<int>&);
} // namespace Opm::cuistl
} // namespace Opm::gpuistl

View File

@ -16,35 +16,35 @@
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_CUBUFFER_HEADER_HPP
#define OPM_CUBUFFER_HEADER_HPP
#ifndef OPM_GPUBUFFER_HEADER_HPP
#define OPM_GPUBUFFER_HEADER_HPP
#include <dune/common/fvector.hh>
#include <dune/istl/bvector.hh>
#include <exception>
#include <fmt/core.h>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
#include <opm/simulators/linalg/cuistl/CuView.hpp>
#include <opm/simulators/linalg/gpuistl/detail/safe_conversion.hpp>
#include <opm/simulators/linalg/gpuistl/GpuView.hpp>
#include <vector>
#include <string>
namespace Opm::cuistl
namespace Opm::gpuistl
{
/**
* @brief The CuBuffer class is a simple container class for the GPU.
* @brief The GpuBuffer class is a simple container class for the GPU.
*
*
* Example usage:
*
* @code{.cpp}
* #include <opm/simulators/linalg/cuistl/CuBuffer.hpp>
* #include <opm/simulators/linalg/gpuistl/GpuBuffer.hpp>
*
* void someFunction() {
* auto someDataOnCPU = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
*
* auto dataOnGPU = CuBuffer<double>(someDataOnCPU);
* auto dataOnGPU = GpuBuffer<double>(someDataOnCPU);
*
* auto stdVectorOnCPU = dataOnGPU.asStdVector();
* }
@ -52,7 +52,7 @@ namespace Opm::cuistl
* @tparam T the type to store. Can be either float, double or int.
*/
template <typename T>
class CuBuffer
class GpuBuffer
{
public:
using field_type = T;
@ -60,17 +60,17 @@ public:
using value_type = T;
/**
* @brief CuBuffer allocates new GPU memory of the same size as other and copies the content of the other buffer to
* @brief GpuBuffer allocates new GPU memory of the same size as other and copies the content of the other buffer to
* this newly allocated memory.
*
* @note This does synchronous transfer.
*
* @param other the buffer to copy from
*/
CuBuffer(const CuBuffer<T>& other);
GpuBuffer(const GpuBuffer<T>& other);
/**
* @brief CuBuffer allocates new GPU memory of the same size as data and copies the content of the data vector to
* @brief GpuBuffer allocates new GPU memory of the same size as data and copies the content of the data vector to
* this newly allocated memory.
*
* @note This does CPU to GPU transfer.
@ -78,23 +78,23 @@ public:
*
* @param data the vector to copy from
*/
explicit CuBuffer(const std::vector<T>& data);
explicit GpuBuffer(const std::vector<T>& data);
/**
* @brief Default constructor that will initialize cublas and allocate 0 bytes of memory
*/
explicit CuBuffer();
explicit GpuBuffer();
/**
* @brief CuBuffer allocates new GPU memory of size numberOfElements * sizeof(T)
* @brief GpuBuffer allocates new GPU memory of size numberOfElements * sizeof(T)
*
* @param numberOfElements number of T elements to allocate
*/
explicit CuBuffer(const size_t numberOfElements);
explicit GpuBuffer(const size_t numberOfElements);
/**
* @brief CuBuffer allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from
* @brief GpuBuffer allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from
* data
*
* @note This assumes the data is on the CPU.
@ -102,12 +102,12 @@ public:
* @param numberOfElements number of T elements to allocate
* @param dataOnHost data on host/CPU
*/
CuBuffer(const T* dataOnHost, const size_t numberOfElements);
GpuBuffer(const T* dataOnHost, const size_t numberOfElements);
/**
* @brief ~CuBuffer calls cudaFree
* @brief ~GpuBuffer calls cudaFree
*/
virtual ~CuBuffer();
virtual ~GpuBuffer();
/**
* @return the raw pointer to the GPU data
@ -120,7 +120,7 @@ public:
const T* data() const;
/**
* @return fetch the first element in a CuBuffer
* @return fetch the first element in a GpuBuffer
*/
__host__ __device__ T& front()
{
@ -131,7 +131,7 @@ public:
}
/**
* @return fetch the last element in a CuBuffer
* @return fetch the last element in a GpuBuffer
*/
__host__ __device__ T& back()
{
@ -142,7 +142,7 @@ public:
}
/**
* @return fetch the first element in a CuBuffer
* @return fetch the first element in a GpuBuffer
*/
__host__ __device__ T front() const
{
@ -153,7 +153,7 @@ public:
}
/**
* @return fetch the last element in a CuBuffer
* @return fetch the last element in a GpuBuffer
*/
__host__ __device__ T back() const
{
@ -176,7 +176,7 @@ public:
// TODO: [perf] vector.size() can be replaced by bvector.N() * BlockDimension
if (m_numberOfElements != bvector.size()) {
OPM_THROW(std::runtime_error,
fmt::format("Given incompatible vector size. CuBuffer has size {}, \n"
fmt::format("Given incompatible vector size. GpuBuffer has size {}, \n"
"however, BlockVector has N() = {}, and size = {}.",
m_numberOfElements,
bvector.N(),
@ -199,7 +199,7 @@ public:
// TODO: [perf] vector.size() can be replaced by bvector.N() * BlockDimension
if (m_numberOfElements != bvector.size()) {
OPM_THROW(std::runtime_error,
fmt::format("Given incompatible vector size. CuBuffer has size {},\n however, the BlockVector "
fmt::format("Given incompatible vector size. GpuBuffer has size {},\n however, the BlockVector "
"has has N() = {}, and size() = {}.",
m_numberOfElements,
bvector.N(),
@ -267,14 +267,14 @@ private:
T* m_dataOnDevice = nullptr;
size_t m_numberOfElements;
void assertSameSize(const CuBuffer<T>& other) const;
void assertSameSize(const GpuBuffer<T>& other) const;
void assertSameSize(size_t size) const;
void assertHasElements() const;
};
template <class T>
CuView<const T> make_view(const CuBuffer<T>&);
GpuView<const T> make_view(const GpuBuffer<T>&);
} // namespace Opm::cuistl
} // namespace Opm::gpuistl
#endif

View File

@ -25,28 +25,28 @@
#include <opm/common/ErrorMacros.hpp>
#include <opm/common/TimingMacros.hpp>
#include <opm/simulators/linalg/GraphColoring.hpp>
#include <opm/simulators/linalg/cuistl/detail/autotuner.hpp>
#include <opm/simulators/linalg/cuistl/CuDILU.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/coloringAndReorderingUtils.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/DILUKernels.hpp>
#include <opm/simulators/linalg/gpuistl/detail/autotuner.hpp>
#include <opm/simulators/linalg/gpuistl/GpuDILU.hpp>
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/gpuistl/detail/coloringAndReorderingUtils.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/gpuistl/detail/preconditionerKernels/DILUKernels.hpp>
#include <opm/simulators/linalg/matrixblock.hh>
#include <tuple>
#include <functional>
#include <utility>
#include <string>
namespace Opm::cuistl
namespace Opm::gpuistl
{
template <class M, class X, class Y, int l>
CuDILU<M, X, Y, l>::CuDILU(const M& A, bool splitMatrix, bool tuneKernels)
GpuDILU<M, X, Y, l>::GpuDILU(const M& A, bool splitMatrix, bool tuneKernels)
: m_cpuMatrix(A)
, m_levelSets(Opm::getMatrixRowColoring(m_cpuMatrix, Opm::ColoringType::LOWER))
, m_reorderedToNatural(detail::createReorderedToNatural(m_levelSets))
, m_naturalToReordered(detail::createNaturalToReordered(m_levelSets))
, m_gpuMatrix(CuSparseMatrix<field_type>::fromMatrix(m_cpuMatrix, true))
, m_gpuMatrix(GpuSparseMatrix<field_type>::fromMatrix(m_cpuMatrix, true))
, m_gpuNaturalToReorder(m_naturalToReordered)
, m_gpuReorderToNatural(m_reorderedToNatural)
, m_gpuDInv(m_gpuMatrix.N() * m_gpuMatrix.blockSize() * m_gpuMatrix.blockSize())
@ -71,13 +71,13 @@ CuDILU<M, X, Y, l>::CuDILU(const M& A, bool splitMatrix, bool tuneKernels)
m_gpuMatrix.nonzeroes(),
A.nonzeroes()));
if (m_splitMatrix) {
m_gpuMatrixReorderedDiag = std::make_unique<CuVector<field_type>>(blocksize_ * blocksize_ * m_cpuMatrix.N());
m_gpuMatrixReorderedDiag = std::make_unique<GpuVector<field_type>>(blocksize_ * blocksize_ * m_cpuMatrix.N());
std::tie(m_gpuMatrixReorderedLower, m_gpuMatrixReorderedUpper)
= detail::extractLowerAndUpperMatrices<M, field_type, CuSparseMatrix<field_type>>(m_cpuMatrix,
= detail::extractLowerAndUpperMatrices<M, field_type, GpuSparseMatrix<field_type>>(m_cpuMatrix,
m_reorderedToNatural);
}
else {
m_gpuMatrixReordered = detail::createReorderedMatrix<M, field_type, CuSparseMatrix<field_type>>(
m_gpuMatrixReordered = detail::createReorderedMatrix<M, field_type, GpuSparseMatrix<field_type>>(
m_cpuMatrix, m_reorderedToNatural);
}
computeDiagAndMoveReorderedData(m_moveThreadBlockSize, m_DILUFactorizationThreadBlockSize);
@ -89,13 +89,13 @@ CuDILU<M, X, Y, l>::CuDILU(const M& A, bool splitMatrix, bool tuneKernels)
template <class M, class X, class Y, int l>
void
CuDILU<M, X, Y, l>::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b)
GpuDILU<M, X, Y, l>::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b)
{
}
template <class M, class X, class Y, int l>
void
CuDILU<M, X, Y, l>::apply(X& v, const Y& d)
GpuDILU<M, X, Y, l>::apply(X& v, const Y& d)
{
OPM_TIMEBLOCK(prec_apply);
{
@ -105,7 +105,7 @@ CuDILU<M, X, Y, l>::apply(X& v, const Y& d)
template <class M, class X, class Y, int l>
void
CuDILU<M, X, Y, l>::apply(X& v, const Y& d, int lowerSolveThreadBlockSize, int upperSolveThreadBlockSize)
GpuDILU<M, X, Y, l>::apply(X& v, const Y& d, int lowerSolveThreadBlockSize, int upperSolveThreadBlockSize)
{
int levelStartIdx = 0;
for (int level = 0; level < m_levelSets.size(); ++level) {
@ -172,20 +172,20 @@ CuDILU<M, X, Y, l>::apply(X& v, const Y& d, int lowerSolveThreadBlockSize, int u
template <class M, class X, class Y, int l>
void
CuDILU<M, X, Y, l>::post([[maybe_unused]] X& x)
GpuDILU<M, X, Y, l>::post([[maybe_unused]] X& x)
{
}
template <class M, class X, class Y, int l>
Dune::SolverCategory::Category
CuDILU<M, X, Y, l>::category() const
GpuDILU<M, X, Y, l>::category() const
{
return Dune::SolverCategory::sequential;
}
template <class M, class X, class Y, int l>
void
CuDILU<M, X, Y, l>::update()
GpuDILU<M, X, Y, l>::update()
{
OPM_TIMEBLOCK(prec_update);
{
@ -195,7 +195,7 @@ CuDILU<M, X, Y, l>::update()
template <class M, class X, class Y, int l>
void
CuDILU<M, X, Y, l>::update(int moveThreadBlockSize, int factorizationBlockSize)
GpuDILU<M, X, Y, l>::update(int moveThreadBlockSize, int factorizationBlockSize)
{
m_gpuMatrix.updateNonzeroValues(m_cpuMatrix, true); // send updated matrix to the gpu
computeDiagAndMoveReorderedData(moveThreadBlockSize, factorizationBlockSize);
@ -203,7 +203,7 @@ CuDILU<M, X, Y, l>::update(int moveThreadBlockSize, int factorizationBlockSize)
template <class M, class X, class Y, int l>
void
CuDILU<M, X, Y, l>::computeDiagAndMoveReorderedData(int moveThreadBlockSize, int factorizationBlockSize)
GpuDILU<M, X, Y, l>::computeDiagAndMoveReorderedData(int moveThreadBlockSize, int factorizationBlockSize)
{
if (m_splitMatrix) {
detail::copyMatDataToReorderedSplit<field_type, blocksize_>(
@ -264,7 +264,7 @@ CuDILU<M, X, Y, l>::computeDiagAndMoveReorderedData(int moveThreadBlockSize, int
template <class M, class X, class Y, int l>
void
CuDILU<M, X, Y, l>::tuneThreadBlockSizes()
GpuDILU<M, X, Y, l>::tuneThreadBlockSizes()
{
// tune the thread-block size of the update function
auto tuneMoveThreadBlockSizeInUpdate = [this](int moveThreadBlockSize){
@ -278,8 +278,8 @@ CuDILU<M, X, Y, l>::tuneThreadBlockSizes()
m_DILUFactorizationThreadBlockSize = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing DILU factorization");
// tune the thread-block size of the apply
CuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
CuVector<field_type> tmpD(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
GpuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
GpuVector<field_type> tmpD(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
tmpD = 1;
auto tuneLowerSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int lowerSolveThreadBlockSize){
@ -293,14 +293,14 @@ CuDILU<M, X, Y, l>::tuneThreadBlockSizes()
m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set");
}
} // namespace Opm::cuistl
} // namespace Opm::gpuistl
#define INSTANTIATE_CUDILU_DUNE(realtype, blockdim) \
template class ::Opm::cuistl::CuDILU<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
::Opm::cuistl::CuVector<realtype>, \
::Opm::cuistl::CuVector<realtype>>; \
template class ::Opm::cuistl::CuDILU<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
::Opm::cuistl::CuVector<realtype>, \
::Opm::cuistl::CuVector<realtype>>
template class ::Opm::gpuistl::GpuDILU<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::GpuVector<realtype>, \
::Opm::gpuistl::GpuVector<realtype>>; \
template class ::Opm::gpuistl::GpuDILU<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::GpuVector<realtype>, \
::Opm::gpuistl::GpuVector<realtype>>
INSTANTIATE_CUDILU_DUNE(double, 1);
INSTANTIATE_CUDILU_DUNE(double, 2);

View File

@ -16,18 +16,18 @@
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_CUDILU_HPP
#define OPM_CUDILU_HPP
#ifndef OPM_GPUDILU_HPP
#define OPM_GPUDILU_HPP
#include <memory>
#include <opm/grid/utility/SparseTable.hpp>
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <vector>
namespace Opm::cuistl
namespace Opm::gpuistl
{
//! \brief DILU preconditioner on the GPU.
//!
@ -37,10 +37,10 @@ namespace Opm::cuistl
//! \tparam l Ignored. Just there to have the same number of template arguments
//! as other preconditioners.
//!
//! \note We assume X and Y are both CuVector<real_type>, but we leave them as template
//! \note We assume X and Y are both GpuVector<real_type>, but we leave them as template
//! arguments in case of future additions.
template <class M, class X, class Y, int l = 1>
class CuDILU : public Dune::PreconditionerWithUpdate<X, Y>
class GpuDILU : public Dune::PreconditionerWithUpdate<X, Y>
{
public:
//! \brief The matrix type the preconditioner is for.
@ -52,7 +52,7 @@ public:
//! \brief The field type of the preconditioner.
using field_type = typename X::field_type;
//! \brief The GPU matrix type
using CuMat = CuSparseMatrix<field_type>;
using CuMat = GpuSparseMatrix<field_type>;
//! \brief Constructor.
//!
@ -60,7 +60,7 @@ public:
//! \param A The matrix to operate on.
//! \param w The relaxation factor.
//!
explicit CuDILU(const M& A, bool splitMatrix, bool tuneKernels);
explicit GpuDILU(const M& A, bool splitMatrix, bool tuneKernels);
//! \brief Prepare the preconditioner.
//! \note Does nothing at the time being.
@ -126,13 +126,13 @@ private:
std::unique_ptr<CuMat> m_gpuMatrixReorderedLower;
std::unique_ptr<CuMat> m_gpuMatrixReorderedUpper;
//! \brief If matrix splitting is enabled, we also store the diagonal separately
std::unique_ptr<CuVector<field_type>> m_gpuMatrixReorderedDiag;
std::unique_ptr<GpuVector<field_type>> m_gpuMatrixReorderedDiag;
//! row conversion from natural to reordered matrix indices stored on the GPU
CuVector<int> m_gpuNaturalToReorder;
GpuVector<int> m_gpuNaturalToReorder;
//! row conversion from reordered to natural matrix indices stored on the GPU
CuVector<int> m_gpuReorderToNatural;
GpuVector<int> m_gpuReorderToNatural;
//! \brief Stores the inverted diagonal that we use in DILU
CuVector<field_type> m_gpuDInv;
GpuVector<field_type> m_gpuDInv;
//! \brief Bool storing whether or not we should store matrices in a split format
bool m_splitMatrix;
//! \brief Bool storing whether or not we will tune the threadblock sizes. Only used for AMD cards
@ -144,6 +144,6 @@ private:
int m_moveThreadBlockSize = -1;
int m_DILUFactorizationThreadBlockSize = -1;
};
} // end namespace Opm::cuistl
} // end namespace Opm::gpuistl
#endif

View File

@ -20,20 +20,20 @@
#include <dune/istl/bcrsmatrix.hh>
#include <fmt/core.h>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/CuJac.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/JacKernels.hpp>
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
#include <opm/simulators/linalg/gpuistl/GpuJac.hpp>
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/gpuistl/detail/preconditionerKernels/JacKernels.hpp>
#include <opm/simulators/linalg/gpuistl/detail/vector_operations.hpp>
#include <opm/simulators/linalg/matrixblock.hh>
namespace Opm::cuistl
namespace Opm::gpuistl
{
template <class M, class X, class Y, int l>
CuJac<M, X, Y, l>::CuJac(const M& A, field_type w)
GpuJac<M, X, Y, l>::GpuJac(const M& A, field_type w)
: m_cpuMatrix(A)
, m_relaxationFactor(w)
, m_gpuMatrix(CuSparseMatrix<field_type>::fromMatrix(A))
, m_gpuMatrix(GpuSparseMatrix<field_type>::fromMatrix(A))
, m_diagInvFlattened(m_gpuMatrix.N() * m_gpuMatrix.blockSize() * m_gpuMatrix.blockSize())
{
// Some sanity check
@ -58,13 +58,13 @@ CuJac<M, X, Y, l>::CuJac(const M& A, field_type w)
template <class M, class X, class Y, int l>
void
CuJac<M, X, Y, l>::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b)
GpuJac<M, X, Y, l>::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b)
{
}
template <class M, class X, class Y, int l>
void
CuJac<M, X, Y, l>::apply(X& v, const Y& d)
GpuJac<M, X, Y, l>::apply(X& v, const Y& d)
{
// Jacobi preconditioner: x_{n+1} = x_n + w * (D^-1 * (b - Ax_n) )
// Working with defect d and update v it we only need to set v = w*(D^-1)*d
@ -77,20 +77,20 @@ CuJac<M, X, Y, l>::apply(X& v, const Y& d)
template <class M, class X, class Y, int l>
void
CuJac<M, X, Y, l>::post([[maybe_unused]] X& x)
GpuJac<M, X, Y, l>::post([[maybe_unused]] X& x)
{
}
template <class M, class X, class Y, int l>
Dune::SolverCategory::Category
CuJac<M, X, Y, l>::category() const
GpuJac<M, X, Y, l>::category() const
{
return Dune::SolverCategory::sequential;
}
template <class M, class X, class Y, int l>
void
CuJac<M, X, Y, l>::update()
GpuJac<M, X, Y, l>::update()
{
m_gpuMatrix.updateNonzeroValues(m_cpuMatrix);
invertDiagonalAndFlatten();
@ -98,7 +98,7 @@ CuJac<M, X, Y, l>::update()
template <class M, class X, class Y, int l>
void
CuJac<M, X, Y, l>::invertDiagonalAndFlatten()
GpuJac<M, X, Y, l>::invertDiagonalAndFlatten()
{
detail::JAC::invertDiagonalAndFlatten<field_type, matrix_type::block_type::cols>(
m_gpuMatrix.getNonZeroValues().data(),
@ -108,14 +108,14 @@ CuJac<M, X, Y, l>::invertDiagonalAndFlatten()
m_diagInvFlattened.data());
}
} // namespace Opm::cuistl
} // namespace Opm::gpuistl
#define INSTANTIATE_CUJAC_DUNE(realtype, blockdim) \
template class ::Opm::cuistl::CuJac<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
::Opm::cuistl::CuVector<realtype>, \
::Opm::cuistl::CuVector<realtype>>; \
template class ::Opm::cuistl::CuJac<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
::Opm::cuistl::CuVector<realtype>, \
::Opm::cuistl::CuVector<realtype>>
template class ::Opm::gpuistl::GpuJac<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::GpuVector<realtype>, \
::Opm::gpuistl::GpuVector<realtype>>; \
template class ::Opm::gpuistl::GpuJac<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::GpuVector<realtype>, \
::Opm::gpuistl::GpuVector<realtype>>
INSTANTIATE_CUJAC_DUNE(double, 1);
INSTANTIATE_CUJAC_DUNE(double, 2);

View File

@ -16,19 +16,19 @@
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_CUJAC_HPP
#define OPM_CUJAC_HPP
#ifndef OPM_GPUJAC_HPP
#define OPM_GPUJAC_HPP
#include <dune/istl/preconditioner.hh>
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp>
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/gpuistl/detail/CuMatrixDescription.hpp>
#include <opm/simulators/linalg/gpuistl/detail/CuSparseHandle.hpp>
#include <opm/simulators/linalg/gpuistl/detail/CuSparseResource.hpp>
namespace Opm::cuistl
namespace Opm::gpuistl
{
//! \brief Jacobi preconditioner on the GPU.
//!
@ -40,10 +40,10 @@ namespace Opm::cuistl
//! \tparam l Ignored. Just there to have the same number of template arguments
//! as other preconditioners.
//!
//! \note We assume X and Y are both CuVector<real_type>, but we leave them as template
//! \note We assume X and Y are both GpuVector<real_type>, but we leave them as template
//! arguments in case of future additions.
template <class M, class X, class Y, int l = 1>
class CuJac : public Dune::PreconditionerWithUpdate<X, Y>
class GpuJac : public Dune::PreconditionerWithUpdate<X, Y>
{
public:
//! \brief The matrix type the preconditioner is for.
@ -61,7 +61,7 @@ public:
//! \param A The matrix to operate on.
//! \param w The relaxation factor.
//!
CuJac(const M& A, field_type w);
GpuJac(const M& A, field_type w);
//! \brief Prepare the preconditioner.
//! \note Does nothing at the time being.
@ -104,12 +104,12 @@ private:
//! \brief The relaxation factor to use.
const field_type m_relaxationFactor;
//! \brief The A matrix stored on the gpu
CuSparseMatrix<field_type> m_gpuMatrix;
GpuSparseMatrix<field_type> m_gpuMatrix;
//! \brief the diagonal of cuMatrix inverted, and then flattened to fit in a vector
CuVector<field_type> m_diagInvFlattened;
GpuVector<field_type> m_diagInvFlattened;
void invertDiagonalAndFlatten();
};
} // end namespace Opm::cuistl
} // end namespace Opm::gpuistl
#endif

View File

@ -16,15 +16,15 @@
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_CUISTL_CUOWNEROVERLAPCOPY_HPP
#define OPM_CUISTL_CUOWNEROVERLAPCOPY_HPP
#ifndef OPM_GPUISTL_GPUOWNEROVERLAPCOPY_HPP
#define OPM_GPUISTL_GPUOWNEROVERLAPCOPY_HPP
#include <dune/istl/owneroverlapcopy.hh>
#include <memory>
#include <mutex>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <vector>
namespace Opm::cuistl
namespace Opm::gpuistl
{
/**
* @brief GPUSender is a wrapper class for classes which will implement copOwnerToAll
@ -36,7 +36,7 @@ namespace Opm::cuistl
template<class field_type, class OwnerOverlapCopyCommunicationType>
class GPUSender {
public:
using X = CuVector<field_type>;
using X = GpuVector<field_type>;
GPUSender(const OwnerOverlapCopyCommunicationType& cpuOwnerOverlapCopy) : m_cpuOwnerOverlapCopy(cpuOwnerOverlapCopy){}
@ -97,8 +97,8 @@ protected:
// premature optimization, in the sense that we could just initialize these indices
// always, but they are not always used.
mutable std::once_flag m_initializedIndices;
mutable std::unique_ptr<CuVector<int>> m_indicesOwner;
mutable std::unique_ptr<CuVector<int>> m_indicesCopy;
mutable std::unique_ptr<GpuVector<int>> m_indicesOwner;
mutable std::unique_ptr<GpuVector<int>> m_indicesCopy;
const OwnerOverlapCopyCommunicationType& m_cpuOwnerOverlapCopy;
};
@ -113,7 +113,7 @@ template <class field_type, int block_size, class OwnerOverlapCopyCommunicationT
class GPUObliviousMPISender : public GPUSender<field_type, OwnerOverlapCopyCommunicationType>
{
public:
using X = CuVector<field_type>;
using X = GpuVector<field_type>;
GPUObliviousMPISender(const OwnerOverlapCopyCommunicationType& cpuOwnerOverlapCopy)
: GPUSender<field_type, OwnerOverlapCopyCommunicationType>(cpuOwnerOverlapCopy)
@ -151,8 +151,8 @@ private:
}
}
this->m_indicesCopy = std::make_unique<CuVector<int>>(indicesCopyOnCPU);
this->m_indicesOwner = std::make_unique<CuVector<int>>(indicesOwnerCPU);
this->m_indicesCopy = std::make_unique<GpuVector<int>>(indicesCopyOnCPU);
this->m_indicesOwner = std::make_unique<GpuVector<int>>(indicesOwnerCPU);
}
};
@ -168,7 +168,7 @@ template <class field_type, int block_size, class OwnerOverlapCopyCommunicationT
class GPUAwareMPISender : public GPUSender<field_type, OwnerOverlapCopyCommunicationType>
{
public:
using X = CuVector<field_type>;
using X = GpuVector<field_type>;
GPUAwareMPISender(const OwnerOverlapCopyCommunicationType& cpuOwnerOverlapCopy)
: GPUSender<field_type, OwnerOverlapCopyCommunicationType>(cpuOwnerOverlapCopy)
@ -178,7 +178,7 @@ public:
void copyOwnerToAll(const X& source, X& dest) const override
{
OPM_ERROR_IF(&source != &dest, "The provided CuVectors' address did not match"); // In this context, source == dest!!!
OPM_ERROR_IF(&source != &dest, "The provided GpuVectors' address did not match"); // In this context, source == dest!!!
std::call_once(this->m_initializedIndices, [&]() { initIndexSet(); });
int rank = this->m_cpuOwnerOverlapCopy.communicator().rank();
@ -251,10 +251,10 @@ public:
}
private:
mutable std::unique_ptr<CuVector<int>> m_commpairIndicesCopy;
mutable std::unique_ptr<CuVector<int>> m_commpairIndicesOwner;
mutable std::unique_ptr<CuVector<field_type>> m_GPUSendBuf;
mutable std::unique_ptr<CuVector<field_type>> m_GPURecvBuf;
mutable std::unique_ptr<GpuVector<int>> m_commpairIndicesCopy;
mutable std::unique_ptr<GpuVector<int>> m_commpairIndicesOwner;
mutable std::unique_ptr<GpuVector<field_type>> m_GPUSendBuf;
mutable std::unique_ptr<GpuVector<field_type>> m_GPURecvBuf;
struct MessageInformation
{
@ -332,11 +332,11 @@ private:
}
}
m_commpairIndicesCopy = std::make_unique<CuVector<int>>(commpairIndicesCopyOnCPU);
m_commpairIndicesOwner = std::make_unique<CuVector<int>>(commpairIndicesOwnerCPU);
m_commpairIndicesCopy = std::make_unique<GpuVector<int>>(commpairIndicesCopyOnCPU);
m_commpairIndicesOwner = std::make_unique<GpuVector<int>>(commpairIndicesOwnerCPU);
m_GPUSendBuf = std::make_unique<CuVector<field_type>>(sendBufIdx * block_size);
m_GPURecvBuf = std::make_unique<CuVector<field_type>>(recvBufIdx * block_size);
m_GPUSendBuf = std::make_unique<GpuVector<field_type>>(sendBufIdx * block_size);
m_GPURecvBuf = std::make_unique<GpuVector<field_type>>(recvBufIdx * block_size);
}
void initIndexSet() const override
@ -360,8 +360,8 @@ private:
}
}
this->m_indicesCopy = std::make_unique<CuVector<int>>(indicesCopyOnCPU);
this->m_indicesOwner = std::make_unique<CuVector<int>>(indicesOwnerCPU);
this->m_indicesCopy = std::make_unique<GpuVector<int>>(indicesCopyOnCPU);
this->m_indicesOwner = std::make_unique<GpuVector<int>>(indicesOwnerCPU);
buildCommPairIdxs();
}
@ -371,21 +371,21 @@ private:
* @brief CUDA compatiable variant of Dune::OwnerOverlapCopyCommunication
*
* This class can essentially be seen as an adapter around Dune::OwnerOverlapCopyCommunication, and should work as
* a Dune::OwnerOverlapCopyCommunication on CuVectors
* a Dune::OwnerOverlapCopyCommunication on GpuVectors
*
* @note This currently only has the functionality to parallelize the linear solve.
*
* @tparam field_type should be a field_type supported by CuVector (double, float)
* @tparam field_type should be a field_type supported by GpuVector (double, float)
* @tparam block_size the block size used (this is relevant for say figuring out the correct indices)
* @tparam OwnerOverlapCopyCommunicationType should mimic Dune::OwnerOverlapCopyCommunication.
*/
template <class field_type, int block_size, class OwnerOverlapCopyCommunicationType>
class CuOwnerOverlapCopy
class GpuOwnerOverlapCopy
{
public:
using X = CuVector<field_type>;
using X = GpuVector<field_type>;
CuOwnerOverlapCopy(std::shared_ptr<GPUSender<field_type, OwnerOverlapCopyCommunicationType>> sender) : m_sender(sender){}
GpuOwnerOverlapCopy(std::shared_ptr<GPUSender<field_type, OwnerOverlapCopyCommunicationType>> sender) : m_sender(sender){}
void copyOwnerToAll(const X& source, X& dest) const {
m_sender->copyOwnerToAll(source, dest);
@ -409,5 +409,5 @@ public:
private:
std::shared_ptr<GPUSender<field_type, OwnerOverlapCopyCommunicationType>> m_sender;
};
} // namespace Opm::cuistl
} // namespace Opm::gpuistl
#endif

View File

@ -25,26 +25,26 @@
#include <dune/istl/bvector.hh>
#include <fmt/core.h>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/CuSeqILU0.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_constants.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_wrapper.hpp>
#include <opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp>
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
#include <opm/simulators/linalg/gpuistl/GpuSeqILU0.hpp>
#include <opm/simulators/linalg/gpuistl/detail/cusparse_constants.hpp>
#include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/detail/cusparse_wrapper.hpp>
#include <opm/simulators/linalg/gpuistl/detail/fix_zero_diagonal.hpp>
#include <opm/simulators/linalg/gpuistl/detail/safe_conversion.hpp>
#include <opm/simulators/linalg/matrixblock.hh>
// This file is based on the guide at https://docs.nvidia.com/cuda/cusparse/index.html#csrilu02_solve ,
// it highly recommended to read that before proceeding.
namespace Opm::cuistl
namespace Opm::gpuistl
{
template <class M, class X, class Y, int l>
CuSeqILU0<M, X, Y, l>::CuSeqILU0(const M& A, field_type w)
GpuSeqILU0<M, X, Y, l>::GpuSeqILU0(const M& A, field_type w)
: m_underlyingMatrix(A)
, m_w(w)
, m_LU(CuSparseMatrix<field_type>::fromMatrix(detail::makeMatrixWithNonzeroDiagonal(A)))
, m_LU(GpuSparseMatrix<field_type>::fromMatrix(detail::makeMatrixWithNonzeroDiagonal(A)))
, m_temporaryStorage(m_LU.N() * m_LU.blockSize())
, m_descriptionL(detail::createLowerDiagonalDescription())
, m_descriptionU(detail::createUpperDiagonalDescription())
@ -70,13 +70,13 @@ CuSeqILU0<M, X, Y, l>::CuSeqILU0(const M& A, field_type w)
template <class M, class X, class Y, int l>
void
CuSeqILU0<M, X, Y, l>::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b)
GpuSeqILU0<M, X, Y, l>::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b)
{
}
template <class M, class X, class Y, int l>
void
CuSeqILU0<M, X, Y, l>::apply(X& v, const Y& d)
GpuSeqILU0<M, X, Y, l>::apply(X& v, const Y& d)
{
// We need to pass the solve routine a scalar to multiply.
@ -133,20 +133,20 @@ CuSeqILU0<M, X, Y, l>::apply(X& v, const Y& d)
template <class M, class X, class Y, int l>
void
CuSeqILU0<M, X, Y, l>::post([[maybe_unused]] X& x)
GpuSeqILU0<M, X, Y, l>::post([[maybe_unused]] X& x)
{
}
template <class M, class X, class Y, int l>
Dune::SolverCategory::Category
CuSeqILU0<M, X, Y, l>::category() const
GpuSeqILU0<M, X, Y, l>::category() const
{
return Dune::SolverCategory::sequential;
}
template <class M, class X, class Y, int l>
void
CuSeqILU0<M, X, Y, l>::update()
GpuSeqILU0<M, X, Y, l>::update()
{
m_LU.updateNonzeroValues(detail::makeMatrixWithNonzeroDiagonal(m_underlyingMatrix));
createILU();
@ -154,7 +154,7 @@ CuSeqILU0<M, X, Y, l>::update()
template <class M, class X, class Y, int l>
void
CuSeqILU0<M, X, Y, l>::analyzeMatrix()
GpuSeqILU0<M, X, Y, l>::analyzeMatrix()
{
if (!m_buffer) {
@ -226,7 +226,7 @@ CuSeqILU0<M, X, Y, l>::analyzeMatrix()
template <class M, class X, class Y, int l>
size_t
CuSeqILU0<M, X, Y, l>::findBufferSize()
GpuSeqILU0<M, X, Y, l>::findBufferSize()
{
// We have three calls that need buffers:
// 1) LU decomposition
@ -290,7 +290,7 @@ CuSeqILU0<M, X, Y, l>::findBufferSize()
template <class M, class X, class Y, int l>
void
CuSeqILU0<M, X, Y, l>::createILU()
GpuSeqILU0<M, X, Y, l>::createILU()
{
OPM_ERROR_IF(!m_buffer, "Buffer not initialized. Call findBufferSize() then initialize with the appropiate size.");
OPM_ERROR_IF(!m_analysisDone, "Analyzis of matrix not done. Call analyzeMatrix() first.");
@ -328,35 +328,35 @@ CuSeqILU0<M, X, Y, l>::createILU()
template <class M, class X, class Y, int l>
void
CuSeqILU0<M, X, Y, l>::updateILUConfiguration()
GpuSeqILU0<M, X, Y, l>::updateILUConfiguration()
{
auto bufferSize = findBufferSize();
if (!m_buffer || m_buffer->dim() < bufferSize) {
m_buffer.reset(new CuVector<field_type>((bufferSize + sizeof(field_type) - 1) / sizeof(field_type)));
m_buffer.reset(new GpuVector<field_type>((bufferSize + sizeof(field_type) - 1) / sizeof(field_type)));
}
analyzeMatrix();
createILU();
}
} // namespace Opm::cuistl
#define INSTANTIATE_CUSEQILU0_DUNE(realtype, blockdim) \
template class ::Opm::cuistl::CuSeqILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
::Opm::cuistl::CuVector<realtype>, \
::Opm::cuistl::CuVector<realtype>>; \
template class ::Opm::cuistl::CuSeqILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
::Opm::cuistl::CuVector<realtype>, \
::Opm::cuistl::CuVector<realtype>>
} // namespace Opm::gpuistl
#define INSTANTIATE_GPUSEQILU0_DUNE(realtype, blockdim) \
template class ::Opm::gpuistl::GpuSeqILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::GpuVector<realtype>, \
::Opm::gpuistl::GpuVector<realtype>>; \
template class ::Opm::gpuistl::GpuSeqILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::GpuVector<realtype>, \
::Opm::gpuistl::GpuVector<realtype>>
INSTANTIATE_CUSEQILU0_DUNE(double, 1);
INSTANTIATE_CUSEQILU0_DUNE(double, 2);
INSTANTIATE_CUSEQILU0_DUNE(double, 3);
INSTANTIATE_CUSEQILU0_DUNE(double, 4);
INSTANTIATE_CUSEQILU0_DUNE(double, 5);
INSTANTIATE_CUSEQILU0_DUNE(double, 6);
INSTANTIATE_GPUSEQILU0_DUNE(double, 1);
INSTANTIATE_GPUSEQILU0_DUNE(double, 2);
INSTANTIATE_GPUSEQILU0_DUNE(double, 3);
INSTANTIATE_GPUSEQILU0_DUNE(double, 4);
INSTANTIATE_GPUSEQILU0_DUNE(double, 5);
INSTANTIATE_GPUSEQILU0_DUNE(double, 6);
INSTANTIATE_CUSEQILU0_DUNE(float, 1);
INSTANTIATE_CUSEQILU0_DUNE(float, 2);
INSTANTIATE_CUSEQILU0_DUNE(float, 3);
INSTANTIATE_CUSEQILU0_DUNE(float, 4);
INSTANTIATE_CUSEQILU0_DUNE(float, 5);
INSTANTIATE_CUSEQILU0_DUNE(float, 6);
INSTANTIATE_GPUSEQILU0_DUNE(float, 1);
INSTANTIATE_GPUSEQILU0_DUNE(float, 2);
INSTANTIATE_GPUSEQILU0_DUNE(float, 3);
INSTANTIATE_GPUSEQILU0_DUNE(float, 4);
INSTANTIATE_GPUSEQILU0_DUNE(float, 5);
INSTANTIATE_GPUSEQILU0_DUNE(float, 6);

View File

@ -16,19 +16,19 @@
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_CUSEQILU0_HPP
#define OPM_CUSEQILU0_HPP
#ifndef OPM_GPUSEQILU0_HPP
#define OPM_GPUSEQILU0_HPP
#include <dune/istl/preconditioner.hh>
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp>
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/gpuistl/detail/CuMatrixDescription.hpp>
#include <opm/simulators/linalg/gpuistl/detail/CuSparseHandle.hpp>
#include <opm/simulators/linalg/gpuistl/detail/CuSparseResource.hpp>
namespace Opm::cuistl
namespace Opm::gpuistl
{
//! \brief Sequential ILU0 preconditioner on the GPU through the CuSparse library.
//!
@ -43,10 +43,10 @@ namespace Opm::cuistl
//! \tparam l Ignored. Just there to have the same number of template arguments
//! as other preconditioners.
//!
//! \note We assume X and Y are both CuVector<real_type>, but we leave them as template
//! \note We assume X and Y are both GpuVector<real_type>, but we leave them as template
//! arguments in case of future additions.
template <class M, class X, class Y, int l = 1>
class CuSeqILU0 : public Dune::PreconditionerWithUpdate<X, Y>
class GpuSeqILU0 : public Dune::PreconditionerWithUpdate<X, Y>
{
public:
//! \brief The matrix type the preconditioner is for.
@ -64,7 +64,7 @@ public:
//! \param A The matrix to operate on.
//! \param w The relaxation factor.
//!
CuSeqILU0(const M& A, field_type w);
GpuSeqILU0(const M& A, field_type w);
//! \brief Prepare the preconditioner.
//! \note Does nothing at the time being.
@ -110,18 +110,18 @@ private:
//! This is the storage for the LU composition.
//! Initially this will have the values of A, but will be
//! modified in the constructor to be the proper LU decomposition.
CuSparseMatrix<field_type> m_LU;
GpuSparseMatrix<field_type> m_LU;
CuVector<field_type> m_temporaryStorage;
GpuVector<field_type> m_temporaryStorage;
detail::CuSparseMatrixDescriptionPtr m_descriptionL;
detail::CuSparseMatrixDescriptionPtr m_descriptionU;
detail::GpuSparseMatrixDescriptionPtr m_descriptionL;
detail::GpuSparseMatrixDescriptionPtr m_descriptionU;
detail::CuSparseResource<bsrsv2Info_t> m_infoL;
detail::CuSparseResource<bsrsv2Info_t> m_infoU;
detail::CuSparseResource<bsrilu02Info_t> m_infoM;
std::unique_ptr<CuVector<field_type>> m_buffer;
std::unique_ptr<GpuVector<field_type>> m_buffer;
detail::CuSparseHandle& m_cuSparseHandle;
bool m_analysisDone = false;
@ -133,6 +133,6 @@ private:
void updateILUConfiguration();
};
} // end namespace Opm::cuistl
} // end namespace Opm::gpuistl
#endif

View File

@ -22,14 +22,14 @@
#include <dune/istl/bcrsmatrix.hh>
#include <dune/istl/bvector.hh>
#include <fmt/core.h>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_constants.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_wrapper.hpp>
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/gpuistl/detail/cusparse_constants.hpp>
#include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/detail/cusparse_wrapper.hpp>
#include <opm/simulators/linalg/matrixblock.hh>
#include <type_traits>
namespace Opm::cuistl
namespace Opm::gpuistl
{
namespace
@ -61,7 +61,7 @@ namespace
template <class T>
CuSparseMatrix<T>::CuSparseMatrix(const T* nonZeroElements,
GpuSparseMatrix<T>::GpuSparseMatrix(const T* nonZeroElements,
const int* rowIndices,
const int* columnIndices,
size_t numberOfNonzeroBlocks,
@ -82,15 +82,15 @@ CuSparseMatrix<T>::CuSparseMatrix(const T* nonZeroElements,
}
template <class T>
CuSparseMatrix<T>::~CuSparseMatrix()
GpuSparseMatrix<T>::~GpuSparseMatrix()
{
// empty
}
template <typename T>
template <typename MatrixType>
CuSparseMatrix<T>
CuSparseMatrix<T>::fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
GpuSparseMatrix<T>
GpuSparseMatrix<T>::fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
{
// TODO: Do we need this intermediate storage? Or this shuffling of data?
std::vector<int> columnIndices;
@ -129,18 +129,18 @@ CuSparseMatrix<T>::fromMatrix(const MatrixType& matrix, bool copyNonZeroElements
// Sanity check
// h_rows and h_cols could be changed to 'unsigned int', but cusparse expects 'int'
OPM_ERROR_IF(rowIndices[matrix.N()] != detail::to_int(matrix.nonzeroes()),
"Error size of rows do not sum to number of nonzeroes in CuSparseMatrix.");
OPM_ERROR_IF(rowIndices.size() != numberOfRows + 1, "Row indices do not match for CuSparseMatrix.");
OPM_ERROR_IF(columnIndices.size() != numberOfNonzeroBlocks, "Column indices do not match for CuSparseMatrix.");
"Error size of rows do not sum to number of nonzeroes in GpuSparseMatrix.");
OPM_ERROR_IF(rowIndices.size() != numberOfRows + 1, "Row indices do not match for GpuSparseMatrix.");
OPM_ERROR_IF(columnIndices.size() != numberOfNonzeroBlocks, "Column indices do not match for GpuSparseMatrix.");
if (copyNonZeroElementsDirectly) {
const T* nonZeroElements = nonZeroElementsTmp;
return CuSparseMatrix<T>(
return GpuSparseMatrix<T>(
nonZeroElements, rowIndices.data(), columnIndices.data(), numberOfNonzeroBlocks, blockSize, numberOfRows);
} else {
auto nonZeroElementData = extractNonzeroValues<T>(matrix);
return CuSparseMatrix<T>(nonZeroElementData.data(),
return GpuSparseMatrix<T>(nonZeroElementData.data(),
rowIndices.data(),
columnIndices.data(),
numberOfNonzeroBlocks,
@ -152,7 +152,7 @@ CuSparseMatrix<T>::fromMatrix(const MatrixType& matrix, bool copyNonZeroElements
template <class T>
template <class MatrixType>
void
CuSparseMatrix<T>::updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
GpuSparseMatrix<T>::updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
{
OPM_ERROR_IF(nonzeroes() != matrix.nonzeroes(), "Matrix does not have the same number of non-zero elements.");
OPM_ERROR_IF(matrix[0][0].N() != blockSize(), "Matrix does not have the same blocksize.");
@ -170,42 +170,42 @@ CuSparseMatrix<T>::updateNonzeroValues(const MatrixType& matrix, bool copyNonZer
template <typename T>
void
CuSparseMatrix<T>::setUpperTriangular()
GpuSparseMatrix<T>::setUpperTriangular()
{
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatFillMode(m_matrixDescription->get(), CUSPARSE_FILL_MODE_UPPER));
}
template <typename T>
void
CuSparseMatrix<T>::setLowerTriangular()
GpuSparseMatrix<T>::setLowerTriangular()
{
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatFillMode(m_matrixDescription->get(), CUSPARSE_FILL_MODE_LOWER));
}
template <typename T>
void
CuSparseMatrix<T>::setUnitDiagonal()
GpuSparseMatrix<T>::setUnitDiagonal()
{
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(m_matrixDescription->get(), CUSPARSE_DIAG_TYPE_UNIT));
}
template <typename T>
void
CuSparseMatrix<T>::setNonUnitDiagonal()
GpuSparseMatrix<T>::setNonUnitDiagonal()
{
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(m_matrixDescription->get(), CUSPARSE_DIAG_TYPE_NON_UNIT));
}
template <typename T>
void
CuSparseMatrix<T>::mv(const CuVector<T>& x, CuVector<T>& y) const
GpuSparseMatrix<T>::mv(const GpuVector<T>& x, GpuVector<T>& y) const
{
assertSameSize(x);
assertSameSize(y);
if (blockSize() < 2u) {
OPM_THROW(
std::invalid_argument,
"CuSparseMatrix<T>::usmv and CuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
"GpuSparseMatrix<T>::usmv and GpuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
}
const auto nonzeroValues = getNonZeroValues().data();
@ -232,14 +232,14 @@ CuSparseMatrix<T>::mv(const CuVector<T>& x, CuVector<T>& y) const
template <typename T>
void
CuSparseMatrix<T>::umv(const CuVector<T>& x, CuVector<T>& y) const
GpuSparseMatrix<T>::umv(const GpuVector<T>& x, GpuVector<T>& y) const
{
assertSameSize(x);
assertSameSize(y);
if (blockSize() < 2u) {
OPM_THROW(
std::invalid_argument,
"CuSparseMatrix<T>::usmv and CuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
"GpuSparseMatrix<T>::usmv and GpuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
}
const auto nonzeroValues = getNonZeroValues().data();
@ -267,14 +267,14 @@ CuSparseMatrix<T>::umv(const CuVector<T>& x, CuVector<T>& y) const
template <typename T>
void
CuSparseMatrix<T>::usmv(T alpha, const CuVector<T>& x, CuVector<T>& y) const
GpuSparseMatrix<T>::usmv(T alpha, const GpuVector<T>& x, GpuVector<T>& y) const
{
assertSameSize(x);
assertSameSize(y);
if (blockSize() < 2) {
OPM_THROW(
std::invalid_argument,
"CuSparseMatrix<T>::usmv and CuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
"GpuSparseMatrix<T>::usmv and GpuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
}
const auto numberOfRows = N();
const auto numberOfNonzeroBlocks = nonzeroes();
@ -304,7 +304,7 @@ CuSparseMatrix<T>::usmv(T alpha, const CuVector<T>& x, CuVector<T>& y) const
template <class T>
template <class VectorType>
void
CuSparseMatrix<T>::assertSameSize(const VectorType& x) const
GpuSparseMatrix<T>::assertSameSize(const VectorType& x) const
{
if (x.dim() != blockSize() * N()) {
OPM_THROW(std::invalid_argument,
@ -317,17 +317,17 @@ CuSparseMatrix<T>::assertSameSize(const VectorType& x) const
#define INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(realtype, blockdim) \
template CuSparseMatrix<realtype> CuSparseMatrix<realtype>::fromMatrix( \
template GpuSparseMatrix<realtype> GpuSparseMatrix<realtype>::fromMatrix( \
const Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>&, bool); \
template CuSparseMatrix<realtype> CuSparseMatrix<realtype>::fromMatrix( \
template GpuSparseMatrix<realtype> GpuSparseMatrix<realtype>::fromMatrix( \
const Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>&, bool); \
template void CuSparseMatrix<realtype>::updateNonzeroValues( \
template void GpuSparseMatrix<realtype>::updateNonzeroValues( \
const Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>&, bool); \
template void CuSparseMatrix<realtype>::updateNonzeroValues( \
template void GpuSparseMatrix<realtype>::updateNonzeroValues( \
const Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>&, bool)
template class CuSparseMatrix<float>;
template class CuSparseMatrix<double>;
template class GpuSparseMatrix<float>;
template class GpuSparseMatrix<double>;
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 1);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 2);
@ -343,4 +343,4 @@ INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 4);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 5);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 6);
} // namespace Opm::cuistl
} // namespace Opm::gpuistl

View File

@ -16,23 +16,23 @@
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_CUSPARSEMATRIX_HPP
#define OPM_CUSPARSEMATRIX_HPP
#ifndef OPM_GPUSPARSEMATRIX_HPP
#define OPM_GPUSPARSEMATRIX_HPP
#include <cusparse.h>
#include <iostream>
#include <memory>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/gpuistl/detail/CuMatrixDescription.hpp>
#include <opm/simulators/linalg/gpuistl/detail/CuSparseHandle.hpp>
#include <opm/simulators/linalg/gpuistl/detail/safe_conversion.hpp>
#include <vector>
namespace Opm::cuistl
namespace Opm::gpuistl
{
/**
* @brief The CuSparseMatrix class simple wrapper class for a CuSparse matrix.
* @brief The GpuSparseMatrix class simple wrapper class for a CuSparse matrix.
*
* @note we currently only support simple raw primitives for T (double and float). Block size is handled through the
* block size parameter
@ -44,7 +44,7 @@ namespace Opm::cuistl
* @note We only support Block Compressed Sparse Row Format (BSR) for now.
*/
template <typename T>
class CuSparseMatrix
class GpuSparseMatrix
{
public:
//! Create the sparse matrix specified by the raw data.
@ -60,7 +60,7 @@ public:
//!
//! \note We assume numberOfNonzeroBlocks, blockSize and numberOfRows all are representable as int due to
//! restrictions in the current version of cusparse. This might change in future versions.
CuSparseMatrix(const T* nonZeroElements,
GpuSparseMatrix(const T* nonZeroElements,
const int* rowIndices,
const int* columnIndices,
size_t numberOfNonzeroBlocks,
@ -70,14 +70,14 @@ public:
/**
* We don't want to be able to copy this for now (too much hassle in copying the cusparse resources)
*/
CuSparseMatrix(const CuSparseMatrix&) = delete;
GpuSparseMatrix(const GpuSparseMatrix&) = delete;
/**
* We don't want to be able to copy this for now (too much hassle in copying the cusparse resources)
*/
CuSparseMatrix& operator=(const CuSparseMatrix&) = delete;
GpuSparseMatrix& operator=(const GpuSparseMatrix&) = delete;
virtual ~CuSparseMatrix();
virtual ~GpuSparseMatrix();
/**
* @brief fromMatrix creates a new matrix with the same block size and values as the given matrix
@ -89,7 +89,7 @@ public:
* @tparam MatrixType is assumed to be a Dune::BCRSMatrix compatible matrix.
*/
template <class MatrixType>
static CuSparseMatrix<T> fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);
static GpuSparseMatrix<T> fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);
/**
* @brief setUpperTriangular sets the CuSparse flag that this is an upper diagonal (with unit diagonal) matrix.
@ -144,7 +144,7 @@ public:
*
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
*/
CuVector<T>& getNonZeroValues()
GpuVector<T>& getNonZeroValues()
{
return m_nonZeroElements;
}
@ -154,7 +154,7 @@ public:
*
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
*/
const CuVector<T>& getNonZeroValues() const
const GpuVector<T>& getNonZeroValues() const
{
return m_nonZeroElements;
}
@ -164,7 +164,7 @@ public:
*
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
*/
CuVector<int>& getRowIndices()
GpuVector<int>& getRowIndices()
{
return m_rowIndices;
}
@ -174,7 +174,7 @@ public:
*
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
*/
const CuVector<int>& getRowIndices() const
const GpuVector<int>& getRowIndices() const
{
return m_rowIndices;
}
@ -184,7 +184,7 @@ public:
*
* @return Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
*/
CuVector<int>& getColumnIndices()
GpuVector<int>& getColumnIndices()
{
return m_columnIndices;
}
@ -194,7 +194,7 @@ public:
*
* @return Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
*/
const CuVector<int>& getColumnIndices() const
const GpuVector<int>& getColumnIndices() const
{
return m_columnIndices;
}
@ -233,7 +233,7 @@ public:
*
* This description is needed for most calls to the CuSparse library
*/
detail::CuSparseMatrixDescription& getDescription()
detail::GpuSparseMatrixDescription& getDescription()
{
return *m_matrixDescription;
}
@ -245,7 +245,7 @@ public:
*
* @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1.
*/
virtual void mv(const CuVector<T>& x, CuVector<T>& y) const;
virtual void mv(const GpuVector<T>& x, GpuVector<T>& y) const;
/**
* @brief umv computes y=Ax+y
@ -254,7 +254,7 @@ public:
*
* @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1.
*/
virtual void umv(const CuVector<T>& x, CuVector<T>& y) const;
virtual void umv(const GpuVector<T>& x, GpuVector<T>& y) const;
/**
@ -264,7 +264,7 @@ public:
*
* @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1.
*/
virtual void usmv(T alpha, const CuVector<T>& x, CuVector<T>& y) const;
virtual void usmv(T alpha, const GpuVector<T>& x, GpuVector<T>& y) const;
/**
* @brief updateNonzeroValues updates the non-zero values by using the non-zero values of the supplied matrix
@ -280,9 +280,9 @@ public:
void updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);
private:
CuVector<T> m_nonZeroElements;
CuVector<int> m_columnIndices;
CuVector<int> m_rowIndices;
GpuVector<T> m_nonZeroElements;
GpuVector<int> m_columnIndices;
GpuVector<int> m_rowIndices;
// Notice that we store these three as int to make sure we are cusparse compatible.
//
@ -292,11 +292,11 @@ private:
const int m_numberOfRows;
const int m_blockSize;
detail::CuSparseMatrixDescriptionPtr m_matrixDescription;
detail::GpuSparseMatrixDescriptionPtr m_matrixDescription;
detail::CuSparseHandle& m_cusparseHandle;
template <class VectorType>
void assertSameSize(const VectorType& vector) const;
};
} // namespace Opm::cuistl
} // namespace Opm::gpuistl
#endif

View File

@ -20,41 +20,41 @@
#include <cuda.h>
#include <cuda_runtime.h>
#include <fmt/core.h>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/cublas_wrapper.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/detail/cublas_wrapper.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/detail/vector_operations.hpp>
namespace Opm::cuistl
namespace Opm::gpuistl
{
template <class T>
CuVector<T>::CuVector(const std::vector<T>& data)
: CuVector(data.data(), detail::to_int(data.size()))
GpuVector<T>::GpuVector(const std::vector<T>& data)
: GpuVector(data.data(), detail::to_int(data.size()))
{
}
template <class T>
CuVector<T>::CuVector(const size_t numberOfElements)
GpuVector<T>::GpuVector(const size_t numberOfElements)
: m_numberOfElements(detail::to_int(numberOfElements))
, m_cuBlasHandle(detail::CuBlasHandle::getInstance())
{
OPM_CUDA_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * detail::to_size_t(m_numberOfElements)));
OPM_GPU_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * detail::to_size_t(m_numberOfElements)));
}
template <class T>
CuVector<T>::CuVector(const T* dataOnHost, const size_t numberOfElements)
: CuVector(numberOfElements)
GpuVector<T>::GpuVector(const T* dataOnHost, const size_t numberOfElements)
: GpuVector(numberOfElements)
{
OPM_CUDA_SAFE_CALL(cudaMemcpy(
OPM_GPU_SAFE_CALL(cudaMemcpy(
m_dataOnDevice, dataOnHost, detail::to_size_t(m_numberOfElements) * sizeof(T), cudaMemcpyHostToDevice));
}
template <class T>
CuVector<T>&
CuVector<T>::operator=(T scalar)
GpuVector<T>&
GpuVector<T>::operator=(T scalar)
{
assertHasElements();
detail::setVectorValue(data(), detail::to_size_t(m_numberOfElements), scalar);
@ -62,13 +62,13 @@ CuVector<T>::operator=(T scalar)
}
template <class T>
CuVector<T>&
CuVector<T>::operator=(const CuVector<T>& other)
GpuVector<T>&
GpuVector<T>::operator=(const GpuVector<T>& other)
{
assertHasElements();
assertSameSize(other);
OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
OPM_GPU_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
other.m_dataOnDevice,
detail::to_size_t(m_numberOfElements) * sizeof(T),
cudaMemcpyDeviceToDevice));
@ -76,33 +76,33 @@ CuVector<T>::operator=(const CuVector<T>& other)
}
template <class T>
CuVector<T>::CuVector(const CuVector<T>& other)
: CuVector(other.m_numberOfElements)
GpuVector<T>::GpuVector(const GpuVector<T>& other)
: GpuVector(other.m_numberOfElements)
{
assertHasElements();
assertSameSize(other);
OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
OPM_GPU_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
other.m_dataOnDevice,
detail::to_size_t(m_numberOfElements) * sizeof(T),
cudaMemcpyDeviceToDevice));
}
template <class T>
CuVector<T>::~CuVector()
GpuVector<T>::~GpuVector()
{
OPM_CUDA_WARN_IF_ERROR(cudaFree(m_dataOnDevice));
OPM_GPU_WARN_IF_ERROR(cudaFree(m_dataOnDevice));
}
template <typename T>
const T*
CuVector<T>::data() const
GpuVector<T>::data() const
{
return m_dataOnDevice;
}
template <typename T>
typename CuVector<T>::size_type
CuVector<T>::dim() const
typename GpuVector<T>::size_type
GpuVector<T>::dim() const
{
// Note that there is no way for m_numberOfElements to be non-positive,
// but for sanity we still use the safe conversion function here.
@ -114,7 +114,7 @@ CuVector<T>::dim() const
template <typename T>
std::vector<T>
CuVector<T>::asStdVector() const
GpuVector<T>::asStdVector() const
{
std::vector<T> temporary(detail::to_size_t(m_numberOfElements));
copyToHost(temporary);
@ -123,21 +123,21 @@ CuVector<T>::asStdVector() const
template <typename T>
void
CuVector<T>::setZeroAtIndexSet(const CuVector<int>& indexSet)
GpuVector<T>::setZeroAtIndexSet(const GpuVector<int>& indexSet)
{
detail::setZeroAtIndexSet(m_dataOnDevice, indexSet.dim(), indexSet.data());
}
template <typename T>
void
CuVector<T>::assertSameSize(const CuVector<T>& x) const
GpuVector<T>::assertSameSize(const GpuVector<T>& x) const
{
assertSameSize(x.m_numberOfElements);
}
template <typename T>
void
CuVector<T>::assertSameSize(int size) const
GpuVector<T>::assertSameSize(int size) const
{
if (size != m_numberOfElements) {
OPM_THROW(std::invalid_argument,
@ -147,7 +147,7 @@ CuVector<T>::assertSameSize(int size) const
template <typename T>
void
CuVector<T>::assertHasElements() const
GpuVector<T>::assertHasElements() const
{
if (m_numberOfElements <= 0) {
OPM_THROW(std::invalid_argument, "We have 0 elements");
@ -156,14 +156,14 @@ CuVector<T>::assertHasElements() const
template <typename T>
T*
CuVector<T>::data()
GpuVector<T>::data()
{
return m_dataOnDevice;
}
template <class T>
CuVector<T>&
CuVector<T>::operator*=(const T& scalar)
GpuVector<T>&
GpuVector<T>::operator*=(const T& scalar)
{
assertHasElements();
OPM_CUBLAS_SAFE_CALL(detail::cublasScal(m_cuBlasHandle.get(), m_numberOfElements, &scalar, data(), 1));
@ -171,8 +171,8 @@ CuVector<T>::operator*=(const T& scalar)
}
template <class T>
CuVector<T>&
CuVector<T>::axpy(T alpha, const CuVector<T>& y)
GpuVector<T>&
GpuVector<T>::axpy(T alpha, const GpuVector<T>& y)
{
assertHasElements();
assertSameSize(y);
@ -182,7 +182,7 @@ CuVector<T>::axpy(T alpha, const CuVector<T>& y)
template <class T>
T
CuVector<T>::dot(const CuVector<T>& other) const
GpuVector<T>::dot(const GpuVector<T>& other) const
{
assertHasElements();
assertSameSize(other);
@ -193,7 +193,7 @@ CuVector<T>::dot(const CuVector<T>& other) const
}
template <class T>
T
CuVector<T>::two_norm() const
GpuVector<T>::two_norm() const
{
assertHasElements();
T result = T(0);
@ -203,14 +203,14 @@ CuVector<T>::two_norm() const
template <typename T>
T
CuVector<T>::dot(const CuVector<T>& other, const CuVector<int>& indexSet, CuVector<T>& buffer) const
GpuVector<T>::dot(const GpuVector<T>& other, const GpuVector<int>& indexSet, GpuVector<T>& buffer) const
{
return detail::innerProductAtIndices(m_cuBlasHandle.get(), m_dataOnDevice, other.data(), buffer.data(), indexSet.dim(), indexSet.data());
}
template <typename T>
T
CuVector<T>::two_norm(const CuVector<int>& indexSet, CuVector<T>& buffer) const
GpuVector<T>::two_norm(const GpuVector<int>& indexSet, GpuVector<T>& buffer) const
{
// TODO: [perf] Optimize this to a single call
return std::sqrt(this->dot(*this, indexSet, buffer));
@ -218,23 +218,23 @@ CuVector<T>::two_norm(const CuVector<int>& indexSet, CuVector<T>& buffer) const
template <typename T>
T
CuVector<T>::dot(const CuVector<T>& other, const CuVector<int>& indexSet) const
GpuVector<T>::dot(const GpuVector<T>& other, const GpuVector<int>& indexSet) const
{
CuVector<T> buffer(indexSet.dim());
GpuVector<T> buffer(indexSet.dim());
return detail::innerProductAtIndices(m_cuBlasHandle.get(), m_dataOnDevice, other.data(), buffer.data(), indexSet.dim(), indexSet.data());
}
template <typename T>
T
CuVector<T>::two_norm(const CuVector<int>& indexSet) const
GpuVector<T>::two_norm(const GpuVector<int>& indexSet) const
{
CuVector<T> buffer(indexSet.dim());
GpuVector<T> buffer(indexSet.dim());
// TODO: [perf] Optimize this to a single call
return std::sqrt(this->dot(*this, indexSet, buffer));
}
template <class T>
CuVector<T>&
CuVector<T>::operator+=(const CuVector<T>& other)
GpuVector<T>&
GpuVector<T>::operator+=(const GpuVector<T>& other)
{
assertHasElements();
assertSameSize(other);
@ -243,8 +243,8 @@ CuVector<T>::operator+=(const CuVector<T>& other)
}
template <class T>
CuVector<T>&
CuVector<T>::operator-=(const CuVector<T>& other)
GpuVector<T>&
GpuVector<T>::operator-=(const GpuVector<T>& other)
{
assertHasElements();
assertSameSize(other);
@ -255,7 +255,7 @@ CuVector<T>::operator-=(const CuVector<T>& other)
template <class T>
void
CuVector<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
GpuVector<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
{
if (numberOfElements > dim()) {
OPM_THROW(std::runtime_error,
@ -263,45 +263,45 @@ CuVector<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
dim(),
numberOfElements));
}
OPM_CUDA_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
OPM_GPU_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
}
template <class T>
void
CuVector<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
GpuVector<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
{
assertSameSize(detail::to_int(numberOfElements));
OPM_CUDA_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
OPM_GPU_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
}
template <class T>
void
CuVector<T>::copyFromHost(const std::vector<T>& data)
GpuVector<T>::copyFromHost(const std::vector<T>& data)
{
copyFromHost(data.data(), data.size());
}
template <class T>
void
CuVector<T>::copyToHost(std::vector<T>& data) const
GpuVector<T>::copyToHost(std::vector<T>& data) const
{
copyToHost(data.data(), data.size());
}
template <typename T>
void
CuVector<T>::prepareSendBuf(CuVector<T>& buffer, const CuVector<int>& indexSet) const
GpuVector<T>::prepareSendBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const
{
return detail::prepareSendBuf(m_dataOnDevice, buffer.data(), indexSet.dim(), indexSet.data());
}
template <typename T>
void
CuVector<T>::syncFromRecvBuf(CuVector<T>& buffer, const CuVector<int>& indexSet) const
GpuVector<T>::syncFromRecvBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const
{
return detail::syncFromRecvBuf(m_dataOnDevice, buffer.data(), indexSet.dim(), indexSet.data());
}
template class CuVector<double>;
template class CuVector<float>;
template class CuVector<int>;
template class GpuVector<double>;
template class GpuVector<float>;
template class GpuVector<int>;
} // namespace Opm::cuistl
} // namespace Opm::gpuistl

View File

@ -16,24 +16,24 @@
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_CUVECTOR_HEADER_HPP
#define OPM_CUVECTOR_HEADER_HPP
#ifndef OPM_GPUVECTOR_HEADER_HPP
#define OPM_GPUVECTOR_HEADER_HPP
#include <dune/common/fvector.hh>
#include <dune/istl/bvector.hh>
#include <exception>
#include <fmt/core.h>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp>
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
#include <opm/simulators/linalg/gpuistl/detail/CuBlasHandle.hpp>
#include <opm/simulators/linalg/gpuistl/detail/safe_conversion.hpp>
#include <vector>
#include <string>
namespace Opm::cuistl
namespace Opm::gpuistl
{
/**
* @brief The CuVector class is a simple (arithmetic) vector class for the GPU.
* @brief The GpuVector class is a simple (arithmetic) vector class for the GPU.
*
* @note we currently only support simple raw primitives for T (double, float and int)
*
@ -45,12 +45,12 @@ namespace Opm::cuistl
* Example usage:
*
* @code{.cpp}
* #include <opm/simulators/linalg/cuistl/CuVector.hpp>
* #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
*
* void someFunction() {
* auto someDataOnCPU = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
*
* auto dataOnGPU = CuVector<double>(someDataOnCPU);
* auto dataOnGPU = GpuVector<double>(someDataOnCPU);
*
* // Multiply by 4.0:
* dataOnGPU *= 4.0;
@ -62,7 +62,7 @@ namespace Opm::cuistl
* @tparam T the type to store. Can be either float, double or int.
*/
template <typename T>
class CuVector
class GpuVector
{
public:
using field_type = T;
@ -70,17 +70,17 @@ public:
/**
* @brief CuVector allocates new GPU memory of the same size as other and copies the content of the other vector to
* @brief GpuVector allocates new GPU memory of the same size as other and copies the content of the other vector to
* this newly allocated memory.
*
* @note This does synchronous transfer.
*
* @param other the vector to copy from
*/
CuVector(const CuVector<T>& other);
GpuVector(const GpuVector<T>& other);
/**
* @brief CuVector allocates new GPU memory of the same size as data and copies the content of the data vector to
* @brief GpuVector allocates new GPU memory of the same size as data and copies the content of the data vector to
* this newly allocated memory.
*
* @note This does CPU to GPU transfer.
@ -90,7 +90,7 @@ public:
*
* @param data the vector to copy from
*/
explicit CuVector(const std::vector<T>& data);
explicit GpuVector(const std::vector<T>& data);
/**
* @brief operator= copies the content of the data vector to the memory of this vector.
@ -100,7 +100,7 @@ public:
*
* @param other the vector to copy from
*/
CuVector& operator=(const CuVector<T>& other);
GpuVector& operator=(const GpuVector<T>& other);
/**
* @brief operator= sets the whole vector equal to the scalar value.
@ -109,20 +109,20 @@ public:
*
* @param scalar the value all elements will be set to.
*/
CuVector& operator=(T scalar);
GpuVector& operator=(T scalar);
/**
* @brief CuVector allocates new GPU memory of size numberOfElements * sizeof(T)
* @brief GpuVector allocates new GPU memory of size numberOfElements * sizeof(T)
*
* @note For now numberOfElements needs to be within the limits of int due to restrictions in cublas
*
* @param numberOfElements number of T elements to allocate
*/
explicit CuVector(const size_t numberOfElements);
explicit GpuVector(const size_t numberOfElements);
/**
* @brief CuVector allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from
* @brief GpuVector allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from
* data
*
* @note This assumes the data is on the CPU.
@ -132,12 +132,12 @@ public:
*
* @note For now numberOfElements needs to be within the limits of int due to restrictions in cublas
*/
CuVector(const T* dataOnHost, const size_t numberOfElements);
GpuVector(const T* dataOnHost, const size_t numberOfElements);
/**
* @brief ~CuVector calls cudaFree
* @brief ~GpuVector calls cudaFree
*/
virtual ~CuVector();
virtual ~GpuVector();
/**
* @return the raw pointer to the GPU data
@ -162,7 +162,7 @@ public:
// TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension
if (detail::to_size_t(m_numberOfElements) != bvector.dim()) {
OPM_THROW(std::runtime_error,
fmt::format("Given incompatible vector size. CuVector has size {}, \n"
fmt::format("Given incompatible vector size. GpuVector has size {}, \n"
"however, BlockVector has N() = {}, and dim = {}.",
m_numberOfElements,
bvector.N(),
@ -185,7 +185,7 @@ public:
// TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension
if (detail::to_size_t(m_numberOfElements) != bvector.dim()) {
OPM_THROW(std::runtime_error,
fmt::format("Given incompatible vector size. CuVector has size {},\n however, the BlockVector "
fmt::format("Given incompatible vector size. GpuVector has size {},\n however, the BlockVector "
"has has N() = {}, and dim() = {}.",
m_numberOfElements,
bvector.N(),
@ -231,8 +231,8 @@ public:
*/
void copyToHost(std::vector<T>& data) const;
void prepareSendBuf(CuVector<T>& buffer, const CuVector<int>& indexSet) const;
void syncFromRecvBuf(CuVector<T>& buffer, const CuVector<int>& indexSet) const;
void prepareSendBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const;
void syncFromRecvBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const;
/**
* @brief operator *= multiplies every element by scalar
@ -242,7 +242,7 @@ public:
*
* @note int is not supported
*/
CuVector<T>& operator*=(const T& scalar);
GpuVector<T>& operator*=(const T& scalar);
/**
* @brief axpy sets this vector equal to this + alha * y
@ -252,7 +252,7 @@ public:
* @note this will call CuBlas in the background
* @note int is not supported
*/
CuVector<T>& axpy(T alpha, const CuVector<T>& y);
GpuVector<T>& axpy(T alpha, const GpuVector<T>& y);
/**
* @brief operator+= adds the other vector to this vector
@ -260,7 +260,7 @@ public:
* @note this will call CuBlas in the background
* @note int is not supported
*/
CuVector<T>& operator+=(const CuVector<T>& other);
GpuVector<T>& operator+=(const GpuVector<T>& other);
/**
* @brief operator-= subtracts the other vector from this vector
@ -268,7 +268,7 @@ public:
* @note this will call CuBlas in the background
* @note int is not supported
*/
CuVector<T>& operator-=(const CuVector<T>& other);
GpuVector<T>& operator-=(const GpuVector<T>& other);
/**
* @brief dot computes the dot product (standard inner product) against the other vector
@ -278,7 +278,7 @@ public:
*
* @return the result on the inner product
*/
T dot(const CuVector<T>& other) const;
T dot(const GpuVector<T>& other) const;
/**
* @brief returns the l2 norm of the vector
@ -294,14 +294,14 @@ public:
*
* @note int is not supported
*/
T dot(const CuVector<T>& other, const CuVector<int>& indexSet, CuVector<T>& buffer) const;
T dot(const GpuVector<T>& other, const GpuVector<int>& indexSet, GpuVector<T>& buffer) const;
/**
* Computes the norm sqrt(sum_i this[indexSet[i]] * this[indexSet[i]])
*
* @note int is not supported
*/
T two_norm(const CuVector<int>& indexSet, CuVector<T>& buffer) const;
T two_norm(const GpuVector<int>& indexSet, GpuVector<T>& buffer) const;
/**
@ -309,14 +309,14 @@ public:
*
* @note int is not supported
*/
T dot(const CuVector<T>& other, const CuVector<int>& indexSet) const;
T dot(const GpuVector<T>& other, const GpuVector<int>& indexSet) const;
/**
* Computes the norm sqrt(sum_i this[indexSet[i]] * this[indexSet[i]])
*
* @note int is not supported
*/
T two_norm(const CuVector<int>& indexSet) const;
T two_norm(const GpuVector<int>& indexSet) const;
/**
@ -363,9 +363,9 @@ public:
* }
* @endcode
*/
void setZeroAtIndexSet(const CuVector<int>& indexSet);
void setZeroAtIndexSet(const GpuVector<int>& indexSet);
// Slow method that creates a string representation of a CuVector for debug purposes
// Slow method that creates a string representation of a GpuVector for debug purposes
std::string toDebugString()
{
std::vector<T> v = asStdVector();
@ -385,11 +385,11 @@ private:
const int m_numberOfElements;
detail::CuBlasHandle& m_cuBlasHandle;
void assertSameSize(const CuVector<T>& other) const;
void assertSameSize(const GpuVector<T>& other) const;
void assertSameSize(int size) const;
void assertHasElements() const;
};
} // namespace Opm::cuistl
} // namespace Opm::gpuistl
#endif

View File

@ -20,21 +20,21 @@
#include <cuda_runtime.h>
#include <algorithm>
#include <fmt/core.h>
#include <opm/simulators/linalg/cuistl/CuView.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/GpuView.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
namespace Opm::cuistl
namespace Opm::gpuistl
{
template <class T>
CuView<T>::CuView(std::vector<T>& data)
: CuView(data.data(), data.size())
GpuView<T>::GpuView(std::vector<T>& data)
: GpuView(data.data(), data.size())
{
}
template <typename T>
std::vector<T>
CuView<T>::asStdVector() const
GpuView<T>::asStdVector() const
{
std::vector<T> temporary(m_numberOfElements);
copyToHost(temporary);
@ -43,7 +43,7 @@ CuView<T>::asStdVector() const
template <class T>
void
CuView<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
GpuView<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
{
if (numberOfElements > size()) {
OPM_THROW(std::runtime_error,
@ -51,32 +51,32 @@ CuView<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
size(),
numberOfElements));
}
OPM_CUDA_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
OPM_GPU_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
}
template <class T>
void
CuView<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
GpuView<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
{
assertSameSize(numberOfElements);
OPM_CUDA_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
OPM_GPU_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
}
template <class T>
void
CuView<T>::copyFromHost(const std::vector<T>& data)
GpuView<T>::copyFromHost(const std::vector<T>& data)
{
copyFromHost(data.data(), data.size());
}
template <class T>
void
CuView<T>::copyToHost(std::vector<T>& data) const
GpuView<T>::copyToHost(std::vector<T>& data) const
{
copyToHost(data.data(), data.size());
}
template class CuView<double>;
template class CuView<float>;
template class CuView<int>;
template class GpuView<double>;
template class GpuView<float>;
template class GpuView<int>;
} // namespace Opm::cuistl
} // namespace Opm::gpuistl

View File

@ -16,14 +16,14 @@
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_CUVIEW_HEADER_HPP
#define OPM_CUVIEW_HEADER_HPP
#ifndef OPM_GPUVIEW_HEADER_HPP
#define OPM_GPUVIEW_HEADER_HPP
#include <dune/common/fvector.hh>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
#include <opm/simulators/linalg/gpuistl/detail/safe_conversion.hpp>
#include <stdexcept>
#include <vector>
@ -37,37 +37,37 @@
#define OPM_IS_INSIDE_DEVICE_FUNCTION_TEMPORARY 0
#endif
namespace Opm::cuistl
namespace Opm::gpuistl
{
/**
* @brief The CuView class is provides a view of some data allocated on the GPU
* @brief The GpuView class is provides a view of some data allocated on the GPU
* Essenstially is only stores a pointer and a size.
*
* This class supports being used from inside a CUDA/HIP Kernel.
* Implementations are placed in this headerfile for functions that may be called
* inside a kernel to avoid expensive RDC (relocatable device code)
*
* The view will typically provide a view into a CuBuffer and be able to
* The view will typically provide a view into a GpuBuffer and be able to
* manipulate the data within it
*
* @param T Type of the data we store, typically int/float/double w/o const specifier
*
**/
template <typename T>
class CuView
class GpuView
{
public:
using value_type = T;
/**
* @brief Default constructor that will initialize cublas and allocate 0 bytes of memory
*/
explicit CuView() = default;
explicit GpuView() = default;
//TODO: we probably dont need anything like this or is it useful to have views also be able to handle things on CPU?
/// @brief constructor based on std::vectors, this will make a view on the CPU
/// @param data std vector to pr
CuView(std::vector<T>& data);
GpuView(std::vector<T>& data);
/**
* @brief operator[] to retrieve a reference to an item in the buffer
@ -95,7 +95,7 @@ public:
/**
* @brief CuView allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from
* @brief GpuView allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from
* data
*
* @note This assumes the data is on the CPU.
@ -103,15 +103,15 @@ public:
* @param numberOfElements number of T elements to allocate
* @param dataOnHost data on host/CPU
*/
__host__ __device__ CuView(T* dataOnHost, size_t numberOfElements)
__host__ __device__ GpuView(T* dataOnHost, size_t numberOfElements)
: m_dataPtr(dataOnHost), m_numberOfElements(numberOfElements)
{
}
/**
* @brief ~CuView calls cudaFree
* @brief ~GpuView calls cudaFree
*/
~CuView() = default;
~GpuView() = default;
/**
* @return the raw pointer to the GPU data
@ -128,7 +128,7 @@ public:
}
/**
* @return fetch the first element in a CuView
* @return fetch the first element in a GpuView
*/
__host__ __device__ T& front()
{
@ -139,7 +139,7 @@ public:
}
/**
* @return fetch the last element in a CuView
* @return fetch the last element in a GpuView
*/
__host__ __device__ T& back()
{
@ -150,7 +150,7 @@ public:
}
/**
* @return fetch the first element in a CuView
* @return fetch the first element in a GpuView
*/
__host__ __device__ T front() const
{
@ -161,7 +161,7 @@ public:
}
/**
* @return fetch the last element in a CuView
* @return fetch the last element in a GpuView
*/
__host__ __device__ T back() const
{
@ -220,7 +220,7 @@ public:
* @return an std::vector containing the elements copied from the GPU.
*/
std::vector<T> asStdVector() const;
/// @brief Iterator class to make CuViews more similar to std containers
/// @brief Iterator class to make GpuViews more similar to std containers
class iterator {
public:
// Iterator typedefs
@ -363,7 +363,7 @@ private:
/// @brief Helper function to assert if another view has the same size
/// @param other view
__host__ __device__ void assertSameSize(const CuView<T>& other) const
__host__ __device__ void assertSameSize(const GpuView<T>& other) const
{
assertSameSize(other.m_numberOfElements);
}
@ -410,6 +410,6 @@ private:
}
};
} // namespace Opm::cuistl
} // namespace Opm::gpuistl
#endif

View File

@ -26,18 +26,18 @@
#include <opm/common/ErrorMacros.hpp>
#include <opm/common/TimingMacros.hpp>
#include <opm/simulators/linalg/GraphColoring.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/OpmCuILU0.hpp>
#include <opm/simulators/linalg/cuistl/detail/autotuner.hpp>
#include <opm/simulators/linalg/cuistl/detail/coloringAndReorderingUtils.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/ILU0Kernels.hpp>
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/gpuistl/OpmCuILU0.hpp>
#include <opm/simulators/linalg/gpuistl/detail/autotuner.hpp>
#include <opm/simulators/linalg/gpuistl/detail/coloringAndReorderingUtils.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/gpuistl/detail/preconditionerKernels/ILU0Kernels.hpp>
#include <opm/simulators/linalg/matrixblock.hh>
#include <string>
#include <tuple>
#include <utility>
namespace Opm::cuistl
namespace Opm::gpuistl
{
template <class M, class X, class Y, int l>
@ -46,7 +46,7 @@ OpmCuILU0<M, X, Y, l>::OpmCuILU0(const M& A, bool splitMatrix, bool tuneKernels)
, m_levelSets(Opm::getMatrixRowColoring(m_cpuMatrix, Opm::ColoringType::LOWER))
, m_reorderedToNatural(detail::createReorderedToNatural(m_levelSets))
, m_naturalToReordered(detail::createNaturalToReordered(m_levelSets))
, m_gpuMatrix(CuSparseMatrix<field_type>::fromMatrix(m_cpuMatrix, true))
, m_gpuMatrix(GpuSparseMatrix<field_type>::fromMatrix(m_cpuMatrix, true))
, m_gpuMatrixReorderedLower(nullptr)
, m_gpuMatrixReorderedUpper(nullptr)
, m_gpuNaturalToReorder(m_naturalToReordered)
@ -72,12 +72,12 @@ OpmCuILU0<M, X, Y, l>::OpmCuILU0(const M& A, bool splitMatrix, bool tuneKernels)
m_gpuMatrix.nonzeroes(),
A.nonzeroes()));
if (m_splitMatrix) {
m_gpuMatrixReorderedDiag.emplace(CuVector<field_type>(blocksize_ * blocksize_ * m_cpuMatrix.N()));
m_gpuMatrixReorderedDiag.emplace(GpuVector<field_type>(blocksize_ * blocksize_ * m_cpuMatrix.N()));
std::tie(m_gpuMatrixReorderedLower, m_gpuMatrixReorderedUpper)
= detail::extractLowerAndUpperMatrices<M, field_type, CuSparseMatrix<field_type>>(m_cpuMatrix,
= detail::extractLowerAndUpperMatrices<M, field_type, GpuSparseMatrix<field_type>>(m_cpuMatrix,
m_reorderedToNatural);
} else {
m_gpuReorderedLU = detail::createReorderedMatrix<M, field_type, CuSparseMatrix<field_type>>(
m_gpuReorderedLU = detail::createReorderedMatrix<M, field_type, GpuSparseMatrix<field_type>>(
m_cpuMatrix, m_reorderedToNatural);
}
LUFactorizeAndMoveData(m_moveThreadBlockSize, m_ILU0FactorizationThreadBlockSize);
@ -272,8 +272,8 @@ OpmCuILU0<M, X, Y, l>::tuneThreadBlockSizes()
= detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing ILU0 factorization");
// tune the thread-block size of the apply
CuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
CuVector<field_type> tmpD(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
GpuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
GpuVector<field_type> tmpD(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
tmpD = 1;
auto tuneLowerSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int lowerSolveThreadBlockSize) {
@ -289,14 +289,14 @@ OpmCuILU0<M, X, Y, l>::tuneThreadBlockSizes()
tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set");
}
} // namespace Opm::cuistl
} // namespace Opm::gpuistl
#define INSTANTIATE_CUDILU_DUNE(realtype, blockdim) \
template class ::Opm::cuistl::OpmCuILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
::Opm::cuistl::CuVector<realtype>, \
::Opm::cuistl::CuVector<realtype>>; \
template class ::Opm::cuistl::OpmCuILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
::Opm::cuistl::CuVector<realtype>, \
::Opm::cuistl::CuVector<realtype>>
template class ::Opm::gpuistl::OpmCuILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::GpuVector<realtype>, \
::Opm::gpuistl::GpuVector<realtype>>; \
template class ::Opm::gpuistl::OpmCuILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::GpuVector<realtype>, \
::Opm::gpuistl::GpuVector<realtype>>
INSTANTIATE_CUDILU_DUNE(double, 1);
INSTANTIATE_CUDILU_DUNE(double, 2);

View File

@ -22,14 +22,14 @@
#include <memory>
#include <opm/grid/utility/SparseTable.hpp>
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <optional>
#include <type_traits>
#include <vector>
namespace Opm::cuistl
namespace Opm::gpuistl
{
//! \brief ILU0 preconditioner on the GPU.
//!
@ -39,7 +39,7 @@ namespace Opm::cuistl
//! \tparam l Ignored. Just there to have the same number of template arguments
//! as other preconditioners.
//!
//! \note We assume X and Y are both CuVector<real_type>, but we leave them as template
//! \note We assume X and Y are both GpuVector<real_type>, but we leave them as template
//! arguments in case of future additions.
template <class M, class X, class Y, int l = 1>
class OpmCuILU0 : public Dune::PreconditionerWithUpdate<X, Y>
@ -54,7 +54,7 @@ public:
//! \brief The field type of the preconditioner.
using field_type = typename X::field_type;
//! \brief The GPU matrix type
using CuMat = CuSparseMatrix<field_type>;
using CuMat = GpuSparseMatrix<field_type>;
//! \brief Constructor.
//!
@ -126,13 +126,13 @@ private:
std::unique_ptr<CuMat> m_gpuMatrixReorderedLower;
std::unique_ptr<CuMat> m_gpuMatrixReorderedUpper;
//! \brief If matrix splitting is enabled, we also store the diagonal separately
std::optional<CuVector<field_type>> m_gpuMatrixReorderedDiag;
std::optional<GpuVector<field_type>> m_gpuMatrixReorderedDiag;
//! row conversion from natural to reordered matrix indices stored on the GPU
CuVector<int> m_gpuNaturalToReorder;
GpuVector<int> m_gpuNaturalToReorder;
//! row conversion from reordered to natural matrix indices stored on the GPU
CuVector<int> m_gpuReorderToNatural;
GpuVector<int> m_gpuReorderToNatural;
//! \brief Stores the inverted diagonal that we use in ILU0
CuVector<field_type> m_gpuDInv;
GpuVector<field_type> m_gpuDInv;
//! \brief Bool storing whether or not we should store matrices in a split format
bool m_splitMatrix;
//! \brief Bool storing whether or not we will tune the threadblock sizes. Only used for AMD cards
@ -144,6 +144,6 @@ private:
int m_moveThreadBlockSize = -1;
int m_ILU0FactorizationThreadBlockSize = -1;
};
} // end namespace Opm::cuistl
} // end namespace Opm::gpuistl
#endif

View File

@ -21,12 +21,12 @@
#include <cusparse.h>
#include <dune/istl/preconditioner.hh>
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerHolder.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/gpuistl/PreconditionerHolder.hpp>
#include <opm/simulators/linalg/gpuistl/detail/preconditioner_should_call_post_pre.hpp>
namespace Opm::cuistl
namespace Opm::gpuistl
{
//!\brief Makes a CUDA preconditioner available to a CPU simulator.
//!
@ -35,11 +35,11 @@ namespace Opm::cuistl
//!
//! \tparam X the domain type (should be on the CPU). Typicall a Dune::BlockVector
//! \tparam Y the range type (should be on the CPU). Typicall a Dune::BlockVector
//! \tparam CudaPreconditionerType the preconditioner taking CuVector<real_type> as arguments to apply
//! \tparam CudaPreconditionerType the preconditioner taking GpuVector<real_type> as arguments to apply
template <class X, class Y, class CudaPreconditionerType>
class PreconditionerAdapter
: public Dune::PreconditionerWithUpdate<X, Y>,
public PreconditionerHolder<CuVector<typename X::field_type>, CuVector<typename Y::field_type>>
public PreconditionerHolder<GpuVector<typename X::field_type>, GpuVector<typename Y::field_type>>
{
public:
//! \brief The domain type of the preconditioner.
@ -77,8 +77,8 @@ public:
virtual void apply(X& v, const Y& d) override
{
if (!m_inputBuffer) {
m_inputBuffer.reset(new CuVector<field_type>(v.dim()));
m_outputBuffer.reset(new CuVector<field_type>(v.dim()));
m_inputBuffer.reset(new GpuVector<field_type>(v.dim()));
m_outputBuffer.reset(new GpuVector<field_type>(v.dim()));
}
m_inputBuffer->copyFromHost(d);
m_underlyingPreconditioner->apply(*m_outputBuffer, *m_inputBuffer);
@ -117,7 +117,7 @@ public:
return detail::shouldCallPreconditionerPre<CudaPreconditionerType>();
}
virtual std::shared_ptr<Dune::PreconditionerWithUpdate<CuVector<field_type>, CuVector<field_type>>>
virtual std::shared_ptr<Dune::PreconditionerWithUpdate<GpuVector<field_type>, GpuVector<field_type>>>
getUnderlyingPreconditioner() override
{
return m_underlyingPreconditioner;
@ -131,9 +131,9 @@ private:
//! \brief the underlying preconditioner to use
std::shared_ptr<CudaPreconditionerType> m_underlyingPreconditioner;
std::unique_ptr<CuVector<field_type>> m_inputBuffer;
std::unique_ptr<CuVector<field_type>> m_outputBuffer;
std::unique_ptr<GpuVector<field_type>> m_inputBuffer;
std::unique_ptr<GpuVector<field_type>> m_outputBuffer;
};
} // end namespace Opm::cuistl
} // end namespace Opm::gpuistl
#endif

View File

@ -22,16 +22,16 @@
#include <dune/istl/bcrsmatrix.hh>
#include <dune/istl/preconditioner.hh>
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_constants.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/gpuistl/detail/CuMatrixDescription.hpp>
#include <opm/simulators/linalg/gpuistl/detail/CuSparseHandle.hpp>
#include <opm/simulators/linalg/gpuistl/detail/CuSparseResource.hpp>
#include <opm/simulators/linalg/gpuistl/detail/cusparse_constants.hpp>
#include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/detail/preconditioner_should_call_post_pre.hpp>
namespace Opm::cuistl
namespace Opm::gpuistl
{
//! \brief Converts the field type (eg. double to float) to benchmark single precision preconditioners
//!
@ -50,7 +50,7 @@ namespace Opm::cuistl
//!
//! To use this, use something like the following code:
//! \code{.cpp}
//! #include <opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp>
//! #include <opm/simulators/linalg/gpuistl/PreconditionerConvertFieldTypeAdapter.hpp>
//! #include <opm/simulators/linalg/ParallelOverlappingILU0.hpp>
//!
//! using XDouble = Dune::BlockVector<Dune::FieldVector<double, 2>>;
@ -64,7 +64,7 @@ namespace Opm::cuistl
//! void applyILU0AsFloat(const MDouble& matrix, const XDouble& x, XDouble& y) {
//!
//! using FloatILU0 = typename Opm::ParallelOverlappingILU0<MFloat, XFloat, XFloat, ParallelInfo>;
//! using DoubleToFloatConverter = typename Opm::cuistl::PreconditionerConvertFieldTypeAdapter<FloatILU0, MDouble,
//! using DoubleToFloatConverter = typename Opm::gpuistl::PreconditionerConvertFieldTypeAdapter<FloatILU0, MDouble,
//! XDouble, XDouble>;
//!
//! // Note that we do not need to make a new instance for every invocation, this
@ -239,6 +239,6 @@ private:
//! \brief the underlying preconditioner to use
std::shared_ptr<CudaPreconditionerType> m_underlyingPreconditioner;
};
} // end namespace Opm::cuistl
} // end namespace Opm::gpuistl
#endif

View File

@ -21,7 +21,7 @@
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
namespace Opm::cuistl
namespace Opm::gpuistl
{
//! \brief Common interface for adapters that hold preconditioners.
//!
@ -38,6 +38,6 @@ public:
*/
virtual std::shared_ptr<Dune::PreconditionerWithUpdate<X, Y>> getUnderlyingPreconditioner() = 0;
};
} // end namespace Opm::cuistl
} // end namespace Opm::gpuistl
#endif

View File

@ -26,12 +26,12 @@
#include <dune/istl/schwarz.hh>
#include <dune/istl/solver.hh>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp>
#include <opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/has_function.hpp>
#include <opm/simulators/linalg/gpuistl/GpuBlockPreconditioner.hpp>
#include <opm/simulators/linalg/gpuistl/GpuOwnerOverlapCopy.hpp>
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/gpuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/gpuistl/detail/has_function.hpp>
#ifdef OPEN_MPI
#if OPEN_MPI
@ -39,7 +39,7 @@
#endif
#endif
namespace Opm::cuistl
namespace Opm::gpuistl
{
//! @brief Wraps a CUDA solver to work with CPU data.
//!
@ -56,7 +56,7 @@ public:
using typename Dune::IterativeSolver<X, X>::real_type;
using typename Dune::IterativeSolver<X, X>::scalar_real_type;
static constexpr auto block_size = domain_type::block_type::dimension;
using XGPU = Opm::cuistl::CuVector<real_type>;
using XGPU = Opm::gpuistl::GpuVector<real_type>;
// TODO: Use a std::forward
SolverAdapter(Operator& op,
@ -67,7 +67,7 @@ public:
int verbose)
: Dune::IterativeSolver<X, X>(op, sp, *prec, reduction, maxit, verbose)
, m_opOnCPUWithMatrix(op)
, m_matrix(CuSparseMatrix<real_type>::fromMatrix(op.getmat()))
, m_matrix(GpuSparseMatrix<real_type>::fromMatrix(op.getmat()))
, m_underlyingSolver(constructSolver(prec, reduction, maxit, verbose))
{
}
@ -116,7 +116,7 @@ public:
private:
Operator& m_opOnCPUWithMatrix;
CuSparseMatrix<real_type> m_matrix;
GpuSparseMatrix<real_type> m_matrix;
UnderlyingSolver<XGPU> m_underlyingSolver;
@ -148,8 +148,8 @@ private:
if (!precAsHolder) {
OPM_THROW(std::invalid_argument,
"The preconditioner needs to be a CUDA preconditioner (eg. CuILU0) wrapped in a "
"Opm::cuistl::PreconditionerAdapter wrapped in a "
"Opm::cuistl::CuBlockPreconditioner. If you are unsure what this means, set "
"Opm::gpuistl::PreconditionerAdapter wrapped in a "
"Opm::gpuistl::GpuBlockPreconditioner. If you are unsure what this means, set "
"preconditioner to 'CUILU0'"); // TODO: Suggest a better preconditioner
}
@ -159,8 +159,8 @@ private:
if (!preconditionerAdapterAsHolder) {
OPM_THROW(std::invalid_argument,
"The preconditioner needs to be a CUDA preconditioner (eg. CuILU0) wrapped in a "
"Opm::cuistl::PreconditionerAdapter wrapped in a "
"Opm::cuistl::CuBlockPreconditioner. If you are unsure what this means, set "
"Opm::gpuistl::PreconditionerAdapter wrapped in a "
"Opm::gpuistl::GpuBlockPreconditioner. If you are unsure what this means, set "
"preconditioner to 'CUILU0'"); // TODO: Suggest a better preconditioner
}
// We need to get the underlying preconditioner:
@ -183,20 +183,20 @@ private:
// TODO add typename Operator communication type as a named type with using
std::shared_ptr<Opm::cuistl::GPUSender<real_type, typename Operator::communication_type>> gpuComm;
std::shared_ptr<Opm::gpuistl::GPUSender<real_type, typename Operator::communication_type>> gpuComm;
if (mpiSupportsCudaAwareAtCompileTime && mpiSupportsCudaAwareAtRunTime){
gpuComm = std::make_shared<Opm::cuistl::GPUAwareMPISender<real_type, block_size, typename Operator::communication_type>>(communication);
gpuComm = std::make_shared<Opm::gpuistl::GPUAwareMPISender<real_type, block_size, typename Operator::communication_type>>(communication);
}
else{
gpuComm = std::make_shared<Opm::cuistl::GPUObliviousMPISender<real_type, block_size, typename Operator::communication_type>>(communication);
gpuComm = std::make_shared<Opm::gpuistl::GPUObliviousMPISender<real_type, block_size, typename Operator::communication_type>>(communication);
}
using CudaCommunication = CuOwnerOverlapCopy<real_type, block_size, typename Operator::communication_type>;
using CudaCommunication = GpuOwnerOverlapCopy<real_type, block_size, typename Operator::communication_type>;
using SchwarzOperator
= Dune::OverlappingSchwarzOperator<CuSparseMatrix<real_type>, XGPU, XGPU, CudaCommunication>;
= Dune::OverlappingSchwarzOperator<GpuSparseMatrix<real_type>, XGPU, XGPU, CudaCommunication>;
auto cudaCommunication = std::make_shared<CudaCommunication>(gpuComm);
auto mpiPreconditioner = std::make_shared<CuBlockPreconditioner<XGPU, XGPU, CudaCommunication>>(
auto mpiPreconditioner = std::make_shared<GpuBlockPreconditioner<XGPU, XGPU, CudaCommunication>>(
preconditionerReallyOnGPU, cudaCommunication);
auto scalarProduct = std::make_shared<Dune::ParallelScalarProduct<XGPU, CudaCommunication>>(
@ -206,8 +206,8 @@ private:
// NOTE: Ownsership of cudaCommunication is handled by mpiPreconditioner. However, just to make sure we
// remember
// this, we add this check. So remember that we hold one count in this scope, and one in the
// CuBlockPreconditioner instance. We accomedate for the fact that it could be passed around in
// CuBlockPreconditioner, hence we do not test for != 2
// GpuBlockPreconditioner instance. We accomedate for the fact that it could be passed around in
// GpuBlockPreconditioner, hence we do not test for != 2
OPM_ERROR_IF(cudaCommunication.use_count() < 2, "Internal error. Shared pointer not owned properly.");
auto overlappingCudaOperator = std::make_shared<SchwarzOperator>(m_matrix, *cudaCommunication);
@ -222,12 +222,12 @@ private:
if (!precAsHolder) {
OPM_THROW(std::invalid_argument,
"The preconditioner needs to be a CUDA preconditioner wrapped in a "
"Opm::cuistl::PreconditionerHolder (eg. CuILU0).");
"Opm::gpuistl::PreconditionerHolder (eg. CuILU0).");
}
auto preconditionerOnGPU = precAsHolder->getUnderlyingPreconditioner();
auto matrixOperator
= std::make_shared<Dune::MatrixAdapter<CuSparseMatrix<real_type>, XGPU, XGPU>>(m_matrix);
= std::make_shared<Dune::MatrixAdapter<GpuSparseMatrix<real_type>, XGPU, XGPU>>(m_matrix);
auto scalarProduct = std::make_shared<Dune::SeqScalarProduct<XGPU>>();
return UnderlyingSolver<XGPU>(
matrixOperator, scalarProduct, preconditionerOnGPU, reduction, maxit, verbose);
@ -237,6 +237,6 @@ private:
std::unique_ptr<XGPU> m_inputBuffer;
std::unique_ptr<XGPU> m_outputBuffer;
};
} // namespace Opm::cuistl
} // namespace Opm::gpuistl
#endif

View File

@ -17,9 +17,9 @@
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#include <cublas_v2.h>
#include <opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp>
#include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
namespace Opm::cuistl::detail
#include <opm/simulators/linalg/gpuistl/detail/CuBlasHandle.hpp>
#include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
namespace Opm::gpuistl::detail
{
@ -46,4 +46,4 @@ CuBlasHandle::getInstance()
return instance;
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail

View File

@ -21,7 +21,7 @@
#include <cublas_v2.h>
#include <memory>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
/**
@ -29,9 +29,9 @@ namespace Opm::cuistl::detail
*
* Example use:
* @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp>
* #include <opm/simulators/linalg/gpuistl/detail/CuBlasHandle.hpp>
* void someFunction() {
* auto& cublasHandle = ::Opm::cuistl::detail::CuBlasHandle::getInstance();
* auto& cublasHandle = ::Opm::gpuistl::detail::CuBlasHandle::getInstance();
* int cuBlasVersion = -1;
* OPM_CUBLAS_SAFE_CALL(cublasGetVersion(cublasHandle.get(), &cuBlasVersion));
* }
@ -64,5 +64,5 @@ private:
CuBlasHandle();
cublasHandle_t m_handle;
};
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif // OPM_CUBLASHANDLE_HPP

View File

@ -18,30 +18,30 @@
*/
#ifndef CU_MATRIX_DESCRIPTION_HPP
#define CU_MATRIX_DESCRIPTION_HPP
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/detail/CuSparseResource.hpp>
#include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
/**
* CuSparseMatrixDescription holder. This is internal information needed for most calls to the CuSparse API.
* GpuSparseMatrixDescription holder. This is internal information needed for most calls to the CuSparse API.
*/
using CuSparseMatrixDescription = CuSparseResource<cusparseMatDescr_t>;
using GpuSparseMatrixDescription = CuSparseResource<cusparseMatDescr_t>;
/**
* Pointer to CuSparseMatrixDescription holder. This is internal information needed for most calls to the CuSparse API.
* Pointer to GpuSparseMatrixDescription holder. This is internal information needed for most calls to the CuSparse API.
*/
using CuSparseMatrixDescriptionPtr = std::shared_ptr<CuSparseResource<cusparseMatDescr_t>>;
using GpuSparseMatrixDescriptionPtr = std::shared_ptr<CuSparseResource<cusparseMatDescr_t>>;
/**
* @brief createMatrixDescription creates a default matrix description
* @return a matrix description to a general sparse matrix with zero based indexing.
*/
inline CuSparseMatrixDescriptionPtr
inline GpuSparseMatrixDescriptionPtr
createMatrixDescription()
{
auto description = std::make_shared<CuSparseMatrixDescription>();
auto description = std::make_shared<GpuSparseMatrixDescription>();
// Note: We always want to use zero base indexing.
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatType(description->get(), CUSPARSE_MATRIX_TYPE_GENERAL));
@ -52,11 +52,11 @@ createMatrixDescription()
/**
* @brief createLowerDiagonalDescription creates a lower diagonal matrix description
* @return a lower diagonal matrix description overlapped with options from ::Opm::cuistl::detail::createMatrixDescription()
* @return a lower diagonal matrix description overlapped with options from ::Opm::gpuistl::detail::createMatrixDescription()
*
* @note This will assume it has a unit diagonal
*/
inline CuSparseMatrixDescriptionPtr
inline GpuSparseMatrixDescriptionPtr
createLowerDiagonalDescription()
{
auto description = createMatrixDescription();
@ -67,11 +67,11 @@ createLowerDiagonalDescription()
/**
* @brief createUpperDiagonalDescription creates an upper diagonal matrix description
* @return an upper diagonal matrix description overlapped with options from ::Opm::cuistl::detail::createMatrixDescription()
* @return an upper diagonal matrix description overlapped with options from ::Opm::gpuistl::detail::createMatrixDescription()
*
* @note This will assume it has a non-unit diagonal.
*/
inline CuSparseMatrixDescriptionPtr
inline GpuSparseMatrixDescriptionPtr
createUpperDiagonalDescription()
{
auto description = createMatrixDescription();
@ -81,6 +81,6 @@ createUpperDiagonalDescription()
return description;
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif // CU_MATRIX_DESCRIPTION_HPP

View File

@ -16,9 +16,9 @@
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
namespace Opm::cuistl::detail
#include <opm/simulators/linalg/gpuistl/detail/CuSparseHandle.hpp>
#include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
namespace Opm::gpuistl::detail
{
@ -46,4 +46,4 @@ CuSparseHandle::getInstance()
return instance;
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail

View File

@ -21,7 +21,7 @@
#include <cusparse.h>
#include <memory>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
/**
@ -29,9 +29,9 @@ namespace Opm::cuistl::detail
*
* Example use:
* @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
* #include <opm/simulators/linalg/gpuistl/detail/CuSparseHandle.hpp>
* void someFunction() {
* auto& cuSparseHandle = ::Opm::cuistl::detail::CuSparseHandle::getInstance();
* auto& cuSparseHandle = ::Opm::gpuistl::detail::CuSparseHandle::getInstance();
* int cuSparseVersion = -1;
* OPM_CUSPARSE_SAFE_CALL(cusparseGetVersion(cuSparseHandle.get(), &cuSparseVersion));
* }
@ -63,5 +63,5 @@ private:
CuSparseHandle();
cusparseHandle_t m_handle;
};
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif // OPM_CUSPARSEHANDLE_HPP

View File

@ -23,7 +23,7 @@
#include <memory>
#include <type_traits>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
/**
@ -43,7 +43,7 @@ namespace Opm::cuistl::detail
*
* Example usage:
* @code{.cpp}
* #include <opm/simulator/linalg/cuistl/detail/CuSparseResource.hpp>
* #include <opm/simulator/linalg/gpuistl/detail/CuSparseResource.hpp>
*
* void someFunction() {
* auto resource = CuSparseResource<cuSparseMatDescr_t>();
@ -94,6 +94,6 @@ private:
DeleterType m_deleter;
};
} // namespace Opm::cuistl::impl
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource_impl.hpp>
} // namespace Opm::gpuistl::impl
#include <opm/simulators/linalg/gpuistl/detail/CuSparseResource_impl.hpp>
#endif // CUSPARSERESOURCE_HPP

View File

@ -18,9 +18,9 @@
*/
#include <exception>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
namespace
@ -100,4 +100,4 @@ CuSparseResource<T>::~CuSparseResource()
// proper name of the function being called.
OPM_CUSPARSE_WARN_IF_ERROR(m_deleter(m_resource));
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail

View File

@ -21,11 +21,11 @@
#include <limits>
#include <opm/common/ErrorMacros.hpp>
#include <opm/common/OpmLog/OpmLog.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
#include <string>
#include <utility>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
/// @brief Function that tests the best thread block size, assumes the provided function depends on threadblock-size
@ -47,7 +47,7 @@ tuneThreadBlockSize(func& f, std::string descriptionOfFunction)
// create the events
for (int i = 0; i < runs + 1; ++i) {
OPM_CUDA_SAFE_CALL(cudaEventCreate(&events[i]));
OPM_GPU_SAFE_CALL(cudaEventCreate(&events[i]));
}
// Initialize helper variables
@ -59,21 +59,21 @@ tuneThreadBlockSize(func& f, std::string descriptionOfFunction)
for (int thrBlockSize = interval; thrBlockSize <= 1024; thrBlockSize += interval) {
// record a first event, and then an event after each kernel
OPM_CUDA_SAFE_CALL(cudaEventRecord(events[0]));
OPM_GPU_SAFE_CALL(cudaEventRecord(events[0]));
for (int i = 0; i < runs; ++i) {
f(thrBlockSize); // runs an arbitrary function with the provided arguments
OPM_CUDA_SAFE_CALL(cudaEventRecord(events[i + 1]));
OPM_GPU_SAFE_CALL(cudaEventRecord(events[i + 1]));
}
// make suret he runs are over
OPM_CUDA_SAFE_CALL(cudaEventSynchronize(events[runs]));
OPM_GPU_SAFE_CALL(cudaEventSynchronize(events[runs]));
// kernel launch was valid
if (cudaSuccess == cudaGetLastError()) {
// check if we beat the record for the fastest kernel
for (int i = 0; i < runs; ++i) {
float candidateBlockSizeTime;
OPM_CUDA_SAFE_CALL(cudaEventElapsedTime(&candidateBlockSizeTime, events[i], events[i + 1]));
OPM_GPU_SAFE_CALL(cudaEventElapsedTime(&candidateBlockSizeTime, events[i], events[i + 1]));
if (candidateBlockSizeTime < bestTime) { // checks if this configuration beat the current best
bestTime = candidateBlockSizeTime;
bestBlockSize = thrBlockSize;
@ -88,6 +88,6 @@ tuneThreadBlockSize(func& f, std::string descriptionOfFunction)
return bestBlockSize;
}
} // end namespace Opm::cuistl::detail
} // end namespace Opm::gpuistl::detail
#endif

View File

@ -23,24 +23,24 @@
#include <memory>
#include <opm/common/ErrorMacros.hpp>
#include <opm/grid/utility/SparseTable.hpp>
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
#include <opm/simulators/linalg/gpuistl/detail/safe_conversion.hpp>
#include <tuple>
#include <vector>
/*
This file contains a collection of utility functions used in the GPU implementation of ILU and DILU
The functions deal with creating the mappings between reordered and natural indices, as well as
extracting sparsity structures from dune matrices and creating cusparsematrix indices
extracting sparsity structures from dune matrices and creating gpusparsematrix indices
*/
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
inline std::vector<int>
createReorderedToNatural(const Opm::SparseTable<size_t>& levelSets)
{
auto res = std::vector<int>(Opm::cuistl::detail::to_size_t(levelSets.dataSize()));
auto res = std::vector<int>(Opm::gpuistl::detail::to_size_t(levelSets.dataSize()));
int globCnt = 0;
for (auto row : levelSets) {
for (auto col : row) {
OPM_ERROR_IF(Opm::cuistl::detail::to_size_t(globCnt) >= res.size(),
OPM_ERROR_IF(Opm::gpuistl::detail::to_size_t(globCnt) >= res.size(),
fmt::format("Internal error. globCnt = {}, res.size() = {}", globCnt, res.size()));
res[globCnt++] = static_cast<int>(col);
}
@ -51,11 +51,11 @@ createReorderedToNatural(const Opm::SparseTable<size_t>& levelSets)
inline std::vector<int>
createNaturalToReordered(const Opm::SparseTable<size_t>& levelSets)
{
auto res = std::vector<int>(Opm::cuistl::detail::to_size_t(levelSets.dataSize()));
auto res = std::vector<int>(Opm::gpuistl::detail::to_size_t(levelSets.dataSize()));
int globCnt = 0;
for (auto row : levelSets) {
for (auto col : row) {
OPM_ERROR_IF(Opm::cuistl::detail::to_size_t(globCnt) >= res.size(),
OPM_ERROR_IF(Opm::gpuistl::detail::to_size_t(globCnt) >= res.size(),
fmt::format("Internal error. globCnt = {}, res.size() = {}", globCnt, res.size()));
res[col] = globCnt++;
}
@ -105,6 +105,6 @@ extractLowerAndUpperMatrices(const M& naturalMatrix, const std::vector<int>& reo
return {std::unique_ptr<GPUM>(new auto(GPUM::fromMatrix(reorderedLower, true))),
std::unique_ptr<GPUM>(new auto(GPUM::fromMatrix(reorderedUpper, true)))};
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif

View File

@ -28,7 +28,7 @@
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
#define CHECK_CUBLAS_ERROR_TYPE(code, x) \
@ -108,7 +108,7 @@ getCublasErrorMessage(cublasStatus_t error,
*
* Example usage:
* @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
* #include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
* #include <cublas_v2.h>
*
* void some_function() {
@ -147,7 +147,7 @@ cublasSafeCall(cublasStatus_t error,
*
* Example usage:
* @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
* #include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
* #include <cublas_v2.h>
*
* void some_function() {
@ -174,7 +174,7 @@ cublasWarnIfError(cublasStatus_t error,
return error;
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
/**
* @brief OPM_CUBLAS_SAFE_CALL checks the return type of the cublas expression (function call) and throws an exception
@ -183,7 +183,7 @@ cublasWarnIfError(cublasStatus_t error,
* Example usage:
* @code{.cpp}
* #include <cublas_v2.h>
* #include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
* #include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
*
* void some_function() {
* cublasHandle_t cublasHandle;
@ -194,7 +194,7 @@ cublasWarnIfError(cublasStatus_t error,
* @note This should be used for any call to cuBlas unless you have a good reason not to.
*/
#define OPM_CUBLAS_SAFE_CALL(expression) \
::Opm::cuistl::detail::cublasSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
::Opm::gpuistl::detail::cublasSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
/**
* @brief OPM_CUBLAS_WARN_IF_ERROR checks the return type of the cublas expression (function call) and issues a warning
@ -203,7 +203,7 @@ cublasWarnIfError(cublasStatus_t error,
* Example usage:
* @code{.cpp}
* #include <cublas_v2.h>
* #include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
* #include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
*
* void some_function() {
* cublasHandle_t cublasHandle;
@ -214,6 +214,6 @@ cublasWarnIfError(cublasStatus_t error,
* @note Prefer the cublasSafeCall/OPM_CUBLAS_SAFE_CALL counterpart unless you really don't want to throw an exception.
*/
#define OPM_CUBLAS_WARN_IF_ERROR(expression) \
::Opm::cuistl::detail::cublasWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
::Opm::gpuistl::detail::cublasWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
#endif // OPM_CUBLAS_SAFE_CALL_HPP

View File

@ -29,7 +29,7 @@
#include <cublas_v2.h>
#include <opm/common/ErrorMacros.hpp>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
inline cublasStatus_t
@ -164,5 +164,5 @@ cublasNrm2([[maybe_unused]] cublasHandle_t handle,
OPM_THROW(std::runtime_error, "norm2 for integer vectors is not implemented yet.");
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif

View File

@ -20,7 +20,7 @@
#define OPM_CUDA_CHECK_LAST_ERROR_HPP
#include <cuda_runtime.h>
#include <fmt/core.h>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
/**
* @brief OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE checks the return type of cudaDeviceSynchronize(),
@ -28,7 +28,7 @@
*
* Example usage:
* @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cuda_check_last_error.hpp>
* #include <opm/simulators/linalg/gpuistl/detail/cuda_check_last_error.hpp>
*
* void some_function() {
* OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE;
@ -38,7 +38,7 @@
* @note This can be used to debug the code, or simply make sure that no error has occured.
* @note This is a rather heavy operation, so prefer to use only in Debug mode (see OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE_IF_DEBUG)
*/
#define OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE OPM_CUDA_SAFE_CALL(cudaDeviceSynchronize())
#define OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE OPM_GPU_SAFE_CALL(cudaDeviceSynchronize())
#ifdef NDEBUG
#define OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE_IF_DEBUG
@ -50,7 +50,7 @@
*
* Example usage:
* @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
* #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
*
* void some_function() {
* OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE_IF_DEBUG;
@ -69,7 +69,7 @@
*
* Example usage:
* @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cuda_check_last_error.hpp>
* #include <opm/simulators/linalg/gpuistl/detail/cuda_check_last_error.hpp>
*
* void some_function() {
* OPM_CUDA_CHECK_LAST_ERROR;
@ -78,7 +78,7 @@
*
* @note This can be used to debug the code, or simply make sure that no error has occured.
*/
#define OPM_CUDA_CHECK_LAST_ERROR OPM_CUDA_SAFE_CALL(cudaGetLastError())
#define OPM_CUDA_CHECK_LAST_ERROR OPM_GPU_SAFE_CALL(cudaGetLastError())
#ifdef NDEBUG
#define OPM_CUDA_CHECK_LAST_ERROR_IF_DEBUG
@ -90,7 +90,7 @@
*
* Example usage:
* @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cuda_check_last_error.hpp>
* #include <opm/simulators/linalg/gpuistl/detail/cuda_check_last_error.hpp>
*
* void some_function() {
* OPM_CUDA_CHECK_LAST_ERROR_IF_DEBUG;

View File

@ -19,7 +19,7 @@
#ifndef CUSPARSE_CONSTANTS_HPP
#define CUSPARSE_CONSTANTS_HPP
#include <cusparse.h>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
const constexpr auto CUSPARSE_MATRIX_ORDER = CUSPARSE_DIRECTION_ROW;
}

View File

@ -24,7 +24,7 @@
#include <opm/common/ErrorMacros.hpp>
#include <opm/common/OpmLog/OpmLog.hpp>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
#define CHECK_CUSPARSE_ERROR_TYPE(code, x) \
@ -93,7 +93,7 @@ getCusparseErrorMessage(cusparseStatus_t error,
*
* Example usage:
* @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
* #include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
* #include <cublas_v2.h>
*
* void some_function() {
@ -133,7 +133,7 @@ cusparseSafeCall(cusparseStatus_t error,
*
* Example usage:
* @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
* #include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
* #include <cublas_v2.h>
*
* void some_function() {
@ -161,7 +161,7 @@ cusparseWarnIfError(cusparseStatus_t error,
return error;
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
@ -171,7 +171,7 @@ cusparseWarnIfError(cusparseStatus_t error,
*
* Example usage:
* @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
* #include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
* #include <cusparse.h>
*
* void some_function() {
@ -183,7 +183,7 @@ cusparseWarnIfError(cusparseStatus_t error,
* @note This should be used for any call to cuSparse unless you have a good reason not to.
*/
#define OPM_CUSPARSE_SAFE_CALL(expression) \
::Opm::cuistl::detail::cusparseSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
::Opm::gpuistl::detail::cusparseSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
/**
* @brief OPM_CUSPARSE_WARN_IF_ERROR checks the return type of the cusparse expression (function call) and issues a
@ -191,7 +191,7 @@ cusparseWarnIfError(cusparseStatus_t error,
*
* Example usage:
* @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
* #include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
* #include <cusparse.h>
*
* void some_function() {
@ -204,5 +204,5 @@ cusparseWarnIfError(cusparseStatus_t error,
* exception.
*/
#define OPM_CUSPARSE_WARN_IF_ERROR(expression) \
::Opm::cuistl::detail::cusparseWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
::Opm::gpuistl::detail::cusparseWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
#endif // OPM_CUSPARSE_SAFE_CALL_HPP

View File

@ -27,7 +27,7 @@
#include <type_traits>
#ifndef OPM_CUSPARSE_WRAPPER_HPP
#define OPM_CUSPARSE_WRAPPER_HPP
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
inline cusparseStatus_t
@ -450,5 +450,5 @@ cusparseBsrmv(cusparseHandle_t handle,
beta,
y);
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif

View File

@ -22,7 +22,7 @@
#include <limits>
#include <vector>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
/**
@ -53,6 +53,6 @@ makeMatrixWithNonzeroDiagonal(const Matrix& matrix,
return newMatrix;
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif

View File

@ -21,12 +21,12 @@
#include <cstddef>
#include <cuda.h>
#include <cuda_runtime.h>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
/*
This file provides some logic for handling how to choose the correct thread-block size
*/
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
constexpr inline size_t
getThreads([[maybe_unused]] size_t numberOfRows)
@ -51,7 +51,7 @@ getCudaRecomendedThreadBlockSize(Kernel k, int suggestedThrBlockSize = -1)
}
int blockSize;
int tmpGridSize;
OPM_CUDA_SAFE_CALL(cudaOccupancyMaxPotentialBlockSize(&tmpGridSize, &blockSize, k, 0, 0));
OPM_GPU_SAFE_CALL(cudaOccupancyMaxPotentialBlockSize(&tmpGridSize, &blockSize, k, 0, 0));
return blockSize;
}
@ -61,6 +61,6 @@ getNumberOfBlocks(int wantedThreads, int threadBlockSize)
return (wantedThreads + threadBlockSize - 1) / threadBlockSize;
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif

View File

@ -16,15 +16,15 @@
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_CUDA_SAFE_CALL_HPP
#define OPM_CUDA_SAFE_CALL_HPP
#ifndef OPM_GPU_SAFE_CALL_HPP
#define OPM_GPU_SAFE_CALL_HPP
#include <cuda_runtime.h>
#include <fmt/core.h>
#include <opm/common/ErrorMacros.hpp>
#include <opm/common/OpmLog/OpmLog.hpp>
#include <string_view>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
/**
* @brief getCudaErrorMessage generates the error message to display for a given error.
@ -48,9 +48,9 @@ getCudaErrorMessage(cudaError_t error,
const std::string_view& functionName,
size_t lineNumber)
{
return fmt::format("CUDA expression did not execute correctly. Expression was: \n"
return fmt::format("GPU expression did not execute correctly. Expression was: \n"
" {}\n"
"CUDA error was {}\n"
"GPU error was {}\n"
"in function {}, in {}, at line {}\n",
expression,
cudaGetErrorString(error),
@ -60,12 +60,12 @@ getCudaErrorMessage(cudaError_t error,
}
/**
* @brief cudaSafeCall checks the return type of the CUDA expression (function call) and throws an exception if it
* @brief cudaSafeCall checks the return type of the GPU expression (function call) and throws an exception if it
* does not equal cudaSuccess.
*
* Example usage:
* @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
* #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
* #include <cuda_runtime.h>
*
* void some_function() {
@ -74,7 +74,7 @@ getCudaErrorMessage(cudaError_t error,
* }
* @endcode
*
* @note It is probably easier to use the macro OPM_CUDA_SAFE_CALL
* @note It is probably easier to use the macro OPM_GPU_SAFE_CALL
*
* @todo Refactor to use std::source_location once we shift to C++20
*/
@ -91,7 +91,7 @@ cudaSafeCall(cudaError_t error,
}
/**
* @brief cudaWarnIfError checks the return type of the CUDA expression (function call) and issues a warning if it
* @brief cudaWarnIfError checks the return type of the GPU expression (function call) and issues a warning if it
* does not equal cudaSuccess.
*
* @param error the error code from cublas
@ -102,7 +102,7 @@ cudaSafeCall(cudaError_t error,
*
* Example usage:
* @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
* #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
* #include <cuda_runtime.h>
*
* void some_function() {
@ -111,9 +111,9 @@ cudaSafeCall(cudaError_t error,
* }
* @endcode
*
* @note It is probably easier to use the macro OPM_CUDA_WARN_IF_ERROR
* @note It is probably easier to use the macro OPM_GPU_WARN_IF_ERROR
*
* @note Prefer the cudaSafeCall/OPM_CUDA_SAFE_CALL counterpart unless you really don't want to throw an exception.
* @note Prefer the cudaSafeCall/OPM_GPU_SAFE_CALL counterpart unless you really don't want to throw an exception.
*
* @todo Refactor to use std::source_location once we shift to C++20
*/
@ -128,47 +128,47 @@ cudaWarnIfError(cudaError_t error,
OpmLog::warning(getCudaErrorMessage(error, expression, filename, functionName, lineNumber));
}
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
/**
* @brief OPM_CUDA_SAFE_CALL checks the return type of the CUDA expression (function call) and throws an exception if it
* @brief OPM_GPU_SAFE_CALL checks the return type of the GPU expression (function call) and throws an exception if it
* does not equal cudaSuccess.
*
* Example usage:
* @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
* #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
* #include <cuda_runtime.h>
*
* void some_function() {
* void* somePointer;
* OPM_CUDA_SAFE_CALL(cudaMalloc(&somePointer, 1));
* OPM_GPU_SAFE_CALL(cudaMalloc(&somePointer, 1));
* }
* @endcode
*
* @note This should be used for any call to the CUDA runtime API unless you have a good reason not to.
* @note This should be used for any call to the GPU runtime API unless you have a good reason not to.
*/
#define OPM_CUDA_SAFE_CALL(expression) \
::Opm::cuistl::detail::cudaSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
#define OPM_GPU_SAFE_CALL(expression) \
::Opm::gpuistl::detail::cudaSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
/**
* @brief OPM_CUDA_WARN_IF_ERROR checks the return type of the CUDA expression (function call) and issues a warning if
* @brief OPM_GPU_WARN_IF_ERROR checks the return type of the GPU expression (function call) and issues a warning if
* it does not equal cudaSuccess.
*
* Example usage:
* @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
* #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
* #include <cuda_runtime.h>
*
* void some_function() {
* void* somePointer;
* OPM_CUDA_WARN_IF_ERROR(cudaMalloc(&somePointer, 1));
* OPM_GPU_WARN_IF_ERROR(cudaMalloc(&somePointer, 1));
* }
* @endcode
*
* @note Prefer the cudaSafeCall/OPM_CUDA_SAFE_CALL counterpart unless you really don't want to throw an exception.
* @note Prefer the cudaSafeCall/OPM_GPU_SAFE_CALL counterpart unless you really don't want to throw an exception.
*/
#define OPM_CUDA_WARN_IF_ERROR(expression) \
::Opm::cuistl::detail::cudaWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
#define OPM_GPU_WARN_IF_ERROR(expression) \
::Opm::gpuistl::detail::cudaWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
#endif

View File

@ -18,12 +18,12 @@
*/
#include <config.h>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/cuistl/detail/deviceBlockOperations.hpp>
#include <opm/simulators/linalg/cuistl/detail/gpuThreadUtils.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/gpuistl/detail/deviceBlockOperations.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpuThreadUtils.hpp>
#include <stdexcept>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
namespace
{
@ -110,8 +110,8 @@ copyMatDataToReordered(T* srcMatrix,
int thrBlockSize)
{
int threadBlockSize
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuMoveDataToReordered<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuMoveDataToReordered<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
cuMoveDataToReordered<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
srcMatrix, srcRowIndices, dstMatrix, dstRowIndices, naturalToReordered, numberOfRows);
}
@ -130,9 +130,9 @@ copyMatDataToReorderedSplit(T* srcMatrix,
size_t numberOfRows,
int thrBlockSize)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
cuMoveDataToReorderedSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
cuMoveDataToReorderedSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(srcMatrix,
srcRowIndices,
srcColumnIndices,
@ -161,4 +161,4 @@ INSTANTIATE_KERNEL_WRAPPERS(double, 3);
INSTANTIATE_KERNEL_WRAPPERS(double, 4);
INSTANTIATE_KERNEL_WRAPPERS(double, 5);
INSTANTIATE_KERNEL_WRAPPERS(double, 6);
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail

View File

@ -16,11 +16,11 @@
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_CUISTL_CUSPARSE_MATRIX_OPERATIONS_HPP
#define OPM_CUISTL_CUSPARSE_MATRIX_OPERATIONS_HPP
#ifndef OPM_GPUISTL_GPUSPARSE_MATRIX_OPERATIONS_HPP
#define OPM_GPUISTL_GPUSPARSE_MATRIX_OPERATIONS_HPP
#include <cstddef>
#include <vector>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
/**
* @brief Reorders the elements of a matrix by copying them from one matrix to another using a permutation list
@ -68,5 +68,5 @@ void copyMatDataToReorderedSplit(T* srcMatrix,
size_t numberOfRows,
int threadBlockSize);
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif

View File

@ -29,7 +29,7 @@
* TODO: Use the requires-keyword once C++20 becomes availble (https://en.cppreference.com/w/cpp/language/constraints ).
* With C++20 this file can be removed.
*/
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
/**
@ -121,5 +121,5 @@ public:
static constexpr bool value = std::is_same_v<decltype(test<T>(0)), std::true_type>;
};
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif

View File

@ -18,12 +18,12 @@
*/
#include <config.h>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/detail/deviceBlockOperations.hpp>
#include <opm/simulators/linalg/cuistl/detail/gpuThreadUtils.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/DILUKernels.hpp>
#include <opm/simulators/linalg/gpuistl/detail/deviceBlockOperations.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpuThreadUtils.hpp>
#include <opm/simulators/linalg/gpuistl/detail/preconditionerKernels/DILUKernels.hpp>
#include <stdexcept>
namespace Opm::cuistl::detail::DILU
namespace Opm::gpuistl::detail::DILU
{
namespace
{
@ -282,8 +282,8 @@ solveLowerLevelSet(T* reorderedMat,
int thrBlockSize)
{
int threadBlockSize
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuSolveLowerLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, d, v);
}
@ -302,9 +302,9 @@ solveLowerLevelSetSplit(T* reorderedMat,
T* v,
int thrBlockSize)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
cuSolveLowerLevelSetSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuSolveLowerLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, d, v);
}
@ -322,8 +322,8 @@ solveUpperLevelSet(T* reorderedMat,
int thrBlockSize)
{
int threadBlockSize
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuSolveUpperLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, v);
}
@ -340,9 +340,9 @@ solveUpperLevelSetSplit(T* reorderedMat,
T* v,
int thrBlockSize)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
cuSolveUpperLevelSetSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuSolveUpperLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, v);
}
@ -360,9 +360,9 @@ computeDiluDiagonal(T* reorderedMat,
int thrBlockSize)
{
if (blocksize <= 3) {
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
cuComputeDiluDiagonal<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuComputeDiluDiagonal<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(reorderedMat,
rowIndices,
colIndices,
@ -393,9 +393,9 @@ computeDiluDiagonalSplit(T* reorderedLowerMat,
int thrBlockSize)
{
if (blocksize <= 3) {
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
cuComputeDiluDiagonalSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuComputeDiluDiagonalSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(reorderedLowerMat,
lowerRowIndices,
lowerColIndices,
@ -434,4 +434,4 @@ INSTANTIATE_KERNEL_WRAPPERS(double, 3);
INSTANTIATE_KERNEL_WRAPPERS(double, 4);
INSTANTIATE_KERNEL_WRAPPERS(double, 5);
INSTANTIATE_KERNEL_WRAPPERS(double, 6);
} // namespace Opm::cuistl::detail::DILU
} // namespace Opm::gpuistl::detail::DILU

View File

@ -24,7 +24,7 @@
#include <cuda_runtime.h>
#include <vector>
namespace Opm::cuistl::detail::DILU
namespace Opm::gpuistl::detail::DILU
{
/**
@ -198,5 +198,5 @@ void computeDiluDiagonalSplit(T* reorderedLowerMat,
T* dInv,
int threadBlockSize);
} // namespace Opm::cuistl::detail::DILU
} // namespace Opm::gpuistl::detail::DILU
#endif

View File

@ -18,16 +18,16 @@
*/
#include <config.h>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/detail/deviceBlockOperations.hpp>
#include <opm/simulators/linalg/cuistl/detail/gpuThreadUtils.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/ILU0Kernels.hpp>
#include <opm/simulators/linalg/gpuistl/detail/deviceBlockOperations.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpuThreadUtils.hpp>
#include <opm/simulators/linalg/gpuistl/detail/preconditionerKernels/ILU0Kernels.hpp>
#include <stdexcept>
/*
The LU factorization and apply step is written based on the Dune implementations
*/
namespace Opm::cuistl::detail::ILU0
namespace Opm::gpuistl::detail::ILU0
{
namespace
{
@ -341,8 +341,8 @@ solveLowerLevelSet(T* reorderedMat,
int thrBlockSize)
{
int threadBlockSize
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuSolveLowerLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, d, v);
}
@ -359,8 +359,8 @@ solveUpperLevelSet(T* reorderedMat,
int thrBlockSize)
{
int threadBlockSize
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuSolveUpperLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, v);
}
@ -377,9 +377,9 @@ solveLowerLevelSetSplit(T* reorderedMat,
T* v,
int thrBlockSize)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
cuSolveLowerLevelSetSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuSolveLowerLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, d, v);
}
@ -396,9 +396,9 @@ solveUpperLevelSetSplit(T* reorderedMat,
T* v,
int thrBlockSize)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
cuSolveUpperLevelSetSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuSolveUpperLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, v);
}
@ -415,8 +415,8 @@ LUFactorization(T* srcMatrix,
int thrBlockSize)
{
int threadBlockSize
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuLUFactorization<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuLUFactorization<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuLUFactorization<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
srcMatrix, srcRowIndices, srcColumnIndices, naturalToReordered, reorderedToNatual, rowsInLevelSet, startIdx);
}
@ -437,8 +437,8 @@ LUFactorizationSplit(T* reorderedLowerMat,
int thrBlockSize)
{
int threadBlockSize
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuLUFactorizationSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuLUFactorizationSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuLUFactorizationSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(reorderedLowerMat,
lowerRowIndices,
lowerColIndices,
@ -473,4 +473,4 @@ INSTANTIATE_KERNEL_WRAPPERS(double, 3);
INSTANTIATE_KERNEL_WRAPPERS(double, 4);
INSTANTIATE_KERNEL_WRAPPERS(double, 5);
INSTANTIATE_KERNEL_WRAPPERS(double, 6);
} // namespace Opm::cuistl::detail::ILU0
} // namespace Opm::gpuistl::detail::ILU0

View File

@ -20,7 +20,7 @@
#define OPM_ILU0_KERNELS_HPP
#include <cstddef>
#include <vector>
namespace Opm::cuistl::detail::ILU0
namespace Opm::gpuistl::detail::ILU0
{
/**
@ -189,5 +189,5 @@ void LUFactorizationSplit(T* reorderedLowerMat,
int rowsInLevelSet,
int threadBlockSize);
} // namespace Opm::cuistl::detail::ILU0
} // namespace Opm::gpuistl::detail::ILU0
#endif

View File

@ -18,12 +18,12 @@
*/
#include <config.h>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/detail/deviceBlockOperations.hpp>
#include <opm/simulators/linalg/cuistl/detail/gpuThreadUtils.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/JacKernels.hpp>
#include <opm/simulators/linalg/gpuistl/detail/deviceBlockOperations.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpuThreadUtils.hpp>
#include <opm/simulators/linalg/gpuistl/detail/preconditionerKernels/JacKernels.hpp>
#include <stdexcept>
namespace Opm::cuistl::detail::JAC
namespace Opm::gpuistl::detail::JAC
{
namespace
{
@ -60,8 +60,8 @@ invertDiagonalAndFlatten(T* mat, int* rowIndices, int* colIndices, size_t number
{
if (blocksize <= 3) {
int threadBlockSize
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuInvertDiagonalAndFlatten<T, blocksize>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuInvertDiagonalAndFlatten<T, blocksize>);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
cuInvertDiagonalAndFlatten<T, blocksize>
<<<nThreadBlocks, threadBlockSize>>>(mat, rowIndices, colIndices, numberOfRows, vec);
} else {
@ -85,4 +85,4 @@ INSTANTIATE_KERNEL_WRAPPERS(double, 4);
INSTANTIATE_KERNEL_WRAPPERS(double, 5);
INSTANTIATE_KERNEL_WRAPPERS(double, 6);
} // namespace Opm::cuistl::detail::JAC
} // namespace Opm::gpuistl::detail::JAC

View File

@ -20,7 +20,7 @@
#define OPM_JAC_KERNELS_HPP
#include <cstddef>
#include <vector>
namespace Opm::cuistl::detail::JAC
namespace Opm::gpuistl::detail::JAC
{
/**
@ -34,5 +34,5 @@ namespace Opm::cuistl::detail::JAC
template <class T, int blocksize>
void invertDiagonalAndFlatten(T* mat, int* rowIndices, int* colIndices, size_t numberOfRows, T* vec);
} // namespace Opm::cuistl::detail::JAC
} // namespace Opm::gpuistl::detail::JAC
#endif

View File

@ -20,9 +20,9 @@
#ifndef OPM_CUISTL_PRECONDIDTIONER_SHOULD_CALL_POST_PRE_HPP
#define OPM_CUISTL_PRECONDIDTIONER_SHOULD_CALL_POST_PRE_HPP
#include <opm/simulators/linalg/cuistl/detail/has_function.hpp>
#include <opm/simulators/linalg/gpuistl/detail/has_function.hpp>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
//! @brief Tests (compile time) if the preconditioner type needs to call pre() before a call to apply()
@ -60,5 +60,5 @@ shouldCallPreconditionerPost()
return true;
}
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif

View File

@ -37,7 +37,7 @@
* while Dune::BlockVector (and relatives) use unsigned size_t.
*/
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
/**
@ -106,6 +106,6 @@ to_size_t(int i)
return std::size_t(i);
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif

View File

@ -18,14 +18,14 @@
*/
#include <config.h>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/cublas_wrapper.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/gpuThreadUtils.hpp>
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/detail/cublas_wrapper.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpuThreadUtils.hpp>
#include <opm/simulators/linalg/gpuistl/detail/vector_operations.hpp>
#include <stdexcept>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
namespace
@ -108,8 +108,8 @@ template <class T>
void
setVectorValue(T* deviceData, size_t numberOfElements, const T& value)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(setVectorValueKernel<T>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(setVectorValueKernel<T>);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
setVectorValueKernel<<<nThreadBlocks, threadBlockSize>>>(deviceData, numberOfElements, value);
}
@ -121,8 +121,8 @@ template <class T>
void
setZeroAtIndexSet(T* deviceData, size_t numberOfElements, const int* indices)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(setZeroAtIndexSetKernel<T>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(setZeroAtIndexSetKernel<T>);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
setZeroAtIndexSetKernel<<<nThreadBlocks, threadBlockSize>>>(deviceData, numberOfElements, indices);
}
template void setZeroAtIndexSet(double*, size_t, const int*);
@ -138,12 +138,12 @@ innerProductAtIndices(cublasHandle_t cublasHandle,
size_t numberOfElements,
const int* indices)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(elementWiseMultiplyKernel<T>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(elementWiseMultiplyKernel<T>);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
elementWiseMultiplyKernel<<<nThreadBlocks, threadBlockSize>>>(deviceA, deviceB, buffer, numberOfElements, indices);
// TODO: [perf] Get rid of the allocation here.
CuVector<T> oneVector(numberOfElements);
GpuVector<T> oneVector(numberOfElements);
oneVector = 1.0;
T result = 0.0;
OPM_CUBLAS_SAFE_CALL(cublasDot(cublasHandle, numberOfElements, oneVector.data(), 1, buffer, 1, &result));
@ -158,10 +158,10 @@ template <class T>
void
prepareSendBuf(const T* deviceA, T* buffer, size_t numberOfElements, const int* indices)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(prepareSendBufKernel<T>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(prepareSendBufKernel<T>);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
prepareSendBufKernel<<<nThreadBlocks, threadBlockSize>>>(deviceA, buffer, numberOfElements, indices);
OPM_CUDA_SAFE_CALL(cudaDeviceSynchronize()); // The buffers are prepared for MPI. Wait for them to finish.
OPM_GPU_SAFE_CALL(cudaDeviceSynchronize()); // The buffers are prepared for MPI. Wait for them to finish.
}
template void prepareSendBuf(const double* deviceA, double* buffer, size_t numberOfElements, const int* indices);
template void prepareSendBuf(const float* deviceA, float* buffer, size_t numberOfElements, const int* indices);
@ -171,8 +171,8 @@ template <class T>
void
syncFromRecvBuf(T* deviceA, T* buffer, size_t numberOfElements, const int* indices)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(syncFromRecvBufKernel<T>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(syncFromRecvBufKernel<T>);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
syncFromRecvBufKernel<<<nThreadBlocks, threadBlockSize>>>(deviceA, buffer, numberOfElements, indices);
// cudaDeviceSynchronize(); // Not needed, I guess...
}
@ -191,20 +191,20 @@ weightedDiagMV(const T* squareBlockVector,
{
switch (blocksize) {
case 1: {
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 1>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 1>);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
weightedDiagMV<T, 1>
<<<nThreadBlocks, threadBlockSize>>>(squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec);
} break;
case 2: {
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 2>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 2>);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
weightedDiagMV<T, 2>
<<<nThreadBlocks, threadBlockSize>>>(squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec);
} break;
case 3: {
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 3>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 3>);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
weightedDiagMV<T, 3>
<<<nThreadBlocks, threadBlockSize>>>(squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec);
} break;
@ -217,4 +217,4 @@ weightedDiagMV(const T* squareBlockVector,
template void weightedDiagMV(const double*, const size_t, const size_t, double, const double*, double*);
template void weightedDiagMV(const float*, const size_t, const size_t, float, const float*, float*);
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail

View File

@ -20,7 +20,7 @@
#define OPM_CUISTL_VECTOR_OPERATIONS_HPP
#include <cstddef>
#include <cublas_v2.h>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
/**
@ -65,11 +65,11 @@ void syncFromRecvBuf(T* deviceA, T* buffer, size_t numberOfElements, const int*
/**
* @brief Compue the weighted matrix vector product where the matrix is diagonal, the diagonal is a vector, meaning we
* compute the Hadamard product.
* @param squareBlockVector A CuVector whose elements are NxN matrix blocks
* @param squareBlockVector A GpuVector whose elements are NxN matrix blocks
* @param numberOfRows The number of rows in the vector
* @param blocksize The sidelength of the square block elements in the vector
* @param src_vec A pointer to the data of the CuVector we multiply the blockvector with
* @param[out] dst_vec A pointer to the data of the CuVector we store the result in
* @param src_vec A pointer to the data of the GpuVector we multiply the blockvector with
* @param[out] dst_vec A pointer to the data of the GpuVector we store the result in
*
* @note This is implemented as a faster way to multiply a diagonal matrix with a blockvector. We need only store the
* diagonal of the matrix and use this product.
@ -81,5 +81,5 @@ void weightedDiagMV(const T* squareBlockVector,
T relaxationFactor,
const T* srcVec,
T* dstVec);
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif

View File

@ -19,9 +19,9 @@
#include <config.h>
#include <cuda_runtime.h>
#include <opm/common/OpmLog/OpmLog.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/set_device.hpp>
namespace Opm::cuistl
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/set_device.hpp>
namespace Opm::gpuistl
{
void
setDevice(int mpiRank, [[maybe_unused]] int numberOfMpiRanks)
@ -41,9 +41,9 @@ setDevice(int mpiRank, [[maybe_unused]] int numberOfMpiRanks)
// Now do a round robin kind of assignment
// TODO: We need to be more sophistacted here. We have no guarantee this will pick the correct device.
const auto deviceId = mpiRank % deviceCount;
OPM_CUDA_SAFE_CALL(cudaDeviceReset());
OPM_CUDA_SAFE_CALL(cudaSetDevice(deviceId));
OPM_GPU_SAFE_CALL(cudaDeviceReset());
OPM_GPU_SAFE_CALL(cudaSetDevice(deviceId));
OpmLog::info("Set CUDA device to " + std::to_string(deviceId) + " (out of " + std::to_string(deviceCount)
+ " devices).");
}
} // namespace Opm::cuistl
} // namespace Opm::gpuistl

View File

@ -20,7 +20,7 @@
#ifndef OPM_CUISTL_SET_DEVICE_HEADER
#define OPM_CUISTL_SET_DEVICE_HEADER
namespace Opm::cuistl
namespace Opm::gpuistl
{
//! @brief Sets the correct CUDA device in the setting of MPI
//!
@ -32,5 +32,5 @@ namespace Opm::cuistl
//!
//! @note If no CUDA device is present, this does nothing.
void setDevice(int mpiRank, int numberOfMpiRanks);
} // namespace Opm::cuistl
} // namespace Opm::gpuistl
#endif

View File

@ -18,14 +18,14 @@
*/
#include <config.h>
#define BOOST_TEST_MODULE TestCuBuffer
#define BOOST_TEST_MODULE TestGpuBuffer
#include <boost/test/unit_test.hpp>
#include <cuda_runtime.h>
#include <opm/simulators/linalg/cuistl/CuBuffer.hpp>
#include <opm/simulators/linalg/cuistl/CuView.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/GpuBuffer.hpp>
#include <opm/simulators/linalg/gpuistl/GpuView.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
#include <array>
#include <algorithm>
@ -35,15 +35,15 @@ BOOST_AUTO_TEST_CASE(TestMakeView)
{
// test that we can create buffers and make views of the buffers using the pointer constructor
auto buf = std::vector<int>({1, 2, 3, 4, 5, 6});
const auto gpubuf = ::Opm::cuistl::CuBuffer<int>(buf);
auto gpuview = ::Opm::cuistl::CuView<int>(buf.data(), buf.size());
bool gpuBufCreatedView = std::is_same<::Opm::cuistl::CuView<int>, decltype(gpuview)>::value;
const auto gpubuf = ::Opm::gpuistl::GpuBuffer<int>(buf);
auto gpuview = ::Opm::gpuistl::GpuView<int>(buf.data(), buf.size());
bool gpuBufCreatedView = std::is_same<::Opm::gpuistl::GpuView<int>, decltype(gpuview)>::value;
BOOST_CHECK(gpuBufCreatedView);
// test that we can make views of buffers by using the cubuffer constructor
auto gpuview2 = ::Opm::cuistl::make_view(gpubuf);
bool gpuBufCreatedView2 = std::is_same<::Opm::cuistl::CuView<const int>, decltype(gpuview2)>::value;
// test that we can make views of buffers by using the GpuBuffer constructor
auto gpuview2 = ::Opm::gpuistl::make_view(gpubuf);
bool gpuBufCreatedView2 = std::is_same<::Opm::gpuistl::GpuView<const int>, decltype(gpuview2)>::value;
BOOST_CHECK(gpuBufCreatedView2);

View File

@ -18,18 +18,18 @@
*/
#include <config.h>
#define BOOST_TEST_MODULE TestCuDiluHelpers
#define BOOST_TEST_MODULE TestGpuDILU
#include <boost/test/unit_test.hpp>
#include <dune/common/fmatrix.hh>
#include <dune/istl/bcrsmatrix.hh>
#include <memory>
#include <opm/simulators/linalg/DILU.hpp>
#include <opm/simulators/linalg/cuistl/CuDILU.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/gpuistl/GpuDILU.hpp>
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpusparse_matrix_operations.hpp>
#include <random>
#include <vector>
@ -41,11 +41,11 @@ using B1x1Vec = Dune::BlockVector<Dune::FieldVector<double, 1>>;
using B2x2Vec = Dune::BlockVector<Dune::FieldVector<double, 2>>;
using Sp1x1BlockMatrix = Dune::BCRSMatrix<FM1x1>;
using Sp2x2BlockMatrix = Dune::BCRSMatrix<FM2x2>;
using CuMatrix = Opm::cuistl::CuSparseMatrix<T>;
using CuIntVec = Opm::cuistl::CuVector<int>;
using CuFloatingPointVec = Opm::cuistl::CuVector<T>;
using CuDilu1x1 = Opm::cuistl::CuDILU<Sp1x1BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
using CuDilu2x2 = Opm::cuistl::CuDILU<Sp2x2BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
using CuMatrix = Opm::gpuistl::GpuSparseMatrix<T>;
using CuIntVec = Opm::gpuistl::GpuVector<int>;
using CuFloatingPointVec = Opm::gpuistl::GpuVector<T>;
using GpuDilu1x1 = Opm::gpuistl::GpuDILU<Sp1x1BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
using GpuDilu2x2 = Opm::gpuistl::GpuDILU<Sp2x2BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
Sp1x1BlockMatrix
get1x1BlockTestMatrix()
@ -211,7 +211,7 @@ BOOST_AUTO_TEST_CASE(TestDiluApply)
// Initialize preconditioner objects
Dune::MultithreadDILU<Sp1x1BlockMatrix, B1x1Vec, B1x1Vec> cpudilu(matA);
auto gpudilu = CuDilu1x1(matA, true, true);
auto gpudilu = GpuDilu1x1(matA, true, true);
// Use the apply
gpudilu.apply(d_output, d_input);
@ -224,7 +224,7 @@ BOOST_AUTO_TEST_CASE(TestDiluApply)
}
auto cudilures = d_output.asStdVector();
// check that CuDilu results matches that of CPU dilu
// check that GpuDilu results matches that of CPU dilu
for (size_t i = 0; i < cudilures.size(); ++i) {
BOOST_CHECK_CLOSE(cudilures[i], cpudilures[i], 1e-7);
}
@ -235,7 +235,7 @@ BOOST_AUTO_TEST_CASE(TestDiluApplyBlocked)
// init matrix with 2x2 blocks
Sp2x2BlockMatrix matA = get2x2BlockTestMatrix();
auto gpudilu = CuDilu2x2(matA, true, true);
auto gpudilu = GpuDilu2x2(matA, true, true);
Dune::MultithreadDILU<Sp2x2BlockMatrix, B2x2Vec, B2x2Vec> cpudilu(matA);
// create input/output buffers for the apply
@ -275,7 +275,7 @@ BOOST_AUTO_TEST_CASE(TestDiluInitAndUpdateLarge)
{
// create gpu dilu preconditioner
Sp1x1BlockMatrix matA = get1x1BlockTestMatrix();
auto gpudilu = CuDilu1x1(matA, true, true);
auto gpudilu = GpuDilu1x1(matA, true, true);
matA[0][0][0][0] = 11.0;
matA[0][1][0][0] = 12.0;
@ -325,7 +325,7 @@ BOOST_AUTO_TEST_CASE(TestDiluInitAndUpdateLarge)
}
auto cudilures = d_output.asStdVector();
// check that CuDilu results matches that of CPU dilu
// check that GpuDilu results matches that of CPU dilu
for (size_t i = 0; i < cudilures.size(); ++i) {
BOOST_CHECK_CLOSE(cudilures[i], cpudilures[i], 1e-7);
}

View File

@ -18,22 +18,22 @@
*/
#include <config.h>
#define BOOST_TEST_MODULE TestCuJac
#define BOOST_TEST_MODULE TestGpuJac
#include <boost/mpl/list.hpp>
#include <boost/test/unit_test.hpp>
#include <cuda_runtime.h>
#include <dune/istl/bcrsmatrix.hh>
#include <opm/simulators/linalg/cuistl/CuJac.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp>
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
#include <opm/simulators/linalg/gpuistl/GpuJac.hpp>
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/gpuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/gpuistl/detail/fix_zero_diagonal.hpp>
#include <opm/simulators/linalg/gpuistl/detail/vector_operations.hpp>
using NumericTypes = boost::mpl::list<double, float>;
BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize2, T, NumericTypes)
BOOST_AUTO_TEST_CASE_TEMPLATE(GPUJACApplyBlocksize2, T, NumericTypes)
{
/*
Test data to validate jacobi preconditioner, expected result is x_1, and relaxation factor is 0.5
@ -49,7 +49,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize2, T, NumericTypes)
using M = Dune::FieldMatrix<T, blocksize, blocksize>;
using SpMatrix = Dune::BCRSMatrix<M>;
using Vector = Dune::BlockVector<Dune::FieldVector<T, blocksize>>;
using CuJac = Opm::cuistl::CuJac<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>;
using GpuJac = Opm::gpuistl::GpuJac<SpMatrix, Opm::gpuistl::GpuVector<T>, Opm::gpuistl::GpuVector<T>>;
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
for (auto row = B.createbegin(); row != B.createend(); ++row) {
@ -70,7 +70,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize2, T, NumericTypes)
B[1][1][0][0] = -1.0;
B[1][1][1][1] = -1.0;
auto cujac = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuJac>(std::make_shared<CuJac>(B, 0.5));
auto gpujac = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, GpuJac>(std::make_shared<GpuJac>(B, 0.5));
Vector vVector(2);
Vector dVector(2);
@ -81,14 +81,14 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize2, T, NumericTypes)
const T expectedAns[2][2] = {{1.0 / 2.0, -1.0 / 2.0}, {-3.0 / 2.0, -2.0}};
cujac.apply(vVector, dVector);
gpujac.apply(vVector, dVector);
BOOST_CHECK_CLOSE(vVector[0][0], expectedAns[0][0], 1e-7);
BOOST_CHECK_CLOSE(vVector[0][1], expectedAns[0][1], 1e-7);
BOOST_CHECK_CLOSE(vVector[1][0], expectedAns[1][0], 1e-7);
BOOST_CHECK_CLOSE(vVector[1][1], expectedAns[1][1], 1e-7);
}
BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize1, T, NumericTypes)
BOOST_AUTO_TEST_CASE_TEMPLATE(GPUJACApplyBlocksize1, T, NumericTypes)
{
/*
Test data to validate jacobi preconditioner, expected result is x_1, relaxation factor is 0.5
@ -103,7 +103,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize1, T, NumericTypes)
using M = Dune::FieldMatrix<T, blocksize, blocksize>;
using SpMatrix = Dune::BCRSMatrix<M>;
using Vector = Dune::BlockVector<Dune::FieldVector<T, blocksize>>;
using CuJac = Opm::cuistl::CuJac<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>;
using GpuJac = Opm::gpuistl::GpuJac<SpMatrix, Opm::gpuistl::GpuVector<T>, Opm::gpuistl::GpuVector<T>>;
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
for (auto row = B.createbegin(); row != B.createend(); ++row) {
@ -129,7 +129,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize1, T, NumericTypes)
B[2][2][0][0] = -1.0;
B[3][3][0][0] = -1.0;
auto cujac = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuJac>(std::make_shared<CuJac>(B, 0.5));
auto gpujac = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, GpuJac>(std::make_shared<GpuJac>(B, 0.5));
Vector vVector(4);
Vector dVector(4);
@ -140,7 +140,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize1, T, NumericTypes)
const T expectedAns[4] = {1.0 / 3.0, 1.0 / 2.0, -3.0 / 2.0, -2.0};
cujac.apply(vVector, dVector);
gpujac.apply(vVector, dVector);
BOOST_CHECK_CLOSE(vVector[0], expectedAns[0], 1e-7);
BOOST_CHECK_CLOSE(vVector[1], expectedAns[1], 1e-7);
BOOST_CHECK_CLOSE(vVector[2], expectedAns[2], 1e-7);

View File

@ -18,7 +18,7 @@
*/
#include <config.h>
#define BOOST_TEST_MODULE TestCuOwnerOverlapCopy
#define BOOST_TEST_MODULE TestGpuOwnerOverlapCopy
#define BOOST_TEST_NO_MAIN
#include <boost/test/unit_test.hpp>
@ -26,10 +26,10 @@
#include <dune/istl/bcrsmatrix.hh>
#include <dune/istl/owneroverlapcopy.hh>
#include <memory>
#include <opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/set_device.hpp>
#include <opm/simulators/linalg/gpuistl/GpuOwnerOverlapCopy.hpp>
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/set_device.hpp>
#include <random>
#include <mpi.h>
@ -46,7 +46,7 @@ main(int argc, char** argv)
int rank, totalRanks;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &totalRanks);
Opm::cuistl::setDevice(rank, totalRanks);
Opm::gpuistl::setDevice(rank, totalRanks);
boost::unit_test::unit_test_main(&init_unit_test_func, argc, argv);
}
@ -62,14 +62,14 @@ BOOST_AUTO_TEST_CASE(TestProject)
auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD);
auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}};
auto xGPU = Opm::cuistl::CuVector<double>(xCPU);
auto xGPU = Opm::gpuistl::GpuVector<double>(xCPU);
auto gpuComm = std::make_shared<Opm::cuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);
auto gpuComm = std::make_shared<Opm::gpuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);
auto cuOwnerOverlapCopy
= Opm::cuistl::CuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(gpuComm);
auto GpuOwnerOverlapCopy
= Opm::gpuistl::GpuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(gpuComm);
cuOwnerOverlapCopy.project(xGPU);
GpuOwnerOverlapCopy.project(xGPU);
auto resultOfProject = xGPU.asStdVector();
@ -94,19 +94,19 @@ BOOST_AUTO_TEST_CASE(TestDot)
indexInfo.addRemoteIndex(std::make_tuple(0, 2, Dune::OwnerOverlapCopyAttributeSet::copy));
auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD);
auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}};
auto xGPU = Opm::cuistl::CuVector<double>(xCPU);
auto xGPU = Opm::gpuistl::GpuVector<double>(xCPU);
auto gpuComm = std::make_shared<Opm::cuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);
auto gpuComm = std::make_shared<Opm::gpuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);
auto cuOwnerOverlapCopy
= Opm::cuistl::CuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(gpuComm);
auto GpuOwnerOverlapCopy
= Opm::gpuistl::GpuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(gpuComm);
double outputDune = -1.0;
auto xDune = xGPU.asDuneBlockVector<1>();
ownerOverlapCopy.dot(xDune, xDune, outputDune);
double output = -1.0;
cuOwnerOverlapCopy.dot(xGPU, xGPU, output);
GpuOwnerOverlapCopy.dot(xGPU, xGPU, output);
BOOST_CHECK_EQUAL(outputDune, output);

View File

@ -18,7 +18,7 @@
*/
#include <config.h>
#define BOOST_TEST_MODULE TestCuSeqILU0
#define BOOST_TEST_MODULE TestGpuSeqILU0
#define BOOST_TEST_NO_MAIN
@ -27,10 +27,10 @@
#include <dune/common/parallel/mpihelper.hh>
#include <dune/istl/bcrsmatrix.hh>
#include <dune/istl/preconditioners.hh>
#include <opm/simulators/linalg/cuistl/CuSeqILU0.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/GpuSeqILU0.hpp>
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/gpuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
#include <limits>
#include <memory>
@ -63,7 +63,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
using M = Dune::FieldMatrix<T, 1, 1>;
using SpMatrix = Dune::BCRSMatrix<M>;
using Vector = Dune::BlockVector<Dune::FieldVector<T, 1>>;
using CuILU0 = Opm::cuistl::CuSeqILU0<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>;
using GpuILU0 = Opm::gpuistl::GpuSeqILU0<SpMatrix, Opm::gpuistl::GpuVector<T>, Opm::gpuistl::GpuVector<T>>;
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
for (auto row = B.createbegin(); row != B.createend(); ++row) {
@ -91,7 +91,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
auto duneILU = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);
auto cuILU = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuILU0>(std::make_shared<CuILU0>(B, 1.0));
auto gpuILU = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, GpuILU0>(std::make_shared<GpuILU0>(B, 1.0));
// check for the standard basis {e_i}
// (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
@ -101,7 +101,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
Vector outputVectorDune(N);
Vector outputVectorCuistl(N);
duneILU.apply(outputVectorDune, inputVector);
cuILU.apply(outputVectorCuistl, inputVector);
gpuILU.apply(outputVectorCuistl, inputVector);
for (int component = 0; component < N; ++component) {
BOOST_CHECK_CLOSE(outputVectorDune[component][0],
@ -113,7 +113,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
// Now we check that we can update the matrix. We basically just negate B
B *= -1.0;
auto duneILUNew = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);
cuILU.update();
gpuILU.update();
// check for the standard basis {e_i}
// (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
for (int i = 0; i < N; ++i) {
@ -122,7 +122,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
Vector outputVectorDune(N);
Vector outputVectorCuistl(N);
duneILUNew.apply(outputVectorDune, inputVector);
cuILU.apply(outputVectorCuistl, inputVector);
gpuILU.apply(outputVectorCuistl, inputVector);
for (int component = 0; component < N; ++component) {
BOOST_CHECK_CLOSE(outputVectorDune[component][0],
@ -158,7 +158,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
using M = Dune::FieldMatrix<T, 2, 2>;
using SpMatrix = Dune::BCRSMatrix<M>;
using Vector = Dune::BlockVector<Dune::FieldVector<T, 2>>;
using CuILU0 = Opm::cuistl::CuSeqILU0<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>;
using GpuILU0 = Opm::gpuistl::GpuSeqILU0<SpMatrix, Opm::gpuistl::GpuVector<T>, Opm::gpuistl::GpuVector<T>>;
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
for (auto row = B.createbegin(); row != B.createend(); ++row) {
@ -181,7 +181,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
auto duneILU = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);
auto cuILU = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuILU0>(std::make_shared<CuILU0>(B, 1.0));
auto gpuILU = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, GpuILU0>(std::make_shared<GpuILU0>(B, 1.0));
// check for the standard basis {e_i}
// (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
@ -191,7 +191,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
Vector outputVectorDune(N);
Vector outputVectorCuistl(N);
duneILU.apply(outputVectorDune, inputVector);
cuILU.apply(outputVectorCuistl, inputVector);
gpuILU.apply(outputVectorCuistl, inputVector);
for (int component = 0; component < N; ++component) {
BOOST_CHECK_CLOSE(outputVectorDune[component][0],
@ -203,7 +203,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
// Now we check that we can update the matrix. We basically just negate B
B *= -1.0;
auto duneILUNew = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);
cuILU.update();
gpuILU.update();
// check for the standard basis {e_i}
// (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
for (int i = 0; i < N; ++i) {
@ -212,7 +212,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
Vector outputVectorDune(N);
Vector outputVectorCuistl(N);
duneILUNew.apply(outputVectorDune, inputVector);
cuILU.apply(outputVectorCuistl, inputVector);
gpuILU.apply(outputVectorCuistl, inputVector);
for (int component = 0; component < N; ++component) {
BOOST_CHECK_CLOSE(outputVectorDune[component][0],

View File

@ -18,14 +18,14 @@
*/
#include <config.h>
#define BOOST_TEST_MODULE TestCuSparseMatrix
#define BOOST_TEST_MODULE TestGpuSparseMatrix
#include <boost/test/unit_test.hpp>
#include <dune/istl/bcrsmatrix.hh>
#include <memory>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
#include <random>
BOOST_AUTO_TEST_CASE(TestConstruction1D)
@ -76,17 +76,17 @@ BOOST_AUTO_TEST_CASE(TestConstruction1D)
}
}
auto cuSparseMatrix = Opm::cuistl::CuSparseMatrix<double>::fromMatrix(B);
auto gpuSparseMatrix = Opm::gpuistl::GpuSparseMatrix<double>::fromMatrix(B);
const auto& nonZeroValuesCuda = cuSparseMatrix.getNonZeroValues();
std::vector<double> buffer(cuSparseMatrix.nonzeroes(), 0.0);
const auto& nonZeroValuesCuda = gpuSparseMatrix.getNonZeroValues();
std::vector<double> buffer(gpuSparseMatrix.nonzeroes(), 0.0);
nonZeroValuesCuda.copyToHost(buffer.data(), buffer.size());
const double* nonZeroElements = static_cast<const double*>(&((B[0][0][0][0])));
BOOST_CHECK_EQUAL_COLLECTIONS(buffer.begin(), buffer.end(), nonZeroElements, nonZeroElements + B.nonzeroes());
BOOST_CHECK_EQUAL(N * 3 - 2, cuSparseMatrix.nonzeroes());
BOOST_CHECK_EQUAL(N * 3 - 2, gpuSparseMatrix.nonzeroes());
std::vector<int> rowIndicesFromCUDA(N + 1);
cuSparseMatrix.getRowIndices().copyToHost(rowIndicesFromCUDA.data(), rowIndicesFromCUDA.size());
gpuSparseMatrix.getRowIndices().copyToHost(rowIndicesFromCUDA.data(), rowIndicesFromCUDA.size());
BOOST_CHECK_EQUAL(rowIndicesFromCUDA[0], 0);
BOOST_CHECK_EQUAL(rowIndicesFromCUDA[1], 2);
for (int i = 2; i < N; ++i) {
@ -95,7 +95,7 @@ BOOST_AUTO_TEST_CASE(TestConstruction1D)
std::vector<int> columnIndicesFromCUDA(B.nonzeroes(), 0);
cuSparseMatrix.getColumnIndices().copyToHost(columnIndicesFromCUDA.data(), columnIndicesFromCUDA.size());
gpuSparseMatrix.getColumnIndices().copyToHost(columnIndicesFromCUDA.data(), columnIndicesFromCUDA.size());
BOOST_CHECK_EQUAL(columnIndicesFromCUDA[0], 0);
BOOST_CHECK_EQUAL(columnIndicesFromCUDA[1], 1);
@ -143,19 +143,19 @@ BOOST_AUTO_TEST_CASE(RandomSparsityMatrix)
}
}
auto cuSparseMatrix = Opm::cuistl::CuSparseMatrix<double>::fromMatrix(B);
auto gpuSparseMatrix = Opm::gpuistl::GpuSparseMatrix<double>::fromMatrix(B);
// check each column
for (size_t component = 0; component < N; ++component) {
std::vector<double> inputDataX(N * dim, 0.0);
inputDataX[component] = 1.0;
std::vector<double> inputDataY(N * dim, .25);
auto inputVectorX = Opm::cuistl::CuVector<double>(inputDataX.data(), inputDataX.size());
auto inputVectorY = Opm::cuistl::CuVector<double>(inputDataY.data(), inputDataY.size());
auto inputVectorX = Opm::gpuistl::GpuVector<double>(inputDataX.data(), inputDataX.size());
auto inputVectorY = Opm::gpuistl::GpuVector<double>(inputDataY.data(), inputDataY.size());
Vector xHost(N), yHost(N);
yHost = inputDataY[0];
inputVectorX.copyToHost(xHost);
const double alpha = 1.42;
cuSparseMatrix.usmv(alpha, inputVectorX, inputVectorY);
gpuSparseMatrix.usmv(alpha, inputVectorX, inputVectorY);
inputVectorY.copyToHost(inputDataY);
@ -167,7 +167,7 @@ BOOST_AUTO_TEST_CASE(RandomSparsityMatrix)
}
inputVectorX.copyToHost(xHost);
cuSparseMatrix.mv(inputVectorX, inputVectorY);
gpuSparseMatrix.mv(inputVectorX, inputVectorY);
inputVectorY.copyToHost(inputDataY);

View File

@ -18,21 +18,21 @@
*/
#include <config.h>
#define BOOST_TEST_MODULE TestCuVector
#define BOOST_TEST_MODULE TestGpuVector
#include <boost/test/unit_test.hpp>
#include <cuda_runtime.h>
#include <dune/common/fvector.hh>
#include <dune/istl/bvector.hh>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
#include <random>
BOOST_AUTO_TEST_CASE(TestDocumentedUsage)
{
auto someDataOnCPU = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
auto dataOnGPU = ::Opm::cuistl::CuVector<double>(someDataOnCPU);
auto dataOnGPU = ::Opm::gpuistl::GpuVector<double>(someDataOnCPU);
// Multiply by 4.0:
dataOnGPU *= 4.0;
@ -50,14 +50,14 @@ BOOST_AUTO_TEST_CASE(TestDocumentedUsage)
BOOST_AUTO_TEST_CASE(TestConstructionSize)
{
const int numberOfElements = 1234;
auto vectorOnGPU = Opm::cuistl::CuVector<double>(numberOfElements);
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(numberOfElements);
BOOST_CHECK_EQUAL(numberOfElements, vectorOnGPU.dim());
}
BOOST_AUTO_TEST_CASE(TestCopyFromHostConstructor)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
std::vector<double> buffer(data.size(), 0.0);
vectorOnGPU.copyToHost(buffer.data(), buffer.size());
@ -68,7 +68,7 @@ BOOST_AUTO_TEST_CASE(TestCopyFromHostConstructor)
BOOST_AUTO_TEST_CASE(TestCopyFromHostFunction)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.size());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.size());
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
vectorOnGPU.copyFromHost(data.data(), data.size());
std::vector<double> buffer(data.size(), 0.0);
@ -80,7 +80,7 @@ BOOST_AUTO_TEST_CASE(TestCopyFromHostFunction)
BOOST_AUTO_TEST_CASE(TestCopyFromBvector)
{
auto blockVector = Dune::BlockVector<Dune::FieldVector<double, 2>> {{{42, 43}, {44, 45}, {46, 47}}};
auto vectorOnGPU = Opm::cuistl::CuVector<double>(blockVector.dim());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(blockVector.dim());
vectorOnGPU.copyFromHost(blockVector);
std::vector<double> buffer(vectorOnGPU.dim());
vectorOnGPU.copyToHost(buffer.data(), buffer.size());
@ -93,7 +93,7 @@ BOOST_AUTO_TEST_CASE(TestCopyToBvector)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7, 8, 9}};
auto blockVector = Dune::BlockVector<Dune::FieldVector<double, 3>>(3);
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
vectorOnGPU.copyToHost(blockVector);
@ -103,17 +103,17 @@ BOOST_AUTO_TEST_CASE(TestCopyToBvector)
BOOST_AUTO_TEST_CASE(TestDataPointer)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7, 8, 9}};
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
std::vector<double> buffer(data.size(), 0.0);
OPM_CUDA_SAFE_CALL(cudaMemcpy(buffer.data(), vectorOnGPU.data(), sizeof(double) * data.size(), cudaMemcpyDeviceToHost));
OPM_GPU_SAFE_CALL(cudaMemcpy(buffer.data(), vectorOnGPU.data(), sizeof(double) * data.size(), cudaMemcpyDeviceToHost));
BOOST_CHECK_EQUAL_COLLECTIONS(data.begin(), data.end(), buffer.begin(), buffer.end());
}
BOOST_AUTO_TEST_CASE(TestCopyScalarMultiply)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
const double scalar = 42.25;
vectorOnGPU *= scalar;
@ -128,7 +128,7 @@ BOOST_AUTO_TEST_CASE(TestCopyScalarMultiply)
BOOST_AUTO_TEST_CASE(TestTwoNorm)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
auto twoNorm = vectorOnGPU.two_norm();
double correctAnswer = 0.0;
@ -143,8 +143,8 @@ BOOST_AUTO_TEST_CASE(TestDot)
{
std::vector<double> dataA {{1, 2, 3, 4, 5, 6, 7}};
std::vector<double> dataB {{8, 9, 10, 11, 12, 13, 14}};
auto vectorOnGPUA = Opm::cuistl::CuVector<double>(dataA.data(), dataA.size());
auto vectorOnGPUB = Opm::cuistl::CuVector<double>(dataB.data(), dataB.size());
auto vectorOnGPUA = Opm::gpuistl::GpuVector<double>(dataA.data(), dataA.size());
auto vectorOnGPUB = Opm::gpuistl::GpuVector<double>(dataB.data(), dataB.size());
auto dot = vectorOnGPUA.dot(vectorOnGPUB);
double correctAnswer = 0.0;
@ -158,7 +158,7 @@ BOOST_AUTO_TEST_CASE(TestDot)
BOOST_AUTO_TEST_CASE(Assigment)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
vectorOnGPU = 10.0;
vectorOnGPU.copyToHost(data.data(), data.size());
@ -171,9 +171,9 @@ BOOST_AUTO_TEST_CASE(Assigment)
BOOST_AUTO_TEST_CASE(CopyAssignment)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
vectorOnGPU.copyToHost(data.data(), data.size());
auto vectorOnGPUB = Opm::cuistl::CuVector<double>(data.size());
auto vectorOnGPUB = Opm::gpuistl::GpuVector<double>(data.size());
vectorOnGPUB = 4.0;
vectorOnGPUB = vectorOnGPU;
@ -185,7 +185,7 @@ BOOST_AUTO_TEST_CASE(CopyAssignment)
BOOST_AUTO_TEST_CASE(RandomVectors)
{
using GVector = Opm::cuistl::CuVector<double>;
using GVector = Opm::gpuistl::GpuVector<double>;
std::srand(0);
std::mt19937 generator;
std::uniform_real_distribution<double> distribution(-100.0, 100.0);
@ -268,7 +268,7 @@ BOOST_AUTO_TEST_CASE(RandomVectors)
indexSet.push_back(i);
}
}
auto indexSetGPU = Opm::cuistl::CuVector<int>(indexSet);
auto indexSetGPU = Opm::gpuistl::GpuVector<int>(indexSet);
aGPU.setZeroAtIndexSet(indexSetGPU);
auto projectedA = aGPU.asStdVector();

View File

@ -18,24 +18,24 @@
*/
#include <config.h>
#define BOOST_TEST_MODULE TestCuView
#define BOOST_TEST_MODULE TestGpuView
#include <boost/test/unit_test.hpp>
#include <cuda_runtime.h>
#include <dune/common/fvector.hh>
#include <dune/istl/bvector.hh>
#include <opm/simulators/linalg/cuistl/CuView.hpp>
#include <opm/simulators/linalg/cuistl/CuBuffer.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/GpuView.hpp>
#include <opm/simulators/linalg/gpuistl/GpuBuffer.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
#include <random>
#include <array>
#include <algorithm>
#include <type_traits>
using CuViewDouble = ::Opm::cuistl::CuView<double>;
using CuBufferDouble = ::Opm::cuistl::CuBuffer<double>;
using GpuViewDouble = ::Opm::gpuistl::GpuView<double>;
using GpuBufferDouble = ::Opm::gpuistl::GpuBuffer<double>;
__global__ void useCuViewOnGPU(CuViewDouble a, CuViewDouble b){
__global__ void useGpuViewOnGPU(GpuViewDouble a, GpuViewDouble b){
b[0] = a.front();
b[1] = a.back();
b[2] = *a.begin();
@ -48,24 +48,24 @@ BOOST_AUTO_TEST_CASE(TestCreationAndIndexing)
{
// A simple test to check that we can move data to and from the GPU
auto cpubuffer = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
auto cubuffer = CuBufferDouble(cpubuffer);
auto cuview = CuViewDouble(cubuffer.data(), cubuffer.size());
const auto const_cuview = CuViewDouble(cubuffer.data(), cubuffer.size());
auto cubuffer = GpuBufferDouble(cpubuffer);
auto gpuview = GpuViewDouble(cubuffer.data(), cubuffer.size());
const auto const_gpuview = GpuViewDouble(cubuffer.data(), cubuffer.size());
auto stdVecOfCuView = cuview.asStdVector();
auto const_stdVecOfCuView = cuview.asStdVector();
auto stdVecOfGpuView = gpuview.asStdVector();
auto const_stdVecOfGpuView = gpuview.asStdVector();
BOOST_CHECK_EQUAL_COLLECTIONS(
stdVecOfCuView.begin(), stdVecOfCuView.end(), cpubuffer.begin(), cpubuffer.end());
stdVecOfGpuView.begin(), stdVecOfGpuView.end(), cpubuffer.begin(), cpubuffer.end());
BOOST_CHECK_EQUAL_COLLECTIONS(
stdVecOfCuView.begin(), stdVecOfCuView.end(), const_stdVecOfCuView.begin(), const_stdVecOfCuView.end());
stdVecOfGpuView.begin(), stdVecOfGpuView.end(), const_stdVecOfGpuView.begin(), const_stdVecOfGpuView.end());
}
BOOST_AUTO_TEST_CASE(TestCuViewOnCPUTypes)
BOOST_AUTO_TEST_CASE(TestGpuViewOnCPUTypes)
{
auto buf = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
auto cpuview = CuViewDouble(buf.data(), buf.size());
const auto const_cpuview = CuViewDouble(buf.data(), buf.size());
auto cpuview = GpuViewDouble(buf.data(), buf.size());
const auto const_cpuview = GpuViewDouble(buf.data(), buf.size());
// check that indexing a mutable view gives references when indexing it
bool correct_type_of_cpu_front = std::is_same_v<double&, decltype(cpuview.front())>;
@ -83,26 +83,26 @@ BOOST_AUTO_TEST_CASE(TestCuViewOnCPUTypes)
BOOST_CHECK(cpuview.back() == buf.back());
}
BOOST_AUTO_TEST_CASE(TestCuViewOnCPUWithSTLIteratorAlgorithm)
BOOST_AUTO_TEST_CASE(TestGpuViewOnCPUWithSTLIteratorAlgorithm)
{
auto buf = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
auto cpuview = CuViewDouble(buf.data(), buf.size());
auto cpuview = GpuViewDouble(buf.data(), buf.size());
std::sort(buf.begin(), buf.end());
BOOST_CHECK(42.0 == cpuview[3]);
}
BOOST_AUTO_TEST_CASE(TestCuViewOnGPU)
BOOST_AUTO_TEST_CASE(TestGpuViewOnGPU)
{
auto buf = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
auto cubufA = CuBufferDouble(buf);
auto cuviewA = CuViewDouble(cubufA.data(), cubufA.size());
auto cubufB = CuBufferDouble(4);
auto cuviewB = CuViewDouble(cubufB.data(), cubufB.size());
auto cubufA = GpuBufferDouble(buf);
auto gpuviewA = GpuViewDouble(cubufA.data(), cubufA.size());
auto cubufB = GpuBufferDouble(4);
auto gpuviewB = GpuViewDouble(cubufB.data(), cubufB.size());
useCuViewOnGPU<<<1,1>>>(cuviewA, cuviewB);
useGpuViewOnGPU<<<1,1>>>(gpuviewA, gpuviewB);
auto vecA = cuviewA.asStdVector();
auto vecB = cuviewB.asStdVector();
auto vecA = gpuviewA.asStdVector();
auto vecB = gpuviewB.asStdVector();
// checks that front/back/begin/end works
BOOST_CHECK(vecB[0] == buf[0]);

View File

@ -29,7 +29,7 @@
#include <dune/istl/preconditioners.hh>
#include <limits>
#include <memory>
#include <opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp>
#include <opm/simulators/linalg/gpuistl/PreconditionerConvertFieldTypeAdapter.hpp>
using XDouble = Dune::BlockVector<Dune::FieldVector<double, 2>>;
@ -167,7 +167,7 @@ BOOST_AUTO_TEST_CASE(TestFiniteDifference1D)
expectedOutputVector[i][1] = 43.0;
inputVector[i][0] = 1.0;
auto converter
= Opm::cuistl::PreconditionerConvertFieldTypeAdapter<TestPreconditioner, SpMatrixDouble, XDouble, XDouble>(
= Opm::gpuistl::PreconditionerConvertFieldTypeAdapter<TestPreconditioner, SpMatrixDouble, XDouble, XDouble>(
B);
auto underlyingPreconditioner = std::make_shared<TestPreconditioner>(
converter.getConvertedMatrix(), inputVector, B, expectedOutputVector);

View File

@ -18,17 +18,17 @@
*/
#include <config.h>
#define BOOST_TEST_MODULE TestCuSparseMatrixOperations
#define BOOST_TEST_MODULE TestGpuSparseMatrixOperations
#include <boost/mpl/list.hpp>
#include <boost/test/unit_test.hpp>
#include <cuda_runtime.h>
#include <dune/istl/bcrsmatrix.hh>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/JacKernels.hpp>
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/gpuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/gpuistl/detail/fix_zero_diagonal.hpp>
#include <opm/simulators/linalg/gpuistl/detail/preconditionerKernels/JacKernels.hpp>
using NumericTypes = boost::mpl::list<double, float>;
@ -85,10 +85,10 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith3By3Blocks, T, Numeric
B[1][1][1][1] = -1.0;
B[1][1][2][2] = -1.0;
Opm::cuistl::CuSparseMatrix<T> m = Opm::cuistl::CuSparseMatrix<T>::fromMatrix(B);
Opm::cuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);
Opm::gpuistl::GpuSparseMatrix<T> m = Opm::gpuistl::GpuSparseMatrix<T>::fromMatrix(B);
Opm::gpuistl::GpuVector<T> dInvDiag(blocksize * blocksize * N);
Opm::cuistl::detail::JAC::invertDiagonalAndFlatten<T, 3>(
Opm::gpuistl::detail::JAC::invertDiagonalAndFlatten<T, 3>(
m.getNonZeroValues().data(), m.getRowIndices().data(), m.getColumnIndices().data(), N, dInvDiag.data());
std::vector<T> expectedInvDiag {-1.0 / 4.0,
@ -159,10 +159,10 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith2By2Blocks, T, Numeric
B[1][1][0][0] = -1.0;
B[1][1][1][1] = -1.0;
Opm::cuistl::CuSparseMatrix<T> m = Opm::cuistl::CuSparseMatrix<T>::fromMatrix(B);
Opm::cuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);
Opm::gpuistl::GpuSparseMatrix<T> m = Opm::gpuistl::GpuSparseMatrix<T>::fromMatrix(B);
Opm::gpuistl::GpuVector<T> dInvDiag(blocksize * blocksize * N);
Opm::cuistl::detail::JAC::invertDiagonalAndFlatten<T, 2>(
Opm::gpuistl::detail::JAC::invertDiagonalAndFlatten<T, 2>(
m.getNonZeroValues().data(), m.getRowIndices().data(), m.getColumnIndices().data(), N, dInvDiag.data());
std::vector<T> expectedInvDiag {2.0, -2.0, -1.0 / 2.0, 1.0, -1.0, 0.0, 0.0, -1.0};

View File

@ -18,16 +18,16 @@
*/
#include <config.h>
#define BOOST_TEST_MODULE TestCuVectorOperations
#define BOOST_TEST_MODULE TestGpuVectorOperations
#include <boost/mpl/list.hpp>
#include <boost/test/unit_test.hpp>
#include <cuda_runtime.h>
#include <dune/istl/bcrsmatrix.hh>
#include <opm/simulators/linalg/cuistl/CuJac.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
#include <opm/simulators/linalg/gpuistl/GpuJac.hpp>
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/gpuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/gpuistl/detail/vector_operations.hpp>
using NumericTypes = boost::mpl::list<double, float>;
@ -47,11 +47,11 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf3By3BlockVectorAndVecto
std::vector<T> hostBlockVector({1.0, 2.0, 3.0, 5.0, 2.0, 3.0, 2.0, 1.0, 2.0});
std::vector<T> hostVecVector({3.0, 2.0, 1.0});
std::vector<T> hostDstVector({0, 0, 0});
Opm::cuistl::CuVector<T> deviceBlockVector(hostBlockVector);
Opm::cuistl::CuVector<T> deviceVecVector(hostVecVector);
Opm::cuistl::CuVector<T> deviceDstVector(hostDstVector);
Opm::gpuistl::GpuVector<T> deviceBlockVector(hostBlockVector);
Opm::gpuistl::GpuVector<T> deviceVecVector(hostVecVector);
Opm::gpuistl::GpuVector<T> deviceDstVector(hostDstVector);
Opm::cuistl::detail::weightedDiagMV(
Opm::gpuistl::detail::weightedDiagMV(
deviceBlockVector.data(), N, blocksize, weight, deviceVecVector.data(), deviceDstVector.data());
std::vector<T> expectedVec {10.0, 22.0, 10.0};
@ -81,11 +81,11 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf2By2BlockVectorAndVecto
std::vector<T> hostBlockVector({1.0, 2.0, 3.0, 4.0, 4.0, 3.0, 2.0, 1.0});
std::vector<T> hostVecVector({1.0, 3.0, 2.0, 4.0});
std::vector<T> hostDstVector({0, 0, 0, 0});
Opm::cuistl::CuVector<T> deviceBlockVector(hostBlockVector);
Opm::cuistl::CuVector<T> deviceVecVector(hostVecVector);
Opm::cuistl::CuVector<T> deviceDstVector(hostDstVector);
Opm::gpuistl::GpuVector<T> deviceBlockVector(hostBlockVector);
Opm::gpuistl::GpuVector<T> deviceVecVector(hostVecVector);
Opm::gpuistl::GpuVector<T> deviceDstVector(hostDstVector);
Opm::cuistl::detail::weightedDiagMV(
Opm::gpuistl::detail::weightedDiagMV(
deviceBlockVector.data(), N, blocksize, weight, deviceVecVector.data(), deviceDstVector.data());
std::vector<T> expectedVec {3.5, 7.5, 10.0, 4.0};
@ -95,4 +95,4 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf2By2BlockVectorAndVecto
for (size_t i = 0; i < expectedVec.size(); i++) {
BOOST_CHECK_CLOSE(expectedVec[i], computedVec[i], 1e-7);
}
}
}

View File

@ -16,14 +16,14 @@
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#include "opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp"
#include "opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp"
#include <config.h>
#define BOOST_TEST_MODULE TestCublasHandle
#include <cuda_runtime.h>
#include <boost/test/unit_test.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp>
#include <opm/simulators/linalg/gpuistl/detail/CuBlasHandle.hpp>
BOOST_AUTO_TEST_CASE(TestGetCublasVersion)
{
@ -32,7 +32,7 @@ BOOST_AUTO_TEST_CASE(TestGetCublasVersion)
// that checks the version of blas programatically. Let the test pass for now.
BOOST_CHECK(true);
#else
auto& cublasHandle = ::Opm::cuistl::detail::CuBlasHandle::getInstance();
auto& cublasHandle = ::Opm::gpuistl::detail::CuBlasHandle::getInstance();
int cuBlasVersion = -1;
OPM_CUBLAS_SAFE_CALL(cublasGetVersion(cublasHandle.get(), &cuBlasVersion));

View File

@ -22,7 +22,7 @@
#include <boost/test/unit_test.hpp>
#include <cublas_v2.h>
#include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
BOOST_AUTO_TEST_CASE(TestCreateHandle)
{

View File

@ -21,7 +21,7 @@
#define BOOST_TEST_MODULE TestCudaCheckLastError
#include <boost/test/unit_test.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_check_last_error.hpp>
#include <opm/simulators/linalg/gpuistl/detail/cuda_check_last_error.hpp>
BOOST_AUTO_TEST_CASE(TestNoThrowLastError)

View File

@ -21,12 +21,12 @@
#define BOOST_TEST_MODULE TestSparseHandle
#include <boost/test/unit_test.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/detail/CuSparseHandle.hpp>
#include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
BOOST_AUTO_TEST_CASE(TestGetSparseVersion)
{
auto& cuSparseHandle = ::Opm::cuistl::detail::CuSparseHandle::getInstance();
auto& cuSparseHandle = ::Opm::gpuistl::detail::CuSparseHandle::getInstance();
int cuSparseVersion = -1;
OPM_CUSPARSE_SAFE_CALL(cusparseGetVersion(cuSparseHandle.get(), &cuSparseVersion));
BOOST_CHECK_LT(0, cuSparseVersion);

View File

@ -22,7 +22,7 @@
#include <boost/test/unit_test.hpp>
#include <cusparse.h>
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
BOOST_AUTO_TEST_CASE(TestCreateHandle)
{

View File

@ -18,15 +18,15 @@
*/
#include <config.h>
#define BOOST_TEST_MODULE TestCudaSafeCall
#define BOOST_TEST_MODULE TestGpuSafeCall
#include <boost/test/unit_test.hpp>
#include <cuda_runtime.h>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
BOOST_AUTO_TEST_CASE(TestCudaMalloc)
BOOST_AUTO_TEST_CASE(TestGpuMalloc)
{
void* pointer;
BOOST_CHECK_NO_THROW(OPM_CUDA_SAFE_CALL(cudaMalloc(&pointer, 1)););
BOOST_CHECK_NO_THROW(OPM_GPU_SAFE_CALL(cudaMalloc(&pointer, 1)););
}
@ -41,6 +41,6 @@ BOOST_AUTO_TEST_CASE(TestThrows)
errorCodes = {{cudaErrorAddressOfConstant, cudaErrorAlreadyAcquired}};
#endif
for (auto code : errorCodes) {
BOOST_CHECK_THROW(OPM_CUDA_SAFE_CALL(code), std::exception);
BOOST_CHECK_THROW(OPM_GPU_SAFE_CALL(code), std::exception);
}
}

View File

@ -21,11 +21,11 @@
#define BOOST_TEST_MODULE TestSafeConversion
#include <boost/test/unit_test.hpp>
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
#include <opm/simulators/linalg/gpuistl/detail/safe_conversion.hpp>
BOOST_AUTO_TEST_CASE(TestToIntThrowsOutofRange)
{
BOOST_CHECK_THROW(Opm::cuistl::detail::to_int(size_t(std::numeric_limits<int>::max()) + size_t(1));
BOOST_CHECK_THROW(Opm::gpuistl::detail::to_int(size_t(std::numeric_limits<int>::max()) + size_t(1));
, std::invalid_argument);
}
@ -33,26 +33,26 @@ BOOST_AUTO_TEST_CASE(TestToIntConvertInRange)
{
// This might seem slow, but it is really fast:
for (size_t i = 0; i <= size_t(1024 * 1024); ++i) {
BOOST_CHECK_EQUAL(int(i), Opm::cuistl::detail::to_int(i));
BOOST_CHECK_EQUAL(int(i), Opm::gpuistl::detail::to_int(i));
}
BOOST_CHECK_EQUAL(std::numeric_limits<int>::max(),
Opm::cuistl::detail::to_int(size_t(std::numeric_limits<int>::max())));
Opm::gpuistl::detail::to_int(size_t(std::numeric_limits<int>::max())));
}
BOOST_AUTO_TEST_CASE(TestToSizeTThrowsOutofRange)
{
BOOST_CHECK_THROW(Opm::cuistl::detail::to_size_t(-1);, std::invalid_argument);
BOOST_CHECK_THROW(Opm::gpuistl::detail::to_size_t(-1);, std::invalid_argument);
}
BOOST_AUTO_TEST_CASE(TestToSizeTConvertInRange)
{
// This might seem slow, but it is really fast:
for (int i = 0; i <= 1024 * 1024; ++i) {
BOOST_CHECK_EQUAL(size_t(i), Opm::cuistl::detail::to_size_t(i));
BOOST_CHECK_EQUAL(size_t(i), Opm::gpuistl::detail::to_size_t(i));
}
BOOST_CHECK_EQUAL(size_t(std::numeric_limits<int>::max()),
Opm::cuistl::detail::to_size_t(std::numeric_limits<int>::max()));
Opm::gpuistl::detail::to_size_t(std::numeric_limits<int>::max()));
}

View File

@ -24,14 +24,14 @@
#include <dune/istl/solvers.hh>
#include <opm/simulators/linalg/PreconditionerFactory.hpp>
#include <opm/simulators/linalg/PropertyTree.hpp>
#include <opm/simulators/linalg/cuistl/SolverAdapter.hpp>
#include <opm/simulators/linalg/gpuistl/SolverAdapter.hpp>
static const constexpr int dim = 3;
using Matrix = Dune::BCRSMatrix<Dune::FieldMatrix<double, dim, dim>>;
using Vector = Dune::BlockVector<Dune::FieldVector<double, dim>>;
using Moperator = Dune::MatrixAdapter<Matrix, Vector, Vector>;
using PrecondFactory = Opm::PreconditionerFactory<Moperator, Dune::Amg::SequentialInformation>;
using SolverAdapter = Opm::cuistl::SolverAdapter<Moperator, Dune::BiCGSTABSolver, Vector>;
using SolverAdapter = Opm::gpuistl::SolverAdapter<Moperator, Dune::BiCGSTABSolver, Vector>;
namespace
{