Merge pull request #5554 from multitalentloes/refactor_cuistl

refactor cuistl to gpuistl
This commit is contained in:
Kjetil Olsen Lye 2024-08-26 09:55:13 +02:00 committed by GitHub
commit f97389d1b5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
82 changed files with 1035 additions and 1035 deletions

View File

@ -530,7 +530,7 @@ if(CUDA_FOUND)
if (NOT USE_HIP) if (NOT USE_HIP)
target_link_libraries( opmsimulators PUBLIC ${CUDA_cusparse_LIBRARY} ) target_link_libraries( opmsimulators PUBLIC ${CUDA_cusparse_LIBRARY} )
target_link_libraries( opmsimulators PUBLIC ${CUDA_cublas_LIBRARY} ) target_link_libraries( opmsimulators PUBLIC ${CUDA_cublas_LIBRARY} )
foreach(tgt test_cuda_safe_call test_cuda_check_last_error test_cuvector) foreach(tgt test_gpu_safe_call test_cuda_check_last_error test_GpuVector)
target_link_libraries(${tgt} CUDA::cudart) target_link_libraries(${tgt} CUDA::cudart)
endforeach() endforeach()
endif() endif()
@ -545,21 +545,21 @@ if(CUDA_FOUND)
endif() endif()
set_tests_properties(cusparse_safe_call set_tests_properties(cusparse_safe_call
cublas_safe_call cublas_safe_call
cuda_safe_call gpu_safe_call
cuda_check_last_error cuda_check_last_error
cublas_handle cublas_handle
cujac GpuJac
cudilu GpuDILU
cusparse_handle cusparse_handle
cuSparse_matrix_operations cuSparse_matrix_operations
cuVector_operations cuVector_operations
cuvector GpuVector
cusparsematrix GpuSparseMatrix
cuseqilu0 GpuSeqILU0
cuowneroverlapcopy GpuOwnerOverlapCopy
solver_adapter solver_adapter
cubuffer GpuBuffer
cuview GpuView
PROPERTIES LABELS ${gpu_label}) PROPERTIES LABELS ${gpu_label})
endif() endif()

View File

@ -28,15 +28,15 @@
# hipification, we a dependency that will trigger when the cuda source code is # hipification, we a dependency that will trigger when the cuda source code is
# changed. # changed.
macro (ADD_CUDA_OR_HIP_FILE LIST DIR FILE) macro (ADD_CUDA_OR_HIP_FILE LIST DIR FILE)
set (cuda_file_path "${PROJECT_SOURCE_DIR}/${DIR}/cuistl/${FILE}") set (cuda_file_path "${PROJECT_SOURCE_DIR}/${DIR}/gpuistl/${FILE}")
if(CUDA_FOUND AND NOT CONVERT_CUDA_TO_HIP) if(CUDA_FOUND AND NOT CONVERT_CUDA_TO_HIP)
list (APPEND ${LIST} "${DIR}/cuistl/${FILE}") list (APPEND ${LIST} "${DIR}/gpuistl/${FILE}")
else() else()
# we must hipify the code # we must hipify the code
# and include the correct path which is in the build/binary dir # and include the correct path which is in the build/binary dir
string(REPLACE ".cu" ".hip" HIP_SOURCE_FILE ${FILE}) string(REPLACE ".cu" ".hip" HIP_SOURCE_FILE ${FILE})
set (hip_file_path "${PROJECT_BINARY_DIR}/${DIR}/hipistl/${HIP_SOURCE_FILE}") set (hip_file_path "${PROJECT_BINARY_DIR}/${DIR}/gpuistl_hip/${HIP_SOURCE_FILE}")
file(RELATIVE_PATH relpath ${PROJECT_SOURCE_DIR} ${hip_file_path}) file(RELATIVE_PATH relpath ${PROJECT_SOURCE_DIR} ${hip_file_path})
# add a custom command that will hipify # add a custom command that will hipify
@ -207,42 +207,42 @@ endif()
# add these files if we should compile the hip code # add these files if we should compile the hip code
if (HAVE_CUDA) if (HAVE_CUDA)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/CuBlasHandle.cpp) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/CuBlasHandle.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/cusparse_matrix_operations.cu) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/gpusparse_matrix_operations.cu)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/CuSparseHandle.cpp) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/CuSparseHandle.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuBuffer.cpp) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuBuffer.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/preconditionerKernels/DILUKernels.cu) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/preconditionerKernels/DILUKernels.cu)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/preconditionerKernels/ILU0Kernels.cu) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/preconditionerKernels/ILU0Kernels.cu)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/preconditionerKernels/JacKernels.cu) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/preconditionerKernels/JacKernels.cu)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuVector.cpp) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuVector.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuView.cpp) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuView.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/vector_operations.cu) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/vector_operations.cu)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuSparseMatrix.cpp) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuSparseMatrix.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuDILU.cpp) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuDILU.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg OpmCuILU0.cpp) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg OpmCuILU0.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuJac.cpp) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuJac.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuSeqILU0.cpp) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuSeqILU0.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg set_device.cpp) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg set_device.cpp)
# HEADERS # HEADERS
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/autotuner.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/autotuner.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/coloringAndReorderingUtils.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/coloringAndReorderingUtils.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/cuda_safe_call.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/gpu_safe_call.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/cusparse_matrix_operations.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/gpusparse_matrix_operations.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/cusparse_safe_call.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/cusparse_safe_call.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/cublas_safe_call.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/cublas_safe_call.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/cuda_check_last_error.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/cuda_check_last_error.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuBlasHandle.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuBlasHandle.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuSparseHandle.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuSparseHandle.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuBuffer.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuBuffer.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/preconditionerKernels/DILUKernels.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/preconditionerKernels/DILUKernels.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/preconditionerKernels/ILU0Kernels.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/preconditionerKernels/ILU0Kernels.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/preconditionerKernels/JacKernels.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/preconditionerKernels/JacKernels.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuDILU.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuDILU.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg OpmCuILU0.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg OpmCuILU0.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuJac.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuJac.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuVector.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuVector.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuView.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuView.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuSparseMatrix.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuSparseMatrix.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuMatrixDescription.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuMatrixDescription.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuSparseResource.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuSparseResource.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuSparseResource_impl.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuSparseResource_impl.hpp)
@ -256,12 +256,12 @@ if (HAVE_CUDA)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/deviceBlockOperations.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/deviceBlockOperations.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/gpuThreadUtils.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/gpuThreadUtils.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg PreconditionerAdapter.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg PreconditionerAdapter.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuSeqILU0.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuSeqILU0.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/fix_zero_diagonal.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/fix_zero_diagonal.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg PreconditionerConvertFieldTypeAdapter.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg PreconditionerConvertFieldTypeAdapter.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuOwnerOverlapCopy.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuOwnerOverlapCopy.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg SolverAdapter.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg SolverAdapter.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuBlockPreconditioner.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuBlockPreconditioner.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg PreconditionerHolder.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg PreconditionerHolder.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg set_device.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg set_device.hpp)
endif() endif()
@ -389,19 +389,19 @@ if (HAVE_CUDA)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_converttofloatadapter.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_converttofloatadapter.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cublas_handle.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cublas_handle.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cublas_safe_call.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cublas_safe_call.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cubuffer.cu) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuBuffer.cu)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuview.cu) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuView.cu)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cusparse_safe_call.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cusparse_safe_call.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuda_safe_call.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_gpu_safe_call.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuda_check_last_error.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuda_check_last_error.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cudilu.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuDILU.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cujac.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuJac.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuowneroverlapcopy.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuOwnerOverlapCopy.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuseqilu0.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuSeqILU0.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cusparse_handle.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cusparse_handle.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuSparse_matrix_operations.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuSparse_matrix_operations.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cusparsematrix.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuSparseMatrix.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuvector.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuVector.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuVector_operations.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuVector_operations.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_safe_conversion.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_safe_conversion.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_solver_adapter.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_solver_adapter.cpp)

View File

@ -15,6 +15,6 @@ hipify-perl $input_file > $output_file
sed -i 's/^#include <hipblas\.h>/#include <hipblas\/hipblas.h>/g' $output_file sed -i 's/^#include <hipblas\.h>/#include <hipblas\/hipblas.h>/g' $output_file
sed -i 's/^#include <hipsparse\.h>/#include <hipsparse\/hipsparse.h>/g' $output_file sed -i 's/^#include <hipsparse\.h>/#include <hipsparse\/hipsparse.h>/g' $output_file
# make sure includes refer to hipistl/ files (the ones that are also hipified) # make sure includes refer to hipistl/ files (the ones that are also hipified)
sed -i 's/cuistl\//hipistl\//g' $output_file sed -i 's/gpuistl\//gpuistl_hip\//g' $output_file
echo "$output_file hipified" echo "$output_file hipified"

View File

@ -36,7 +36,7 @@
#endif #endif
#if HAVE_CUDA #if HAVE_CUDA
#include <opm/simulators/linalg/cuistl/set_device.hpp> #include <opm/simulators/linalg/gpuistl/set_device.hpp>
#endif #endif
namespace Opm { namespace Opm {
@ -163,7 +163,7 @@ void Main::initMPI()
} }
#if HAVE_CUDA #if HAVE_CUDA
Opm::cuistl::setDevice(FlowGenericVanguard::comm().rank(), FlowGenericVanguard::comm().size()); Opm::gpuistl::setDevice(FlowGenericVanguard::comm().rank(), FlowGenericVanguard::comm().size());
#endif #endif
#endif // HAVE_MPI #endif // HAVE_MPI

View File

@ -39,9 +39,9 @@
#if HAVE_CUDA #if HAVE_CUDA
#if USE_HIP #if USE_HIP
#include <opm/simulators/linalg/hipistl/SolverAdapter.hpp> #include <opm/simulators/linalg/gpuistl_hip/SolverAdapter.hpp>
#else #else
#include <opm/simulators/linalg/cuistl/SolverAdapter.hpp> #include <opm/simulators/linalg/gpuistl/SolverAdapter.hpp>
#endif #endif
#endif #endif
@ -205,7 +205,7 @@ namespace Dune
#endif #endif
#if HAVE_CUDA #if HAVE_CUDA
} else if (solver_type == "cubicgstab") { } else if (solver_type == "cubicgstab") {
linsolver_.reset(new Opm::cuistl::SolverAdapter<Operator, Dune::BiCGSTABSolver, VectorType>( linsolver_.reset(new Opm::gpuistl::SolverAdapter<Operator, Dune::BiCGSTABSolver, VectorType>(
*linearoperator_for_solver_, *linearoperator_for_solver_,
*scalarproduct_, *scalarproduct_,
preconditioner_, preconditioner_,

View File

@ -22,22 +22,22 @@
// both with the normal cuistl path, and the hipistl path // both with the normal cuistl path, and the hipistl path
#if HAVE_CUDA #if HAVE_CUDA
#if USE_HIP #if USE_HIP
#include <opm/simulators/linalg/hipistl/CuBlockPreconditioner.hpp> #include <opm/simulators/linalg/gpuistl_hip/GpuBlockPreconditioner.hpp>
#include <opm/simulators/linalg/hipistl/CuDILU.hpp> #include <opm/simulators/linalg/gpuistl_hip/GpuDILU.hpp>
#include <opm/simulators/linalg/hipistl/OpmCuILU0.hpp> #include <opm/simulators/linalg/gpuistl_hip/OpmCuILU0.hpp>
#include <opm/simulators/linalg/hipistl/CuJac.hpp> #include <opm/simulators/linalg/gpuistl_hip/GpuJac.hpp>
#include <opm/simulators/linalg/hipistl/CuSeqILU0.hpp> #include <opm/simulators/linalg/gpuistl_hip/GpuSeqILU0.hpp>
#include <opm/simulators/linalg/hipistl/PreconditionerAdapter.hpp> #include <opm/simulators/linalg/gpuistl_hip/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/hipistl/PreconditionerConvertFieldTypeAdapter.hpp> #include <opm/simulators/linalg/gpuistl_hip/PreconditionerConvertFieldTypeAdapter.hpp>
#include <opm/simulators/linalg/hipistl/detail/cuda_safe_call.hpp> #include <opm/simulators/linalg/gpuistl_hip/detail/gpu_safe_call.hpp>
#else #else
#include <opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp> #include <opm/simulators/linalg/gpuistl/GpuBlockPreconditioner.hpp>
#include <opm/simulators/linalg/cuistl/CuDILU.hpp> #include <opm/simulators/linalg/gpuistl/GpuDILU.hpp>
#include <opm/simulators/linalg/cuistl/OpmCuILU0.hpp> #include <opm/simulators/linalg/gpuistl/OpmCuILU0.hpp>
#include <opm/simulators/linalg/cuistl/CuJac.hpp> #include <opm/simulators/linalg/gpuistl/GpuJac.hpp>
#include <opm/simulators/linalg/cuistl/CuSeqILU0.hpp> #include <opm/simulators/linalg/gpuistl/GpuSeqILU0.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp> #include <opm/simulators/linalg/gpuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp> #include <opm/simulators/linalg/gpuistl/PreconditionerConvertFieldTypeAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
#endif #endif
#endif #endif

View File

@ -322,39 +322,39 @@ struct StandardPreconditioners {
} }
#if HAVE_CUDA #if HAVE_CUDA
F::addCreator("CUILU0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) { F::addCreator("GPUILU0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
const double w = prm.get<double>("relaxation", 1.0); const double w = prm.get<double>("relaxation", 1.0);
using field_type = typename V::field_type; using field_type = typename V::field_type;
using CuILU0 = typename cuistl:: using GpuILU0 = typename gpuistl::
CuSeqILU0<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>; GpuSeqILU0<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
auto cuILU0 = std::make_shared<CuILU0>(op.getmat(), w); auto gpuILU0 = std::make_shared<GpuILU0>(op.getmat(), w);
auto adapted = std::make_shared<cuistl::PreconditionerAdapter<V, V, CuILU0>>(cuILU0); auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, GpuILU0>>(gpuILU0);
auto wrapped = std::make_shared<cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm); auto wrapped = std::make_shared<gpuistl::GpuBlockPreconditioner<V, V, Comm>>(adapted, comm);
return wrapped; return wrapped;
}); });
F::addCreator("CUJac", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) { F::addCreator("GPUJAC", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
const double w = prm.get<double>("relaxation", 1.0); const double w = prm.get<double>("relaxation", 1.0);
using field_type = typename V::field_type; using field_type = typename V::field_type;
using CuJac = using GpuJac =
typename cuistl::CuJac<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>; typename gpuistl::GpuJac<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
auto cuJac = std::make_shared<CuJac>(op.getmat(), w); auto gpuJac = std::make_shared<GpuJac>(op.getmat(), w);
auto adapted = std::make_shared<cuistl::PreconditionerAdapter<V, V, CuJac>>(cuJac); auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, GpuJac>>(gpuJac);
auto wrapped = std::make_shared<cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm); auto wrapped = std::make_shared<gpuistl::GpuBlockPreconditioner<V, V, Comm>>(adapted, comm);
return wrapped; return wrapped;
}); });
F::addCreator("CUDILU", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t, const C& comm) { F::addCreator("GPUDILU", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
const bool split_matrix = prm.get<bool>("split_matrix", true); const bool split_matrix = prm.get<bool>("split_matrix", true);
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true); const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
using field_type = typename V::field_type; using field_type = typename V::field_type;
using CuDILU = typename cuistl::CuDILU<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>; using GpuDILU = typename gpuistl::GpuDILU<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
auto cuDILU = std::make_shared<CuDILU>(op.getmat(), split_matrix, tune_gpu_kernels); auto gpuDILU = std::make_shared<GpuDILU>(op.getmat(), split_matrix, tune_gpu_kernels);
auto adapted = std::make_shared<cuistl::PreconditionerAdapter<V, V, CuDILU>>(cuDILU); auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, GpuDILU>>(gpuDILU);
auto wrapped = std::make_shared<cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm); auto wrapped = std::make_shared<gpuistl::GpuBlockPreconditioner<V, V, Comm>>(adapted, comm);
return wrapped; return wrapped;
}); });
@ -362,11 +362,11 @@ struct StandardPreconditioners {
const bool split_matrix = prm.get<bool>("split_matrix", true); const bool split_matrix = prm.get<bool>("split_matrix", true);
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true); const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
using field_type = typename V::field_type; using field_type = typename V::field_type;
using OpmCuILU0 = typename cuistl::OpmCuILU0<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>; using OpmCuILU0 = typename gpuistl::OpmCuILU0<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
auto cuilu0 = std::make_shared<OpmCuILU0>(op.getmat(), split_matrix, tune_gpu_kernels); auto cuilu0 = std::make_shared<OpmCuILU0>(op.getmat(), split_matrix, tune_gpu_kernels);
auto adapted = std::make_shared<cuistl::PreconditionerAdapter<V, V, OpmCuILU0>>(cuilu0); auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, OpmCuILU0>>(cuilu0);
auto wrapped = std::make_shared<cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm); auto wrapped = std::make_shared<gpuistl::GpuBlockPreconditioner<V, V, Comm>>(adapted, comm);
return wrapped; return wrapped;
}); });
#endif #endif
@ -582,68 +582,68 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
}); });
#if HAVE_CUDA #if HAVE_CUDA
F::addCreator("CUILU0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) { F::addCreator("GPUILU0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
const double w = prm.get<double>("relaxation", 1.0); const double w = prm.get<double>("relaxation", 1.0);
using field_type = typename V::field_type; using field_type = typename V::field_type;
using CuILU0 = typename cuistl:: using GpuILU0 = typename gpuistl::
CuSeqILU0<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>; GpuSeqILU0<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
return std::make_shared<cuistl::PreconditionerAdapter<V, V, CuILU0>>( return std::make_shared<gpuistl::PreconditionerAdapter<V, V, GpuILU0>>(
std::make_shared<CuILU0>(op.getmat(), w)); std::make_shared<GpuILU0>(op.getmat(), w));
}); });
F::addCreator("CUILU0Float", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) { F::addCreator("GPUILU0Float", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
const double w = prm.get<double>("relaxation", 1.0); const double w = prm.get<double>("relaxation", 1.0);
using block_type = typename V::block_type; using block_type = typename V::block_type;
using VTo = Dune::BlockVector<Dune::FieldVector<float, block_type::dimension>>; using VTo = Dune::BlockVector<Dune::FieldVector<float, block_type::dimension>>;
using matrix_type_to = using matrix_type_to =
typename Dune::BCRSMatrix<Dune::FieldMatrix<float, block_type::dimension, block_type::dimension>>; typename Dune::BCRSMatrix<Dune::FieldMatrix<float, block_type::dimension, block_type::dimension>>;
using CuILU0 = typename cuistl:: using GpuILU0 = typename gpuistl::
CuSeqILU0<matrix_type_to, cuistl::CuVector<float>, cuistl::CuVector<float>>; GpuSeqILU0<matrix_type_to, gpuistl::GpuVector<float>, gpuistl::GpuVector<float>>;
using Adapter = typename cuistl::PreconditionerAdapter<VTo, VTo, CuILU0>; using Adapter = typename gpuistl::PreconditionerAdapter<VTo, VTo, GpuILU0>;
using Converter = typename cuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>; using Converter = typename gpuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
auto converted = std::make_shared<Converter>(op.getmat()); auto converted = std::make_shared<Converter>(op.getmat());
auto adapted = std::make_shared<Adapter>(std::make_shared<CuILU0>(converted->getConvertedMatrix(), w)); auto adapted = std::make_shared<Adapter>(std::make_shared<GpuILU0>(converted->getConvertedMatrix(), w));
converted->setUnderlyingPreconditioner(adapted); converted->setUnderlyingPreconditioner(adapted);
return converted; return converted;
}); });
F::addCreator("CUJac", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) { F::addCreator("GPUJAC", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
const double w = prm.get<double>("relaxation", 1.0); const double w = prm.get<double>("relaxation", 1.0);
using field_type = typename V::field_type; using field_type = typename V::field_type;
using CUJac = using GPUJac =
typename cuistl::CuJac<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>; typename gpuistl::GpuJac<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
return std::make_shared<cuistl::PreconditionerAdapter<V, V, CUJac>>( return std::make_shared<gpuistl::PreconditionerAdapter<V, V, GPUJac>>(
std::make_shared<CUJac>(op.getmat(), w)); std::make_shared<GPUJac>(op.getmat(), w));
}); });
F::addCreator("OPMCUILU0", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) { F::addCreator("OPMCUILU0", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
const bool split_matrix = prm.get<bool>("split_matrix", true); const bool split_matrix = prm.get<bool>("split_matrix", true);
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true); const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
using field_type = typename V::field_type; using field_type = typename V::field_type;
using CUILU0 = typename cuistl::OpmCuILU0<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>; using CUILU0 = typename gpuistl::OpmCuILU0<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
return std::make_shared<cuistl::PreconditionerAdapter<V, V, CUILU0>>(std::make_shared<CUILU0>(op.getmat(), split_matrix, tune_gpu_kernels)); return std::make_shared<gpuistl::PreconditionerAdapter<V, V, CUILU0>>(std::make_shared<CUILU0>(op.getmat(), split_matrix, tune_gpu_kernels));
}); });
F::addCreator("CUDILU", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) { F::addCreator("GPUDILU", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
const bool split_matrix = prm.get<bool>("split_matrix", true); const bool split_matrix = prm.get<bool>("split_matrix", true);
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true); const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
using field_type = typename V::field_type; using field_type = typename V::field_type;
using CUDILU = typename cuistl::CuDILU<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>; using GPUDILU = typename gpuistl::GpuDILU<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
return std::make_shared<cuistl::PreconditionerAdapter<V, V, CUDILU>>(std::make_shared<CUDILU>(op.getmat(), split_matrix, tune_gpu_kernels)); return std::make_shared<gpuistl::PreconditionerAdapter<V, V, GPUDILU>>(std::make_shared<GPUDILU>(op.getmat(), split_matrix, tune_gpu_kernels));
}); });
F::addCreator("CUDILUFloat", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) { F::addCreator("GPUDILUFloat", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
const bool split_matrix = prm.get<bool>("split_matrix", true); const bool split_matrix = prm.get<bool>("split_matrix", true);
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true); const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
using block_type = typename V::block_type; using block_type = typename V::block_type;
using VTo = Dune::BlockVector<Dune::FieldVector<float, block_type::dimension>>; using VTo = Dune::BlockVector<Dune::FieldVector<float, block_type::dimension>>;
using matrix_type_to = typename Dune::BCRSMatrix<Dune::FieldMatrix<float, block_type::dimension, block_type::dimension>>; using matrix_type_to = typename Dune::BCRSMatrix<Dune::FieldMatrix<float, block_type::dimension, block_type::dimension>>;
using CuDILU = typename cuistl::CuDILU<matrix_type_to, cuistl::CuVector<float>, cuistl::CuVector<float>>; using GpuDILU = typename gpuistl::GpuDILU<matrix_type_to, gpuistl::GpuVector<float>, gpuistl::GpuVector<float>>;
using Adapter = typename cuistl::PreconditionerAdapter<VTo, VTo, CuDILU>; using Adapter = typename gpuistl::PreconditionerAdapter<VTo, VTo, GpuDILU>;
using Converter = typename cuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>; using Converter = typename gpuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
auto converted = std::make_shared<Converter>(op.getmat()); auto converted = std::make_shared<Converter>(op.getmat());
auto adapted = std::make_shared<Adapter>(std::make_shared<CuDILU>(converted->getConvertedMatrix(), split_matrix, tune_gpu_kernels)); auto adapted = std::make_shared<Adapter>(std::make_shared<GpuDILU>(converted->getConvertedMatrix(), split_matrix, tune_gpu_kernels));
converted->setUnderlyingPreconditioner(adapted); converted->setUnderlyingPreconditioner(adapted);
return converted; return converted;
}); });

View File

@ -16,22 +16,22 @@
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>. along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef OPM_CUISTL_CUBLOCKPRECONDITIONER_HPP #ifndef OPM_CUISTL_GPUBLOCKPRECONDITIONER_HPP
#define OPM_CUISTL_CUBLOCKPRECONDITIONER_HPP #define OPM_CUISTL_GPUBLOCKPRECONDITIONER_HPP
#include <dune/common/shared_ptr.hh> #include <dune/common/shared_ptr.hh>
#include <memory> #include <memory>
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp> #include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerHolder.hpp> #include <opm/simulators/linalg/gpuistl/PreconditionerHolder.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp> #include <opm/simulators/linalg/gpuistl/detail/preconditioner_should_call_post_pre.hpp>
namespace Opm::cuistl namespace Opm::gpuistl
{ {
//! @brief Is an adaptation of Dune::BlockPreconditioner that works within the CuISTL framework. //! @brief Is an adaptation of Dune::BlockPreconditioner that works within the CuISTL framework.
//! //!
//! @note We aim to intgrate this into OwningBlockPreconditioner (or a relative thereof). //! @note We aim to intgrate this into OwningBlockPreconditioner (or a relative thereof).
template <class X, class Y, class C, class P = Dune::PreconditionerWithUpdate<X, Y>> template <class X, class Y, class C, class P = Dune::PreconditionerWithUpdate<X, Y>>
class CuBlockPreconditioner : public Dune::PreconditionerWithUpdate<X, Y>, public PreconditionerHolder<X, Y> class GpuBlockPreconditioner : public Dune::PreconditionerWithUpdate<X, Y>, public PreconditionerHolder<X, Y>
{ {
public: public:
using domain_type = X; using domain_type = X;
@ -47,13 +47,13 @@ public:
//! @param c The communication object for syncing overlap and copy //! @param c The communication object for syncing overlap and copy
//! data points. (E.~g. OwnerOverlapCopyCommunication ) //! data points. (E.~g. OwnerOverlapCopyCommunication )
//! //!
CuBlockPreconditioner(const std::shared_ptr<P>& p, const std::shared_ptr<const communication_type>& c) GpuBlockPreconditioner(const std::shared_ptr<P>& p, const std::shared_ptr<const communication_type>& c)
: m_preconditioner(p) : m_preconditioner(p)
, m_communication(c) , m_communication(c)
{ {
} }
CuBlockPreconditioner(const std::shared_ptr<P>& p, const communication_type& c) GpuBlockPreconditioner(const std::shared_ptr<P>& p, const communication_type& c)
: m_preconditioner(p) : m_preconditioner(p)
, m_communication(Dune::stackobject_to_shared_ptr(c)) , m_communication(Dune::stackobject_to_shared_ptr(c))
{ {
@ -125,5 +125,5 @@ private:
//! \brief the communication object //! \brief the communication object
std::shared_ptr<const communication_type> m_communication; std::shared_ptr<const communication_type> m_communication;
}; };
} // namespace Opm::cuistl } // namespace Opm::gpuistl
#endif #endif

View File

@ -20,83 +20,83 @@
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <algorithm> #include <algorithm>
#include <fmt/core.h> #include <fmt/core.h>
#include <opm/simulators/linalg/cuistl/CuBuffer.hpp> #include <opm/simulators/linalg/gpuistl/GpuBuffer.hpp>
#include <opm/simulators/linalg/cuistl/CuView.hpp> #include <opm/simulators/linalg/gpuistl/GpuView.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
namespace Opm::cuistl namespace Opm::gpuistl
{ {
template <class T> template <class T>
CuBuffer<T>::CuBuffer(const std::vector<T>& data) GpuBuffer<T>::GpuBuffer(const std::vector<T>& data)
: CuBuffer(data.data(), data.size()) : GpuBuffer(data.data(), data.size())
{ {
} }
template <class T> template <class T>
CuBuffer<T>::CuBuffer(const size_t numberOfElements) GpuBuffer<T>::GpuBuffer(const size_t numberOfElements)
: m_numberOfElements(numberOfElements) : m_numberOfElements(numberOfElements)
{ {
if (numberOfElements < 1) { if (numberOfElements < 1) {
OPM_THROW(std::invalid_argument, "Setting a CuBuffer size to a non-positive number is not allowed"); OPM_THROW(std::invalid_argument, "Setting a GpuBuffer size to a non-positive number is not allowed");
} }
OPM_CUDA_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * m_numberOfElements)); OPM_GPU_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * m_numberOfElements));
} }
template <class T> template <class T>
CuBuffer<T>::CuBuffer(const T* dataOnHost, const size_t numberOfElements) GpuBuffer<T>::GpuBuffer(const T* dataOnHost, const size_t numberOfElements)
: CuBuffer(numberOfElements) : GpuBuffer(numberOfElements)
{ {
OPM_CUDA_SAFE_CALL(cudaMemcpy( OPM_GPU_SAFE_CALL(cudaMemcpy(
m_dataOnDevice, dataOnHost, m_numberOfElements * sizeof(T), cudaMemcpyHostToDevice)); m_dataOnDevice, dataOnHost, m_numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
} }
template <class T> template <class T>
CuBuffer<T>::CuBuffer(const CuBuffer<T>& other) GpuBuffer<T>::GpuBuffer(const GpuBuffer<T>& other)
: CuBuffer(other.m_numberOfElements) : GpuBuffer(other.m_numberOfElements)
{ {
assertHasElements(); assertHasElements();
assertSameSize(other); assertSameSize(other);
OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice, OPM_GPU_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
other.m_dataOnDevice, other.m_dataOnDevice,
m_numberOfElements * sizeof(T), m_numberOfElements * sizeof(T),
cudaMemcpyDeviceToDevice)); cudaMemcpyDeviceToDevice));
} }
template <class T> template <class T>
CuBuffer<T>::~CuBuffer() GpuBuffer<T>::~GpuBuffer()
{ {
OPM_CUDA_WARN_IF_ERROR(cudaFree(m_dataOnDevice)); OPM_GPU_WARN_IF_ERROR(cudaFree(m_dataOnDevice));
} }
template <typename T> template <typename T>
typename CuBuffer<T>::size_type typename GpuBuffer<T>::size_type
CuBuffer<T>::size() const GpuBuffer<T>::size() const
{ {
return m_numberOfElements; return m_numberOfElements;
} }
template <typename T> template <typename T>
void void
CuBuffer<T>::resize(size_t newSize) GpuBuffer<T>::resize(size_t newSize)
{ {
if (newSize < 1) { if (newSize < 1) {
OPM_THROW(std::invalid_argument, "Setting a CuBuffer size to a non-positive number is not allowed"); OPM_THROW(std::invalid_argument, "Setting a GpuBuffer size to a non-positive number is not allowed");
} }
// Allocate memory for the new buffer // Allocate memory for the new buffer
T* tmpBuffer = nullptr; T* tmpBuffer = nullptr;
OPM_CUDA_SAFE_CALL(cudaMalloc(&tmpBuffer, sizeof(T) * newSize)); OPM_GPU_SAFE_CALL(cudaMalloc(&tmpBuffer, sizeof(T) * newSize));
// Move the data from the old to the new buffer with truncation // Move the data from the old to the new buffer with truncation
size_t sizeOfMove = std::min({m_numberOfElements, newSize}); size_t sizeOfMove = std::min({m_numberOfElements, newSize});
OPM_CUDA_SAFE_CALL(cudaMemcpy(tmpBuffer, OPM_GPU_SAFE_CALL(cudaMemcpy(tmpBuffer,
m_dataOnDevice, m_dataOnDevice,
sizeOfMove * sizeof(T), sizeOfMove * sizeof(T),
cudaMemcpyDeviceToDevice)); cudaMemcpyDeviceToDevice));
// free the old buffer // free the old buffer
OPM_CUDA_SAFE_CALL(cudaFree(m_dataOnDevice)); OPM_GPU_SAFE_CALL(cudaFree(m_dataOnDevice));
// swap the buffers // swap the buffers
m_dataOnDevice = tmpBuffer; m_dataOnDevice = tmpBuffer;
@ -107,7 +107,7 @@ CuBuffer<T>::resize(size_t newSize)
template <typename T> template <typename T>
std::vector<T> std::vector<T>
CuBuffer<T>::asStdVector() const GpuBuffer<T>::asStdVector() const
{ {
std::vector<T> temporary(m_numberOfElements); std::vector<T> temporary(m_numberOfElements);
copyToHost(temporary); copyToHost(temporary);
@ -116,14 +116,14 @@ CuBuffer<T>::asStdVector() const
template <typename T> template <typename T>
void void
CuBuffer<T>::assertSameSize(const CuBuffer<T>& x) const GpuBuffer<T>::assertSameSize(const GpuBuffer<T>& x) const
{ {
assertSameSize(x.m_numberOfElements); assertSameSize(x.m_numberOfElements);
} }
template <typename T> template <typename T>
void void
CuBuffer<T>::assertSameSize(size_t size) const GpuBuffer<T>::assertSameSize(size_t size) const
{ {
if (size != m_numberOfElements) { if (size != m_numberOfElements) {
OPM_THROW(std::invalid_argument, OPM_THROW(std::invalid_argument,
@ -133,7 +133,7 @@ CuBuffer<T>::assertSameSize(size_t size) const
template <typename T> template <typename T>
void void
CuBuffer<T>::assertHasElements() const GpuBuffer<T>::assertHasElements() const
{ {
if (m_numberOfElements <= 0) { if (m_numberOfElements <= 0) {
OPM_THROW(std::invalid_argument, "We have 0 elements"); OPM_THROW(std::invalid_argument, "We have 0 elements");
@ -142,21 +142,21 @@ CuBuffer<T>::assertHasElements() const
template <typename T> template <typename T>
T* T*
CuBuffer<T>::data() GpuBuffer<T>::data()
{ {
return m_dataOnDevice; return m_dataOnDevice;
} }
template <typename T> template <typename T>
const T* const T*
CuBuffer<T>::data() const GpuBuffer<T>::data() const
{ {
return m_dataOnDevice; return m_dataOnDevice;
} }
template <class T> template <class T>
void void
CuBuffer<T>::copyFromHost(const T* dataPointer, size_t numberOfElements) GpuBuffer<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
{ {
if (numberOfElements > size()) { if (numberOfElements > size()) {
OPM_THROW(std::runtime_error, OPM_THROW(std::runtime_error,
@ -164,41 +164,41 @@ CuBuffer<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
size(), size(),
numberOfElements)); numberOfElements));
} }
OPM_CUDA_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice)); OPM_GPU_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
} }
template <class T> template <class T>
void void
CuBuffer<T>::copyToHost(T* dataPointer, size_t numberOfElements) const GpuBuffer<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
{ {
assertSameSize(numberOfElements); assertSameSize(numberOfElements);
OPM_CUDA_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost)); OPM_GPU_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
} }
template <class T> template <class T>
void void
CuBuffer<T>::copyFromHost(const std::vector<T>& data) GpuBuffer<T>::copyFromHost(const std::vector<T>& data)
{ {
copyFromHost(data.data(), data.size()); copyFromHost(data.data(), data.size());
} }
template <class T> template <class T>
void void
CuBuffer<T>::copyToHost(std::vector<T>& data) const GpuBuffer<T>::copyToHost(std::vector<T>& data) const
{ {
copyToHost(data.data(), data.size()); copyToHost(data.data(), data.size());
} }
template class CuBuffer<double>; template class GpuBuffer<double>;
template class CuBuffer<float>; template class GpuBuffer<float>;
template class CuBuffer<int>; template class GpuBuffer<int>;
template <class T> template <class T>
CuView<const T> make_view(const CuBuffer<T>& buf) { GpuView<const T> make_view(const GpuBuffer<T>& buf) {
return CuView<const T>(buf.data(), buf.size()); return GpuView<const T>(buf.data(), buf.size());
} }
template CuView<const double> make_view<double>(const CuBuffer<double>&); template GpuView<const double> make_view<double>(const GpuBuffer<double>&);
template CuView<const float> make_view<float>(const CuBuffer<float>&); template GpuView<const float> make_view<float>(const GpuBuffer<float>&);
template CuView<const int> make_view<int>(const CuBuffer<int>&); template GpuView<const int> make_view<int>(const GpuBuffer<int>&);
} // namespace Opm::cuistl } // namespace Opm::gpuistl

View File

@ -16,35 +16,35 @@
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>. along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef OPM_CUBUFFER_HEADER_HPP #ifndef OPM_GPUBUFFER_HEADER_HPP
#define OPM_CUBUFFER_HEADER_HPP #define OPM_GPUBUFFER_HEADER_HPP
#include <dune/common/fvector.hh> #include <dune/common/fvector.hh>
#include <dune/istl/bvector.hh> #include <dune/istl/bvector.hh>
#include <exception> #include <exception>
#include <fmt/core.h> #include <fmt/core.h>
#include <opm/common/ErrorMacros.hpp> #include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp> #include <opm/simulators/linalg/gpuistl/detail/safe_conversion.hpp>
#include <opm/simulators/linalg/cuistl/CuView.hpp> #include <opm/simulators/linalg/gpuistl/GpuView.hpp>
#include <vector> #include <vector>
#include <string> #include <string>
namespace Opm::cuistl namespace Opm::gpuistl
{ {
/** /**
* @brief The CuBuffer class is a simple container class for the GPU. * @brief The GpuBuffer class is a simple container class for the GPU.
* *
* *
* Example usage: * Example usage:
* *
* @code{.cpp} * @code{.cpp}
* #include <opm/simulators/linalg/cuistl/CuBuffer.hpp> * #include <opm/simulators/linalg/gpuistl/GpuBuffer.hpp>
* *
* void someFunction() { * void someFunction() {
* auto someDataOnCPU = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692}); * auto someDataOnCPU = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
* *
* auto dataOnGPU = CuBuffer<double>(someDataOnCPU); * auto dataOnGPU = GpuBuffer<double>(someDataOnCPU);
* *
* auto stdVectorOnCPU = dataOnGPU.asStdVector(); * auto stdVectorOnCPU = dataOnGPU.asStdVector();
* } * }
@ -52,7 +52,7 @@ namespace Opm::cuistl
* @tparam T the type to store. Can be either float, double or int. * @tparam T the type to store. Can be either float, double or int.
*/ */
template <typename T> template <typename T>
class CuBuffer class GpuBuffer
{ {
public: public:
using field_type = T; using field_type = T;
@ -60,17 +60,17 @@ public:
using value_type = T; using value_type = T;
/** /**
* @brief CuBuffer allocates new GPU memory of the same size as other and copies the content of the other buffer to * @brief GpuBuffer allocates new GPU memory of the same size as other and copies the content of the other buffer to
* this newly allocated memory. * this newly allocated memory.
* *
* @note This does synchronous transfer. * @note This does synchronous transfer.
* *
* @param other the buffer to copy from * @param other the buffer to copy from
*/ */
CuBuffer(const CuBuffer<T>& other); GpuBuffer(const GpuBuffer<T>& other);
/** /**
* @brief CuBuffer allocates new GPU memory of the same size as data and copies the content of the data vector to * @brief GpuBuffer allocates new GPU memory of the same size as data and copies the content of the data vector to
* this newly allocated memory. * this newly allocated memory.
* *
* @note This does CPU to GPU transfer. * @note This does CPU to GPU transfer.
@ -78,23 +78,23 @@ public:
* *
* @param data the vector to copy from * @param data the vector to copy from
*/ */
explicit CuBuffer(const std::vector<T>& data); explicit GpuBuffer(const std::vector<T>& data);
/** /**
* @brief Default constructor that will initialize cublas and allocate 0 bytes of memory * @brief Default constructor that will initialize cublas and allocate 0 bytes of memory
*/ */
explicit CuBuffer(); explicit GpuBuffer();
/** /**
* @brief CuBuffer allocates new GPU memory of size numberOfElements * sizeof(T) * @brief GpuBuffer allocates new GPU memory of size numberOfElements * sizeof(T)
* *
* @param numberOfElements number of T elements to allocate * @param numberOfElements number of T elements to allocate
*/ */
explicit CuBuffer(const size_t numberOfElements); explicit GpuBuffer(const size_t numberOfElements);
/** /**
* @brief CuBuffer allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from * @brief GpuBuffer allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from
* data * data
* *
* @note This assumes the data is on the CPU. * @note This assumes the data is on the CPU.
@ -102,12 +102,12 @@ public:
* @param numberOfElements number of T elements to allocate * @param numberOfElements number of T elements to allocate
* @param dataOnHost data on host/CPU * @param dataOnHost data on host/CPU
*/ */
CuBuffer(const T* dataOnHost, const size_t numberOfElements); GpuBuffer(const T* dataOnHost, const size_t numberOfElements);
/** /**
* @brief ~CuBuffer calls cudaFree * @brief ~GpuBuffer calls cudaFree
*/ */
virtual ~CuBuffer(); virtual ~GpuBuffer();
/** /**
* @return the raw pointer to the GPU data * @return the raw pointer to the GPU data
@ -120,7 +120,7 @@ public:
const T* data() const; const T* data() const;
/** /**
* @return fetch the first element in a CuBuffer * @return fetch the first element in a GpuBuffer
*/ */
__host__ __device__ T& front() __host__ __device__ T& front()
{ {
@ -131,7 +131,7 @@ public:
} }
/** /**
* @return fetch the last element in a CuBuffer * @return fetch the last element in a GpuBuffer
*/ */
__host__ __device__ T& back() __host__ __device__ T& back()
{ {
@ -142,7 +142,7 @@ public:
} }
/** /**
* @return fetch the first element in a CuBuffer * @return fetch the first element in a GpuBuffer
*/ */
__host__ __device__ T front() const __host__ __device__ T front() const
{ {
@ -153,7 +153,7 @@ public:
} }
/** /**
* @return fetch the last element in a CuBuffer * @return fetch the last element in a GpuBuffer
*/ */
__host__ __device__ T back() const __host__ __device__ T back() const
{ {
@ -176,7 +176,7 @@ public:
// TODO: [perf] vector.size() can be replaced by bvector.N() * BlockDimension // TODO: [perf] vector.size() can be replaced by bvector.N() * BlockDimension
if (m_numberOfElements != bvector.size()) { if (m_numberOfElements != bvector.size()) {
OPM_THROW(std::runtime_error, OPM_THROW(std::runtime_error,
fmt::format("Given incompatible vector size. CuBuffer has size {}, \n" fmt::format("Given incompatible vector size. GpuBuffer has size {}, \n"
"however, BlockVector has N() = {}, and size = {}.", "however, BlockVector has N() = {}, and size = {}.",
m_numberOfElements, m_numberOfElements,
bvector.N(), bvector.N(),
@ -199,7 +199,7 @@ public:
// TODO: [perf] vector.size() can be replaced by bvector.N() * BlockDimension // TODO: [perf] vector.size() can be replaced by bvector.N() * BlockDimension
if (m_numberOfElements != bvector.size()) { if (m_numberOfElements != bvector.size()) {
OPM_THROW(std::runtime_error, OPM_THROW(std::runtime_error,
fmt::format("Given incompatible vector size. CuBuffer has size {},\n however, the BlockVector " fmt::format("Given incompatible vector size. GpuBuffer has size {},\n however, the BlockVector "
"has has N() = {}, and size() = {}.", "has has N() = {}, and size() = {}.",
m_numberOfElements, m_numberOfElements,
bvector.N(), bvector.N(),
@ -267,14 +267,14 @@ private:
T* m_dataOnDevice = nullptr; T* m_dataOnDevice = nullptr;
size_t m_numberOfElements; size_t m_numberOfElements;
void assertSameSize(const CuBuffer<T>& other) const; void assertSameSize(const GpuBuffer<T>& other) const;
void assertSameSize(size_t size) const; void assertSameSize(size_t size) const;
void assertHasElements() const; void assertHasElements() const;
}; };
template <class T> template <class T>
CuView<const T> make_view(const CuBuffer<T>&); GpuView<const T> make_view(const GpuBuffer<T>&);
} // namespace Opm::cuistl } // namespace Opm::gpuistl
#endif #endif

View File

@ -25,28 +25,28 @@
#include <opm/common/ErrorMacros.hpp> #include <opm/common/ErrorMacros.hpp>
#include <opm/common/TimingMacros.hpp> #include <opm/common/TimingMacros.hpp>
#include <opm/simulators/linalg/GraphColoring.hpp> #include <opm/simulators/linalg/GraphColoring.hpp>
#include <opm/simulators/linalg/cuistl/detail/autotuner.hpp> #include <opm/simulators/linalg/gpuistl/detail/autotuner.hpp>
#include <opm/simulators/linalg/cuistl/CuDILU.hpp> #include <opm/simulators/linalg/gpuistl/GpuDILU.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp> #include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp> #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/coloringAndReorderingUtils.hpp> #include <opm/simulators/linalg/gpuistl/detail/coloringAndReorderingUtils.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/DILUKernels.hpp> #include <opm/simulators/linalg/gpuistl/detail/preconditionerKernels/DILUKernels.hpp>
#include <opm/simulators/linalg/matrixblock.hh> #include <opm/simulators/linalg/matrixblock.hh>
#include <tuple> #include <tuple>
#include <functional> #include <functional>
#include <utility> #include <utility>
#include <string> #include <string>
namespace Opm::cuistl namespace Opm::gpuistl
{ {
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
CuDILU<M, X, Y, l>::CuDILU(const M& A, bool splitMatrix, bool tuneKernels) GpuDILU<M, X, Y, l>::GpuDILU(const M& A, bool splitMatrix, bool tuneKernels)
: m_cpuMatrix(A) : m_cpuMatrix(A)
, m_levelSets(Opm::getMatrixRowColoring(m_cpuMatrix, Opm::ColoringType::LOWER)) , m_levelSets(Opm::getMatrixRowColoring(m_cpuMatrix, Opm::ColoringType::LOWER))
, m_reorderedToNatural(detail::createReorderedToNatural(m_levelSets)) , m_reorderedToNatural(detail::createReorderedToNatural(m_levelSets))
, m_naturalToReordered(detail::createNaturalToReordered(m_levelSets)) , m_naturalToReordered(detail::createNaturalToReordered(m_levelSets))
, m_gpuMatrix(CuSparseMatrix<field_type>::fromMatrix(m_cpuMatrix, true)) , m_gpuMatrix(GpuSparseMatrix<field_type>::fromMatrix(m_cpuMatrix, true))
, m_gpuNaturalToReorder(m_naturalToReordered) , m_gpuNaturalToReorder(m_naturalToReordered)
, m_gpuReorderToNatural(m_reorderedToNatural) , m_gpuReorderToNatural(m_reorderedToNatural)
, m_gpuDInv(m_gpuMatrix.N() * m_gpuMatrix.blockSize() * m_gpuMatrix.blockSize()) , m_gpuDInv(m_gpuMatrix.N() * m_gpuMatrix.blockSize() * m_gpuMatrix.blockSize())
@ -71,13 +71,13 @@ CuDILU<M, X, Y, l>::CuDILU(const M& A, bool splitMatrix, bool tuneKernels)
m_gpuMatrix.nonzeroes(), m_gpuMatrix.nonzeroes(),
A.nonzeroes())); A.nonzeroes()));
if (m_splitMatrix) { if (m_splitMatrix) {
m_gpuMatrixReorderedDiag = std::make_unique<CuVector<field_type>>(blocksize_ * blocksize_ * m_cpuMatrix.N()); m_gpuMatrixReorderedDiag = std::make_unique<GpuVector<field_type>>(blocksize_ * blocksize_ * m_cpuMatrix.N());
std::tie(m_gpuMatrixReorderedLower, m_gpuMatrixReorderedUpper) std::tie(m_gpuMatrixReorderedLower, m_gpuMatrixReorderedUpper)
= detail::extractLowerAndUpperMatrices<M, field_type, CuSparseMatrix<field_type>>(m_cpuMatrix, = detail::extractLowerAndUpperMatrices<M, field_type, GpuSparseMatrix<field_type>>(m_cpuMatrix,
m_reorderedToNatural); m_reorderedToNatural);
} }
else { else {
m_gpuMatrixReordered = detail::createReorderedMatrix<M, field_type, CuSparseMatrix<field_type>>( m_gpuMatrixReordered = detail::createReorderedMatrix<M, field_type, GpuSparseMatrix<field_type>>(
m_cpuMatrix, m_reorderedToNatural); m_cpuMatrix, m_reorderedToNatural);
} }
computeDiagAndMoveReorderedData(m_moveThreadBlockSize, m_DILUFactorizationThreadBlockSize); computeDiagAndMoveReorderedData(m_moveThreadBlockSize, m_DILUFactorizationThreadBlockSize);
@ -89,13 +89,13 @@ CuDILU<M, X, Y, l>::CuDILU(const M& A, bool splitMatrix, bool tuneKernels)
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
void void
CuDILU<M, X, Y, l>::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b) GpuDILU<M, X, Y, l>::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b)
{ {
} }
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
void void
CuDILU<M, X, Y, l>::apply(X& v, const Y& d) GpuDILU<M, X, Y, l>::apply(X& v, const Y& d)
{ {
OPM_TIMEBLOCK(prec_apply); OPM_TIMEBLOCK(prec_apply);
{ {
@ -105,7 +105,7 @@ CuDILU<M, X, Y, l>::apply(X& v, const Y& d)
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
void void
CuDILU<M, X, Y, l>::apply(X& v, const Y& d, int lowerSolveThreadBlockSize, int upperSolveThreadBlockSize) GpuDILU<M, X, Y, l>::apply(X& v, const Y& d, int lowerSolveThreadBlockSize, int upperSolveThreadBlockSize)
{ {
int levelStartIdx = 0; int levelStartIdx = 0;
for (int level = 0; level < m_levelSets.size(); ++level) { for (int level = 0; level < m_levelSets.size(); ++level) {
@ -172,20 +172,20 @@ CuDILU<M, X, Y, l>::apply(X& v, const Y& d, int lowerSolveThreadBlockSize, int u
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
void void
CuDILU<M, X, Y, l>::post([[maybe_unused]] X& x) GpuDILU<M, X, Y, l>::post([[maybe_unused]] X& x)
{ {
} }
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
Dune::SolverCategory::Category Dune::SolverCategory::Category
CuDILU<M, X, Y, l>::category() const GpuDILU<M, X, Y, l>::category() const
{ {
return Dune::SolverCategory::sequential; return Dune::SolverCategory::sequential;
} }
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
void void
CuDILU<M, X, Y, l>::update() GpuDILU<M, X, Y, l>::update()
{ {
OPM_TIMEBLOCK(prec_update); OPM_TIMEBLOCK(prec_update);
{ {
@ -195,7 +195,7 @@ CuDILU<M, X, Y, l>::update()
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
void void
CuDILU<M, X, Y, l>::update(int moveThreadBlockSize, int factorizationBlockSize) GpuDILU<M, X, Y, l>::update(int moveThreadBlockSize, int factorizationBlockSize)
{ {
m_gpuMatrix.updateNonzeroValues(m_cpuMatrix, true); // send updated matrix to the gpu m_gpuMatrix.updateNonzeroValues(m_cpuMatrix, true); // send updated matrix to the gpu
computeDiagAndMoveReorderedData(moveThreadBlockSize, factorizationBlockSize); computeDiagAndMoveReorderedData(moveThreadBlockSize, factorizationBlockSize);
@ -203,7 +203,7 @@ CuDILU<M, X, Y, l>::update(int moveThreadBlockSize, int factorizationBlockSize)
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
void void
CuDILU<M, X, Y, l>::computeDiagAndMoveReorderedData(int moveThreadBlockSize, int factorizationBlockSize) GpuDILU<M, X, Y, l>::computeDiagAndMoveReorderedData(int moveThreadBlockSize, int factorizationBlockSize)
{ {
if (m_splitMatrix) { if (m_splitMatrix) {
detail::copyMatDataToReorderedSplit<field_type, blocksize_>( detail::copyMatDataToReorderedSplit<field_type, blocksize_>(
@ -264,7 +264,7 @@ CuDILU<M, X, Y, l>::computeDiagAndMoveReorderedData(int moveThreadBlockSize, int
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
void void
CuDILU<M, X, Y, l>::tuneThreadBlockSizes() GpuDILU<M, X, Y, l>::tuneThreadBlockSizes()
{ {
// tune the thread-block size of the update function // tune the thread-block size of the update function
auto tuneMoveThreadBlockSizeInUpdate = [this](int moveThreadBlockSize){ auto tuneMoveThreadBlockSizeInUpdate = [this](int moveThreadBlockSize){
@ -278,8 +278,8 @@ CuDILU<M, X, Y, l>::tuneThreadBlockSizes()
m_DILUFactorizationThreadBlockSize = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing DILU factorization"); m_DILUFactorizationThreadBlockSize = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing DILU factorization");
// tune the thread-block size of the apply // tune the thread-block size of the apply
CuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize()); GpuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
CuVector<field_type> tmpD(m_gpuMatrix.N() * m_gpuMatrix.blockSize()); GpuVector<field_type> tmpD(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
tmpD = 1; tmpD = 1;
auto tuneLowerSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int lowerSolveThreadBlockSize){ auto tuneLowerSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int lowerSolveThreadBlockSize){
@ -293,14 +293,14 @@ CuDILU<M, X, Y, l>::tuneThreadBlockSizes()
m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set"); m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set");
} }
} // namespace Opm::cuistl } // namespace Opm::gpuistl
#define INSTANTIATE_CUDILU_DUNE(realtype, blockdim) \ #define INSTANTIATE_CUDILU_DUNE(realtype, blockdim) \
template class ::Opm::cuistl::CuDILU<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \ template class ::Opm::gpuistl::GpuDILU<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
::Opm::cuistl::CuVector<realtype>, \ ::Opm::gpuistl::GpuVector<realtype>, \
::Opm::cuistl::CuVector<realtype>>; \ ::Opm::gpuistl::GpuVector<realtype>>; \
template class ::Opm::cuistl::CuDILU<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \ template class ::Opm::gpuistl::GpuDILU<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
::Opm::cuistl::CuVector<realtype>, \ ::Opm::gpuistl::GpuVector<realtype>, \
::Opm::cuistl::CuVector<realtype>> ::Opm::gpuistl::GpuVector<realtype>>
INSTANTIATE_CUDILU_DUNE(double, 1); INSTANTIATE_CUDILU_DUNE(double, 1);
INSTANTIATE_CUDILU_DUNE(double, 2); INSTANTIATE_CUDILU_DUNE(double, 2);

View File

@ -16,18 +16,18 @@
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>. along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef OPM_CUDILU_HPP #ifndef OPM_GPUDILU_HPP
#define OPM_CUDILU_HPP #define OPM_GPUDILU_HPP
#include <memory> #include <memory>
#include <opm/grid/utility/SparseTable.hpp> #include <opm/grid/utility/SparseTable.hpp>
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp> #include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp> #include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <vector> #include <vector>
namespace Opm::cuistl namespace Opm::gpuistl
{ {
//! \brief DILU preconditioner on the GPU. //! \brief DILU preconditioner on the GPU.
//! //!
@ -37,10 +37,10 @@ namespace Opm::cuistl
//! \tparam l Ignored. Just there to have the same number of template arguments //! \tparam l Ignored. Just there to have the same number of template arguments
//! as other preconditioners. //! as other preconditioners.
//! //!
//! \note We assume X and Y are both CuVector<real_type>, but we leave them as template //! \note We assume X and Y are both GpuVector<real_type>, but we leave them as template
//! arguments in case of future additions. //! arguments in case of future additions.
template <class M, class X, class Y, int l = 1> template <class M, class X, class Y, int l = 1>
class CuDILU : public Dune::PreconditionerWithUpdate<X, Y> class GpuDILU : public Dune::PreconditionerWithUpdate<X, Y>
{ {
public: public:
//! \brief The matrix type the preconditioner is for. //! \brief The matrix type the preconditioner is for.
@ -52,7 +52,7 @@ public:
//! \brief The field type of the preconditioner. //! \brief The field type of the preconditioner.
using field_type = typename X::field_type; using field_type = typename X::field_type;
//! \brief The GPU matrix type //! \brief The GPU matrix type
using CuMat = CuSparseMatrix<field_type>; using CuMat = GpuSparseMatrix<field_type>;
//! \brief Constructor. //! \brief Constructor.
//! //!
@ -60,7 +60,7 @@ public:
//! \param A The matrix to operate on. //! \param A The matrix to operate on.
//! \param w The relaxation factor. //! \param w The relaxation factor.
//! //!
explicit CuDILU(const M& A, bool splitMatrix, bool tuneKernels); explicit GpuDILU(const M& A, bool splitMatrix, bool tuneKernels);
//! \brief Prepare the preconditioner. //! \brief Prepare the preconditioner.
//! \note Does nothing at the time being. //! \note Does nothing at the time being.
@ -126,13 +126,13 @@ private:
std::unique_ptr<CuMat> m_gpuMatrixReorderedLower; std::unique_ptr<CuMat> m_gpuMatrixReorderedLower;
std::unique_ptr<CuMat> m_gpuMatrixReorderedUpper; std::unique_ptr<CuMat> m_gpuMatrixReorderedUpper;
//! \brief If matrix splitting is enabled, we also store the diagonal separately //! \brief If matrix splitting is enabled, we also store the diagonal separately
std::unique_ptr<CuVector<field_type>> m_gpuMatrixReorderedDiag; std::unique_ptr<GpuVector<field_type>> m_gpuMatrixReorderedDiag;
//! row conversion from natural to reordered matrix indices stored on the GPU //! row conversion from natural to reordered matrix indices stored on the GPU
CuVector<int> m_gpuNaturalToReorder; GpuVector<int> m_gpuNaturalToReorder;
//! row conversion from reordered to natural matrix indices stored on the GPU //! row conversion from reordered to natural matrix indices stored on the GPU
CuVector<int> m_gpuReorderToNatural; GpuVector<int> m_gpuReorderToNatural;
//! \brief Stores the inverted diagonal that we use in DILU //! \brief Stores the inverted diagonal that we use in DILU
CuVector<field_type> m_gpuDInv; GpuVector<field_type> m_gpuDInv;
//! \brief Bool storing whether or not we should store matrices in a split format //! \brief Bool storing whether or not we should store matrices in a split format
bool m_splitMatrix; bool m_splitMatrix;
//! \brief Bool storing whether or not we will tune the threadblock sizes. Only used for AMD cards //! \brief Bool storing whether or not we will tune the threadblock sizes. Only used for AMD cards
@ -144,6 +144,6 @@ private:
int m_moveThreadBlockSize = -1; int m_moveThreadBlockSize = -1;
int m_DILUFactorizationThreadBlockSize = -1; int m_DILUFactorizationThreadBlockSize = -1;
}; };
} // end namespace Opm::cuistl } // end namespace Opm::gpuistl
#endif #endif

View File

@ -20,20 +20,20 @@
#include <dune/istl/bcrsmatrix.hh> #include <dune/istl/bcrsmatrix.hh>
#include <fmt/core.h> #include <fmt/core.h>
#include <opm/common/ErrorMacros.hpp> #include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/CuJac.hpp> #include <opm/simulators/linalg/gpuistl/GpuJac.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp> #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/JacKernels.hpp> #include <opm/simulators/linalg/gpuistl/detail/preconditionerKernels/JacKernels.hpp>
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp> #include <opm/simulators/linalg/gpuistl/detail/vector_operations.hpp>
#include <opm/simulators/linalg/matrixblock.hh> #include <opm/simulators/linalg/matrixblock.hh>
namespace Opm::cuistl namespace Opm::gpuistl
{ {
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
CuJac<M, X, Y, l>::CuJac(const M& A, field_type w) GpuJac<M, X, Y, l>::GpuJac(const M& A, field_type w)
: m_cpuMatrix(A) : m_cpuMatrix(A)
, m_relaxationFactor(w) , m_relaxationFactor(w)
, m_gpuMatrix(CuSparseMatrix<field_type>::fromMatrix(A)) , m_gpuMatrix(GpuSparseMatrix<field_type>::fromMatrix(A))
, m_diagInvFlattened(m_gpuMatrix.N() * m_gpuMatrix.blockSize() * m_gpuMatrix.blockSize()) , m_diagInvFlattened(m_gpuMatrix.N() * m_gpuMatrix.blockSize() * m_gpuMatrix.blockSize())
{ {
// Some sanity check // Some sanity check
@ -58,13 +58,13 @@ CuJac<M, X, Y, l>::CuJac(const M& A, field_type w)
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
void void
CuJac<M, X, Y, l>::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b) GpuJac<M, X, Y, l>::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b)
{ {
} }
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
void void
CuJac<M, X, Y, l>::apply(X& v, const Y& d) GpuJac<M, X, Y, l>::apply(X& v, const Y& d)
{ {
// Jacobi preconditioner: x_{n+1} = x_n + w * (D^-1 * (b - Ax_n) ) // Jacobi preconditioner: x_{n+1} = x_n + w * (D^-1 * (b - Ax_n) )
// Working with defect d and update v it we only need to set v = w*(D^-1)*d // Working with defect d and update v it we only need to set v = w*(D^-1)*d
@ -77,20 +77,20 @@ CuJac<M, X, Y, l>::apply(X& v, const Y& d)
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
void void
CuJac<M, X, Y, l>::post([[maybe_unused]] X& x) GpuJac<M, X, Y, l>::post([[maybe_unused]] X& x)
{ {
} }
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
Dune::SolverCategory::Category Dune::SolverCategory::Category
CuJac<M, X, Y, l>::category() const GpuJac<M, X, Y, l>::category() const
{ {
return Dune::SolverCategory::sequential; return Dune::SolverCategory::sequential;
} }
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
void void
CuJac<M, X, Y, l>::update() GpuJac<M, X, Y, l>::update()
{ {
m_gpuMatrix.updateNonzeroValues(m_cpuMatrix); m_gpuMatrix.updateNonzeroValues(m_cpuMatrix);
invertDiagonalAndFlatten(); invertDiagonalAndFlatten();
@ -98,7 +98,7 @@ CuJac<M, X, Y, l>::update()
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
void void
CuJac<M, X, Y, l>::invertDiagonalAndFlatten() GpuJac<M, X, Y, l>::invertDiagonalAndFlatten()
{ {
detail::JAC::invertDiagonalAndFlatten<field_type, matrix_type::block_type::cols>( detail::JAC::invertDiagonalAndFlatten<field_type, matrix_type::block_type::cols>(
m_gpuMatrix.getNonZeroValues().data(), m_gpuMatrix.getNonZeroValues().data(),
@ -108,14 +108,14 @@ CuJac<M, X, Y, l>::invertDiagonalAndFlatten()
m_diagInvFlattened.data()); m_diagInvFlattened.data());
} }
} // namespace Opm::cuistl } // namespace Opm::gpuistl
#define INSTANTIATE_CUJAC_DUNE(realtype, blockdim) \ #define INSTANTIATE_CUJAC_DUNE(realtype, blockdim) \
template class ::Opm::cuistl::CuJac<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \ template class ::Opm::gpuistl::GpuJac<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
::Opm::cuistl::CuVector<realtype>, \ ::Opm::gpuistl::GpuVector<realtype>, \
::Opm::cuistl::CuVector<realtype>>; \ ::Opm::gpuistl::GpuVector<realtype>>; \
template class ::Opm::cuistl::CuJac<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \ template class ::Opm::gpuistl::GpuJac<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
::Opm::cuistl::CuVector<realtype>, \ ::Opm::gpuistl::GpuVector<realtype>, \
::Opm::cuistl::CuVector<realtype>> ::Opm::gpuistl::GpuVector<realtype>>
INSTANTIATE_CUJAC_DUNE(double, 1); INSTANTIATE_CUJAC_DUNE(double, 1);
INSTANTIATE_CUJAC_DUNE(double, 2); INSTANTIATE_CUJAC_DUNE(double, 2);

View File

@ -16,19 +16,19 @@
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>. along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef OPM_CUJAC_HPP #ifndef OPM_GPUJAC_HPP
#define OPM_CUJAC_HPP #define OPM_GPUJAC_HPP
#include <dune/istl/preconditioner.hh> #include <dune/istl/preconditioner.hh>
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp> #include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp> #include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp> #include <opm/simulators/linalg/gpuistl/detail/CuMatrixDescription.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp> #include <opm/simulators/linalg/gpuistl/detail/CuSparseHandle.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp> #include <opm/simulators/linalg/gpuistl/detail/CuSparseResource.hpp>
namespace Opm::cuistl namespace Opm::gpuistl
{ {
//! \brief Jacobi preconditioner on the GPU. //! \brief Jacobi preconditioner on the GPU.
//! //!
@ -40,10 +40,10 @@ namespace Opm::cuistl
//! \tparam l Ignored. Just there to have the same number of template arguments //! \tparam l Ignored. Just there to have the same number of template arguments
//! as other preconditioners. //! as other preconditioners.
//! //!
//! \note We assume X and Y are both CuVector<real_type>, but we leave them as template //! \note We assume X and Y are both GpuVector<real_type>, but we leave them as template
//! arguments in case of future additions. //! arguments in case of future additions.
template <class M, class X, class Y, int l = 1> template <class M, class X, class Y, int l = 1>
class CuJac : public Dune::PreconditionerWithUpdate<X, Y> class GpuJac : public Dune::PreconditionerWithUpdate<X, Y>
{ {
public: public:
//! \brief The matrix type the preconditioner is for. //! \brief The matrix type the preconditioner is for.
@ -61,7 +61,7 @@ public:
//! \param A The matrix to operate on. //! \param A The matrix to operate on.
//! \param w The relaxation factor. //! \param w The relaxation factor.
//! //!
CuJac(const M& A, field_type w); GpuJac(const M& A, field_type w);
//! \brief Prepare the preconditioner. //! \brief Prepare the preconditioner.
//! \note Does nothing at the time being. //! \note Does nothing at the time being.
@ -104,12 +104,12 @@ private:
//! \brief The relaxation factor to use. //! \brief The relaxation factor to use.
const field_type m_relaxationFactor; const field_type m_relaxationFactor;
//! \brief The A matrix stored on the gpu //! \brief The A matrix stored on the gpu
CuSparseMatrix<field_type> m_gpuMatrix; GpuSparseMatrix<field_type> m_gpuMatrix;
//! \brief the diagonal of cuMatrix inverted, and then flattened to fit in a vector //! \brief the diagonal of cuMatrix inverted, and then flattened to fit in a vector
CuVector<field_type> m_diagInvFlattened; GpuVector<field_type> m_diagInvFlattened;
void invertDiagonalAndFlatten(); void invertDiagonalAndFlatten();
}; };
} // end namespace Opm::cuistl } // end namespace Opm::gpuistl
#endif #endif

View File

@ -16,15 +16,15 @@
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>. along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef OPM_CUISTL_CUOWNEROVERLAPCOPY_HPP #ifndef OPM_GPUISTL_GPUOWNEROVERLAPCOPY_HPP
#define OPM_CUISTL_CUOWNEROVERLAPCOPY_HPP #define OPM_GPUISTL_GPUOWNEROVERLAPCOPY_HPP
#include <dune/istl/owneroverlapcopy.hh> #include <dune/istl/owneroverlapcopy.hh>
#include <memory> #include <memory>
#include <mutex> #include <mutex>
#include <opm/simulators/linalg/cuistl/CuVector.hpp> #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <vector> #include <vector>
namespace Opm::cuistl namespace Opm::gpuistl
{ {
/** /**
* @brief GPUSender is a wrapper class for classes which will implement copOwnerToAll * @brief GPUSender is a wrapper class for classes which will implement copOwnerToAll
@ -36,7 +36,7 @@ namespace Opm::cuistl
template<class field_type, class OwnerOverlapCopyCommunicationType> template<class field_type, class OwnerOverlapCopyCommunicationType>
class GPUSender { class GPUSender {
public: public:
using X = CuVector<field_type>; using X = GpuVector<field_type>;
GPUSender(const OwnerOverlapCopyCommunicationType& cpuOwnerOverlapCopy) : m_cpuOwnerOverlapCopy(cpuOwnerOverlapCopy){} GPUSender(const OwnerOverlapCopyCommunicationType& cpuOwnerOverlapCopy) : m_cpuOwnerOverlapCopy(cpuOwnerOverlapCopy){}
@ -97,8 +97,8 @@ protected:
// premature optimization, in the sense that we could just initialize these indices // premature optimization, in the sense that we could just initialize these indices
// always, but they are not always used. // always, but they are not always used.
mutable std::once_flag m_initializedIndices; mutable std::once_flag m_initializedIndices;
mutable std::unique_ptr<CuVector<int>> m_indicesOwner; mutable std::unique_ptr<GpuVector<int>> m_indicesOwner;
mutable std::unique_ptr<CuVector<int>> m_indicesCopy; mutable std::unique_ptr<GpuVector<int>> m_indicesCopy;
const OwnerOverlapCopyCommunicationType& m_cpuOwnerOverlapCopy; const OwnerOverlapCopyCommunicationType& m_cpuOwnerOverlapCopy;
}; };
@ -113,7 +113,7 @@ template <class field_type, int block_size, class OwnerOverlapCopyCommunicationT
class GPUObliviousMPISender : public GPUSender<field_type, OwnerOverlapCopyCommunicationType> class GPUObliviousMPISender : public GPUSender<field_type, OwnerOverlapCopyCommunicationType>
{ {
public: public:
using X = CuVector<field_type>; using X = GpuVector<field_type>;
GPUObliviousMPISender(const OwnerOverlapCopyCommunicationType& cpuOwnerOverlapCopy) GPUObliviousMPISender(const OwnerOverlapCopyCommunicationType& cpuOwnerOverlapCopy)
: GPUSender<field_type, OwnerOverlapCopyCommunicationType>(cpuOwnerOverlapCopy) : GPUSender<field_type, OwnerOverlapCopyCommunicationType>(cpuOwnerOverlapCopy)
@ -151,8 +151,8 @@ private:
} }
} }
this->m_indicesCopy = std::make_unique<CuVector<int>>(indicesCopyOnCPU); this->m_indicesCopy = std::make_unique<GpuVector<int>>(indicesCopyOnCPU);
this->m_indicesOwner = std::make_unique<CuVector<int>>(indicesOwnerCPU); this->m_indicesOwner = std::make_unique<GpuVector<int>>(indicesOwnerCPU);
} }
}; };
@ -168,7 +168,7 @@ template <class field_type, int block_size, class OwnerOverlapCopyCommunicationT
class GPUAwareMPISender : public GPUSender<field_type, OwnerOverlapCopyCommunicationType> class GPUAwareMPISender : public GPUSender<field_type, OwnerOverlapCopyCommunicationType>
{ {
public: public:
using X = CuVector<field_type>; using X = GpuVector<field_type>;
GPUAwareMPISender(const OwnerOverlapCopyCommunicationType& cpuOwnerOverlapCopy) GPUAwareMPISender(const OwnerOverlapCopyCommunicationType& cpuOwnerOverlapCopy)
: GPUSender<field_type, OwnerOverlapCopyCommunicationType>(cpuOwnerOverlapCopy) : GPUSender<field_type, OwnerOverlapCopyCommunicationType>(cpuOwnerOverlapCopy)
@ -178,7 +178,7 @@ public:
void copyOwnerToAll(const X& source, X& dest) const override void copyOwnerToAll(const X& source, X& dest) const override
{ {
OPM_ERROR_IF(&source != &dest, "The provided CuVectors' address did not match"); // In this context, source == dest!!! OPM_ERROR_IF(&source != &dest, "The provided GpuVectors' address did not match"); // In this context, source == dest!!!
std::call_once(this->m_initializedIndices, [&]() { initIndexSet(); }); std::call_once(this->m_initializedIndices, [&]() { initIndexSet(); });
int rank = this->m_cpuOwnerOverlapCopy.communicator().rank(); int rank = this->m_cpuOwnerOverlapCopy.communicator().rank();
@ -251,10 +251,10 @@ public:
} }
private: private:
mutable std::unique_ptr<CuVector<int>> m_commpairIndicesCopy; mutable std::unique_ptr<GpuVector<int>> m_commpairIndicesCopy;
mutable std::unique_ptr<CuVector<int>> m_commpairIndicesOwner; mutable std::unique_ptr<GpuVector<int>> m_commpairIndicesOwner;
mutable std::unique_ptr<CuVector<field_type>> m_GPUSendBuf; mutable std::unique_ptr<GpuVector<field_type>> m_GPUSendBuf;
mutable std::unique_ptr<CuVector<field_type>> m_GPURecvBuf; mutable std::unique_ptr<GpuVector<field_type>> m_GPURecvBuf;
struct MessageInformation struct MessageInformation
{ {
@ -332,11 +332,11 @@ private:
} }
} }
m_commpairIndicesCopy = std::make_unique<CuVector<int>>(commpairIndicesCopyOnCPU); m_commpairIndicesCopy = std::make_unique<GpuVector<int>>(commpairIndicesCopyOnCPU);
m_commpairIndicesOwner = std::make_unique<CuVector<int>>(commpairIndicesOwnerCPU); m_commpairIndicesOwner = std::make_unique<GpuVector<int>>(commpairIndicesOwnerCPU);
m_GPUSendBuf = std::make_unique<CuVector<field_type>>(sendBufIdx * block_size); m_GPUSendBuf = std::make_unique<GpuVector<field_type>>(sendBufIdx * block_size);
m_GPURecvBuf = std::make_unique<CuVector<field_type>>(recvBufIdx * block_size); m_GPURecvBuf = std::make_unique<GpuVector<field_type>>(recvBufIdx * block_size);
} }
void initIndexSet() const override void initIndexSet() const override
@ -360,8 +360,8 @@ private:
} }
} }
this->m_indicesCopy = std::make_unique<CuVector<int>>(indicesCopyOnCPU); this->m_indicesCopy = std::make_unique<GpuVector<int>>(indicesCopyOnCPU);
this->m_indicesOwner = std::make_unique<CuVector<int>>(indicesOwnerCPU); this->m_indicesOwner = std::make_unique<GpuVector<int>>(indicesOwnerCPU);
buildCommPairIdxs(); buildCommPairIdxs();
} }
@ -371,21 +371,21 @@ private:
* @brief CUDA compatiable variant of Dune::OwnerOverlapCopyCommunication * @brief CUDA compatiable variant of Dune::OwnerOverlapCopyCommunication
* *
* This class can essentially be seen as an adapter around Dune::OwnerOverlapCopyCommunication, and should work as * This class can essentially be seen as an adapter around Dune::OwnerOverlapCopyCommunication, and should work as
* a Dune::OwnerOverlapCopyCommunication on CuVectors * a Dune::OwnerOverlapCopyCommunication on GpuVectors
* *
* @note This currently only has the functionality to parallelize the linear solve. * @note This currently only has the functionality to parallelize the linear solve.
* *
* @tparam field_type should be a field_type supported by CuVector (double, float) * @tparam field_type should be a field_type supported by GpuVector (double, float)
* @tparam block_size the block size used (this is relevant for say figuring out the correct indices) * @tparam block_size the block size used (this is relevant for say figuring out the correct indices)
* @tparam OwnerOverlapCopyCommunicationType should mimic Dune::OwnerOverlapCopyCommunication. * @tparam OwnerOverlapCopyCommunicationType should mimic Dune::OwnerOverlapCopyCommunication.
*/ */
template <class field_type, int block_size, class OwnerOverlapCopyCommunicationType> template <class field_type, int block_size, class OwnerOverlapCopyCommunicationType>
class CuOwnerOverlapCopy class GpuOwnerOverlapCopy
{ {
public: public:
using X = CuVector<field_type>; using X = GpuVector<field_type>;
CuOwnerOverlapCopy(std::shared_ptr<GPUSender<field_type, OwnerOverlapCopyCommunicationType>> sender) : m_sender(sender){} GpuOwnerOverlapCopy(std::shared_ptr<GPUSender<field_type, OwnerOverlapCopyCommunicationType>> sender) : m_sender(sender){}
void copyOwnerToAll(const X& source, X& dest) const { void copyOwnerToAll(const X& source, X& dest) const {
m_sender->copyOwnerToAll(source, dest); m_sender->copyOwnerToAll(source, dest);
@ -409,5 +409,5 @@ public:
private: private:
std::shared_ptr<GPUSender<field_type, OwnerOverlapCopyCommunicationType>> m_sender; std::shared_ptr<GPUSender<field_type, OwnerOverlapCopyCommunicationType>> m_sender;
}; };
} // namespace Opm::cuistl } // namespace Opm::gpuistl
#endif #endif

View File

@ -25,26 +25,26 @@
#include <dune/istl/bvector.hh> #include <dune/istl/bvector.hh>
#include <fmt/core.h> #include <fmt/core.h>
#include <opm/common/ErrorMacros.hpp> #include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/CuSeqILU0.hpp> #include <opm/simulators/linalg/gpuistl/GpuSeqILU0.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_constants.hpp> #include <opm/simulators/linalg/gpuistl/detail/cusparse_constants.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_wrapper.hpp> #include <opm/simulators/linalg/gpuistl/detail/cusparse_wrapper.hpp>
#include <opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp> #include <opm/simulators/linalg/gpuistl/detail/fix_zero_diagonal.hpp>
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp> #include <opm/simulators/linalg/gpuistl/detail/safe_conversion.hpp>
#include <opm/simulators/linalg/matrixblock.hh> #include <opm/simulators/linalg/matrixblock.hh>
// This file is based on the guide at https://docs.nvidia.com/cuda/cusparse/index.html#csrilu02_solve , // This file is based on the guide at https://docs.nvidia.com/cuda/cusparse/index.html#csrilu02_solve ,
// it highly recommended to read that before proceeding. // it highly recommended to read that before proceeding.
namespace Opm::cuistl namespace Opm::gpuistl
{ {
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
CuSeqILU0<M, X, Y, l>::CuSeqILU0(const M& A, field_type w) GpuSeqILU0<M, X, Y, l>::GpuSeqILU0(const M& A, field_type w)
: m_underlyingMatrix(A) : m_underlyingMatrix(A)
, m_w(w) , m_w(w)
, m_LU(CuSparseMatrix<field_type>::fromMatrix(detail::makeMatrixWithNonzeroDiagonal(A))) , m_LU(GpuSparseMatrix<field_type>::fromMatrix(detail::makeMatrixWithNonzeroDiagonal(A)))
, m_temporaryStorage(m_LU.N() * m_LU.blockSize()) , m_temporaryStorage(m_LU.N() * m_LU.blockSize())
, m_descriptionL(detail::createLowerDiagonalDescription()) , m_descriptionL(detail::createLowerDiagonalDescription())
, m_descriptionU(detail::createUpperDiagonalDescription()) , m_descriptionU(detail::createUpperDiagonalDescription())
@ -70,13 +70,13 @@ CuSeqILU0<M, X, Y, l>::CuSeqILU0(const M& A, field_type w)
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
void void
CuSeqILU0<M, X, Y, l>::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b) GpuSeqILU0<M, X, Y, l>::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b)
{ {
} }
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
void void
CuSeqILU0<M, X, Y, l>::apply(X& v, const Y& d) GpuSeqILU0<M, X, Y, l>::apply(X& v, const Y& d)
{ {
// We need to pass the solve routine a scalar to multiply. // We need to pass the solve routine a scalar to multiply.
@ -133,20 +133,20 @@ CuSeqILU0<M, X, Y, l>::apply(X& v, const Y& d)
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
void void
CuSeqILU0<M, X, Y, l>::post([[maybe_unused]] X& x) GpuSeqILU0<M, X, Y, l>::post([[maybe_unused]] X& x)
{ {
} }
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
Dune::SolverCategory::Category Dune::SolverCategory::Category
CuSeqILU0<M, X, Y, l>::category() const GpuSeqILU0<M, X, Y, l>::category() const
{ {
return Dune::SolverCategory::sequential; return Dune::SolverCategory::sequential;
} }
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
void void
CuSeqILU0<M, X, Y, l>::update() GpuSeqILU0<M, X, Y, l>::update()
{ {
m_LU.updateNonzeroValues(detail::makeMatrixWithNonzeroDiagonal(m_underlyingMatrix)); m_LU.updateNonzeroValues(detail::makeMatrixWithNonzeroDiagonal(m_underlyingMatrix));
createILU(); createILU();
@ -154,7 +154,7 @@ CuSeqILU0<M, X, Y, l>::update()
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
void void
CuSeqILU0<M, X, Y, l>::analyzeMatrix() GpuSeqILU0<M, X, Y, l>::analyzeMatrix()
{ {
if (!m_buffer) { if (!m_buffer) {
@ -226,7 +226,7 @@ CuSeqILU0<M, X, Y, l>::analyzeMatrix()
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
size_t size_t
CuSeqILU0<M, X, Y, l>::findBufferSize() GpuSeqILU0<M, X, Y, l>::findBufferSize()
{ {
// We have three calls that need buffers: // We have three calls that need buffers:
// 1) LU decomposition // 1) LU decomposition
@ -290,7 +290,7 @@ CuSeqILU0<M, X, Y, l>::findBufferSize()
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
void void
CuSeqILU0<M, X, Y, l>::createILU() GpuSeqILU0<M, X, Y, l>::createILU()
{ {
OPM_ERROR_IF(!m_buffer, "Buffer not initialized. Call findBufferSize() then initialize with the appropiate size."); OPM_ERROR_IF(!m_buffer, "Buffer not initialized. Call findBufferSize() then initialize with the appropiate size.");
OPM_ERROR_IF(!m_analysisDone, "Analyzis of matrix not done. Call analyzeMatrix() first."); OPM_ERROR_IF(!m_analysisDone, "Analyzis of matrix not done. Call analyzeMatrix() first.");
@ -328,35 +328,35 @@ CuSeqILU0<M, X, Y, l>::createILU()
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
void void
CuSeqILU0<M, X, Y, l>::updateILUConfiguration() GpuSeqILU0<M, X, Y, l>::updateILUConfiguration()
{ {
auto bufferSize = findBufferSize(); auto bufferSize = findBufferSize();
if (!m_buffer || m_buffer->dim() < bufferSize) { if (!m_buffer || m_buffer->dim() < bufferSize) {
m_buffer.reset(new CuVector<field_type>((bufferSize + sizeof(field_type) - 1) / sizeof(field_type))); m_buffer.reset(new GpuVector<field_type>((bufferSize + sizeof(field_type) - 1) / sizeof(field_type)));
} }
analyzeMatrix(); analyzeMatrix();
createILU(); createILU();
} }
} // namespace Opm::cuistl } // namespace Opm::gpuistl
#define INSTANTIATE_CUSEQILU0_DUNE(realtype, blockdim) \ #define INSTANTIATE_GPUSEQILU0_DUNE(realtype, blockdim) \
template class ::Opm::cuistl::CuSeqILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \ template class ::Opm::gpuistl::GpuSeqILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
::Opm::cuistl::CuVector<realtype>, \ ::Opm::gpuistl::GpuVector<realtype>, \
::Opm::cuistl::CuVector<realtype>>; \ ::Opm::gpuistl::GpuVector<realtype>>; \
template class ::Opm::cuistl::CuSeqILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \ template class ::Opm::gpuistl::GpuSeqILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
::Opm::cuistl::CuVector<realtype>, \ ::Opm::gpuistl::GpuVector<realtype>, \
::Opm::cuistl::CuVector<realtype>> ::Opm::gpuistl::GpuVector<realtype>>
INSTANTIATE_CUSEQILU0_DUNE(double, 1); INSTANTIATE_GPUSEQILU0_DUNE(double, 1);
INSTANTIATE_CUSEQILU0_DUNE(double, 2); INSTANTIATE_GPUSEQILU0_DUNE(double, 2);
INSTANTIATE_CUSEQILU0_DUNE(double, 3); INSTANTIATE_GPUSEQILU0_DUNE(double, 3);
INSTANTIATE_CUSEQILU0_DUNE(double, 4); INSTANTIATE_GPUSEQILU0_DUNE(double, 4);
INSTANTIATE_CUSEQILU0_DUNE(double, 5); INSTANTIATE_GPUSEQILU0_DUNE(double, 5);
INSTANTIATE_CUSEQILU0_DUNE(double, 6); INSTANTIATE_GPUSEQILU0_DUNE(double, 6);
INSTANTIATE_CUSEQILU0_DUNE(float, 1); INSTANTIATE_GPUSEQILU0_DUNE(float, 1);
INSTANTIATE_CUSEQILU0_DUNE(float, 2); INSTANTIATE_GPUSEQILU0_DUNE(float, 2);
INSTANTIATE_CUSEQILU0_DUNE(float, 3); INSTANTIATE_GPUSEQILU0_DUNE(float, 3);
INSTANTIATE_CUSEQILU0_DUNE(float, 4); INSTANTIATE_GPUSEQILU0_DUNE(float, 4);
INSTANTIATE_CUSEQILU0_DUNE(float, 5); INSTANTIATE_GPUSEQILU0_DUNE(float, 5);
INSTANTIATE_CUSEQILU0_DUNE(float, 6); INSTANTIATE_GPUSEQILU0_DUNE(float, 6);

View File

@ -16,19 +16,19 @@
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>. along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef OPM_CUSEQILU0_HPP #ifndef OPM_GPUSEQILU0_HPP
#define OPM_CUSEQILU0_HPP #define OPM_GPUSEQILU0_HPP
#include <dune/istl/preconditioner.hh> #include <dune/istl/preconditioner.hh>
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp> #include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp> #include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp> #include <opm/simulators/linalg/gpuistl/detail/CuMatrixDescription.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp> #include <opm/simulators/linalg/gpuistl/detail/CuSparseHandle.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp> #include <opm/simulators/linalg/gpuistl/detail/CuSparseResource.hpp>
namespace Opm::cuistl namespace Opm::gpuistl
{ {
//! \brief Sequential ILU0 preconditioner on the GPU through the CuSparse library. //! \brief Sequential ILU0 preconditioner on the GPU through the CuSparse library.
//! //!
@ -43,10 +43,10 @@ namespace Opm::cuistl
//! \tparam l Ignored. Just there to have the same number of template arguments //! \tparam l Ignored. Just there to have the same number of template arguments
//! as other preconditioners. //! as other preconditioners.
//! //!
//! \note We assume X and Y are both CuVector<real_type>, but we leave them as template //! \note We assume X and Y are both GpuVector<real_type>, but we leave them as template
//! arguments in case of future additions. //! arguments in case of future additions.
template <class M, class X, class Y, int l = 1> template <class M, class X, class Y, int l = 1>
class CuSeqILU0 : public Dune::PreconditionerWithUpdate<X, Y> class GpuSeqILU0 : public Dune::PreconditionerWithUpdate<X, Y>
{ {
public: public:
//! \brief The matrix type the preconditioner is for. //! \brief The matrix type the preconditioner is for.
@ -64,7 +64,7 @@ public:
//! \param A The matrix to operate on. //! \param A The matrix to operate on.
//! \param w The relaxation factor. //! \param w The relaxation factor.
//! //!
CuSeqILU0(const M& A, field_type w); GpuSeqILU0(const M& A, field_type w);
//! \brief Prepare the preconditioner. //! \brief Prepare the preconditioner.
//! \note Does nothing at the time being. //! \note Does nothing at the time being.
@ -110,18 +110,18 @@ private:
//! This is the storage for the LU composition. //! This is the storage for the LU composition.
//! Initially this will have the values of A, but will be //! Initially this will have the values of A, but will be
//! modified in the constructor to be the proper LU decomposition. //! modified in the constructor to be the proper LU decomposition.
CuSparseMatrix<field_type> m_LU; GpuSparseMatrix<field_type> m_LU;
CuVector<field_type> m_temporaryStorage; GpuVector<field_type> m_temporaryStorage;
detail::CuSparseMatrixDescriptionPtr m_descriptionL; detail::GpuSparseMatrixDescriptionPtr m_descriptionL;
detail::CuSparseMatrixDescriptionPtr m_descriptionU; detail::GpuSparseMatrixDescriptionPtr m_descriptionU;
detail::CuSparseResource<bsrsv2Info_t> m_infoL; detail::CuSparseResource<bsrsv2Info_t> m_infoL;
detail::CuSparseResource<bsrsv2Info_t> m_infoU; detail::CuSparseResource<bsrsv2Info_t> m_infoU;
detail::CuSparseResource<bsrilu02Info_t> m_infoM; detail::CuSparseResource<bsrilu02Info_t> m_infoM;
std::unique_ptr<CuVector<field_type>> m_buffer; std::unique_ptr<GpuVector<field_type>> m_buffer;
detail::CuSparseHandle& m_cuSparseHandle; detail::CuSparseHandle& m_cuSparseHandle;
bool m_analysisDone = false; bool m_analysisDone = false;
@ -133,6 +133,6 @@ private:
void updateILUConfiguration(); void updateILUConfiguration();
}; };
} // end namespace Opm::cuistl } // end namespace Opm::gpuistl
#endif #endif

View File

@ -22,14 +22,14 @@
#include <dune/istl/bcrsmatrix.hh> #include <dune/istl/bcrsmatrix.hh>
#include <dune/istl/bvector.hh> #include <dune/istl/bvector.hh>
#include <fmt/core.h> #include <fmt/core.h>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp> #include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_constants.hpp> #include <opm/simulators/linalg/gpuistl/detail/cusparse_constants.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_wrapper.hpp> #include <opm/simulators/linalg/gpuistl/detail/cusparse_wrapper.hpp>
#include <opm/simulators/linalg/matrixblock.hh> #include <opm/simulators/linalg/matrixblock.hh>
#include <type_traits> #include <type_traits>
namespace Opm::cuistl namespace Opm::gpuistl
{ {
namespace namespace
@ -61,7 +61,7 @@ namespace
template <class T> template <class T>
CuSparseMatrix<T>::CuSparseMatrix(const T* nonZeroElements, GpuSparseMatrix<T>::GpuSparseMatrix(const T* nonZeroElements,
const int* rowIndices, const int* rowIndices,
const int* columnIndices, const int* columnIndices,
size_t numberOfNonzeroBlocks, size_t numberOfNonzeroBlocks,
@ -82,15 +82,15 @@ CuSparseMatrix<T>::CuSparseMatrix(const T* nonZeroElements,
} }
template <class T> template <class T>
CuSparseMatrix<T>::~CuSparseMatrix() GpuSparseMatrix<T>::~GpuSparseMatrix()
{ {
// empty // empty
} }
template <typename T> template <typename T>
template <typename MatrixType> template <typename MatrixType>
CuSparseMatrix<T> GpuSparseMatrix<T>
CuSparseMatrix<T>::fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly) GpuSparseMatrix<T>::fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
{ {
// TODO: Do we need this intermediate storage? Or this shuffling of data? // TODO: Do we need this intermediate storage? Or this shuffling of data?
std::vector<int> columnIndices; std::vector<int> columnIndices;
@ -129,18 +129,18 @@ CuSparseMatrix<T>::fromMatrix(const MatrixType& matrix, bool copyNonZeroElements
// Sanity check // Sanity check
// h_rows and h_cols could be changed to 'unsigned int', but cusparse expects 'int' // h_rows and h_cols could be changed to 'unsigned int', but cusparse expects 'int'
OPM_ERROR_IF(rowIndices[matrix.N()] != detail::to_int(matrix.nonzeroes()), OPM_ERROR_IF(rowIndices[matrix.N()] != detail::to_int(matrix.nonzeroes()),
"Error size of rows do not sum to number of nonzeroes in CuSparseMatrix."); "Error size of rows do not sum to number of nonzeroes in GpuSparseMatrix.");
OPM_ERROR_IF(rowIndices.size() != numberOfRows + 1, "Row indices do not match for CuSparseMatrix."); OPM_ERROR_IF(rowIndices.size() != numberOfRows + 1, "Row indices do not match for GpuSparseMatrix.");
OPM_ERROR_IF(columnIndices.size() != numberOfNonzeroBlocks, "Column indices do not match for CuSparseMatrix."); OPM_ERROR_IF(columnIndices.size() != numberOfNonzeroBlocks, "Column indices do not match for GpuSparseMatrix.");
if (copyNonZeroElementsDirectly) { if (copyNonZeroElementsDirectly) {
const T* nonZeroElements = nonZeroElementsTmp; const T* nonZeroElements = nonZeroElementsTmp;
return CuSparseMatrix<T>( return GpuSparseMatrix<T>(
nonZeroElements, rowIndices.data(), columnIndices.data(), numberOfNonzeroBlocks, blockSize, numberOfRows); nonZeroElements, rowIndices.data(), columnIndices.data(), numberOfNonzeroBlocks, blockSize, numberOfRows);
} else { } else {
auto nonZeroElementData = extractNonzeroValues<T>(matrix); auto nonZeroElementData = extractNonzeroValues<T>(matrix);
return CuSparseMatrix<T>(nonZeroElementData.data(), return GpuSparseMatrix<T>(nonZeroElementData.data(),
rowIndices.data(), rowIndices.data(),
columnIndices.data(), columnIndices.data(),
numberOfNonzeroBlocks, numberOfNonzeroBlocks,
@ -152,7 +152,7 @@ CuSparseMatrix<T>::fromMatrix(const MatrixType& matrix, bool copyNonZeroElements
template <class T> template <class T>
template <class MatrixType> template <class MatrixType>
void void
CuSparseMatrix<T>::updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly) GpuSparseMatrix<T>::updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
{ {
OPM_ERROR_IF(nonzeroes() != matrix.nonzeroes(), "Matrix does not have the same number of non-zero elements."); OPM_ERROR_IF(nonzeroes() != matrix.nonzeroes(), "Matrix does not have the same number of non-zero elements.");
OPM_ERROR_IF(matrix[0][0].N() != blockSize(), "Matrix does not have the same blocksize."); OPM_ERROR_IF(matrix[0][0].N() != blockSize(), "Matrix does not have the same blocksize.");
@ -170,42 +170,42 @@ CuSparseMatrix<T>::updateNonzeroValues(const MatrixType& matrix, bool copyNonZer
template <typename T> template <typename T>
void void
CuSparseMatrix<T>::setUpperTriangular() GpuSparseMatrix<T>::setUpperTriangular()
{ {
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatFillMode(m_matrixDescription->get(), CUSPARSE_FILL_MODE_UPPER)); OPM_CUSPARSE_SAFE_CALL(cusparseSetMatFillMode(m_matrixDescription->get(), CUSPARSE_FILL_MODE_UPPER));
} }
template <typename T> template <typename T>
void void
CuSparseMatrix<T>::setLowerTriangular() GpuSparseMatrix<T>::setLowerTriangular()
{ {
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatFillMode(m_matrixDescription->get(), CUSPARSE_FILL_MODE_LOWER)); OPM_CUSPARSE_SAFE_CALL(cusparseSetMatFillMode(m_matrixDescription->get(), CUSPARSE_FILL_MODE_LOWER));
} }
template <typename T> template <typename T>
void void
CuSparseMatrix<T>::setUnitDiagonal() GpuSparseMatrix<T>::setUnitDiagonal()
{ {
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(m_matrixDescription->get(), CUSPARSE_DIAG_TYPE_UNIT)); OPM_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(m_matrixDescription->get(), CUSPARSE_DIAG_TYPE_UNIT));
} }
template <typename T> template <typename T>
void void
CuSparseMatrix<T>::setNonUnitDiagonal() GpuSparseMatrix<T>::setNonUnitDiagonal()
{ {
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(m_matrixDescription->get(), CUSPARSE_DIAG_TYPE_NON_UNIT)); OPM_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(m_matrixDescription->get(), CUSPARSE_DIAG_TYPE_NON_UNIT));
} }
template <typename T> template <typename T>
void void
CuSparseMatrix<T>::mv(const CuVector<T>& x, CuVector<T>& y) const GpuSparseMatrix<T>::mv(const GpuVector<T>& x, GpuVector<T>& y) const
{ {
assertSameSize(x); assertSameSize(x);
assertSameSize(y); assertSameSize(y);
if (blockSize() < 2u) { if (blockSize() < 2u) {
OPM_THROW( OPM_THROW(
std::invalid_argument, std::invalid_argument,
"CuSparseMatrix<T>::usmv and CuSparseMatrix<T>::mv are only implemented for block sizes greater than 1."); "GpuSparseMatrix<T>::usmv and GpuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
} }
const auto nonzeroValues = getNonZeroValues().data(); const auto nonzeroValues = getNonZeroValues().data();
@ -232,14 +232,14 @@ CuSparseMatrix<T>::mv(const CuVector<T>& x, CuVector<T>& y) const
template <typename T> template <typename T>
void void
CuSparseMatrix<T>::umv(const CuVector<T>& x, CuVector<T>& y) const GpuSparseMatrix<T>::umv(const GpuVector<T>& x, GpuVector<T>& y) const
{ {
assertSameSize(x); assertSameSize(x);
assertSameSize(y); assertSameSize(y);
if (blockSize() < 2u) { if (blockSize() < 2u) {
OPM_THROW( OPM_THROW(
std::invalid_argument, std::invalid_argument,
"CuSparseMatrix<T>::usmv and CuSparseMatrix<T>::mv are only implemented for block sizes greater than 1."); "GpuSparseMatrix<T>::usmv and GpuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
} }
const auto nonzeroValues = getNonZeroValues().data(); const auto nonzeroValues = getNonZeroValues().data();
@ -267,14 +267,14 @@ CuSparseMatrix<T>::umv(const CuVector<T>& x, CuVector<T>& y) const
template <typename T> template <typename T>
void void
CuSparseMatrix<T>::usmv(T alpha, const CuVector<T>& x, CuVector<T>& y) const GpuSparseMatrix<T>::usmv(T alpha, const GpuVector<T>& x, GpuVector<T>& y) const
{ {
assertSameSize(x); assertSameSize(x);
assertSameSize(y); assertSameSize(y);
if (blockSize() < 2) { if (blockSize() < 2) {
OPM_THROW( OPM_THROW(
std::invalid_argument, std::invalid_argument,
"CuSparseMatrix<T>::usmv and CuSparseMatrix<T>::mv are only implemented for block sizes greater than 1."); "GpuSparseMatrix<T>::usmv and GpuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
} }
const auto numberOfRows = N(); const auto numberOfRows = N();
const auto numberOfNonzeroBlocks = nonzeroes(); const auto numberOfNonzeroBlocks = nonzeroes();
@ -304,7 +304,7 @@ CuSparseMatrix<T>::usmv(T alpha, const CuVector<T>& x, CuVector<T>& y) const
template <class T> template <class T>
template <class VectorType> template <class VectorType>
void void
CuSparseMatrix<T>::assertSameSize(const VectorType& x) const GpuSparseMatrix<T>::assertSameSize(const VectorType& x) const
{ {
if (x.dim() != blockSize() * N()) { if (x.dim() != blockSize() * N()) {
OPM_THROW(std::invalid_argument, OPM_THROW(std::invalid_argument,
@ -317,17 +317,17 @@ CuSparseMatrix<T>::assertSameSize(const VectorType& x) const
#define INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(realtype, blockdim) \ #define INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(realtype, blockdim) \
template CuSparseMatrix<realtype> CuSparseMatrix<realtype>::fromMatrix( \ template GpuSparseMatrix<realtype> GpuSparseMatrix<realtype>::fromMatrix( \
const Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>&, bool); \ const Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>&, bool); \
template CuSparseMatrix<realtype> CuSparseMatrix<realtype>::fromMatrix( \ template GpuSparseMatrix<realtype> GpuSparseMatrix<realtype>::fromMatrix( \
const Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>&, bool); \ const Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>&, bool); \
template void CuSparseMatrix<realtype>::updateNonzeroValues( \ template void GpuSparseMatrix<realtype>::updateNonzeroValues( \
const Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>&, bool); \ const Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>&, bool); \
template void CuSparseMatrix<realtype>::updateNonzeroValues( \ template void GpuSparseMatrix<realtype>::updateNonzeroValues( \
const Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>&, bool) const Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>&, bool)
template class CuSparseMatrix<float>; template class GpuSparseMatrix<float>;
template class CuSparseMatrix<double>; template class GpuSparseMatrix<double>;
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 1); INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 1);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 2); INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 2);
@ -343,4 +343,4 @@ INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 4);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 5); INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 5);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 6); INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 6);
} // namespace Opm::cuistl } // namespace Opm::gpuistl

View File

@ -16,23 +16,23 @@
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>. along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef OPM_CUSPARSEMATRIX_HPP #ifndef OPM_GPUSPARSEMATRIX_HPP
#define OPM_CUSPARSEMATRIX_HPP #define OPM_GPUSPARSEMATRIX_HPP
#include <cusparse.h> #include <cusparse.h>
#include <iostream> #include <iostream>
#include <memory> #include <memory>
#include <opm/common/ErrorMacros.hpp> #include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp> #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp> #include <opm/simulators/linalg/gpuistl/detail/CuMatrixDescription.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp> #include <opm/simulators/linalg/gpuistl/detail/CuSparseHandle.hpp>
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp> #include <opm/simulators/linalg/gpuistl/detail/safe_conversion.hpp>
#include <vector> #include <vector>
namespace Opm::cuistl namespace Opm::gpuistl
{ {
/** /**
* @brief The CuSparseMatrix class simple wrapper class for a CuSparse matrix. * @brief The GpuSparseMatrix class simple wrapper class for a CuSparse matrix.
* *
* @note we currently only support simple raw primitives for T (double and float). Block size is handled through the * @note we currently only support simple raw primitives for T (double and float). Block size is handled through the
* block size parameter * block size parameter
@ -44,7 +44,7 @@ namespace Opm::cuistl
* @note We only support Block Compressed Sparse Row Format (BSR) for now. * @note We only support Block Compressed Sparse Row Format (BSR) for now.
*/ */
template <typename T> template <typename T>
class CuSparseMatrix class GpuSparseMatrix
{ {
public: public:
//! Create the sparse matrix specified by the raw data. //! Create the sparse matrix specified by the raw data.
@ -60,7 +60,7 @@ public:
//! //!
//! \note We assume numberOfNonzeroBlocks, blockSize and numberOfRows all are representable as int due to //! \note We assume numberOfNonzeroBlocks, blockSize and numberOfRows all are representable as int due to
//! restrictions in the current version of cusparse. This might change in future versions. //! restrictions in the current version of cusparse. This might change in future versions.
CuSparseMatrix(const T* nonZeroElements, GpuSparseMatrix(const T* nonZeroElements,
const int* rowIndices, const int* rowIndices,
const int* columnIndices, const int* columnIndices,
size_t numberOfNonzeroBlocks, size_t numberOfNonzeroBlocks,
@ -70,14 +70,14 @@ public:
/** /**
* We don't want to be able to copy this for now (too much hassle in copying the cusparse resources) * We don't want to be able to copy this for now (too much hassle in copying the cusparse resources)
*/ */
CuSparseMatrix(const CuSparseMatrix&) = delete; GpuSparseMatrix(const GpuSparseMatrix&) = delete;
/** /**
* We don't want to be able to copy this for now (too much hassle in copying the cusparse resources) * We don't want to be able to copy this for now (too much hassle in copying the cusparse resources)
*/ */
CuSparseMatrix& operator=(const CuSparseMatrix&) = delete; GpuSparseMatrix& operator=(const GpuSparseMatrix&) = delete;
virtual ~CuSparseMatrix(); virtual ~GpuSparseMatrix();
/** /**
* @brief fromMatrix creates a new matrix with the same block size and values as the given matrix * @brief fromMatrix creates a new matrix with the same block size and values as the given matrix
@ -89,7 +89,7 @@ public:
* @tparam MatrixType is assumed to be a Dune::BCRSMatrix compatible matrix. * @tparam MatrixType is assumed to be a Dune::BCRSMatrix compatible matrix.
*/ */
template <class MatrixType> template <class MatrixType>
static CuSparseMatrix<T> fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false); static GpuSparseMatrix<T> fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);
/** /**
* @brief setUpperTriangular sets the CuSparse flag that this is an upper diagonal (with unit diagonal) matrix. * @brief setUpperTriangular sets the CuSparse flag that this is an upper diagonal (with unit diagonal) matrix.
@ -144,7 +144,7 @@ public:
* *
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering. * @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
*/ */
CuVector<T>& getNonZeroValues() GpuVector<T>& getNonZeroValues()
{ {
return m_nonZeroElements; return m_nonZeroElements;
} }
@ -154,7 +154,7 @@ public:
* *
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering. * @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
*/ */
const CuVector<T>& getNonZeroValues() const const GpuVector<T>& getNonZeroValues() const
{ {
return m_nonZeroElements; return m_nonZeroElements;
} }
@ -164,7 +164,7 @@ public:
* *
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering. * @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
*/ */
CuVector<int>& getRowIndices() GpuVector<int>& getRowIndices()
{ {
return m_rowIndices; return m_rowIndices;
} }
@ -174,7 +174,7 @@ public:
* *
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering. * @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
*/ */
const CuVector<int>& getRowIndices() const const GpuVector<int>& getRowIndices() const
{ {
return m_rowIndices; return m_rowIndices;
} }
@ -184,7 +184,7 @@ public:
* *
* @return Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering. * @return Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
*/ */
CuVector<int>& getColumnIndices() GpuVector<int>& getColumnIndices()
{ {
return m_columnIndices; return m_columnIndices;
} }
@ -194,7 +194,7 @@ public:
* *
* @return Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering. * @return Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
*/ */
const CuVector<int>& getColumnIndices() const const GpuVector<int>& getColumnIndices() const
{ {
return m_columnIndices; return m_columnIndices;
} }
@ -233,7 +233,7 @@ public:
* *
* This description is needed for most calls to the CuSparse library * This description is needed for most calls to the CuSparse library
*/ */
detail::CuSparseMatrixDescription& getDescription() detail::GpuSparseMatrixDescription& getDescription()
{ {
return *m_matrixDescription; return *m_matrixDescription;
} }
@ -245,7 +245,7 @@ public:
* *
* @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1. * @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1.
*/ */
virtual void mv(const CuVector<T>& x, CuVector<T>& y) const; virtual void mv(const GpuVector<T>& x, GpuVector<T>& y) const;
/** /**
* @brief umv computes y=Ax+y * @brief umv computes y=Ax+y
@ -254,7 +254,7 @@ public:
* *
* @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1. * @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1.
*/ */
virtual void umv(const CuVector<T>& x, CuVector<T>& y) const; virtual void umv(const GpuVector<T>& x, GpuVector<T>& y) const;
/** /**
@ -264,7 +264,7 @@ public:
* *
* @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1. * @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1.
*/ */
virtual void usmv(T alpha, const CuVector<T>& x, CuVector<T>& y) const; virtual void usmv(T alpha, const GpuVector<T>& x, GpuVector<T>& y) const;
/** /**
* @brief updateNonzeroValues updates the non-zero values by using the non-zero values of the supplied matrix * @brief updateNonzeroValues updates the non-zero values by using the non-zero values of the supplied matrix
@ -280,9 +280,9 @@ public:
void updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false); void updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);
private: private:
CuVector<T> m_nonZeroElements; GpuVector<T> m_nonZeroElements;
CuVector<int> m_columnIndices; GpuVector<int> m_columnIndices;
CuVector<int> m_rowIndices; GpuVector<int> m_rowIndices;
// Notice that we store these three as int to make sure we are cusparse compatible. // Notice that we store these three as int to make sure we are cusparse compatible.
// //
@ -292,11 +292,11 @@ private:
const int m_numberOfRows; const int m_numberOfRows;
const int m_blockSize; const int m_blockSize;
detail::CuSparseMatrixDescriptionPtr m_matrixDescription; detail::GpuSparseMatrixDescriptionPtr m_matrixDescription;
detail::CuSparseHandle& m_cusparseHandle; detail::CuSparseHandle& m_cusparseHandle;
template <class VectorType> template <class VectorType>
void assertSameSize(const VectorType& vector) const; void assertSameSize(const VectorType& vector) const;
}; };
} // namespace Opm::cuistl } // namespace Opm::gpuistl
#endif #endif

View File

@ -20,41 +20,41 @@
#include <cuda.h> #include <cuda.h>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <fmt/core.h> #include <fmt/core.h>
#include <opm/simulators/linalg/cuistl/CuVector.hpp> #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/cublas_wrapper.hpp> #include <opm/simulators/linalg/gpuistl/detail/cublas_wrapper.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp> #include <opm/simulators/linalg/gpuistl/detail/vector_operations.hpp>
namespace Opm::cuistl namespace Opm::gpuistl
{ {
template <class T> template <class T>
CuVector<T>::CuVector(const std::vector<T>& data) GpuVector<T>::GpuVector(const std::vector<T>& data)
: CuVector(data.data(), detail::to_int(data.size())) : GpuVector(data.data(), detail::to_int(data.size()))
{ {
} }
template <class T> template <class T>
CuVector<T>::CuVector(const size_t numberOfElements) GpuVector<T>::GpuVector(const size_t numberOfElements)
: m_numberOfElements(detail::to_int(numberOfElements)) : m_numberOfElements(detail::to_int(numberOfElements))
, m_cuBlasHandle(detail::CuBlasHandle::getInstance()) , m_cuBlasHandle(detail::CuBlasHandle::getInstance())
{ {
OPM_CUDA_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * detail::to_size_t(m_numberOfElements))); OPM_GPU_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * detail::to_size_t(m_numberOfElements)));
} }
template <class T> template <class T>
CuVector<T>::CuVector(const T* dataOnHost, const size_t numberOfElements) GpuVector<T>::GpuVector(const T* dataOnHost, const size_t numberOfElements)
: CuVector(numberOfElements) : GpuVector(numberOfElements)
{ {
OPM_CUDA_SAFE_CALL(cudaMemcpy( OPM_GPU_SAFE_CALL(cudaMemcpy(
m_dataOnDevice, dataOnHost, detail::to_size_t(m_numberOfElements) * sizeof(T), cudaMemcpyHostToDevice)); m_dataOnDevice, dataOnHost, detail::to_size_t(m_numberOfElements) * sizeof(T), cudaMemcpyHostToDevice));
} }
template <class T> template <class T>
CuVector<T>& GpuVector<T>&
CuVector<T>::operator=(T scalar) GpuVector<T>::operator=(T scalar)
{ {
assertHasElements(); assertHasElements();
detail::setVectorValue(data(), detail::to_size_t(m_numberOfElements), scalar); detail::setVectorValue(data(), detail::to_size_t(m_numberOfElements), scalar);
@ -62,13 +62,13 @@ CuVector<T>::operator=(T scalar)
} }
template <class T> template <class T>
CuVector<T>& GpuVector<T>&
CuVector<T>::operator=(const CuVector<T>& other) GpuVector<T>::operator=(const GpuVector<T>& other)
{ {
assertHasElements(); assertHasElements();
assertSameSize(other); assertSameSize(other);
OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice, OPM_GPU_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
other.m_dataOnDevice, other.m_dataOnDevice,
detail::to_size_t(m_numberOfElements) * sizeof(T), detail::to_size_t(m_numberOfElements) * sizeof(T),
cudaMemcpyDeviceToDevice)); cudaMemcpyDeviceToDevice));
@ -76,33 +76,33 @@ CuVector<T>::operator=(const CuVector<T>& other)
} }
template <class T> template <class T>
CuVector<T>::CuVector(const CuVector<T>& other) GpuVector<T>::GpuVector(const GpuVector<T>& other)
: CuVector(other.m_numberOfElements) : GpuVector(other.m_numberOfElements)
{ {
assertHasElements(); assertHasElements();
assertSameSize(other); assertSameSize(other);
OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice, OPM_GPU_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
other.m_dataOnDevice, other.m_dataOnDevice,
detail::to_size_t(m_numberOfElements) * sizeof(T), detail::to_size_t(m_numberOfElements) * sizeof(T),
cudaMemcpyDeviceToDevice)); cudaMemcpyDeviceToDevice));
} }
template <class T> template <class T>
CuVector<T>::~CuVector() GpuVector<T>::~GpuVector()
{ {
OPM_CUDA_WARN_IF_ERROR(cudaFree(m_dataOnDevice)); OPM_GPU_WARN_IF_ERROR(cudaFree(m_dataOnDevice));
} }
template <typename T> template <typename T>
const T* const T*
CuVector<T>::data() const GpuVector<T>::data() const
{ {
return m_dataOnDevice; return m_dataOnDevice;
} }
template <typename T> template <typename T>
typename CuVector<T>::size_type typename GpuVector<T>::size_type
CuVector<T>::dim() const GpuVector<T>::dim() const
{ {
// Note that there is no way for m_numberOfElements to be non-positive, // Note that there is no way for m_numberOfElements to be non-positive,
// but for sanity we still use the safe conversion function here. // but for sanity we still use the safe conversion function here.
@ -114,7 +114,7 @@ CuVector<T>::dim() const
template <typename T> template <typename T>
std::vector<T> std::vector<T>
CuVector<T>::asStdVector() const GpuVector<T>::asStdVector() const
{ {
std::vector<T> temporary(detail::to_size_t(m_numberOfElements)); std::vector<T> temporary(detail::to_size_t(m_numberOfElements));
copyToHost(temporary); copyToHost(temporary);
@ -123,21 +123,21 @@ CuVector<T>::asStdVector() const
template <typename T> template <typename T>
void void
CuVector<T>::setZeroAtIndexSet(const CuVector<int>& indexSet) GpuVector<T>::setZeroAtIndexSet(const GpuVector<int>& indexSet)
{ {
detail::setZeroAtIndexSet(m_dataOnDevice, indexSet.dim(), indexSet.data()); detail::setZeroAtIndexSet(m_dataOnDevice, indexSet.dim(), indexSet.data());
} }
template <typename T> template <typename T>
void void
CuVector<T>::assertSameSize(const CuVector<T>& x) const GpuVector<T>::assertSameSize(const GpuVector<T>& x) const
{ {
assertSameSize(x.m_numberOfElements); assertSameSize(x.m_numberOfElements);
} }
template <typename T> template <typename T>
void void
CuVector<T>::assertSameSize(int size) const GpuVector<T>::assertSameSize(int size) const
{ {
if (size != m_numberOfElements) { if (size != m_numberOfElements) {
OPM_THROW(std::invalid_argument, OPM_THROW(std::invalid_argument,
@ -147,7 +147,7 @@ CuVector<T>::assertSameSize(int size) const
template <typename T> template <typename T>
void void
CuVector<T>::assertHasElements() const GpuVector<T>::assertHasElements() const
{ {
if (m_numberOfElements <= 0) { if (m_numberOfElements <= 0) {
OPM_THROW(std::invalid_argument, "We have 0 elements"); OPM_THROW(std::invalid_argument, "We have 0 elements");
@ -156,14 +156,14 @@ CuVector<T>::assertHasElements() const
template <typename T> template <typename T>
T* T*
CuVector<T>::data() GpuVector<T>::data()
{ {
return m_dataOnDevice; return m_dataOnDevice;
} }
template <class T> template <class T>
CuVector<T>& GpuVector<T>&
CuVector<T>::operator*=(const T& scalar) GpuVector<T>::operator*=(const T& scalar)
{ {
assertHasElements(); assertHasElements();
OPM_CUBLAS_SAFE_CALL(detail::cublasScal(m_cuBlasHandle.get(), m_numberOfElements, &scalar, data(), 1)); OPM_CUBLAS_SAFE_CALL(detail::cublasScal(m_cuBlasHandle.get(), m_numberOfElements, &scalar, data(), 1));
@ -171,8 +171,8 @@ CuVector<T>::operator*=(const T& scalar)
} }
template <class T> template <class T>
CuVector<T>& GpuVector<T>&
CuVector<T>::axpy(T alpha, const CuVector<T>& y) GpuVector<T>::axpy(T alpha, const GpuVector<T>& y)
{ {
assertHasElements(); assertHasElements();
assertSameSize(y); assertSameSize(y);
@ -182,7 +182,7 @@ CuVector<T>::axpy(T alpha, const CuVector<T>& y)
template <class T> template <class T>
T T
CuVector<T>::dot(const CuVector<T>& other) const GpuVector<T>::dot(const GpuVector<T>& other) const
{ {
assertHasElements(); assertHasElements();
assertSameSize(other); assertSameSize(other);
@ -193,7 +193,7 @@ CuVector<T>::dot(const CuVector<T>& other) const
} }
template <class T> template <class T>
T T
CuVector<T>::two_norm() const GpuVector<T>::two_norm() const
{ {
assertHasElements(); assertHasElements();
T result = T(0); T result = T(0);
@ -203,14 +203,14 @@ CuVector<T>::two_norm() const
template <typename T> template <typename T>
T T
CuVector<T>::dot(const CuVector<T>& other, const CuVector<int>& indexSet, CuVector<T>& buffer) const GpuVector<T>::dot(const GpuVector<T>& other, const GpuVector<int>& indexSet, GpuVector<T>& buffer) const
{ {
return detail::innerProductAtIndices(m_cuBlasHandle.get(), m_dataOnDevice, other.data(), buffer.data(), indexSet.dim(), indexSet.data()); return detail::innerProductAtIndices(m_cuBlasHandle.get(), m_dataOnDevice, other.data(), buffer.data(), indexSet.dim(), indexSet.data());
} }
template <typename T> template <typename T>
T T
CuVector<T>::two_norm(const CuVector<int>& indexSet, CuVector<T>& buffer) const GpuVector<T>::two_norm(const GpuVector<int>& indexSet, GpuVector<T>& buffer) const
{ {
// TODO: [perf] Optimize this to a single call // TODO: [perf] Optimize this to a single call
return std::sqrt(this->dot(*this, indexSet, buffer)); return std::sqrt(this->dot(*this, indexSet, buffer));
@ -218,23 +218,23 @@ CuVector<T>::two_norm(const CuVector<int>& indexSet, CuVector<T>& buffer) const
template <typename T> template <typename T>
T T
CuVector<T>::dot(const CuVector<T>& other, const CuVector<int>& indexSet) const GpuVector<T>::dot(const GpuVector<T>& other, const GpuVector<int>& indexSet) const
{ {
CuVector<T> buffer(indexSet.dim()); GpuVector<T> buffer(indexSet.dim());
return detail::innerProductAtIndices(m_cuBlasHandle.get(), m_dataOnDevice, other.data(), buffer.data(), indexSet.dim(), indexSet.data()); return detail::innerProductAtIndices(m_cuBlasHandle.get(), m_dataOnDevice, other.data(), buffer.data(), indexSet.dim(), indexSet.data());
} }
template <typename T> template <typename T>
T T
CuVector<T>::two_norm(const CuVector<int>& indexSet) const GpuVector<T>::two_norm(const GpuVector<int>& indexSet) const
{ {
CuVector<T> buffer(indexSet.dim()); GpuVector<T> buffer(indexSet.dim());
// TODO: [perf] Optimize this to a single call // TODO: [perf] Optimize this to a single call
return std::sqrt(this->dot(*this, indexSet, buffer)); return std::sqrt(this->dot(*this, indexSet, buffer));
} }
template <class T> template <class T>
CuVector<T>& GpuVector<T>&
CuVector<T>::operator+=(const CuVector<T>& other) GpuVector<T>::operator+=(const GpuVector<T>& other)
{ {
assertHasElements(); assertHasElements();
assertSameSize(other); assertSameSize(other);
@ -243,8 +243,8 @@ CuVector<T>::operator+=(const CuVector<T>& other)
} }
template <class T> template <class T>
CuVector<T>& GpuVector<T>&
CuVector<T>::operator-=(const CuVector<T>& other) GpuVector<T>::operator-=(const GpuVector<T>& other)
{ {
assertHasElements(); assertHasElements();
assertSameSize(other); assertSameSize(other);
@ -255,7 +255,7 @@ CuVector<T>::operator-=(const CuVector<T>& other)
template <class T> template <class T>
void void
CuVector<T>::copyFromHost(const T* dataPointer, size_t numberOfElements) GpuVector<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
{ {
if (numberOfElements > dim()) { if (numberOfElements > dim()) {
OPM_THROW(std::runtime_error, OPM_THROW(std::runtime_error,
@ -263,45 +263,45 @@ CuVector<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
dim(), dim(),
numberOfElements)); numberOfElements));
} }
OPM_CUDA_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice)); OPM_GPU_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
} }
template <class T> template <class T>
void void
CuVector<T>::copyToHost(T* dataPointer, size_t numberOfElements) const GpuVector<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
{ {
assertSameSize(detail::to_int(numberOfElements)); assertSameSize(detail::to_int(numberOfElements));
OPM_CUDA_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost)); OPM_GPU_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
} }
template <class T> template <class T>
void void
CuVector<T>::copyFromHost(const std::vector<T>& data) GpuVector<T>::copyFromHost(const std::vector<T>& data)
{ {
copyFromHost(data.data(), data.size()); copyFromHost(data.data(), data.size());
} }
template <class T> template <class T>
void void
CuVector<T>::copyToHost(std::vector<T>& data) const GpuVector<T>::copyToHost(std::vector<T>& data) const
{ {
copyToHost(data.data(), data.size()); copyToHost(data.data(), data.size());
} }
template <typename T> template <typename T>
void void
CuVector<T>::prepareSendBuf(CuVector<T>& buffer, const CuVector<int>& indexSet) const GpuVector<T>::prepareSendBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const
{ {
return detail::prepareSendBuf(m_dataOnDevice, buffer.data(), indexSet.dim(), indexSet.data()); return detail::prepareSendBuf(m_dataOnDevice, buffer.data(), indexSet.dim(), indexSet.data());
} }
template <typename T> template <typename T>
void void
CuVector<T>::syncFromRecvBuf(CuVector<T>& buffer, const CuVector<int>& indexSet) const GpuVector<T>::syncFromRecvBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const
{ {
return detail::syncFromRecvBuf(m_dataOnDevice, buffer.data(), indexSet.dim(), indexSet.data()); return detail::syncFromRecvBuf(m_dataOnDevice, buffer.data(), indexSet.dim(), indexSet.data());
} }
template class CuVector<double>; template class GpuVector<double>;
template class CuVector<float>; template class GpuVector<float>;
template class CuVector<int>; template class GpuVector<int>;
} // namespace Opm::cuistl } // namespace Opm::gpuistl

View File

@ -16,24 +16,24 @@
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>. along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef OPM_CUVECTOR_HEADER_HPP #ifndef OPM_GPUVECTOR_HEADER_HPP
#define OPM_CUVECTOR_HEADER_HPP #define OPM_GPUVECTOR_HEADER_HPP
#include <dune/common/fvector.hh> #include <dune/common/fvector.hh>
#include <dune/istl/bvector.hh> #include <dune/istl/bvector.hh>
#include <exception> #include <exception>
#include <fmt/core.h> #include <fmt/core.h>
#include <opm/common/ErrorMacros.hpp> #include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp> #include <opm/simulators/linalg/gpuistl/detail/CuBlasHandle.hpp>
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp> #include <opm/simulators/linalg/gpuistl/detail/safe_conversion.hpp>
#include <vector> #include <vector>
#include <string> #include <string>
namespace Opm::cuistl namespace Opm::gpuistl
{ {
/** /**
* @brief The CuVector class is a simple (arithmetic) vector class for the GPU. * @brief The GpuVector class is a simple (arithmetic) vector class for the GPU.
* *
* @note we currently only support simple raw primitives for T (double, float and int) * @note we currently only support simple raw primitives for T (double, float and int)
* *
@ -45,12 +45,12 @@ namespace Opm::cuistl
* Example usage: * Example usage:
* *
* @code{.cpp} * @code{.cpp}
* #include <opm/simulators/linalg/cuistl/CuVector.hpp> * #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
* *
* void someFunction() { * void someFunction() {
* auto someDataOnCPU = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692}); * auto someDataOnCPU = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
* *
* auto dataOnGPU = CuVector<double>(someDataOnCPU); * auto dataOnGPU = GpuVector<double>(someDataOnCPU);
* *
* // Multiply by 4.0: * // Multiply by 4.0:
* dataOnGPU *= 4.0; * dataOnGPU *= 4.0;
@ -62,7 +62,7 @@ namespace Opm::cuistl
* @tparam T the type to store. Can be either float, double or int. * @tparam T the type to store. Can be either float, double or int.
*/ */
template <typename T> template <typename T>
class CuVector class GpuVector
{ {
public: public:
using field_type = T; using field_type = T;
@ -70,17 +70,17 @@ public:
/** /**
* @brief CuVector allocates new GPU memory of the same size as other and copies the content of the other vector to * @brief GpuVector allocates new GPU memory of the same size as other and copies the content of the other vector to
* this newly allocated memory. * this newly allocated memory.
* *
* @note This does synchronous transfer. * @note This does synchronous transfer.
* *
* @param other the vector to copy from * @param other the vector to copy from
*/ */
CuVector(const CuVector<T>& other); GpuVector(const GpuVector<T>& other);
/** /**
* @brief CuVector allocates new GPU memory of the same size as data and copies the content of the data vector to * @brief GpuVector allocates new GPU memory of the same size as data and copies the content of the data vector to
* this newly allocated memory. * this newly allocated memory.
* *
* @note This does CPU to GPU transfer. * @note This does CPU to GPU transfer.
@ -90,7 +90,7 @@ public:
* *
* @param data the vector to copy from * @param data the vector to copy from
*/ */
explicit CuVector(const std::vector<T>& data); explicit GpuVector(const std::vector<T>& data);
/** /**
* @brief operator= copies the content of the data vector to the memory of this vector. * @brief operator= copies the content of the data vector to the memory of this vector.
@ -100,7 +100,7 @@ public:
* *
* @param other the vector to copy from * @param other the vector to copy from
*/ */
CuVector& operator=(const CuVector<T>& other); GpuVector& operator=(const GpuVector<T>& other);
/** /**
* @brief operator= sets the whole vector equal to the scalar value. * @brief operator= sets the whole vector equal to the scalar value.
@ -109,20 +109,20 @@ public:
* *
* @param scalar the value all elements will be set to. * @param scalar the value all elements will be set to.
*/ */
CuVector& operator=(T scalar); GpuVector& operator=(T scalar);
/** /**
* @brief CuVector allocates new GPU memory of size numberOfElements * sizeof(T) * @brief GpuVector allocates new GPU memory of size numberOfElements * sizeof(T)
* *
* @note For now numberOfElements needs to be within the limits of int due to restrictions in cublas * @note For now numberOfElements needs to be within the limits of int due to restrictions in cublas
* *
* @param numberOfElements number of T elements to allocate * @param numberOfElements number of T elements to allocate
*/ */
explicit CuVector(const size_t numberOfElements); explicit GpuVector(const size_t numberOfElements);
/** /**
* @brief CuVector allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from * @brief GpuVector allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from
* data * data
* *
* @note This assumes the data is on the CPU. * @note This assumes the data is on the CPU.
@ -132,12 +132,12 @@ public:
* *
* @note For now numberOfElements needs to be within the limits of int due to restrictions in cublas * @note For now numberOfElements needs to be within the limits of int due to restrictions in cublas
*/ */
CuVector(const T* dataOnHost, const size_t numberOfElements); GpuVector(const T* dataOnHost, const size_t numberOfElements);
/** /**
* @brief ~CuVector calls cudaFree * @brief ~GpuVector calls cudaFree
*/ */
virtual ~CuVector(); virtual ~GpuVector();
/** /**
* @return the raw pointer to the GPU data * @return the raw pointer to the GPU data
@ -162,7 +162,7 @@ public:
// TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension // TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension
if (detail::to_size_t(m_numberOfElements) != bvector.dim()) { if (detail::to_size_t(m_numberOfElements) != bvector.dim()) {
OPM_THROW(std::runtime_error, OPM_THROW(std::runtime_error,
fmt::format("Given incompatible vector size. CuVector has size {}, \n" fmt::format("Given incompatible vector size. GpuVector has size {}, \n"
"however, BlockVector has N() = {}, and dim = {}.", "however, BlockVector has N() = {}, and dim = {}.",
m_numberOfElements, m_numberOfElements,
bvector.N(), bvector.N(),
@ -185,7 +185,7 @@ public:
// TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension // TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension
if (detail::to_size_t(m_numberOfElements) != bvector.dim()) { if (detail::to_size_t(m_numberOfElements) != bvector.dim()) {
OPM_THROW(std::runtime_error, OPM_THROW(std::runtime_error,
fmt::format("Given incompatible vector size. CuVector has size {},\n however, the BlockVector " fmt::format("Given incompatible vector size. GpuVector has size {},\n however, the BlockVector "
"has has N() = {}, and dim() = {}.", "has has N() = {}, and dim() = {}.",
m_numberOfElements, m_numberOfElements,
bvector.N(), bvector.N(),
@ -231,8 +231,8 @@ public:
*/ */
void copyToHost(std::vector<T>& data) const; void copyToHost(std::vector<T>& data) const;
void prepareSendBuf(CuVector<T>& buffer, const CuVector<int>& indexSet) const; void prepareSendBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const;
void syncFromRecvBuf(CuVector<T>& buffer, const CuVector<int>& indexSet) const; void syncFromRecvBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const;
/** /**
* @brief operator *= multiplies every element by scalar * @brief operator *= multiplies every element by scalar
@ -242,7 +242,7 @@ public:
* *
* @note int is not supported * @note int is not supported
*/ */
CuVector<T>& operator*=(const T& scalar); GpuVector<T>& operator*=(const T& scalar);
/** /**
* @brief axpy sets this vector equal to this + alha * y * @brief axpy sets this vector equal to this + alha * y
@ -252,7 +252,7 @@ public:
* @note this will call CuBlas in the background * @note this will call CuBlas in the background
* @note int is not supported * @note int is not supported
*/ */
CuVector<T>& axpy(T alpha, const CuVector<T>& y); GpuVector<T>& axpy(T alpha, const GpuVector<T>& y);
/** /**
* @brief operator+= adds the other vector to this vector * @brief operator+= adds the other vector to this vector
@ -260,7 +260,7 @@ public:
* @note this will call CuBlas in the background * @note this will call CuBlas in the background
* @note int is not supported * @note int is not supported
*/ */
CuVector<T>& operator+=(const CuVector<T>& other); GpuVector<T>& operator+=(const GpuVector<T>& other);
/** /**
* @brief operator-= subtracts the other vector from this vector * @brief operator-= subtracts the other vector from this vector
@ -268,7 +268,7 @@ public:
* @note this will call CuBlas in the background * @note this will call CuBlas in the background
* @note int is not supported * @note int is not supported
*/ */
CuVector<T>& operator-=(const CuVector<T>& other); GpuVector<T>& operator-=(const GpuVector<T>& other);
/** /**
* @brief dot computes the dot product (standard inner product) against the other vector * @brief dot computes the dot product (standard inner product) against the other vector
@ -278,7 +278,7 @@ public:
* *
* @return the result on the inner product * @return the result on the inner product
*/ */
T dot(const CuVector<T>& other) const; T dot(const GpuVector<T>& other) const;
/** /**
* @brief returns the l2 norm of the vector * @brief returns the l2 norm of the vector
@ -294,14 +294,14 @@ public:
* *
* @note int is not supported * @note int is not supported
*/ */
T dot(const CuVector<T>& other, const CuVector<int>& indexSet, CuVector<T>& buffer) const; T dot(const GpuVector<T>& other, const GpuVector<int>& indexSet, GpuVector<T>& buffer) const;
/** /**
* Computes the norm sqrt(sum_i this[indexSet[i]] * this[indexSet[i]]) * Computes the norm sqrt(sum_i this[indexSet[i]] * this[indexSet[i]])
* *
* @note int is not supported * @note int is not supported
*/ */
T two_norm(const CuVector<int>& indexSet, CuVector<T>& buffer) const; T two_norm(const GpuVector<int>& indexSet, GpuVector<T>& buffer) const;
/** /**
@ -309,14 +309,14 @@ public:
* *
* @note int is not supported * @note int is not supported
*/ */
T dot(const CuVector<T>& other, const CuVector<int>& indexSet) const; T dot(const GpuVector<T>& other, const GpuVector<int>& indexSet) const;
/** /**
* Computes the norm sqrt(sum_i this[indexSet[i]] * this[indexSet[i]]) * Computes the norm sqrt(sum_i this[indexSet[i]] * this[indexSet[i]])
* *
* @note int is not supported * @note int is not supported
*/ */
T two_norm(const CuVector<int>& indexSet) const; T two_norm(const GpuVector<int>& indexSet) const;
/** /**
@ -363,9 +363,9 @@ public:
* } * }
* @endcode * @endcode
*/ */
void setZeroAtIndexSet(const CuVector<int>& indexSet); void setZeroAtIndexSet(const GpuVector<int>& indexSet);
// Slow method that creates a string representation of a CuVector for debug purposes // Slow method that creates a string representation of a GpuVector for debug purposes
std::string toDebugString() std::string toDebugString()
{ {
std::vector<T> v = asStdVector(); std::vector<T> v = asStdVector();
@ -385,11 +385,11 @@ private:
const int m_numberOfElements; const int m_numberOfElements;
detail::CuBlasHandle& m_cuBlasHandle; detail::CuBlasHandle& m_cuBlasHandle;
void assertSameSize(const CuVector<T>& other) const; void assertSameSize(const GpuVector<T>& other) const;
void assertSameSize(int size) const; void assertSameSize(int size) const;
void assertHasElements() const; void assertHasElements() const;
}; };
} // namespace Opm::cuistl } // namespace Opm::gpuistl
#endif #endif

View File

@ -20,21 +20,21 @@
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <algorithm> #include <algorithm>
#include <fmt/core.h> #include <fmt/core.h>
#include <opm/simulators/linalg/cuistl/CuView.hpp> #include <opm/simulators/linalg/gpuistl/GpuView.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
namespace Opm::cuistl namespace Opm::gpuistl
{ {
template <class T> template <class T>
CuView<T>::CuView(std::vector<T>& data) GpuView<T>::GpuView(std::vector<T>& data)
: CuView(data.data(), data.size()) : GpuView(data.data(), data.size())
{ {
} }
template <typename T> template <typename T>
std::vector<T> std::vector<T>
CuView<T>::asStdVector() const GpuView<T>::asStdVector() const
{ {
std::vector<T> temporary(m_numberOfElements); std::vector<T> temporary(m_numberOfElements);
copyToHost(temporary); copyToHost(temporary);
@ -43,7 +43,7 @@ CuView<T>::asStdVector() const
template <class T> template <class T>
void void
CuView<T>::copyFromHost(const T* dataPointer, size_t numberOfElements) GpuView<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
{ {
if (numberOfElements > size()) { if (numberOfElements > size()) {
OPM_THROW(std::runtime_error, OPM_THROW(std::runtime_error,
@ -51,32 +51,32 @@ CuView<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
size(), size(),
numberOfElements)); numberOfElements));
} }
OPM_CUDA_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice)); OPM_GPU_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
} }
template <class T> template <class T>
void void
CuView<T>::copyToHost(T* dataPointer, size_t numberOfElements) const GpuView<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
{ {
assertSameSize(numberOfElements); assertSameSize(numberOfElements);
OPM_CUDA_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost)); OPM_GPU_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
} }
template <class T> template <class T>
void void
CuView<T>::copyFromHost(const std::vector<T>& data) GpuView<T>::copyFromHost(const std::vector<T>& data)
{ {
copyFromHost(data.data(), data.size()); copyFromHost(data.data(), data.size());
} }
template <class T> template <class T>
void void
CuView<T>::copyToHost(std::vector<T>& data) const GpuView<T>::copyToHost(std::vector<T>& data) const
{ {
copyToHost(data.data(), data.size()); copyToHost(data.data(), data.size());
} }
template class CuView<double>; template class GpuView<double>;
template class CuView<float>; template class GpuView<float>;
template class CuView<int>; template class GpuView<int>;
} // namespace Opm::cuistl } // namespace Opm::gpuistl

View File

@ -16,14 +16,14 @@
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>. along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef OPM_CUVIEW_HEADER_HPP #ifndef OPM_GPUVIEW_HEADER_HPP
#define OPM_CUVIEW_HEADER_HPP #define OPM_GPUVIEW_HEADER_HPP
#include <dune/common/fvector.hh> #include <dune/common/fvector.hh>
#include <opm/common/ErrorMacros.hpp> #include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp> #include <opm/simulators/linalg/gpuistl/detail/safe_conversion.hpp>
#include <stdexcept> #include <stdexcept>
#include <vector> #include <vector>
@ -37,37 +37,37 @@
#define OPM_IS_INSIDE_DEVICE_FUNCTION_TEMPORARY 0 #define OPM_IS_INSIDE_DEVICE_FUNCTION_TEMPORARY 0
#endif #endif
namespace Opm::cuistl namespace Opm::gpuistl
{ {
/** /**
* @brief The CuView class is provides a view of some data allocated on the GPU * @brief The GpuView class is provides a view of some data allocated on the GPU
* Essenstially is only stores a pointer and a size. * Essenstially is only stores a pointer and a size.
* *
* This class supports being used from inside a CUDA/HIP Kernel. * This class supports being used from inside a CUDA/HIP Kernel.
* Implementations are placed in this headerfile for functions that may be called * Implementations are placed in this headerfile for functions that may be called
* inside a kernel to avoid expensive RDC (relocatable device code) * inside a kernel to avoid expensive RDC (relocatable device code)
* *
* The view will typically provide a view into a CuBuffer and be able to * The view will typically provide a view into a GpuBuffer and be able to
* manipulate the data within it * manipulate the data within it
* *
* @param T Type of the data we store, typically int/float/double w/o const specifier * @param T Type of the data we store, typically int/float/double w/o const specifier
* *
**/ **/
template <typename T> template <typename T>
class CuView class GpuView
{ {
public: public:
using value_type = T; using value_type = T;
/** /**
* @brief Default constructor that will initialize cublas and allocate 0 bytes of memory * @brief Default constructor that will initialize cublas and allocate 0 bytes of memory
*/ */
explicit CuView() = default; explicit GpuView() = default;
//TODO: we probably dont need anything like this or is it useful to have views also be able to handle things on CPU? //TODO: we probably dont need anything like this or is it useful to have views also be able to handle things on CPU?
/// @brief constructor based on std::vectors, this will make a view on the CPU /// @brief constructor based on std::vectors, this will make a view on the CPU
/// @param data std vector to pr /// @param data std vector to pr
CuView(std::vector<T>& data); GpuView(std::vector<T>& data);
/** /**
* @brief operator[] to retrieve a reference to an item in the buffer * @brief operator[] to retrieve a reference to an item in the buffer
@ -95,7 +95,7 @@ public:
/** /**
* @brief CuView allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from * @brief GpuView allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from
* data * data
* *
* @note This assumes the data is on the CPU. * @note This assumes the data is on the CPU.
@ -103,15 +103,15 @@ public:
* @param numberOfElements number of T elements to allocate * @param numberOfElements number of T elements to allocate
* @param dataOnHost data on host/CPU * @param dataOnHost data on host/CPU
*/ */
__host__ __device__ CuView(T* dataOnHost, size_t numberOfElements) __host__ __device__ GpuView(T* dataOnHost, size_t numberOfElements)
: m_dataPtr(dataOnHost), m_numberOfElements(numberOfElements) : m_dataPtr(dataOnHost), m_numberOfElements(numberOfElements)
{ {
} }
/** /**
* @brief ~CuView calls cudaFree * @brief ~GpuView calls cudaFree
*/ */
~CuView() = default; ~GpuView() = default;
/** /**
* @return the raw pointer to the GPU data * @return the raw pointer to the GPU data
@ -128,7 +128,7 @@ public:
} }
/** /**
* @return fetch the first element in a CuView * @return fetch the first element in a GpuView
*/ */
__host__ __device__ T& front() __host__ __device__ T& front()
{ {
@ -139,7 +139,7 @@ public:
} }
/** /**
* @return fetch the last element in a CuView * @return fetch the last element in a GpuView
*/ */
__host__ __device__ T& back() __host__ __device__ T& back()
{ {
@ -150,7 +150,7 @@ public:
} }
/** /**
* @return fetch the first element in a CuView * @return fetch the first element in a GpuView
*/ */
__host__ __device__ T front() const __host__ __device__ T front() const
{ {
@ -161,7 +161,7 @@ public:
} }
/** /**
* @return fetch the last element in a CuView * @return fetch the last element in a GpuView
*/ */
__host__ __device__ T back() const __host__ __device__ T back() const
{ {
@ -220,7 +220,7 @@ public:
* @return an std::vector containing the elements copied from the GPU. * @return an std::vector containing the elements copied from the GPU.
*/ */
std::vector<T> asStdVector() const; std::vector<T> asStdVector() const;
/// @brief Iterator class to make CuViews more similar to std containers /// @brief Iterator class to make GpuViews more similar to std containers
class iterator { class iterator {
public: public:
// Iterator typedefs // Iterator typedefs
@ -363,7 +363,7 @@ private:
/// @brief Helper function to assert if another view has the same size /// @brief Helper function to assert if another view has the same size
/// @param other view /// @param other view
__host__ __device__ void assertSameSize(const CuView<T>& other) const __host__ __device__ void assertSameSize(const GpuView<T>& other) const
{ {
assertSameSize(other.m_numberOfElements); assertSameSize(other.m_numberOfElements);
} }
@ -410,6 +410,6 @@ private:
} }
}; };
} // namespace Opm::cuistl } // namespace Opm::gpuistl
#endif #endif

View File

@ -26,18 +26,18 @@
#include <opm/common/ErrorMacros.hpp> #include <opm/common/ErrorMacros.hpp>
#include <opm/common/TimingMacros.hpp> #include <opm/common/TimingMacros.hpp>
#include <opm/simulators/linalg/GraphColoring.hpp> #include <opm/simulators/linalg/GraphColoring.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp> #include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp> #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/OpmCuILU0.hpp> #include <opm/simulators/linalg/gpuistl/OpmCuILU0.hpp>
#include <opm/simulators/linalg/cuistl/detail/autotuner.hpp> #include <opm/simulators/linalg/gpuistl/detail/autotuner.hpp>
#include <opm/simulators/linalg/cuistl/detail/coloringAndReorderingUtils.hpp> #include <opm/simulators/linalg/gpuistl/detail/coloringAndReorderingUtils.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/ILU0Kernels.hpp> #include <opm/simulators/linalg/gpuistl/detail/preconditionerKernels/ILU0Kernels.hpp>
#include <opm/simulators/linalg/matrixblock.hh> #include <opm/simulators/linalg/matrixblock.hh>
#include <string> #include <string>
#include <tuple> #include <tuple>
#include <utility> #include <utility>
namespace Opm::cuistl namespace Opm::gpuistl
{ {
template <class M, class X, class Y, int l> template <class M, class X, class Y, int l>
@ -46,7 +46,7 @@ OpmCuILU0<M, X, Y, l>::OpmCuILU0(const M& A, bool splitMatrix, bool tuneKernels)
, m_levelSets(Opm::getMatrixRowColoring(m_cpuMatrix, Opm::ColoringType::LOWER)) , m_levelSets(Opm::getMatrixRowColoring(m_cpuMatrix, Opm::ColoringType::LOWER))
, m_reorderedToNatural(detail::createReorderedToNatural(m_levelSets)) , m_reorderedToNatural(detail::createReorderedToNatural(m_levelSets))
, m_naturalToReordered(detail::createNaturalToReordered(m_levelSets)) , m_naturalToReordered(detail::createNaturalToReordered(m_levelSets))
, m_gpuMatrix(CuSparseMatrix<field_type>::fromMatrix(m_cpuMatrix, true)) , m_gpuMatrix(GpuSparseMatrix<field_type>::fromMatrix(m_cpuMatrix, true))
, m_gpuMatrixReorderedLower(nullptr) , m_gpuMatrixReorderedLower(nullptr)
, m_gpuMatrixReorderedUpper(nullptr) , m_gpuMatrixReorderedUpper(nullptr)
, m_gpuNaturalToReorder(m_naturalToReordered) , m_gpuNaturalToReorder(m_naturalToReordered)
@ -72,12 +72,12 @@ OpmCuILU0<M, X, Y, l>::OpmCuILU0(const M& A, bool splitMatrix, bool tuneKernels)
m_gpuMatrix.nonzeroes(), m_gpuMatrix.nonzeroes(),
A.nonzeroes())); A.nonzeroes()));
if (m_splitMatrix) { if (m_splitMatrix) {
m_gpuMatrixReorderedDiag.emplace(CuVector<field_type>(blocksize_ * blocksize_ * m_cpuMatrix.N())); m_gpuMatrixReorderedDiag.emplace(GpuVector<field_type>(blocksize_ * blocksize_ * m_cpuMatrix.N()));
std::tie(m_gpuMatrixReorderedLower, m_gpuMatrixReorderedUpper) std::tie(m_gpuMatrixReorderedLower, m_gpuMatrixReorderedUpper)
= detail::extractLowerAndUpperMatrices<M, field_type, CuSparseMatrix<field_type>>(m_cpuMatrix, = detail::extractLowerAndUpperMatrices<M, field_type, GpuSparseMatrix<field_type>>(m_cpuMatrix,
m_reorderedToNatural); m_reorderedToNatural);
} else { } else {
m_gpuReorderedLU = detail::createReorderedMatrix<M, field_type, CuSparseMatrix<field_type>>( m_gpuReorderedLU = detail::createReorderedMatrix<M, field_type, GpuSparseMatrix<field_type>>(
m_cpuMatrix, m_reorderedToNatural); m_cpuMatrix, m_reorderedToNatural);
} }
LUFactorizeAndMoveData(m_moveThreadBlockSize, m_ILU0FactorizationThreadBlockSize); LUFactorizeAndMoveData(m_moveThreadBlockSize, m_ILU0FactorizationThreadBlockSize);
@ -272,8 +272,8 @@ OpmCuILU0<M, X, Y, l>::tuneThreadBlockSizes()
= detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing ILU0 factorization"); = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing ILU0 factorization");
// tune the thread-block size of the apply // tune the thread-block size of the apply
CuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize()); GpuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
CuVector<field_type> tmpD(m_gpuMatrix.N() * m_gpuMatrix.blockSize()); GpuVector<field_type> tmpD(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
tmpD = 1; tmpD = 1;
auto tuneLowerSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int lowerSolveThreadBlockSize) { auto tuneLowerSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int lowerSolveThreadBlockSize) {
@ -289,14 +289,14 @@ OpmCuILU0<M, X, Y, l>::tuneThreadBlockSizes()
tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set"); tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set");
} }
} // namespace Opm::cuistl } // namespace Opm::gpuistl
#define INSTANTIATE_CUDILU_DUNE(realtype, blockdim) \ #define INSTANTIATE_CUDILU_DUNE(realtype, blockdim) \
template class ::Opm::cuistl::OpmCuILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \ template class ::Opm::gpuistl::OpmCuILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
::Opm::cuistl::CuVector<realtype>, \ ::Opm::gpuistl::GpuVector<realtype>, \
::Opm::cuistl::CuVector<realtype>>; \ ::Opm::gpuistl::GpuVector<realtype>>; \
template class ::Opm::cuistl::OpmCuILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \ template class ::Opm::gpuistl::OpmCuILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
::Opm::cuistl::CuVector<realtype>, \ ::Opm::gpuistl::GpuVector<realtype>, \
::Opm::cuistl::CuVector<realtype>> ::Opm::gpuistl::GpuVector<realtype>>
INSTANTIATE_CUDILU_DUNE(double, 1); INSTANTIATE_CUDILU_DUNE(double, 1);
INSTANTIATE_CUDILU_DUNE(double, 2); INSTANTIATE_CUDILU_DUNE(double, 2);

View File

@ -22,14 +22,14 @@
#include <memory> #include <memory>
#include <opm/grid/utility/SparseTable.hpp> #include <opm/grid/utility/SparseTable.hpp>
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp> #include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp> #include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp> #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <optional> #include <optional>
#include <type_traits> #include <type_traits>
#include <vector> #include <vector>
namespace Opm::cuistl namespace Opm::gpuistl
{ {
//! \brief ILU0 preconditioner on the GPU. //! \brief ILU0 preconditioner on the GPU.
//! //!
@ -39,7 +39,7 @@ namespace Opm::cuistl
//! \tparam l Ignored. Just there to have the same number of template arguments //! \tparam l Ignored. Just there to have the same number of template arguments
//! as other preconditioners. //! as other preconditioners.
//! //!
//! \note We assume X and Y are both CuVector<real_type>, but we leave them as template //! \note We assume X and Y are both GpuVector<real_type>, but we leave them as template
//! arguments in case of future additions. //! arguments in case of future additions.
template <class M, class X, class Y, int l = 1> template <class M, class X, class Y, int l = 1>
class OpmCuILU0 : public Dune::PreconditionerWithUpdate<X, Y> class OpmCuILU0 : public Dune::PreconditionerWithUpdate<X, Y>
@ -54,7 +54,7 @@ public:
//! \brief The field type of the preconditioner. //! \brief The field type of the preconditioner.
using field_type = typename X::field_type; using field_type = typename X::field_type;
//! \brief The GPU matrix type //! \brief The GPU matrix type
using CuMat = CuSparseMatrix<field_type>; using CuMat = GpuSparseMatrix<field_type>;
//! \brief Constructor. //! \brief Constructor.
//! //!
@ -126,13 +126,13 @@ private:
std::unique_ptr<CuMat> m_gpuMatrixReorderedLower; std::unique_ptr<CuMat> m_gpuMatrixReorderedLower;
std::unique_ptr<CuMat> m_gpuMatrixReorderedUpper; std::unique_ptr<CuMat> m_gpuMatrixReorderedUpper;
//! \brief If matrix splitting is enabled, we also store the diagonal separately //! \brief If matrix splitting is enabled, we also store the diagonal separately
std::optional<CuVector<field_type>> m_gpuMatrixReorderedDiag; std::optional<GpuVector<field_type>> m_gpuMatrixReorderedDiag;
//! row conversion from natural to reordered matrix indices stored on the GPU //! row conversion from natural to reordered matrix indices stored on the GPU
CuVector<int> m_gpuNaturalToReorder; GpuVector<int> m_gpuNaturalToReorder;
//! row conversion from reordered to natural matrix indices stored on the GPU //! row conversion from reordered to natural matrix indices stored on the GPU
CuVector<int> m_gpuReorderToNatural; GpuVector<int> m_gpuReorderToNatural;
//! \brief Stores the inverted diagonal that we use in ILU0 //! \brief Stores the inverted diagonal that we use in ILU0
CuVector<field_type> m_gpuDInv; GpuVector<field_type> m_gpuDInv;
//! \brief Bool storing whether or not we should store matrices in a split format //! \brief Bool storing whether or not we should store matrices in a split format
bool m_splitMatrix; bool m_splitMatrix;
//! \brief Bool storing whether or not we will tune the threadblock sizes. Only used for AMD cards //! \brief Bool storing whether or not we will tune the threadblock sizes. Only used for AMD cards
@ -144,6 +144,6 @@ private:
int m_moveThreadBlockSize = -1; int m_moveThreadBlockSize = -1;
int m_ILU0FactorizationThreadBlockSize = -1; int m_ILU0FactorizationThreadBlockSize = -1;
}; };
} // end namespace Opm::cuistl } // end namespace Opm::gpuistl
#endif #endif

View File

@ -21,12 +21,12 @@
#include <cusparse.h> #include <cusparse.h>
#include <dune/istl/preconditioner.hh> #include <dune/istl/preconditioner.hh>
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp> #include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp> #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerHolder.hpp> #include <opm/simulators/linalg/gpuistl/PreconditionerHolder.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp> #include <opm/simulators/linalg/gpuistl/detail/preconditioner_should_call_post_pre.hpp>
namespace Opm::cuistl namespace Opm::gpuistl
{ {
//!\brief Makes a CUDA preconditioner available to a CPU simulator. //!\brief Makes a CUDA preconditioner available to a CPU simulator.
//! //!
@ -35,11 +35,11 @@ namespace Opm::cuistl
//! //!
//! \tparam X the domain type (should be on the CPU). Typicall a Dune::BlockVector //! \tparam X the domain type (should be on the CPU). Typicall a Dune::BlockVector
//! \tparam Y the range type (should be on the CPU). Typicall a Dune::BlockVector //! \tparam Y the range type (should be on the CPU). Typicall a Dune::BlockVector
//! \tparam CudaPreconditionerType the preconditioner taking CuVector<real_type> as arguments to apply //! \tparam CudaPreconditionerType the preconditioner taking GpuVector<real_type> as arguments to apply
template <class X, class Y, class CudaPreconditionerType> template <class X, class Y, class CudaPreconditionerType>
class PreconditionerAdapter class PreconditionerAdapter
: public Dune::PreconditionerWithUpdate<X, Y>, : public Dune::PreconditionerWithUpdate<X, Y>,
public PreconditionerHolder<CuVector<typename X::field_type>, CuVector<typename Y::field_type>> public PreconditionerHolder<GpuVector<typename X::field_type>, GpuVector<typename Y::field_type>>
{ {
public: public:
//! \brief The domain type of the preconditioner. //! \brief The domain type of the preconditioner.
@ -77,8 +77,8 @@ public:
virtual void apply(X& v, const Y& d) override virtual void apply(X& v, const Y& d) override
{ {
if (!m_inputBuffer) { if (!m_inputBuffer) {
m_inputBuffer.reset(new CuVector<field_type>(v.dim())); m_inputBuffer.reset(new GpuVector<field_type>(v.dim()));
m_outputBuffer.reset(new CuVector<field_type>(v.dim())); m_outputBuffer.reset(new GpuVector<field_type>(v.dim()));
} }
m_inputBuffer->copyFromHost(d); m_inputBuffer->copyFromHost(d);
m_underlyingPreconditioner->apply(*m_outputBuffer, *m_inputBuffer); m_underlyingPreconditioner->apply(*m_outputBuffer, *m_inputBuffer);
@ -117,7 +117,7 @@ public:
return detail::shouldCallPreconditionerPre<CudaPreconditionerType>(); return detail::shouldCallPreconditionerPre<CudaPreconditionerType>();
} }
virtual std::shared_ptr<Dune::PreconditionerWithUpdate<CuVector<field_type>, CuVector<field_type>>> virtual std::shared_ptr<Dune::PreconditionerWithUpdate<GpuVector<field_type>, GpuVector<field_type>>>
getUnderlyingPreconditioner() override getUnderlyingPreconditioner() override
{ {
return m_underlyingPreconditioner; return m_underlyingPreconditioner;
@ -131,9 +131,9 @@ private:
//! \brief the underlying preconditioner to use //! \brief the underlying preconditioner to use
std::shared_ptr<CudaPreconditionerType> m_underlyingPreconditioner; std::shared_ptr<CudaPreconditionerType> m_underlyingPreconditioner;
std::unique_ptr<CuVector<field_type>> m_inputBuffer; std::unique_ptr<GpuVector<field_type>> m_inputBuffer;
std::unique_ptr<CuVector<field_type>> m_outputBuffer; std::unique_ptr<GpuVector<field_type>> m_outputBuffer;
}; };
} // end namespace Opm::cuistl } // end namespace Opm::gpuistl
#endif #endif

View File

@ -22,16 +22,16 @@
#include <dune/istl/bcrsmatrix.hh> #include <dune/istl/bcrsmatrix.hh>
#include <dune/istl/preconditioner.hh> #include <dune/istl/preconditioner.hh>
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp> #include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp> #include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp> #include <opm/simulators/linalg/gpuistl/detail/CuMatrixDescription.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp> #include <opm/simulators/linalg/gpuistl/detail/CuSparseHandle.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp> #include <opm/simulators/linalg/gpuistl/detail/CuSparseResource.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_constants.hpp> #include <opm/simulators/linalg/gpuistl/detail/cusparse_constants.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp> #include <opm/simulators/linalg/gpuistl/detail/preconditioner_should_call_post_pre.hpp>
namespace Opm::cuistl namespace Opm::gpuistl
{ {
//! \brief Converts the field type (eg. double to float) to benchmark single precision preconditioners //! \brief Converts the field type (eg. double to float) to benchmark single precision preconditioners
//! //!
@ -50,7 +50,7 @@ namespace Opm::cuistl
//! //!
//! To use this, use something like the following code: //! To use this, use something like the following code:
//! \code{.cpp} //! \code{.cpp}
//! #include <opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp> //! #include <opm/simulators/linalg/gpuistl/PreconditionerConvertFieldTypeAdapter.hpp>
//! #include <opm/simulators/linalg/ParallelOverlappingILU0.hpp> //! #include <opm/simulators/linalg/ParallelOverlappingILU0.hpp>
//! //!
//! using XDouble = Dune::BlockVector<Dune::FieldVector<double, 2>>; //! using XDouble = Dune::BlockVector<Dune::FieldVector<double, 2>>;
@ -64,7 +64,7 @@ namespace Opm::cuistl
//! void applyILU0AsFloat(const MDouble& matrix, const XDouble& x, XDouble& y) { //! void applyILU0AsFloat(const MDouble& matrix, const XDouble& x, XDouble& y) {
//! //!
//! using FloatILU0 = typename Opm::ParallelOverlappingILU0<MFloat, XFloat, XFloat, ParallelInfo>; //! using FloatILU0 = typename Opm::ParallelOverlappingILU0<MFloat, XFloat, XFloat, ParallelInfo>;
//! using DoubleToFloatConverter = typename Opm::cuistl::PreconditionerConvertFieldTypeAdapter<FloatILU0, MDouble, //! using DoubleToFloatConverter = typename Opm::gpuistl::PreconditionerConvertFieldTypeAdapter<FloatILU0, MDouble,
//! XDouble, XDouble>; //! XDouble, XDouble>;
//! //!
//! // Note that we do not need to make a new instance for every invocation, this //! // Note that we do not need to make a new instance for every invocation, this
@ -239,6 +239,6 @@ private:
//! \brief the underlying preconditioner to use //! \brief the underlying preconditioner to use
std::shared_ptr<CudaPreconditionerType> m_underlyingPreconditioner; std::shared_ptr<CudaPreconditionerType> m_underlyingPreconditioner;
}; };
} // end namespace Opm::cuistl } // end namespace Opm::gpuistl
#endif #endif

View File

@ -21,7 +21,7 @@
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp> #include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
namespace Opm::cuistl namespace Opm::gpuistl
{ {
//! \brief Common interface for adapters that hold preconditioners. //! \brief Common interface for adapters that hold preconditioners.
//! //!
@ -38,6 +38,6 @@ public:
*/ */
virtual std::shared_ptr<Dune::PreconditionerWithUpdate<X, Y>> getUnderlyingPreconditioner() = 0; virtual std::shared_ptr<Dune::PreconditionerWithUpdate<X, Y>> getUnderlyingPreconditioner() = 0;
}; };
} // end namespace Opm::cuistl } // end namespace Opm::gpuistl
#endif #endif

View File

@ -26,12 +26,12 @@
#include <dune/istl/schwarz.hh> #include <dune/istl/schwarz.hh>
#include <dune/istl/solver.hh> #include <dune/istl/solver.hh>
#include <opm/common/ErrorMacros.hpp> #include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp> #include <opm/simulators/linalg/gpuistl/GpuBlockPreconditioner.hpp>
#include <opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp> #include <opm/simulators/linalg/gpuistl/GpuOwnerOverlapCopy.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp> #include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp> #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp> #include <opm/simulators/linalg/gpuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/has_function.hpp> #include <opm/simulators/linalg/gpuistl/detail/has_function.hpp>
#ifdef OPEN_MPI #ifdef OPEN_MPI
#if OPEN_MPI #if OPEN_MPI
@ -39,7 +39,7 @@
#endif #endif
#endif #endif
namespace Opm::cuistl namespace Opm::gpuistl
{ {
//! @brief Wraps a CUDA solver to work with CPU data. //! @brief Wraps a CUDA solver to work with CPU data.
//! //!
@ -56,7 +56,7 @@ public:
using typename Dune::IterativeSolver<X, X>::real_type; using typename Dune::IterativeSolver<X, X>::real_type;
using typename Dune::IterativeSolver<X, X>::scalar_real_type; using typename Dune::IterativeSolver<X, X>::scalar_real_type;
static constexpr auto block_size = domain_type::block_type::dimension; static constexpr auto block_size = domain_type::block_type::dimension;
using XGPU = Opm::cuistl::CuVector<real_type>; using XGPU = Opm::gpuistl::GpuVector<real_type>;
// TODO: Use a std::forward // TODO: Use a std::forward
SolverAdapter(Operator& op, SolverAdapter(Operator& op,
@ -67,7 +67,7 @@ public:
int verbose) int verbose)
: Dune::IterativeSolver<X, X>(op, sp, *prec, reduction, maxit, verbose) : Dune::IterativeSolver<X, X>(op, sp, *prec, reduction, maxit, verbose)
, m_opOnCPUWithMatrix(op) , m_opOnCPUWithMatrix(op)
, m_matrix(CuSparseMatrix<real_type>::fromMatrix(op.getmat())) , m_matrix(GpuSparseMatrix<real_type>::fromMatrix(op.getmat()))
, m_underlyingSolver(constructSolver(prec, reduction, maxit, verbose)) , m_underlyingSolver(constructSolver(prec, reduction, maxit, verbose))
{ {
} }
@ -116,7 +116,7 @@ public:
private: private:
Operator& m_opOnCPUWithMatrix; Operator& m_opOnCPUWithMatrix;
CuSparseMatrix<real_type> m_matrix; GpuSparseMatrix<real_type> m_matrix;
UnderlyingSolver<XGPU> m_underlyingSolver; UnderlyingSolver<XGPU> m_underlyingSolver;
@ -148,8 +148,8 @@ private:
if (!precAsHolder) { if (!precAsHolder) {
OPM_THROW(std::invalid_argument, OPM_THROW(std::invalid_argument,
"The preconditioner needs to be a CUDA preconditioner (eg. CuILU0) wrapped in a " "The preconditioner needs to be a CUDA preconditioner (eg. CuILU0) wrapped in a "
"Opm::cuistl::PreconditionerAdapter wrapped in a " "Opm::gpuistl::PreconditionerAdapter wrapped in a "
"Opm::cuistl::CuBlockPreconditioner. If you are unsure what this means, set " "Opm::gpuistl::GpuBlockPreconditioner. If you are unsure what this means, set "
"preconditioner to 'CUILU0'"); // TODO: Suggest a better preconditioner "preconditioner to 'CUILU0'"); // TODO: Suggest a better preconditioner
} }
@ -159,8 +159,8 @@ private:
if (!preconditionerAdapterAsHolder) { if (!preconditionerAdapterAsHolder) {
OPM_THROW(std::invalid_argument, OPM_THROW(std::invalid_argument,
"The preconditioner needs to be a CUDA preconditioner (eg. CuILU0) wrapped in a " "The preconditioner needs to be a CUDA preconditioner (eg. CuILU0) wrapped in a "
"Opm::cuistl::PreconditionerAdapter wrapped in a " "Opm::gpuistl::PreconditionerAdapter wrapped in a "
"Opm::cuistl::CuBlockPreconditioner. If you are unsure what this means, set " "Opm::gpuistl::GpuBlockPreconditioner. If you are unsure what this means, set "
"preconditioner to 'CUILU0'"); // TODO: Suggest a better preconditioner "preconditioner to 'CUILU0'"); // TODO: Suggest a better preconditioner
} }
// We need to get the underlying preconditioner: // We need to get the underlying preconditioner:
@ -183,20 +183,20 @@ private:
// TODO add typename Operator communication type as a named type with using // TODO add typename Operator communication type as a named type with using
std::shared_ptr<Opm::cuistl::GPUSender<real_type, typename Operator::communication_type>> gpuComm; std::shared_ptr<Opm::gpuistl::GPUSender<real_type, typename Operator::communication_type>> gpuComm;
if (mpiSupportsCudaAwareAtCompileTime && mpiSupportsCudaAwareAtRunTime){ if (mpiSupportsCudaAwareAtCompileTime && mpiSupportsCudaAwareAtRunTime){
gpuComm = std::make_shared<Opm::cuistl::GPUAwareMPISender<real_type, block_size, typename Operator::communication_type>>(communication); gpuComm = std::make_shared<Opm::gpuistl::GPUAwareMPISender<real_type, block_size, typename Operator::communication_type>>(communication);
} }
else{ else{
gpuComm = std::make_shared<Opm::cuistl::GPUObliviousMPISender<real_type, block_size, typename Operator::communication_type>>(communication); gpuComm = std::make_shared<Opm::gpuistl::GPUObliviousMPISender<real_type, block_size, typename Operator::communication_type>>(communication);
} }
using CudaCommunication = CuOwnerOverlapCopy<real_type, block_size, typename Operator::communication_type>; using CudaCommunication = GpuOwnerOverlapCopy<real_type, block_size, typename Operator::communication_type>;
using SchwarzOperator using SchwarzOperator
= Dune::OverlappingSchwarzOperator<CuSparseMatrix<real_type>, XGPU, XGPU, CudaCommunication>; = Dune::OverlappingSchwarzOperator<GpuSparseMatrix<real_type>, XGPU, XGPU, CudaCommunication>;
auto cudaCommunication = std::make_shared<CudaCommunication>(gpuComm); auto cudaCommunication = std::make_shared<CudaCommunication>(gpuComm);
auto mpiPreconditioner = std::make_shared<CuBlockPreconditioner<XGPU, XGPU, CudaCommunication>>( auto mpiPreconditioner = std::make_shared<GpuBlockPreconditioner<XGPU, XGPU, CudaCommunication>>(
preconditionerReallyOnGPU, cudaCommunication); preconditionerReallyOnGPU, cudaCommunication);
auto scalarProduct = std::make_shared<Dune::ParallelScalarProduct<XGPU, CudaCommunication>>( auto scalarProduct = std::make_shared<Dune::ParallelScalarProduct<XGPU, CudaCommunication>>(
@ -206,8 +206,8 @@ private:
// NOTE: Ownsership of cudaCommunication is handled by mpiPreconditioner. However, just to make sure we // NOTE: Ownsership of cudaCommunication is handled by mpiPreconditioner. However, just to make sure we
// remember // remember
// this, we add this check. So remember that we hold one count in this scope, and one in the // this, we add this check. So remember that we hold one count in this scope, and one in the
// CuBlockPreconditioner instance. We accomedate for the fact that it could be passed around in // GpuBlockPreconditioner instance. We accomedate for the fact that it could be passed around in
// CuBlockPreconditioner, hence we do not test for != 2 // GpuBlockPreconditioner, hence we do not test for != 2
OPM_ERROR_IF(cudaCommunication.use_count() < 2, "Internal error. Shared pointer not owned properly."); OPM_ERROR_IF(cudaCommunication.use_count() < 2, "Internal error. Shared pointer not owned properly.");
auto overlappingCudaOperator = std::make_shared<SchwarzOperator>(m_matrix, *cudaCommunication); auto overlappingCudaOperator = std::make_shared<SchwarzOperator>(m_matrix, *cudaCommunication);
@ -222,12 +222,12 @@ private:
if (!precAsHolder) { if (!precAsHolder) {
OPM_THROW(std::invalid_argument, OPM_THROW(std::invalid_argument,
"The preconditioner needs to be a CUDA preconditioner wrapped in a " "The preconditioner needs to be a CUDA preconditioner wrapped in a "
"Opm::cuistl::PreconditionerHolder (eg. CuILU0)."); "Opm::gpuistl::PreconditionerHolder (eg. CuILU0).");
} }
auto preconditionerOnGPU = precAsHolder->getUnderlyingPreconditioner(); auto preconditionerOnGPU = precAsHolder->getUnderlyingPreconditioner();
auto matrixOperator auto matrixOperator
= std::make_shared<Dune::MatrixAdapter<CuSparseMatrix<real_type>, XGPU, XGPU>>(m_matrix); = std::make_shared<Dune::MatrixAdapter<GpuSparseMatrix<real_type>, XGPU, XGPU>>(m_matrix);
auto scalarProduct = std::make_shared<Dune::SeqScalarProduct<XGPU>>(); auto scalarProduct = std::make_shared<Dune::SeqScalarProduct<XGPU>>();
return UnderlyingSolver<XGPU>( return UnderlyingSolver<XGPU>(
matrixOperator, scalarProduct, preconditionerOnGPU, reduction, maxit, verbose); matrixOperator, scalarProduct, preconditionerOnGPU, reduction, maxit, verbose);
@ -237,6 +237,6 @@ private:
std::unique_ptr<XGPU> m_inputBuffer; std::unique_ptr<XGPU> m_inputBuffer;
std::unique_ptr<XGPU> m_outputBuffer; std::unique_ptr<XGPU> m_outputBuffer;
}; };
} // namespace Opm::cuistl } // namespace Opm::gpuistl
#endif #endif

View File

@ -17,9 +17,9 @@
along with OPM. If not, see <http://www.gnu.org/licenses/>. along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include <cublas_v2.h> #include <cublas_v2.h>
#include <opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp> #include <opm/simulators/linalg/gpuistl/detail/CuBlasHandle.hpp>
#include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
namespace Opm::cuistl::detail namespace Opm::gpuistl::detail
{ {
@ -46,4 +46,4 @@ CuBlasHandle::getInstance()
return instance; return instance;
} }
} // namespace Opm::cuistl::detail } // namespace Opm::gpuistl::detail

View File

@ -21,7 +21,7 @@
#include <cublas_v2.h> #include <cublas_v2.h>
#include <memory> #include <memory>
namespace Opm::cuistl::detail namespace Opm::gpuistl::detail
{ {
/** /**
@ -29,9 +29,9 @@ namespace Opm::cuistl::detail
* *
* Example use: * Example use:
* @code{.cpp} * @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp> * #include <opm/simulators/linalg/gpuistl/detail/CuBlasHandle.hpp>
* void someFunction() { * void someFunction() {
* auto& cublasHandle = ::Opm::cuistl::detail::CuBlasHandle::getInstance(); * auto& cublasHandle = ::Opm::gpuistl::detail::CuBlasHandle::getInstance();
* int cuBlasVersion = -1; * int cuBlasVersion = -1;
* OPM_CUBLAS_SAFE_CALL(cublasGetVersion(cublasHandle.get(), &cuBlasVersion)); * OPM_CUBLAS_SAFE_CALL(cublasGetVersion(cublasHandle.get(), &cuBlasVersion));
* } * }
@ -64,5 +64,5 @@ private:
CuBlasHandle(); CuBlasHandle();
cublasHandle_t m_handle; cublasHandle_t m_handle;
}; };
} // namespace Opm::cuistl::detail } // namespace Opm::gpuistl::detail
#endif // OPM_CUBLASHANDLE_HPP #endif // OPM_CUBLASHANDLE_HPP

View File

@ -18,30 +18,30 @@
*/ */
#ifndef CU_MATRIX_DESCRIPTION_HPP #ifndef CU_MATRIX_DESCRIPTION_HPP
#define CU_MATRIX_DESCRIPTION_HPP #define CU_MATRIX_DESCRIPTION_HPP
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp> #include <opm/simulators/linalg/gpuistl/detail/CuSparseResource.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
namespace Opm::cuistl::detail namespace Opm::gpuistl::detail
{ {
/** /**
* CuSparseMatrixDescription holder. This is internal information needed for most calls to the CuSparse API. * GpuSparseMatrixDescription holder. This is internal information needed for most calls to the CuSparse API.
*/ */
using CuSparseMatrixDescription = CuSparseResource<cusparseMatDescr_t>; using GpuSparseMatrixDescription = CuSparseResource<cusparseMatDescr_t>;
/** /**
* Pointer to CuSparseMatrixDescription holder. This is internal information needed for most calls to the CuSparse API. * Pointer to GpuSparseMatrixDescription holder. This is internal information needed for most calls to the CuSparse API.
*/ */
using CuSparseMatrixDescriptionPtr = std::shared_ptr<CuSparseResource<cusparseMatDescr_t>>; using GpuSparseMatrixDescriptionPtr = std::shared_ptr<CuSparseResource<cusparseMatDescr_t>>;
/** /**
* @brief createMatrixDescription creates a default matrix description * @brief createMatrixDescription creates a default matrix description
* @return a matrix description to a general sparse matrix with zero based indexing. * @return a matrix description to a general sparse matrix with zero based indexing.
*/ */
inline CuSparseMatrixDescriptionPtr inline GpuSparseMatrixDescriptionPtr
createMatrixDescription() createMatrixDescription()
{ {
auto description = std::make_shared<CuSparseMatrixDescription>(); auto description = std::make_shared<GpuSparseMatrixDescription>();
// Note: We always want to use zero base indexing. // Note: We always want to use zero base indexing.
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatType(description->get(), CUSPARSE_MATRIX_TYPE_GENERAL)); OPM_CUSPARSE_SAFE_CALL(cusparseSetMatType(description->get(), CUSPARSE_MATRIX_TYPE_GENERAL));
@ -52,11 +52,11 @@ createMatrixDescription()
/** /**
* @brief createLowerDiagonalDescription creates a lower diagonal matrix description * @brief createLowerDiagonalDescription creates a lower diagonal matrix description
* @return a lower diagonal matrix description overlapped with options from ::Opm::cuistl::detail::createMatrixDescription() * @return a lower diagonal matrix description overlapped with options from ::Opm::gpuistl::detail::createMatrixDescription()
* *
* @note This will assume it has a unit diagonal * @note This will assume it has a unit diagonal
*/ */
inline CuSparseMatrixDescriptionPtr inline GpuSparseMatrixDescriptionPtr
createLowerDiagonalDescription() createLowerDiagonalDescription()
{ {
auto description = createMatrixDescription(); auto description = createMatrixDescription();
@ -67,11 +67,11 @@ createLowerDiagonalDescription()
/** /**
* @brief createUpperDiagonalDescription creates an upper diagonal matrix description * @brief createUpperDiagonalDescription creates an upper diagonal matrix description
* @return an upper diagonal matrix description overlapped with options from ::Opm::cuistl::detail::createMatrixDescription() * @return an upper diagonal matrix description overlapped with options from ::Opm::gpuistl::detail::createMatrixDescription()
* *
* @note This will assume it has a non-unit diagonal. * @note This will assume it has a non-unit diagonal.
*/ */
inline CuSparseMatrixDescriptionPtr inline GpuSparseMatrixDescriptionPtr
createUpperDiagonalDescription() createUpperDiagonalDescription()
{ {
auto description = createMatrixDescription(); auto description = createMatrixDescription();
@ -81,6 +81,6 @@ createUpperDiagonalDescription()
return description; return description;
} }
} // namespace Opm::cuistl::detail } // namespace Opm::gpuistl::detail
#endif // CU_MATRIX_DESCRIPTION_HPP #endif // CU_MATRIX_DESCRIPTION_HPP

View File

@ -16,9 +16,9 @@
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>. along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp> #include <opm/simulators/linalg/gpuistl/detail/CuSparseHandle.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
namespace Opm::cuistl::detail namespace Opm::gpuistl::detail
{ {
@ -46,4 +46,4 @@ CuSparseHandle::getInstance()
return instance; return instance;
} }
} // namespace Opm::cuistl::detail } // namespace Opm::gpuistl::detail

View File

@ -21,7 +21,7 @@
#include <cusparse.h> #include <cusparse.h>
#include <memory> #include <memory>
namespace Opm::cuistl::detail namespace Opm::gpuistl::detail
{ {
/** /**
@ -29,9 +29,9 @@ namespace Opm::cuistl::detail
* *
* Example use: * Example use:
* @code{.cpp} * @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp> * #include <opm/simulators/linalg/gpuistl/detail/CuSparseHandle.hpp>
* void someFunction() { * void someFunction() {
* auto& cuSparseHandle = ::Opm::cuistl::detail::CuSparseHandle::getInstance(); * auto& cuSparseHandle = ::Opm::gpuistl::detail::CuSparseHandle::getInstance();
* int cuSparseVersion = -1; * int cuSparseVersion = -1;
* OPM_CUSPARSE_SAFE_CALL(cusparseGetVersion(cuSparseHandle.get(), &cuSparseVersion)); * OPM_CUSPARSE_SAFE_CALL(cusparseGetVersion(cuSparseHandle.get(), &cuSparseVersion));
* } * }
@ -63,5 +63,5 @@ private:
CuSparseHandle(); CuSparseHandle();
cusparseHandle_t m_handle; cusparseHandle_t m_handle;
}; };
} // namespace Opm::cuistl::detail } // namespace Opm::gpuistl::detail
#endif // OPM_CUSPARSEHANDLE_HPP #endif // OPM_CUSPARSEHANDLE_HPP

View File

@ -23,7 +23,7 @@
#include <memory> #include <memory>
#include <type_traits> #include <type_traits>
namespace Opm::cuistl::detail namespace Opm::gpuistl::detail
{ {
/** /**
@ -43,7 +43,7 @@ namespace Opm::cuistl::detail
* *
* Example usage: * Example usage:
* @code{.cpp} * @code{.cpp}
* #include <opm/simulator/linalg/cuistl/detail/CuSparseResource.hpp> * #include <opm/simulator/linalg/gpuistl/detail/CuSparseResource.hpp>
* *
* void someFunction() { * void someFunction() {
* auto resource = CuSparseResource<cuSparseMatDescr_t>(); * auto resource = CuSparseResource<cuSparseMatDescr_t>();
@ -94,6 +94,6 @@ private:
DeleterType m_deleter; DeleterType m_deleter;
}; };
} // namespace Opm::cuistl::impl } // namespace Opm::gpuistl::impl
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource_impl.hpp> #include <opm/simulators/linalg/gpuistl/detail/CuSparseResource_impl.hpp>
#endif // CUSPARSERESOURCE_HPP #endif // CUSPARSERESOURCE_HPP

View File

@ -18,9 +18,9 @@
*/ */
#include <exception> #include <exception>
#include <opm/common/ErrorMacros.hpp> #include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
namespace Opm::cuistl::detail namespace Opm::gpuistl::detail
{ {
namespace namespace
@ -100,4 +100,4 @@ CuSparseResource<T>::~CuSparseResource()
// proper name of the function being called. // proper name of the function being called.
OPM_CUSPARSE_WARN_IF_ERROR(m_deleter(m_resource)); OPM_CUSPARSE_WARN_IF_ERROR(m_deleter(m_resource));
} }
} // namespace Opm::cuistl::detail } // namespace Opm::gpuistl::detail

View File

@ -21,11 +21,11 @@
#include <limits> #include <limits>
#include <opm/common/ErrorMacros.hpp> #include <opm/common/ErrorMacros.hpp>
#include <opm/common/OpmLog/OpmLog.hpp> #include <opm/common/OpmLog/OpmLog.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
#include <string> #include <string>
#include <utility> #include <utility>
namespace Opm::cuistl::detail namespace Opm::gpuistl::detail
{ {
/// @brief Function that tests the best thread block size, assumes the provided function depends on threadblock-size /// @brief Function that tests the best thread block size, assumes the provided function depends on threadblock-size
@ -47,7 +47,7 @@ tuneThreadBlockSize(func& f, std::string descriptionOfFunction)
// create the events // create the events
for (int i = 0; i < runs + 1; ++i) { for (int i = 0; i < runs + 1; ++i) {
OPM_CUDA_SAFE_CALL(cudaEventCreate(&events[i])); OPM_GPU_SAFE_CALL(cudaEventCreate(&events[i]));
} }
// Initialize helper variables // Initialize helper variables
@ -59,21 +59,21 @@ tuneThreadBlockSize(func& f, std::string descriptionOfFunction)
for (int thrBlockSize = interval; thrBlockSize <= 1024; thrBlockSize += interval) { for (int thrBlockSize = interval; thrBlockSize <= 1024; thrBlockSize += interval) {
// record a first event, and then an event after each kernel // record a first event, and then an event after each kernel
OPM_CUDA_SAFE_CALL(cudaEventRecord(events[0])); OPM_GPU_SAFE_CALL(cudaEventRecord(events[0]));
for (int i = 0; i < runs; ++i) { for (int i = 0; i < runs; ++i) {
f(thrBlockSize); // runs an arbitrary function with the provided arguments f(thrBlockSize); // runs an arbitrary function with the provided arguments
OPM_CUDA_SAFE_CALL(cudaEventRecord(events[i + 1])); OPM_GPU_SAFE_CALL(cudaEventRecord(events[i + 1]));
} }
// make suret he runs are over // make suret he runs are over
OPM_CUDA_SAFE_CALL(cudaEventSynchronize(events[runs])); OPM_GPU_SAFE_CALL(cudaEventSynchronize(events[runs]));
// kernel launch was valid // kernel launch was valid
if (cudaSuccess == cudaGetLastError()) { if (cudaSuccess == cudaGetLastError()) {
// check if we beat the record for the fastest kernel // check if we beat the record for the fastest kernel
for (int i = 0; i < runs; ++i) { for (int i = 0; i < runs; ++i) {
float candidateBlockSizeTime; float candidateBlockSizeTime;
OPM_CUDA_SAFE_CALL(cudaEventElapsedTime(&candidateBlockSizeTime, events[i], events[i + 1])); OPM_GPU_SAFE_CALL(cudaEventElapsedTime(&candidateBlockSizeTime, events[i], events[i + 1]));
if (candidateBlockSizeTime < bestTime) { // checks if this configuration beat the current best if (candidateBlockSizeTime < bestTime) { // checks if this configuration beat the current best
bestTime = candidateBlockSizeTime; bestTime = candidateBlockSizeTime;
bestBlockSize = thrBlockSize; bestBlockSize = thrBlockSize;
@ -88,6 +88,6 @@ tuneThreadBlockSize(func& f, std::string descriptionOfFunction)
return bestBlockSize; return bestBlockSize;
} }
} // end namespace Opm::cuistl::detail } // end namespace Opm::gpuistl::detail
#endif #endif

View File

@ -23,24 +23,24 @@
#include <memory> #include <memory>
#include <opm/common/ErrorMacros.hpp> #include <opm/common/ErrorMacros.hpp>
#include <opm/grid/utility/SparseTable.hpp> #include <opm/grid/utility/SparseTable.hpp>
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp> #include <opm/simulators/linalg/gpuistl/detail/safe_conversion.hpp>
#include <tuple> #include <tuple>
#include <vector> #include <vector>
/* /*
This file contains a collection of utility functions used in the GPU implementation of ILU and DILU This file contains a collection of utility functions used in the GPU implementation of ILU and DILU
The functions deal with creating the mappings between reordered and natural indices, as well as The functions deal with creating the mappings between reordered and natural indices, as well as
extracting sparsity structures from dune matrices and creating cusparsematrix indices extracting sparsity structures from dune matrices and creating gpusparsematrix indices
*/ */
namespace Opm::cuistl::detail namespace Opm::gpuistl::detail
{ {
inline std::vector<int> inline std::vector<int>
createReorderedToNatural(const Opm::SparseTable<size_t>& levelSets) createReorderedToNatural(const Opm::SparseTable<size_t>& levelSets)
{ {
auto res = std::vector<int>(Opm::cuistl::detail::to_size_t(levelSets.dataSize())); auto res = std::vector<int>(Opm::gpuistl::detail::to_size_t(levelSets.dataSize()));
int globCnt = 0; int globCnt = 0;
for (auto row : levelSets) { for (auto row : levelSets) {
for (auto col : row) { for (auto col : row) {
OPM_ERROR_IF(Opm::cuistl::detail::to_size_t(globCnt) >= res.size(), OPM_ERROR_IF(Opm::gpuistl::detail::to_size_t(globCnt) >= res.size(),
fmt::format("Internal error. globCnt = {}, res.size() = {}", globCnt, res.size())); fmt::format("Internal error. globCnt = {}, res.size() = {}", globCnt, res.size()));
res[globCnt++] = static_cast<int>(col); res[globCnt++] = static_cast<int>(col);
} }
@ -51,11 +51,11 @@ createReorderedToNatural(const Opm::SparseTable<size_t>& levelSets)
inline std::vector<int> inline std::vector<int>
createNaturalToReordered(const Opm::SparseTable<size_t>& levelSets) createNaturalToReordered(const Opm::SparseTable<size_t>& levelSets)
{ {
auto res = std::vector<int>(Opm::cuistl::detail::to_size_t(levelSets.dataSize())); auto res = std::vector<int>(Opm::gpuistl::detail::to_size_t(levelSets.dataSize()));
int globCnt = 0; int globCnt = 0;
for (auto row : levelSets) { for (auto row : levelSets) {
for (auto col : row) { for (auto col : row) {
OPM_ERROR_IF(Opm::cuistl::detail::to_size_t(globCnt) >= res.size(), OPM_ERROR_IF(Opm::gpuistl::detail::to_size_t(globCnt) >= res.size(),
fmt::format("Internal error. globCnt = {}, res.size() = {}", globCnt, res.size())); fmt::format("Internal error. globCnt = {}, res.size() = {}", globCnt, res.size()));
res[col] = globCnt++; res[col] = globCnt++;
} }
@ -105,6 +105,6 @@ extractLowerAndUpperMatrices(const M& naturalMatrix, const std::vector<int>& reo
return {std::unique_ptr<GPUM>(new auto(GPUM::fromMatrix(reorderedLower, true))), return {std::unique_ptr<GPUM>(new auto(GPUM::fromMatrix(reorderedLower, true))),
std::unique_ptr<GPUM>(new auto(GPUM::fromMatrix(reorderedUpper, true)))}; std::unique_ptr<GPUM>(new auto(GPUM::fromMatrix(reorderedUpper, true)))};
} }
} // namespace Opm::cuistl::detail } // namespace Opm::gpuistl::detail
#endif #endif

View File

@ -28,7 +28,7 @@
namespace Opm::cuistl::detail namespace Opm::gpuistl::detail
{ {
#define CHECK_CUBLAS_ERROR_TYPE(code, x) \ #define CHECK_CUBLAS_ERROR_TYPE(code, x) \
@ -108,7 +108,7 @@ getCublasErrorMessage(cublasStatus_t error,
* *
* Example usage: * Example usage:
* @code{.cpp} * @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp> * #include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
* #include <cublas_v2.h> * #include <cublas_v2.h>
* *
* void some_function() { * void some_function() {
@ -147,7 +147,7 @@ cublasSafeCall(cublasStatus_t error,
* *
* Example usage: * Example usage:
* @code{.cpp} * @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp> * #include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
* #include <cublas_v2.h> * #include <cublas_v2.h>
* *
* void some_function() { * void some_function() {
@ -174,7 +174,7 @@ cublasWarnIfError(cublasStatus_t error,
return error; return error;
} }
} // namespace Opm::cuistl::detail } // namespace Opm::gpuistl::detail
/** /**
* @brief OPM_CUBLAS_SAFE_CALL checks the return type of the cublas expression (function call) and throws an exception * @brief OPM_CUBLAS_SAFE_CALL checks the return type of the cublas expression (function call) and throws an exception
@ -183,7 +183,7 @@ cublasWarnIfError(cublasStatus_t error,
* Example usage: * Example usage:
* @code{.cpp} * @code{.cpp}
* #include <cublas_v2.h> * #include <cublas_v2.h>
* #include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp> * #include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
* *
* void some_function() { * void some_function() {
* cublasHandle_t cublasHandle; * cublasHandle_t cublasHandle;
@ -194,7 +194,7 @@ cublasWarnIfError(cublasStatus_t error,
* @note This should be used for any call to cuBlas unless you have a good reason not to. * @note This should be used for any call to cuBlas unless you have a good reason not to.
*/ */
#define OPM_CUBLAS_SAFE_CALL(expression) \ #define OPM_CUBLAS_SAFE_CALL(expression) \
::Opm::cuistl::detail::cublasSafeCall(expression, #expression, __FILE__, __func__, __LINE__) ::Opm::gpuistl::detail::cublasSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
/** /**
* @brief OPM_CUBLAS_WARN_IF_ERROR checks the return type of the cublas expression (function call) and issues a warning * @brief OPM_CUBLAS_WARN_IF_ERROR checks the return type of the cublas expression (function call) and issues a warning
@ -203,7 +203,7 @@ cublasWarnIfError(cublasStatus_t error,
* Example usage: * Example usage:
* @code{.cpp} * @code{.cpp}
* #include <cublas_v2.h> * #include <cublas_v2.h>
* #include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp> * #include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
* *
* void some_function() { * void some_function() {
* cublasHandle_t cublasHandle; * cublasHandle_t cublasHandle;
@ -214,6 +214,6 @@ cublasWarnIfError(cublasStatus_t error,
* @note Prefer the cublasSafeCall/OPM_CUBLAS_SAFE_CALL counterpart unless you really don't want to throw an exception. * @note Prefer the cublasSafeCall/OPM_CUBLAS_SAFE_CALL counterpart unless you really don't want to throw an exception.
*/ */
#define OPM_CUBLAS_WARN_IF_ERROR(expression) \ #define OPM_CUBLAS_WARN_IF_ERROR(expression) \
::Opm::cuistl::detail::cublasWarnIfError(expression, #expression, __FILE__, __func__, __LINE__) ::Opm::gpuistl::detail::cublasWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
#endif // OPM_CUBLAS_SAFE_CALL_HPP #endif // OPM_CUBLAS_SAFE_CALL_HPP

View File

@ -29,7 +29,7 @@
#include <cublas_v2.h> #include <cublas_v2.h>
#include <opm/common/ErrorMacros.hpp> #include <opm/common/ErrorMacros.hpp>
namespace Opm::cuistl::detail namespace Opm::gpuistl::detail
{ {
inline cublasStatus_t inline cublasStatus_t
@ -164,5 +164,5 @@ cublasNrm2([[maybe_unused]] cublasHandle_t handle,
OPM_THROW(std::runtime_error, "norm2 for integer vectors is not implemented yet."); OPM_THROW(std::runtime_error, "norm2 for integer vectors is not implemented yet.");
} }
} // namespace Opm::cuistl::detail } // namespace Opm::gpuistl::detail
#endif #endif

View File

@ -20,7 +20,7 @@
#define OPM_CUDA_CHECK_LAST_ERROR_HPP #define OPM_CUDA_CHECK_LAST_ERROR_HPP
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <fmt/core.h> #include <fmt/core.h>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
/** /**
* @brief OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE checks the return type of cudaDeviceSynchronize(), * @brief OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE checks the return type of cudaDeviceSynchronize(),
@ -28,7 +28,7 @@
* *
* Example usage: * Example usage:
* @code{.cpp} * @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cuda_check_last_error.hpp> * #include <opm/simulators/linalg/gpuistl/detail/cuda_check_last_error.hpp>
* *
* void some_function() { * void some_function() {
* OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE; * OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE;
@ -38,7 +38,7 @@
* @note This can be used to debug the code, or simply make sure that no error has occured. * @note This can be used to debug the code, or simply make sure that no error has occured.
* @note This is a rather heavy operation, so prefer to use only in Debug mode (see OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE_IF_DEBUG) * @note This is a rather heavy operation, so prefer to use only in Debug mode (see OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE_IF_DEBUG)
*/ */
#define OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE OPM_CUDA_SAFE_CALL(cudaDeviceSynchronize()) #define OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE OPM_GPU_SAFE_CALL(cudaDeviceSynchronize())
#ifdef NDEBUG #ifdef NDEBUG
#define OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE_IF_DEBUG #define OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE_IF_DEBUG
@ -50,7 +50,7 @@
* *
* Example usage: * Example usage:
* @code{.cpp} * @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp> * #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
* *
* void some_function() { * void some_function() {
* OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE_IF_DEBUG; * OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE_IF_DEBUG;
@ -69,7 +69,7 @@
* *
* Example usage: * Example usage:
* @code{.cpp} * @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cuda_check_last_error.hpp> * #include <opm/simulators/linalg/gpuistl/detail/cuda_check_last_error.hpp>
* *
* void some_function() { * void some_function() {
* OPM_CUDA_CHECK_LAST_ERROR; * OPM_CUDA_CHECK_LAST_ERROR;
@ -78,7 +78,7 @@
* *
* @note This can be used to debug the code, or simply make sure that no error has occured. * @note This can be used to debug the code, or simply make sure that no error has occured.
*/ */
#define OPM_CUDA_CHECK_LAST_ERROR OPM_CUDA_SAFE_CALL(cudaGetLastError()) #define OPM_CUDA_CHECK_LAST_ERROR OPM_GPU_SAFE_CALL(cudaGetLastError())
#ifdef NDEBUG #ifdef NDEBUG
#define OPM_CUDA_CHECK_LAST_ERROR_IF_DEBUG #define OPM_CUDA_CHECK_LAST_ERROR_IF_DEBUG
@ -90,7 +90,7 @@
* *
* Example usage: * Example usage:
* @code{.cpp} * @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cuda_check_last_error.hpp> * #include <opm/simulators/linalg/gpuistl/detail/cuda_check_last_error.hpp>
* *
* void some_function() { * void some_function() {
* OPM_CUDA_CHECK_LAST_ERROR_IF_DEBUG; * OPM_CUDA_CHECK_LAST_ERROR_IF_DEBUG;

View File

@ -19,7 +19,7 @@
#ifndef CUSPARSE_CONSTANTS_HPP #ifndef CUSPARSE_CONSTANTS_HPP
#define CUSPARSE_CONSTANTS_HPP #define CUSPARSE_CONSTANTS_HPP
#include <cusparse.h> #include <cusparse.h>
namespace Opm::cuistl::detail namespace Opm::gpuistl::detail
{ {
const constexpr auto CUSPARSE_MATRIX_ORDER = CUSPARSE_DIRECTION_ROW; const constexpr auto CUSPARSE_MATRIX_ORDER = CUSPARSE_DIRECTION_ROW;
} }

View File

@ -24,7 +24,7 @@
#include <opm/common/ErrorMacros.hpp> #include <opm/common/ErrorMacros.hpp>
#include <opm/common/OpmLog/OpmLog.hpp> #include <opm/common/OpmLog/OpmLog.hpp>
namespace Opm::cuistl::detail namespace Opm::gpuistl::detail
{ {
#define CHECK_CUSPARSE_ERROR_TYPE(code, x) \ #define CHECK_CUSPARSE_ERROR_TYPE(code, x) \
@ -93,7 +93,7 @@ getCusparseErrorMessage(cusparseStatus_t error,
* *
* Example usage: * Example usage:
* @code{.cpp} * @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp> * #include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
* #include <cublas_v2.h> * #include <cublas_v2.h>
* *
* void some_function() { * void some_function() {
@ -133,7 +133,7 @@ cusparseSafeCall(cusparseStatus_t error,
* *
* Example usage: * Example usage:
* @code{.cpp} * @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp> * #include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
* #include <cublas_v2.h> * #include <cublas_v2.h>
* *
* void some_function() { * void some_function() {
@ -161,7 +161,7 @@ cusparseWarnIfError(cusparseStatus_t error,
return error; return error;
} }
} // namespace Opm::cuistl::detail } // namespace Opm::gpuistl::detail
@ -171,7 +171,7 @@ cusparseWarnIfError(cusparseStatus_t error,
* *
* Example usage: * Example usage:
* @code{.cpp} * @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp> * #include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
* #include <cusparse.h> * #include <cusparse.h>
* *
* void some_function() { * void some_function() {
@ -183,7 +183,7 @@ cusparseWarnIfError(cusparseStatus_t error,
* @note This should be used for any call to cuSparse unless you have a good reason not to. * @note This should be used for any call to cuSparse unless you have a good reason not to.
*/ */
#define OPM_CUSPARSE_SAFE_CALL(expression) \ #define OPM_CUSPARSE_SAFE_CALL(expression) \
::Opm::cuistl::detail::cusparseSafeCall(expression, #expression, __FILE__, __func__, __LINE__) ::Opm::gpuistl::detail::cusparseSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
/** /**
* @brief OPM_CUSPARSE_WARN_IF_ERROR checks the return type of the cusparse expression (function call) and issues a * @brief OPM_CUSPARSE_WARN_IF_ERROR checks the return type of the cusparse expression (function call) and issues a
@ -191,7 +191,7 @@ cusparseWarnIfError(cusparseStatus_t error,
* *
* Example usage: * Example usage:
* @code{.cpp} * @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp> * #include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
* #include <cusparse.h> * #include <cusparse.h>
* *
* void some_function() { * void some_function() {
@ -204,5 +204,5 @@ cusparseWarnIfError(cusparseStatus_t error,
* exception. * exception.
*/ */
#define OPM_CUSPARSE_WARN_IF_ERROR(expression) \ #define OPM_CUSPARSE_WARN_IF_ERROR(expression) \
::Opm::cuistl::detail::cusparseWarnIfError(expression, #expression, __FILE__, __func__, __LINE__) ::Opm::gpuistl::detail::cusparseWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
#endif // OPM_CUSPARSE_SAFE_CALL_HPP #endif // OPM_CUSPARSE_SAFE_CALL_HPP

View File

@ -27,7 +27,7 @@
#include <type_traits> #include <type_traits>
#ifndef OPM_CUSPARSE_WRAPPER_HPP #ifndef OPM_CUSPARSE_WRAPPER_HPP
#define OPM_CUSPARSE_WRAPPER_HPP #define OPM_CUSPARSE_WRAPPER_HPP
namespace Opm::cuistl::detail namespace Opm::gpuistl::detail
{ {
inline cusparseStatus_t inline cusparseStatus_t
@ -450,5 +450,5 @@ cusparseBsrmv(cusparseHandle_t handle,
beta, beta,
y); y);
} }
} // namespace Opm::cuistl::detail } // namespace Opm::gpuistl::detail
#endif #endif

View File

@ -22,7 +22,7 @@
#include <limits> #include <limits>
#include <vector> #include <vector>
namespace Opm::cuistl::detail namespace Opm::gpuistl::detail
{ {
/** /**
@ -53,6 +53,6 @@ makeMatrixWithNonzeroDiagonal(const Matrix& matrix,
return newMatrix; return newMatrix;
} }
} // namespace Opm::cuistl::detail } // namespace Opm::gpuistl::detail
#endif #endif

View File

@ -21,12 +21,12 @@
#include <cstddef> #include <cstddef>
#include <cuda.h> #include <cuda.h>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
/* /*
This file provides some logic for handling how to choose the correct thread-block size This file provides some logic for handling how to choose the correct thread-block size
*/ */
namespace Opm::cuistl::detail namespace Opm::gpuistl::detail
{ {
constexpr inline size_t constexpr inline size_t
getThreads([[maybe_unused]] size_t numberOfRows) getThreads([[maybe_unused]] size_t numberOfRows)
@ -51,7 +51,7 @@ getCudaRecomendedThreadBlockSize(Kernel k, int suggestedThrBlockSize = -1)
} }
int blockSize; int blockSize;
int tmpGridSize; int tmpGridSize;
OPM_CUDA_SAFE_CALL(cudaOccupancyMaxPotentialBlockSize(&tmpGridSize, &blockSize, k, 0, 0)); OPM_GPU_SAFE_CALL(cudaOccupancyMaxPotentialBlockSize(&tmpGridSize, &blockSize, k, 0, 0));
return blockSize; return blockSize;
} }
@ -61,6 +61,6 @@ getNumberOfBlocks(int wantedThreads, int threadBlockSize)
return (wantedThreads + threadBlockSize - 1) / threadBlockSize; return (wantedThreads + threadBlockSize - 1) / threadBlockSize;
} }
} // namespace Opm::cuistl::detail } // namespace Opm::gpuistl::detail
#endif #endif

View File

@ -16,15 +16,15 @@
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>. along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef OPM_CUDA_SAFE_CALL_HPP #ifndef OPM_GPU_SAFE_CALL_HPP
#define OPM_CUDA_SAFE_CALL_HPP #define OPM_GPU_SAFE_CALL_HPP
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <fmt/core.h> #include <fmt/core.h>
#include <opm/common/ErrorMacros.hpp> #include <opm/common/ErrorMacros.hpp>
#include <opm/common/OpmLog/OpmLog.hpp> #include <opm/common/OpmLog/OpmLog.hpp>
#include <string_view> #include <string_view>
namespace Opm::cuistl::detail namespace Opm::gpuistl::detail
{ {
/** /**
* @brief getCudaErrorMessage generates the error message to display for a given error. * @brief getCudaErrorMessage generates the error message to display for a given error.
@ -48,9 +48,9 @@ getCudaErrorMessage(cudaError_t error,
const std::string_view& functionName, const std::string_view& functionName,
size_t lineNumber) size_t lineNumber)
{ {
return fmt::format("CUDA expression did not execute correctly. Expression was: \n" return fmt::format("GPU expression did not execute correctly. Expression was: \n"
" {}\n" " {}\n"
"CUDA error was {}\n" "GPU error was {}\n"
"in function {}, in {}, at line {}\n", "in function {}, in {}, at line {}\n",
expression, expression,
cudaGetErrorString(error), cudaGetErrorString(error),
@ -60,12 +60,12 @@ getCudaErrorMessage(cudaError_t error,
} }
/** /**
* @brief cudaSafeCall checks the return type of the CUDA expression (function call) and throws an exception if it * @brief cudaSafeCall checks the return type of the GPU expression (function call) and throws an exception if it
* does not equal cudaSuccess. * does not equal cudaSuccess.
* *
* Example usage: * Example usage:
* @code{.cpp} * @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp> * #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
* #include <cuda_runtime.h> * #include <cuda_runtime.h>
* *
* void some_function() { * void some_function() {
@ -74,7 +74,7 @@ getCudaErrorMessage(cudaError_t error,
* } * }
* @endcode * @endcode
* *
* @note It is probably easier to use the macro OPM_CUDA_SAFE_CALL * @note It is probably easier to use the macro OPM_GPU_SAFE_CALL
* *
* @todo Refactor to use std::source_location once we shift to C++20 * @todo Refactor to use std::source_location once we shift to C++20
*/ */
@ -91,7 +91,7 @@ cudaSafeCall(cudaError_t error,
} }
/** /**
* @brief cudaWarnIfError checks the return type of the CUDA expression (function call) and issues a warning if it * @brief cudaWarnIfError checks the return type of the GPU expression (function call) and issues a warning if it
* does not equal cudaSuccess. * does not equal cudaSuccess.
* *
* @param error the error code from cublas * @param error the error code from cublas
@ -102,7 +102,7 @@ cudaSafeCall(cudaError_t error,
* *
* Example usage: * Example usage:
* @code{.cpp} * @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp> * #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
* #include <cuda_runtime.h> * #include <cuda_runtime.h>
* *
* void some_function() { * void some_function() {
@ -111,9 +111,9 @@ cudaSafeCall(cudaError_t error,
* } * }
* @endcode * @endcode
* *
* @note It is probably easier to use the macro OPM_CUDA_WARN_IF_ERROR * @note It is probably easier to use the macro OPM_GPU_WARN_IF_ERROR
* *
* @note Prefer the cudaSafeCall/OPM_CUDA_SAFE_CALL counterpart unless you really don't want to throw an exception. * @note Prefer the cudaSafeCall/OPM_GPU_SAFE_CALL counterpart unless you really don't want to throw an exception.
* *
* @todo Refactor to use std::source_location once we shift to C++20 * @todo Refactor to use std::source_location once we shift to C++20
*/ */
@ -128,47 +128,47 @@ cudaWarnIfError(cudaError_t error,
OpmLog::warning(getCudaErrorMessage(error, expression, filename, functionName, lineNumber)); OpmLog::warning(getCudaErrorMessage(error, expression, filename, functionName, lineNumber));
} }
} }
} // namespace Opm::cuistl::detail } // namespace Opm::gpuistl::detail
/** /**
* @brief OPM_CUDA_SAFE_CALL checks the return type of the CUDA expression (function call) and throws an exception if it * @brief OPM_GPU_SAFE_CALL checks the return type of the GPU expression (function call) and throws an exception if it
* does not equal cudaSuccess. * does not equal cudaSuccess.
* *
* Example usage: * Example usage:
* @code{.cpp} * @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp> * #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
* #include <cuda_runtime.h> * #include <cuda_runtime.h>
* *
* void some_function() { * void some_function() {
* void* somePointer; * void* somePointer;
* OPM_CUDA_SAFE_CALL(cudaMalloc(&somePointer, 1)); * OPM_GPU_SAFE_CALL(cudaMalloc(&somePointer, 1));
* } * }
* @endcode * @endcode
* *
* @note This should be used for any call to the CUDA runtime API unless you have a good reason not to. * @note This should be used for any call to the GPU runtime API unless you have a good reason not to.
*/ */
#define OPM_CUDA_SAFE_CALL(expression) \ #define OPM_GPU_SAFE_CALL(expression) \
::Opm::cuistl::detail::cudaSafeCall(expression, #expression, __FILE__, __func__, __LINE__) ::Opm::gpuistl::detail::cudaSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
/** /**
* @brief OPM_CUDA_WARN_IF_ERROR checks the return type of the CUDA expression (function call) and issues a warning if * @brief OPM_GPU_WARN_IF_ERROR checks the return type of the GPU expression (function call) and issues a warning if
* it does not equal cudaSuccess. * it does not equal cudaSuccess.
* *
* Example usage: * Example usage:
* @code{.cpp} * @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp> * #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
* #include <cuda_runtime.h> * #include <cuda_runtime.h>
* *
* void some_function() { * void some_function() {
* void* somePointer; * void* somePointer;
* OPM_CUDA_WARN_IF_ERROR(cudaMalloc(&somePointer, 1)); * OPM_GPU_WARN_IF_ERROR(cudaMalloc(&somePointer, 1));
* } * }
* @endcode * @endcode
* *
* @note Prefer the cudaSafeCall/OPM_CUDA_SAFE_CALL counterpart unless you really don't want to throw an exception. * @note Prefer the cudaSafeCall/OPM_GPU_SAFE_CALL counterpart unless you really don't want to throw an exception.
*/ */
#define OPM_CUDA_WARN_IF_ERROR(expression) \ #define OPM_GPU_WARN_IF_ERROR(expression) \
::Opm::cuistl::detail::cudaWarnIfError(expression, #expression, __FILE__, __func__, __LINE__) ::Opm::gpuistl::detail::cudaWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
#endif #endif

View File

@ -18,12 +18,12 @@
*/ */
#include <config.h> #include <config.h>
#include <opm/common/ErrorMacros.hpp> #include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/cuistl/detail/deviceBlockOperations.hpp> #include <opm/simulators/linalg/gpuistl/detail/deviceBlockOperations.hpp>
#include <opm/simulators/linalg/cuistl/detail/gpuThreadUtils.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpuThreadUtils.hpp>
#include <stdexcept> #include <stdexcept>
namespace Opm::cuistl::detail namespace Opm::gpuistl::detail
{ {
namespace namespace
{ {
@ -110,8 +110,8 @@ copyMatDataToReordered(T* srcMatrix,
int thrBlockSize) int thrBlockSize)
{ {
int threadBlockSize int threadBlockSize
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuMoveDataToReordered<T, blocksize>, thrBlockSize); = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuMoveDataToReordered<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize); int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
cuMoveDataToReordered<T, blocksize><<<nThreadBlocks, threadBlockSize>>>( cuMoveDataToReordered<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
srcMatrix, srcRowIndices, dstMatrix, dstRowIndices, naturalToReordered, numberOfRows); srcMatrix, srcRowIndices, dstMatrix, dstRowIndices, naturalToReordered, numberOfRows);
} }
@ -130,9 +130,9 @@ copyMatDataToReorderedSplit(T* srcMatrix,
size_t numberOfRows, size_t numberOfRows,
int thrBlockSize) int thrBlockSize)
{ {
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize( int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
cuMoveDataToReorderedSplit<T, blocksize>, thrBlockSize); cuMoveDataToReorderedSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize); int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
cuMoveDataToReorderedSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(srcMatrix, cuMoveDataToReorderedSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(srcMatrix,
srcRowIndices, srcRowIndices,
srcColumnIndices, srcColumnIndices,
@ -161,4 +161,4 @@ INSTANTIATE_KERNEL_WRAPPERS(double, 3);
INSTANTIATE_KERNEL_WRAPPERS(double, 4); INSTANTIATE_KERNEL_WRAPPERS(double, 4);
INSTANTIATE_KERNEL_WRAPPERS(double, 5); INSTANTIATE_KERNEL_WRAPPERS(double, 5);
INSTANTIATE_KERNEL_WRAPPERS(double, 6); INSTANTIATE_KERNEL_WRAPPERS(double, 6);
} // namespace Opm::cuistl::detail } // namespace Opm::gpuistl::detail

View File

@ -16,11 +16,11 @@
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>. along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef OPM_CUISTL_CUSPARSE_MATRIX_OPERATIONS_HPP #ifndef OPM_GPUISTL_GPUSPARSE_MATRIX_OPERATIONS_HPP
#define OPM_CUISTL_CUSPARSE_MATRIX_OPERATIONS_HPP #define OPM_GPUISTL_GPUSPARSE_MATRIX_OPERATIONS_HPP
#include <cstddef> #include <cstddef>
#include <vector> #include <vector>
namespace Opm::cuistl::detail namespace Opm::gpuistl::detail
{ {
/** /**
* @brief Reorders the elements of a matrix by copying them from one matrix to another using a permutation list * @brief Reorders the elements of a matrix by copying them from one matrix to another using a permutation list
@ -68,5 +68,5 @@ void copyMatDataToReorderedSplit(T* srcMatrix,
size_t numberOfRows, size_t numberOfRows,
int threadBlockSize); int threadBlockSize);
} // namespace Opm::cuistl::detail } // namespace Opm::gpuistl::detail
#endif #endif

View File

@ -29,7 +29,7 @@
* TODO: Use the requires-keyword once C++20 becomes availble (https://en.cppreference.com/w/cpp/language/constraints ). * TODO: Use the requires-keyword once C++20 becomes availble (https://en.cppreference.com/w/cpp/language/constraints ).
* With C++20 this file can be removed. * With C++20 this file can be removed.
*/ */
namespace Opm::cuistl::detail namespace Opm::gpuistl::detail
{ {
/** /**
@ -121,5 +121,5 @@ public:
static constexpr bool value = std::is_same_v<decltype(test<T>(0)), std::true_type>; static constexpr bool value = std::is_same_v<decltype(test<T>(0)), std::true_type>;
}; };
} // namespace Opm::cuistl::detail } // namespace Opm::gpuistl::detail
#endif #endif

View File

@ -18,12 +18,12 @@
*/ */
#include <config.h> #include <config.h>
#include <opm/common/ErrorMacros.hpp> #include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/detail/deviceBlockOperations.hpp> #include <opm/simulators/linalg/gpuistl/detail/deviceBlockOperations.hpp>
#include <opm/simulators/linalg/cuistl/detail/gpuThreadUtils.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpuThreadUtils.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/DILUKernels.hpp> #include <opm/simulators/linalg/gpuistl/detail/preconditionerKernels/DILUKernels.hpp>
#include <stdexcept> #include <stdexcept>
namespace Opm::cuistl::detail::DILU namespace Opm::gpuistl::detail::DILU
{ {
namespace namespace
{ {
@ -282,8 +282,8 @@ solveLowerLevelSet(T* reorderedMat,
int thrBlockSize) int thrBlockSize)
{ {
int threadBlockSize int threadBlockSize
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize); = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize); int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuSolveLowerLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>( cuSolveLowerLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, d, v); reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, d, v);
} }
@ -302,9 +302,9 @@ solveLowerLevelSetSplit(T* reorderedMat,
T* v, T* v,
int thrBlockSize) int thrBlockSize)
{ {
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize( int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
cuSolveLowerLevelSetSplit<T, blocksize>, thrBlockSize); cuSolveLowerLevelSetSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize); int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuSolveLowerLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>( cuSolveLowerLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, d, v); reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, d, v);
} }
@ -322,8 +322,8 @@ solveUpperLevelSet(T* reorderedMat,
int thrBlockSize) int thrBlockSize)
{ {
int threadBlockSize int threadBlockSize
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize); = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize); int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuSolveUpperLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>( cuSolveUpperLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, v); reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, v);
} }
@ -340,9 +340,9 @@ solveUpperLevelSetSplit(T* reorderedMat,
T* v, T* v,
int thrBlockSize) int thrBlockSize)
{ {
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize( int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
cuSolveUpperLevelSetSplit<T, blocksize>, thrBlockSize); cuSolveUpperLevelSetSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize); int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuSolveUpperLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>( cuSolveUpperLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, v); reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, v);
} }
@ -360,9 +360,9 @@ computeDiluDiagonal(T* reorderedMat,
int thrBlockSize) int thrBlockSize)
{ {
if (blocksize <= 3) { if (blocksize <= 3) {
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize( int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
cuComputeDiluDiagonal<T, blocksize>, thrBlockSize); cuComputeDiluDiagonal<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize); int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuComputeDiluDiagonal<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(reorderedMat, cuComputeDiluDiagonal<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(reorderedMat,
rowIndices, rowIndices,
colIndices, colIndices,
@ -393,9 +393,9 @@ computeDiluDiagonalSplit(T* reorderedLowerMat,
int thrBlockSize) int thrBlockSize)
{ {
if (blocksize <= 3) { if (blocksize <= 3) {
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize( int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
cuComputeDiluDiagonalSplit<T, blocksize>, thrBlockSize); cuComputeDiluDiagonalSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize); int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuComputeDiluDiagonalSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(reorderedLowerMat, cuComputeDiluDiagonalSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(reorderedLowerMat,
lowerRowIndices, lowerRowIndices,
lowerColIndices, lowerColIndices,
@ -434,4 +434,4 @@ INSTANTIATE_KERNEL_WRAPPERS(double, 3);
INSTANTIATE_KERNEL_WRAPPERS(double, 4); INSTANTIATE_KERNEL_WRAPPERS(double, 4);
INSTANTIATE_KERNEL_WRAPPERS(double, 5); INSTANTIATE_KERNEL_WRAPPERS(double, 5);
INSTANTIATE_KERNEL_WRAPPERS(double, 6); INSTANTIATE_KERNEL_WRAPPERS(double, 6);
} // namespace Opm::cuistl::detail::DILU } // namespace Opm::gpuistl::detail::DILU

View File

@ -24,7 +24,7 @@
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <vector> #include <vector>
namespace Opm::cuistl::detail::DILU namespace Opm::gpuistl::detail::DILU
{ {
/** /**
@ -198,5 +198,5 @@ void computeDiluDiagonalSplit(T* reorderedLowerMat,
T* dInv, T* dInv,
int threadBlockSize); int threadBlockSize);
} // namespace Opm::cuistl::detail::DILU } // namespace Opm::gpuistl::detail::DILU
#endif #endif

View File

@ -18,16 +18,16 @@
*/ */
#include <config.h> #include <config.h>
#include <opm/common/ErrorMacros.hpp> #include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/detail/deviceBlockOperations.hpp> #include <opm/simulators/linalg/gpuistl/detail/deviceBlockOperations.hpp>
#include <opm/simulators/linalg/cuistl/detail/gpuThreadUtils.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpuThreadUtils.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/ILU0Kernels.hpp> #include <opm/simulators/linalg/gpuistl/detail/preconditionerKernels/ILU0Kernels.hpp>
#include <stdexcept> #include <stdexcept>
/* /*
The LU factorization and apply step is written based on the Dune implementations The LU factorization and apply step is written based on the Dune implementations
*/ */
namespace Opm::cuistl::detail::ILU0 namespace Opm::gpuistl::detail::ILU0
{ {
namespace namespace
{ {
@ -341,8 +341,8 @@ solveLowerLevelSet(T* reorderedMat,
int thrBlockSize) int thrBlockSize)
{ {
int threadBlockSize int threadBlockSize
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize); = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize); int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuSolveLowerLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>( cuSolveLowerLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, d, v); reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, d, v);
} }
@ -359,8 +359,8 @@ solveUpperLevelSet(T* reorderedMat,
int thrBlockSize) int thrBlockSize)
{ {
int threadBlockSize int threadBlockSize
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize); = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize); int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuSolveUpperLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>( cuSolveUpperLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, v); reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, v);
} }
@ -377,9 +377,9 @@ solveLowerLevelSetSplit(T* reorderedMat,
T* v, T* v,
int thrBlockSize) int thrBlockSize)
{ {
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize( int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
cuSolveLowerLevelSetSplit<T, blocksize>, thrBlockSize); cuSolveLowerLevelSetSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize); int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuSolveLowerLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>( cuSolveLowerLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, d, v); reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, d, v);
} }
@ -396,9 +396,9 @@ solveUpperLevelSetSplit(T* reorderedMat,
T* v, T* v,
int thrBlockSize) int thrBlockSize)
{ {
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize( int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
cuSolveUpperLevelSetSplit<T, blocksize>, thrBlockSize); cuSolveUpperLevelSetSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize); int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuSolveUpperLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>( cuSolveUpperLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, v); reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, v);
} }
@ -415,8 +415,8 @@ LUFactorization(T* srcMatrix,
int thrBlockSize) int thrBlockSize)
{ {
int threadBlockSize int threadBlockSize
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuLUFactorization<T, blocksize>, thrBlockSize); = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuLUFactorization<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize); int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuLUFactorization<T, blocksize><<<nThreadBlocks, threadBlockSize>>>( cuLUFactorization<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
srcMatrix, srcRowIndices, srcColumnIndices, naturalToReordered, reorderedToNatual, rowsInLevelSet, startIdx); srcMatrix, srcRowIndices, srcColumnIndices, naturalToReordered, reorderedToNatual, rowsInLevelSet, startIdx);
} }
@ -437,8 +437,8 @@ LUFactorizationSplit(T* reorderedLowerMat,
int thrBlockSize) int thrBlockSize)
{ {
int threadBlockSize int threadBlockSize
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuLUFactorizationSplit<T, blocksize>, thrBlockSize); = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuLUFactorizationSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize); int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuLUFactorizationSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(reorderedLowerMat, cuLUFactorizationSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(reorderedLowerMat,
lowerRowIndices, lowerRowIndices,
lowerColIndices, lowerColIndices,
@ -473,4 +473,4 @@ INSTANTIATE_KERNEL_WRAPPERS(double, 3);
INSTANTIATE_KERNEL_WRAPPERS(double, 4); INSTANTIATE_KERNEL_WRAPPERS(double, 4);
INSTANTIATE_KERNEL_WRAPPERS(double, 5); INSTANTIATE_KERNEL_WRAPPERS(double, 5);
INSTANTIATE_KERNEL_WRAPPERS(double, 6); INSTANTIATE_KERNEL_WRAPPERS(double, 6);
} // namespace Opm::cuistl::detail::ILU0 } // namespace Opm::gpuistl::detail::ILU0

View File

@ -20,7 +20,7 @@
#define OPM_ILU0_KERNELS_HPP #define OPM_ILU0_KERNELS_HPP
#include <cstddef> #include <cstddef>
#include <vector> #include <vector>
namespace Opm::cuistl::detail::ILU0 namespace Opm::gpuistl::detail::ILU0
{ {
/** /**
@ -189,5 +189,5 @@ void LUFactorizationSplit(T* reorderedLowerMat,
int rowsInLevelSet, int rowsInLevelSet,
int threadBlockSize); int threadBlockSize);
} // namespace Opm::cuistl::detail::ILU0 } // namespace Opm::gpuistl::detail::ILU0
#endif #endif

View File

@ -18,12 +18,12 @@
*/ */
#include <config.h> #include <config.h>
#include <opm/common/ErrorMacros.hpp> #include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/detail/deviceBlockOperations.hpp> #include <opm/simulators/linalg/gpuistl/detail/deviceBlockOperations.hpp>
#include <opm/simulators/linalg/cuistl/detail/gpuThreadUtils.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpuThreadUtils.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/JacKernels.hpp> #include <opm/simulators/linalg/gpuistl/detail/preconditionerKernels/JacKernels.hpp>
#include <stdexcept> #include <stdexcept>
namespace Opm::cuistl::detail::JAC namespace Opm::gpuistl::detail::JAC
{ {
namespace namespace
{ {
@ -60,8 +60,8 @@ invertDiagonalAndFlatten(T* mat, int* rowIndices, int* colIndices, size_t number
{ {
if (blocksize <= 3) { if (blocksize <= 3) {
int threadBlockSize int threadBlockSize
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuInvertDiagonalAndFlatten<T, blocksize>); = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuInvertDiagonalAndFlatten<T, blocksize>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize); int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
cuInvertDiagonalAndFlatten<T, blocksize> cuInvertDiagonalAndFlatten<T, blocksize>
<<<nThreadBlocks, threadBlockSize>>>(mat, rowIndices, colIndices, numberOfRows, vec); <<<nThreadBlocks, threadBlockSize>>>(mat, rowIndices, colIndices, numberOfRows, vec);
} else { } else {
@ -85,4 +85,4 @@ INSTANTIATE_KERNEL_WRAPPERS(double, 4);
INSTANTIATE_KERNEL_WRAPPERS(double, 5); INSTANTIATE_KERNEL_WRAPPERS(double, 5);
INSTANTIATE_KERNEL_WRAPPERS(double, 6); INSTANTIATE_KERNEL_WRAPPERS(double, 6);
} // namespace Opm::cuistl::detail::JAC } // namespace Opm::gpuistl::detail::JAC

View File

@ -20,7 +20,7 @@
#define OPM_JAC_KERNELS_HPP #define OPM_JAC_KERNELS_HPP
#include <cstddef> #include <cstddef>
#include <vector> #include <vector>
namespace Opm::cuistl::detail::JAC namespace Opm::gpuistl::detail::JAC
{ {
/** /**
@ -34,5 +34,5 @@ namespace Opm::cuistl::detail::JAC
template <class T, int blocksize> template <class T, int blocksize>
void invertDiagonalAndFlatten(T* mat, int* rowIndices, int* colIndices, size_t numberOfRows, T* vec); void invertDiagonalAndFlatten(T* mat, int* rowIndices, int* colIndices, size_t numberOfRows, T* vec);
} // namespace Opm::cuistl::detail::JAC } // namespace Opm::gpuistl::detail::JAC
#endif #endif

View File

@ -20,9 +20,9 @@
#ifndef OPM_CUISTL_PRECONDIDTIONER_SHOULD_CALL_POST_PRE_HPP #ifndef OPM_CUISTL_PRECONDIDTIONER_SHOULD_CALL_POST_PRE_HPP
#define OPM_CUISTL_PRECONDIDTIONER_SHOULD_CALL_POST_PRE_HPP #define OPM_CUISTL_PRECONDIDTIONER_SHOULD_CALL_POST_PRE_HPP
#include <opm/simulators/linalg/cuistl/detail/has_function.hpp> #include <opm/simulators/linalg/gpuistl/detail/has_function.hpp>
namespace Opm::cuistl::detail namespace Opm::gpuistl::detail
{ {
//! @brief Tests (compile time) if the preconditioner type needs to call pre() before a call to apply() //! @brief Tests (compile time) if the preconditioner type needs to call pre() before a call to apply()
@ -60,5 +60,5 @@ shouldCallPreconditionerPost()
return true; return true;
} }
} }
} // namespace Opm::cuistl::detail } // namespace Opm::gpuistl::detail
#endif #endif

View File

@ -37,7 +37,7 @@
* while Dune::BlockVector (and relatives) use unsigned size_t. * while Dune::BlockVector (and relatives) use unsigned size_t.
*/ */
namespace Opm::cuistl::detail namespace Opm::gpuistl::detail
{ {
/** /**
@ -106,6 +106,6 @@ to_size_t(int i)
return std::size_t(i); return std::size_t(i);
} }
} // namespace Opm::cuistl::detail } // namespace Opm::gpuistl::detail
#endif #endif

View File

@ -18,14 +18,14 @@
*/ */
#include <config.h> #include <config.h>
#include <opm/common/ErrorMacros.hpp> #include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp> #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/cublas_wrapper.hpp> #include <opm/simulators/linalg/gpuistl/detail/cublas_wrapper.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/gpuThreadUtils.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpuThreadUtils.hpp>
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp> #include <opm/simulators/linalg/gpuistl/detail/vector_operations.hpp>
#include <stdexcept> #include <stdexcept>
namespace Opm::cuistl::detail namespace Opm::gpuistl::detail
{ {
namespace namespace
@ -108,8 +108,8 @@ template <class T>
void void
setVectorValue(T* deviceData, size_t numberOfElements, const T& value) setVectorValue(T* deviceData, size_t numberOfElements, const T& value)
{ {
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(setVectorValueKernel<T>); int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(setVectorValueKernel<T>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize); int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
setVectorValueKernel<<<nThreadBlocks, threadBlockSize>>>(deviceData, numberOfElements, value); setVectorValueKernel<<<nThreadBlocks, threadBlockSize>>>(deviceData, numberOfElements, value);
} }
@ -121,8 +121,8 @@ template <class T>
void void
setZeroAtIndexSet(T* deviceData, size_t numberOfElements, const int* indices) setZeroAtIndexSet(T* deviceData, size_t numberOfElements, const int* indices)
{ {
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(setZeroAtIndexSetKernel<T>); int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(setZeroAtIndexSetKernel<T>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize); int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
setZeroAtIndexSetKernel<<<nThreadBlocks, threadBlockSize>>>(deviceData, numberOfElements, indices); setZeroAtIndexSetKernel<<<nThreadBlocks, threadBlockSize>>>(deviceData, numberOfElements, indices);
} }
template void setZeroAtIndexSet(double*, size_t, const int*); template void setZeroAtIndexSet(double*, size_t, const int*);
@ -138,12 +138,12 @@ innerProductAtIndices(cublasHandle_t cublasHandle,
size_t numberOfElements, size_t numberOfElements,
const int* indices) const int* indices)
{ {
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(elementWiseMultiplyKernel<T>); int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(elementWiseMultiplyKernel<T>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize); int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
elementWiseMultiplyKernel<<<nThreadBlocks, threadBlockSize>>>(deviceA, deviceB, buffer, numberOfElements, indices); elementWiseMultiplyKernel<<<nThreadBlocks, threadBlockSize>>>(deviceA, deviceB, buffer, numberOfElements, indices);
// TODO: [perf] Get rid of the allocation here. // TODO: [perf] Get rid of the allocation here.
CuVector<T> oneVector(numberOfElements); GpuVector<T> oneVector(numberOfElements);
oneVector = 1.0; oneVector = 1.0;
T result = 0.0; T result = 0.0;
OPM_CUBLAS_SAFE_CALL(cublasDot(cublasHandle, numberOfElements, oneVector.data(), 1, buffer, 1, &result)); OPM_CUBLAS_SAFE_CALL(cublasDot(cublasHandle, numberOfElements, oneVector.data(), 1, buffer, 1, &result));
@ -158,10 +158,10 @@ template <class T>
void void
prepareSendBuf(const T* deviceA, T* buffer, size_t numberOfElements, const int* indices) prepareSendBuf(const T* deviceA, T* buffer, size_t numberOfElements, const int* indices)
{ {
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(prepareSendBufKernel<T>); int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(prepareSendBufKernel<T>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize); int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
prepareSendBufKernel<<<nThreadBlocks, threadBlockSize>>>(deviceA, buffer, numberOfElements, indices); prepareSendBufKernel<<<nThreadBlocks, threadBlockSize>>>(deviceA, buffer, numberOfElements, indices);
OPM_CUDA_SAFE_CALL(cudaDeviceSynchronize()); // The buffers are prepared for MPI. Wait for them to finish. OPM_GPU_SAFE_CALL(cudaDeviceSynchronize()); // The buffers are prepared for MPI. Wait for them to finish.
} }
template void prepareSendBuf(const double* deviceA, double* buffer, size_t numberOfElements, const int* indices); template void prepareSendBuf(const double* deviceA, double* buffer, size_t numberOfElements, const int* indices);
template void prepareSendBuf(const float* deviceA, float* buffer, size_t numberOfElements, const int* indices); template void prepareSendBuf(const float* deviceA, float* buffer, size_t numberOfElements, const int* indices);
@ -171,8 +171,8 @@ template <class T>
void void
syncFromRecvBuf(T* deviceA, T* buffer, size_t numberOfElements, const int* indices) syncFromRecvBuf(T* deviceA, T* buffer, size_t numberOfElements, const int* indices)
{ {
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(syncFromRecvBufKernel<T>); int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(syncFromRecvBufKernel<T>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize); int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
syncFromRecvBufKernel<<<nThreadBlocks, threadBlockSize>>>(deviceA, buffer, numberOfElements, indices); syncFromRecvBufKernel<<<nThreadBlocks, threadBlockSize>>>(deviceA, buffer, numberOfElements, indices);
// cudaDeviceSynchronize(); // Not needed, I guess... // cudaDeviceSynchronize(); // Not needed, I guess...
} }
@ -191,20 +191,20 @@ weightedDiagMV(const T* squareBlockVector,
{ {
switch (blocksize) { switch (blocksize) {
case 1: { case 1: {
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 1>); int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 1>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize); int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
weightedDiagMV<T, 1> weightedDiagMV<T, 1>
<<<nThreadBlocks, threadBlockSize>>>(squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec); <<<nThreadBlocks, threadBlockSize>>>(squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec);
} break; } break;
case 2: { case 2: {
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 2>); int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 2>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize); int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
weightedDiagMV<T, 2> weightedDiagMV<T, 2>
<<<nThreadBlocks, threadBlockSize>>>(squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec); <<<nThreadBlocks, threadBlockSize>>>(squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec);
} break; } break;
case 3: { case 3: {
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 3>); int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 3>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize); int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
weightedDiagMV<T, 3> weightedDiagMV<T, 3>
<<<nThreadBlocks, threadBlockSize>>>(squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec); <<<nThreadBlocks, threadBlockSize>>>(squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec);
} break; } break;
@ -217,4 +217,4 @@ weightedDiagMV(const T* squareBlockVector,
template void weightedDiagMV(const double*, const size_t, const size_t, double, const double*, double*); template void weightedDiagMV(const double*, const size_t, const size_t, double, const double*, double*);
template void weightedDiagMV(const float*, const size_t, const size_t, float, const float*, float*); template void weightedDiagMV(const float*, const size_t, const size_t, float, const float*, float*);
} // namespace Opm::cuistl::detail } // namespace Opm::gpuistl::detail

View File

@ -20,7 +20,7 @@
#define OPM_CUISTL_VECTOR_OPERATIONS_HPP #define OPM_CUISTL_VECTOR_OPERATIONS_HPP
#include <cstddef> #include <cstddef>
#include <cublas_v2.h> #include <cublas_v2.h>
namespace Opm::cuistl::detail namespace Opm::gpuistl::detail
{ {
/** /**
@ -65,11 +65,11 @@ void syncFromRecvBuf(T* deviceA, T* buffer, size_t numberOfElements, const int*
/** /**
* @brief Compue the weighted matrix vector product where the matrix is diagonal, the diagonal is a vector, meaning we * @brief Compue the weighted matrix vector product where the matrix is diagonal, the diagonal is a vector, meaning we
* compute the Hadamard product. * compute the Hadamard product.
* @param squareBlockVector A CuVector whose elements are NxN matrix blocks * @param squareBlockVector A GpuVector whose elements are NxN matrix blocks
* @param numberOfRows The number of rows in the vector * @param numberOfRows The number of rows in the vector
* @param blocksize The sidelength of the square block elements in the vector * @param blocksize The sidelength of the square block elements in the vector
* @param src_vec A pointer to the data of the CuVector we multiply the blockvector with * @param src_vec A pointer to the data of the GpuVector we multiply the blockvector with
* @param[out] dst_vec A pointer to the data of the CuVector we store the result in * @param[out] dst_vec A pointer to the data of the GpuVector we store the result in
* *
* @note This is implemented as a faster way to multiply a diagonal matrix with a blockvector. We need only store the * @note This is implemented as a faster way to multiply a diagonal matrix with a blockvector. We need only store the
* diagonal of the matrix and use this product. * diagonal of the matrix and use this product.
@ -81,5 +81,5 @@ void weightedDiagMV(const T* squareBlockVector,
T relaxationFactor, T relaxationFactor,
const T* srcVec, const T* srcVec,
T* dstVec); T* dstVec);
} // namespace Opm::cuistl::detail } // namespace Opm::gpuistl::detail
#endif #endif

View File

@ -19,9 +19,9 @@
#include <config.h> #include <config.h>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <opm/common/OpmLog/OpmLog.hpp> #include <opm/common/OpmLog/OpmLog.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/set_device.hpp> #include <opm/simulators/linalg/gpuistl/set_device.hpp>
namespace Opm::cuistl namespace Opm::gpuistl
{ {
void void
setDevice(int mpiRank, [[maybe_unused]] int numberOfMpiRanks) setDevice(int mpiRank, [[maybe_unused]] int numberOfMpiRanks)
@ -41,9 +41,9 @@ setDevice(int mpiRank, [[maybe_unused]] int numberOfMpiRanks)
// Now do a round robin kind of assignment // Now do a round robin kind of assignment
// TODO: We need to be more sophistacted here. We have no guarantee this will pick the correct device. // TODO: We need to be more sophistacted here. We have no guarantee this will pick the correct device.
const auto deviceId = mpiRank % deviceCount; const auto deviceId = mpiRank % deviceCount;
OPM_CUDA_SAFE_CALL(cudaDeviceReset()); OPM_GPU_SAFE_CALL(cudaDeviceReset());
OPM_CUDA_SAFE_CALL(cudaSetDevice(deviceId)); OPM_GPU_SAFE_CALL(cudaSetDevice(deviceId));
OpmLog::info("Set CUDA device to " + std::to_string(deviceId) + " (out of " + std::to_string(deviceCount) OpmLog::info("Set CUDA device to " + std::to_string(deviceId) + " (out of " + std::to_string(deviceCount)
+ " devices)."); + " devices).");
} }
} // namespace Opm::cuistl } // namespace Opm::gpuistl

View File

@ -20,7 +20,7 @@
#ifndef OPM_CUISTL_SET_DEVICE_HEADER #ifndef OPM_CUISTL_SET_DEVICE_HEADER
#define OPM_CUISTL_SET_DEVICE_HEADER #define OPM_CUISTL_SET_DEVICE_HEADER
namespace Opm::cuistl namespace Opm::gpuistl
{ {
//! @brief Sets the correct CUDA device in the setting of MPI //! @brief Sets the correct CUDA device in the setting of MPI
//! //!
@ -32,5 +32,5 @@ namespace Opm::cuistl
//! //!
//! @note If no CUDA device is present, this does nothing. //! @note If no CUDA device is present, this does nothing.
void setDevice(int mpiRank, int numberOfMpiRanks); void setDevice(int mpiRank, int numberOfMpiRanks);
} // namespace Opm::cuistl } // namespace Opm::gpuistl
#endif #endif

View File

@ -18,14 +18,14 @@
*/ */
#include <config.h> #include <config.h>
#define BOOST_TEST_MODULE TestCuBuffer #define BOOST_TEST_MODULE TestGpuBuffer
#include <boost/test/unit_test.hpp> #include <boost/test/unit_test.hpp>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <opm/simulators/linalg/cuistl/CuBuffer.hpp> #include <opm/simulators/linalg/gpuistl/GpuBuffer.hpp>
#include <opm/simulators/linalg/cuistl/CuView.hpp> #include <opm/simulators/linalg/gpuistl/GpuView.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
#include <array> #include <array>
#include <algorithm> #include <algorithm>
@ -35,15 +35,15 @@ BOOST_AUTO_TEST_CASE(TestMakeView)
{ {
// test that we can create buffers and make views of the buffers using the pointer constructor // test that we can create buffers and make views of the buffers using the pointer constructor
auto buf = std::vector<int>({1, 2, 3, 4, 5, 6}); auto buf = std::vector<int>({1, 2, 3, 4, 5, 6});
const auto gpubuf = ::Opm::cuistl::CuBuffer<int>(buf); const auto gpubuf = ::Opm::gpuistl::GpuBuffer<int>(buf);
auto gpuview = ::Opm::cuistl::CuView<int>(buf.data(), buf.size()); auto gpuview = ::Opm::gpuistl::GpuView<int>(buf.data(), buf.size());
bool gpuBufCreatedView = std::is_same<::Opm::cuistl::CuView<int>, decltype(gpuview)>::value; bool gpuBufCreatedView = std::is_same<::Opm::gpuistl::GpuView<int>, decltype(gpuview)>::value;
BOOST_CHECK(gpuBufCreatedView); BOOST_CHECK(gpuBufCreatedView);
// test that we can make views of buffers by using the cubuffer constructor // test that we can make views of buffers by using the GpuBuffer constructor
auto gpuview2 = ::Opm::cuistl::make_view(gpubuf); auto gpuview2 = ::Opm::gpuistl::make_view(gpubuf);
bool gpuBufCreatedView2 = std::is_same<::Opm::cuistl::CuView<const int>, decltype(gpuview2)>::value; bool gpuBufCreatedView2 = std::is_same<::Opm::gpuistl::GpuView<const int>, decltype(gpuview2)>::value;
BOOST_CHECK(gpuBufCreatedView2); BOOST_CHECK(gpuBufCreatedView2);

View File

@ -18,18 +18,18 @@
*/ */
#include <config.h> #include <config.h>
#define BOOST_TEST_MODULE TestCuDiluHelpers #define BOOST_TEST_MODULE TestGpuDILU
#include <boost/test/unit_test.hpp> #include <boost/test/unit_test.hpp>
#include <dune/common/fmatrix.hh> #include <dune/common/fmatrix.hh>
#include <dune/istl/bcrsmatrix.hh> #include <dune/istl/bcrsmatrix.hh>
#include <memory> #include <memory>
#include <opm/simulators/linalg/DILU.hpp> #include <opm/simulators/linalg/DILU.hpp>
#include <opm/simulators/linalg/cuistl/CuDILU.hpp> #include <opm/simulators/linalg/gpuistl/GpuDILU.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp> #include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp> #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpusparse_matrix_operations.hpp>
#include <random> #include <random>
#include <vector> #include <vector>
@ -41,11 +41,11 @@ using B1x1Vec = Dune::BlockVector<Dune::FieldVector<double, 1>>;
using B2x2Vec = Dune::BlockVector<Dune::FieldVector<double, 2>>; using B2x2Vec = Dune::BlockVector<Dune::FieldVector<double, 2>>;
using Sp1x1BlockMatrix = Dune::BCRSMatrix<FM1x1>; using Sp1x1BlockMatrix = Dune::BCRSMatrix<FM1x1>;
using Sp2x2BlockMatrix = Dune::BCRSMatrix<FM2x2>; using Sp2x2BlockMatrix = Dune::BCRSMatrix<FM2x2>;
using CuMatrix = Opm::cuistl::CuSparseMatrix<T>; using CuMatrix = Opm::gpuistl::GpuSparseMatrix<T>;
using CuIntVec = Opm::cuistl::CuVector<int>; using CuIntVec = Opm::gpuistl::GpuVector<int>;
using CuFloatingPointVec = Opm::cuistl::CuVector<T>; using CuFloatingPointVec = Opm::gpuistl::GpuVector<T>;
using CuDilu1x1 = Opm::cuistl::CuDILU<Sp1x1BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>; using GpuDilu1x1 = Opm::gpuistl::GpuDILU<Sp1x1BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
using CuDilu2x2 = Opm::cuistl::CuDILU<Sp2x2BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>; using GpuDilu2x2 = Opm::gpuistl::GpuDILU<Sp2x2BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
Sp1x1BlockMatrix Sp1x1BlockMatrix
get1x1BlockTestMatrix() get1x1BlockTestMatrix()
@ -211,7 +211,7 @@ BOOST_AUTO_TEST_CASE(TestDiluApply)
// Initialize preconditioner objects // Initialize preconditioner objects
Dune::MultithreadDILU<Sp1x1BlockMatrix, B1x1Vec, B1x1Vec> cpudilu(matA); Dune::MultithreadDILU<Sp1x1BlockMatrix, B1x1Vec, B1x1Vec> cpudilu(matA);
auto gpudilu = CuDilu1x1(matA, true, true); auto gpudilu = GpuDilu1x1(matA, true, true);
// Use the apply // Use the apply
gpudilu.apply(d_output, d_input); gpudilu.apply(d_output, d_input);
@ -224,7 +224,7 @@ BOOST_AUTO_TEST_CASE(TestDiluApply)
} }
auto cudilures = d_output.asStdVector(); auto cudilures = d_output.asStdVector();
// check that CuDilu results matches that of CPU dilu // check that GpuDilu results matches that of CPU dilu
for (size_t i = 0; i < cudilures.size(); ++i) { for (size_t i = 0; i < cudilures.size(); ++i) {
BOOST_CHECK_CLOSE(cudilures[i], cpudilures[i], 1e-7); BOOST_CHECK_CLOSE(cudilures[i], cpudilures[i], 1e-7);
} }
@ -235,7 +235,7 @@ BOOST_AUTO_TEST_CASE(TestDiluApplyBlocked)
// init matrix with 2x2 blocks // init matrix with 2x2 blocks
Sp2x2BlockMatrix matA = get2x2BlockTestMatrix(); Sp2x2BlockMatrix matA = get2x2BlockTestMatrix();
auto gpudilu = CuDilu2x2(matA, true, true); auto gpudilu = GpuDilu2x2(matA, true, true);
Dune::MultithreadDILU<Sp2x2BlockMatrix, B2x2Vec, B2x2Vec> cpudilu(matA); Dune::MultithreadDILU<Sp2x2BlockMatrix, B2x2Vec, B2x2Vec> cpudilu(matA);
// create input/output buffers for the apply // create input/output buffers for the apply
@ -275,7 +275,7 @@ BOOST_AUTO_TEST_CASE(TestDiluInitAndUpdateLarge)
{ {
// create gpu dilu preconditioner // create gpu dilu preconditioner
Sp1x1BlockMatrix matA = get1x1BlockTestMatrix(); Sp1x1BlockMatrix matA = get1x1BlockTestMatrix();
auto gpudilu = CuDilu1x1(matA, true, true); auto gpudilu = GpuDilu1x1(matA, true, true);
matA[0][0][0][0] = 11.0; matA[0][0][0][0] = 11.0;
matA[0][1][0][0] = 12.0; matA[0][1][0][0] = 12.0;
@ -325,7 +325,7 @@ BOOST_AUTO_TEST_CASE(TestDiluInitAndUpdateLarge)
} }
auto cudilures = d_output.asStdVector(); auto cudilures = d_output.asStdVector();
// check that CuDilu results matches that of CPU dilu // check that GpuDilu results matches that of CPU dilu
for (size_t i = 0; i < cudilures.size(); ++i) { for (size_t i = 0; i < cudilures.size(); ++i) {
BOOST_CHECK_CLOSE(cudilures[i], cpudilures[i], 1e-7); BOOST_CHECK_CLOSE(cudilures[i], cpudilures[i], 1e-7);
} }

View File

@ -18,22 +18,22 @@
*/ */
#include <config.h> #include <config.h>
#define BOOST_TEST_MODULE TestCuJac #define BOOST_TEST_MODULE TestGpuJac
#include <boost/mpl/list.hpp> #include <boost/mpl/list.hpp>
#include <boost/test/unit_test.hpp> #include <boost/test/unit_test.hpp>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <dune/istl/bcrsmatrix.hh> #include <dune/istl/bcrsmatrix.hh>
#include <opm/simulators/linalg/cuistl/CuJac.hpp> #include <opm/simulators/linalg/gpuistl/GpuJac.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp> #include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp> #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp> #include <opm/simulators/linalg/gpuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp> #include <opm/simulators/linalg/gpuistl/detail/fix_zero_diagonal.hpp>
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp> #include <opm/simulators/linalg/gpuistl/detail/vector_operations.hpp>
using NumericTypes = boost::mpl::list<double, float>; using NumericTypes = boost::mpl::list<double, float>;
BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize2, T, NumericTypes) BOOST_AUTO_TEST_CASE_TEMPLATE(GPUJACApplyBlocksize2, T, NumericTypes)
{ {
/* /*
Test data to validate jacobi preconditioner, expected result is x_1, and relaxation factor is 0.5 Test data to validate jacobi preconditioner, expected result is x_1, and relaxation factor is 0.5
@ -49,7 +49,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize2, T, NumericTypes)
using M = Dune::FieldMatrix<T, blocksize, blocksize>; using M = Dune::FieldMatrix<T, blocksize, blocksize>;
using SpMatrix = Dune::BCRSMatrix<M>; using SpMatrix = Dune::BCRSMatrix<M>;
using Vector = Dune::BlockVector<Dune::FieldVector<T, blocksize>>; using Vector = Dune::BlockVector<Dune::FieldVector<T, blocksize>>;
using CuJac = Opm::cuistl::CuJac<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>; using GpuJac = Opm::gpuistl::GpuJac<SpMatrix, Opm::gpuistl::GpuVector<T>, Opm::gpuistl::GpuVector<T>>;
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise); SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
for (auto row = B.createbegin(); row != B.createend(); ++row) { for (auto row = B.createbegin(); row != B.createend(); ++row) {
@ -70,7 +70,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize2, T, NumericTypes)
B[1][1][0][0] = -1.0; B[1][1][0][0] = -1.0;
B[1][1][1][1] = -1.0; B[1][1][1][1] = -1.0;
auto cujac = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuJac>(std::make_shared<CuJac>(B, 0.5)); auto gpujac = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, GpuJac>(std::make_shared<GpuJac>(B, 0.5));
Vector vVector(2); Vector vVector(2);
Vector dVector(2); Vector dVector(2);
@ -81,14 +81,14 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize2, T, NumericTypes)
const T expectedAns[2][2] = {{1.0 / 2.0, -1.0 / 2.0}, {-3.0 / 2.0, -2.0}}; const T expectedAns[2][2] = {{1.0 / 2.0, -1.0 / 2.0}, {-3.0 / 2.0, -2.0}};
cujac.apply(vVector, dVector); gpujac.apply(vVector, dVector);
BOOST_CHECK_CLOSE(vVector[0][0], expectedAns[0][0], 1e-7); BOOST_CHECK_CLOSE(vVector[0][0], expectedAns[0][0], 1e-7);
BOOST_CHECK_CLOSE(vVector[0][1], expectedAns[0][1], 1e-7); BOOST_CHECK_CLOSE(vVector[0][1], expectedAns[0][1], 1e-7);
BOOST_CHECK_CLOSE(vVector[1][0], expectedAns[1][0], 1e-7); BOOST_CHECK_CLOSE(vVector[1][0], expectedAns[1][0], 1e-7);
BOOST_CHECK_CLOSE(vVector[1][1], expectedAns[1][1], 1e-7); BOOST_CHECK_CLOSE(vVector[1][1], expectedAns[1][1], 1e-7);
} }
BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize1, T, NumericTypes) BOOST_AUTO_TEST_CASE_TEMPLATE(GPUJACApplyBlocksize1, T, NumericTypes)
{ {
/* /*
Test data to validate jacobi preconditioner, expected result is x_1, relaxation factor is 0.5 Test data to validate jacobi preconditioner, expected result is x_1, relaxation factor is 0.5
@ -103,7 +103,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize1, T, NumericTypes)
using M = Dune::FieldMatrix<T, blocksize, blocksize>; using M = Dune::FieldMatrix<T, blocksize, blocksize>;
using SpMatrix = Dune::BCRSMatrix<M>; using SpMatrix = Dune::BCRSMatrix<M>;
using Vector = Dune::BlockVector<Dune::FieldVector<T, blocksize>>; using Vector = Dune::BlockVector<Dune::FieldVector<T, blocksize>>;
using CuJac = Opm::cuistl::CuJac<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>; using GpuJac = Opm::gpuistl::GpuJac<SpMatrix, Opm::gpuistl::GpuVector<T>, Opm::gpuistl::GpuVector<T>>;
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise); SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
for (auto row = B.createbegin(); row != B.createend(); ++row) { for (auto row = B.createbegin(); row != B.createend(); ++row) {
@ -129,7 +129,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize1, T, NumericTypes)
B[2][2][0][0] = -1.0; B[2][2][0][0] = -1.0;
B[3][3][0][0] = -1.0; B[3][3][0][0] = -1.0;
auto cujac = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuJac>(std::make_shared<CuJac>(B, 0.5)); auto gpujac = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, GpuJac>(std::make_shared<GpuJac>(B, 0.5));
Vector vVector(4); Vector vVector(4);
Vector dVector(4); Vector dVector(4);
@ -140,7 +140,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize1, T, NumericTypes)
const T expectedAns[4] = {1.0 / 3.0, 1.0 / 2.0, -3.0 / 2.0, -2.0}; const T expectedAns[4] = {1.0 / 3.0, 1.0 / 2.0, -3.0 / 2.0, -2.0};
cujac.apply(vVector, dVector); gpujac.apply(vVector, dVector);
BOOST_CHECK_CLOSE(vVector[0], expectedAns[0], 1e-7); BOOST_CHECK_CLOSE(vVector[0], expectedAns[0], 1e-7);
BOOST_CHECK_CLOSE(vVector[1], expectedAns[1], 1e-7); BOOST_CHECK_CLOSE(vVector[1], expectedAns[1], 1e-7);
BOOST_CHECK_CLOSE(vVector[2], expectedAns[2], 1e-7); BOOST_CHECK_CLOSE(vVector[2], expectedAns[2], 1e-7);

View File

@ -18,7 +18,7 @@
*/ */
#include <config.h> #include <config.h>
#define BOOST_TEST_MODULE TestCuOwnerOverlapCopy #define BOOST_TEST_MODULE TestGpuOwnerOverlapCopy
#define BOOST_TEST_NO_MAIN #define BOOST_TEST_NO_MAIN
#include <boost/test/unit_test.hpp> #include <boost/test/unit_test.hpp>
@ -26,10 +26,10 @@
#include <dune/istl/bcrsmatrix.hh> #include <dune/istl/bcrsmatrix.hh>
#include <dune/istl/owneroverlapcopy.hh> #include <dune/istl/owneroverlapcopy.hh>
#include <memory> #include <memory>
#include <opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp> #include <opm/simulators/linalg/gpuistl/GpuOwnerOverlapCopy.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp> #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/set_device.hpp> #include <opm/simulators/linalg/gpuistl/set_device.hpp>
#include <random> #include <random>
#include <mpi.h> #include <mpi.h>
@ -46,7 +46,7 @@ main(int argc, char** argv)
int rank, totalRanks; int rank, totalRanks;
MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &totalRanks); MPI_Comm_size(MPI_COMM_WORLD, &totalRanks);
Opm::cuistl::setDevice(rank, totalRanks); Opm::gpuistl::setDevice(rank, totalRanks);
boost::unit_test::unit_test_main(&init_unit_test_func, argc, argv); boost::unit_test::unit_test_main(&init_unit_test_func, argc, argv);
} }
@ -62,14 +62,14 @@ BOOST_AUTO_TEST_CASE(TestProject)
auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD); auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD);
auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}}; auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}};
auto xGPU = Opm::cuistl::CuVector<double>(xCPU); auto xGPU = Opm::gpuistl::GpuVector<double>(xCPU);
auto gpuComm = std::make_shared<Opm::cuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy); auto gpuComm = std::make_shared<Opm::gpuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);
auto cuOwnerOverlapCopy auto GpuOwnerOverlapCopy
= Opm::cuistl::CuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(gpuComm); = Opm::gpuistl::GpuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(gpuComm);
cuOwnerOverlapCopy.project(xGPU); GpuOwnerOverlapCopy.project(xGPU);
auto resultOfProject = xGPU.asStdVector(); auto resultOfProject = xGPU.asStdVector();
@ -94,19 +94,19 @@ BOOST_AUTO_TEST_CASE(TestDot)
indexInfo.addRemoteIndex(std::make_tuple(0, 2, Dune::OwnerOverlapCopyAttributeSet::copy)); indexInfo.addRemoteIndex(std::make_tuple(0, 2, Dune::OwnerOverlapCopyAttributeSet::copy));
auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD); auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD);
auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}}; auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}};
auto xGPU = Opm::cuistl::CuVector<double>(xCPU); auto xGPU = Opm::gpuistl::GpuVector<double>(xCPU);
auto gpuComm = std::make_shared<Opm::cuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy); auto gpuComm = std::make_shared<Opm::gpuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);
auto cuOwnerOverlapCopy auto GpuOwnerOverlapCopy
= Opm::cuistl::CuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(gpuComm); = Opm::gpuistl::GpuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(gpuComm);
double outputDune = -1.0; double outputDune = -1.0;
auto xDune = xGPU.asDuneBlockVector<1>(); auto xDune = xGPU.asDuneBlockVector<1>();
ownerOverlapCopy.dot(xDune, xDune, outputDune); ownerOverlapCopy.dot(xDune, xDune, outputDune);
double output = -1.0; double output = -1.0;
cuOwnerOverlapCopy.dot(xGPU, xGPU, output); GpuOwnerOverlapCopy.dot(xGPU, xGPU, output);
BOOST_CHECK_EQUAL(outputDune, output); BOOST_CHECK_EQUAL(outputDune, output);

View File

@ -18,7 +18,7 @@
*/ */
#include <config.h> #include <config.h>
#define BOOST_TEST_MODULE TestCuSeqILU0 #define BOOST_TEST_MODULE TestGpuSeqILU0
#define BOOST_TEST_NO_MAIN #define BOOST_TEST_NO_MAIN
@ -27,10 +27,10 @@
#include <dune/common/parallel/mpihelper.hh> #include <dune/common/parallel/mpihelper.hh>
#include <dune/istl/bcrsmatrix.hh> #include <dune/istl/bcrsmatrix.hh>
#include <dune/istl/preconditioners.hh> #include <dune/istl/preconditioners.hh>
#include <opm/simulators/linalg/cuistl/CuSeqILU0.hpp> #include <opm/simulators/linalg/gpuistl/GpuSeqILU0.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp> #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp> #include <opm/simulators/linalg/gpuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
#include <limits> #include <limits>
#include <memory> #include <memory>
@ -63,7 +63,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
using M = Dune::FieldMatrix<T, 1, 1>; using M = Dune::FieldMatrix<T, 1, 1>;
using SpMatrix = Dune::BCRSMatrix<M>; using SpMatrix = Dune::BCRSMatrix<M>;
using Vector = Dune::BlockVector<Dune::FieldVector<T, 1>>; using Vector = Dune::BlockVector<Dune::FieldVector<T, 1>>;
using CuILU0 = Opm::cuistl::CuSeqILU0<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>; using GpuILU0 = Opm::gpuistl::GpuSeqILU0<SpMatrix, Opm::gpuistl::GpuVector<T>, Opm::gpuistl::GpuVector<T>>;
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise); SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
for (auto row = B.createbegin(); row != B.createend(); ++row) { for (auto row = B.createbegin(); row != B.createend(); ++row) {
@ -91,7 +91,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
auto duneILU = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0); auto duneILU = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);
auto cuILU = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuILU0>(std::make_shared<CuILU0>(B, 1.0)); auto gpuILU = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, GpuILU0>(std::make_shared<GpuILU0>(B, 1.0));
// check for the standard basis {e_i} // check for the standard basis {e_i}
// (e_i=(0,...,0, 1 (i-th place), 0, ..., 0)) // (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
@ -101,7 +101,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
Vector outputVectorDune(N); Vector outputVectorDune(N);
Vector outputVectorCuistl(N); Vector outputVectorCuistl(N);
duneILU.apply(outputVectorDune, inputVector); duneILU.apply(outputVectorDune, inputVector);
cuILU.apply(outputVectorCuistl, inputVector); gpuILU.apply(outputVectorCuistl, inputVector);
for (int component = 0; component < N; ++component) { for (int component = 0; component < N; ++component) {
BOOST_CHECK_CLOSE(outputVectorDune[component][0], BOOST_CHECK_CLOSE(outputVectorDune[component][0],
@ -113,7 +113,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
// Now we check that we can update the matrix. We basically just negate B // Now we check that we can update the matrix. We basically just negate B
B *= -1.0; B *= -1.0;
auto duneILUNew = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0); auto duneILUNew = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);
cuILU.update(); gpuILU.update();
// check for the standard basis {e_i} // check for the standard basis {e_i}
// (e_i=(0,...,0, 1 (i-th place), 0, ..., 0)) // (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
for (int i = 0; i < N; ++i) { for (int i = 0; i < N; ++i) {
@ -122,7 +122,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
Vector outputVectorDune(N); Vector outputVectorDune(N);
Vector outputVectorCuistl(N); Vector outputVectorCuistl(N);
duneILUNew.apply(outputVectorDune, inputVector); duneILUNew.apply(outputVectorDune, inputVector);
cuILU.apply(outputVectorCuistl, inputVector); gpuILU.apply(outputVectorCuistl, inputVector);
for (int component = 0; component < N; ++component) { for (int component = 0; component < N; ++component) {
BOOST_CHECK_CLOSE(outputVectorDune[component][0], BOOST_CHECK_CLOSE(outputVectorDune[component][0],
@ -158,7 +158,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
using M = Dune::FieldMatrix<T, 2, 2>; using M = Dune::FieldMatrix<T, 2, 2>;
using SpMatrix = Dune::BCRSMatrix<M>; using SpMatrix = Dune::BCRSMatrix<M>;
using Vector = Dune::BlockVector<Dune::FieldVector<T, 2>>; using Vector = Dune::BlockVector<Dune::FieldVector<T, 2>>;
using CuILU0 = Opm::cuistl::CuSeqILU0<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>; using GpuILU0 = Opm::gpuistl::GpuSeqILU0<SpMatrix, Opm::gpuistl::GpuVector<T>, Opm::gpuistl::GpuVector<T>>;
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise); SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
for (auto row = B.createbegin(); row != B.createend(); ++row) { for (auto row = B.createbegin(); row != B.createend(); ++row) {
@ -181,7 +181,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
auto duneILU = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0); auto duneILU = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);
auto cuILU = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuILU0>(std::make_shared<CuILU0>(B, 1.0)); auto gpuILU = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, GpuILU0>(std::make_shared<GpuILU0>(B, 1.0));
// check for the standard basis {e_i} // check for the standard basis {e_i}
// (e_i=(0,...,0, 1 (i-th place), 0, ..., 0)) // (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
@ -191,7 +191,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
Vector outputVectorDune(N); Vector outputVectorDune(N);
Vector outputVectorCuistl(N); Vector outputVectorCuistl(N);
duneILU.apply(outputVectorDune, inputVector); duneILU.apply(outputVectorDune, inputVector);
cuILU.apply(outputVectorCuistl, inputVector); gpuILU.apply(outputVectorCuistl, inputVector);
for (int component = 0; component < N; ++component) { for (int component = 0; component < N; ++component) {
BOOST_CHECK_CLOSE(outputVectorDune[component][0], BOOST_CHECK_CLOSE(outputVectorDune[component][0],
@ -203,7 +203,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
// Now we check that we can update the matrix. We basically just negate B // Now we check that we can update the matrix. We basically just negate B
B *= -1.0; B *= -1.0;
auto duneILUNew = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0); auto duneILUNew = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);
cuILU.update(); gpuILU.update();
// check for the standard basis {e_i} // check for the standard basis {e_i}
// (e_i=(0,...,0, 1 (i-th place), 0, ..., 0)) // (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
for (int i = 0; i < N; ++i) { for (int i = 0; i < N; ++i) {
@ -212,7 +212,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
Vector outputVectorDune(N); Vector outputVectorDune(N);
Vector outputVectorCuistl(N); Vector outputVectorCuistl(N);
duneILUNew.apply(outputVectorDune, inputVector); duneILUNew.apply(outputVectorDune, inputVector);
cuILU.apply(outputVectorCuistl, inputVector); gpuILU.apply(outputVectorCuistl, inputVector);
for (int component = 0; component < N; ++component) { for (int component = 0; component < N; ++component) {
BOOST_CHECK_CLOSE(outputVectorDune[component][0], BOOST_CHECK_CLOSE(outputVectorDune[component][0],

View File

@ -18,14 +18,14 @@
*/ */
#include <config.h> #include <config.h>
#define BOOST_TEST_MODULE TestCuSparseMatrix #define BOOST_TEST_MODULE TestGpuSparseMatrix
#include <boost/test/unit_test.hpp> #include <boost/test/unit_test.hpp>
#include <dune/istl/bcrsmatrix.hh> #include <dune/istl/bcrsmatrix.hh>
#include <memory> #include <memory>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp> #include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp> #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
#include <random> #include <random>
BOOST_AUTO_TEST_CASE(TestConstruction1D) BOOST_AUTO_TEST_CASE(TestConstruction1D)
@ -76,17 +76,17 @@ BOOST_AUTO_TEST_CASE(TestConstruction1D)
} }
} }
auto cuSparseMatrix = Opm::cuistl::CuSparseMatrix<double>::fromMatrix(B); auto gpuSparseMatrix = Opm::gpuistl::GpuSparseMatrix<double>::fromMatrix(B);
const auto& nonZeroValuesCuda = cuSparseMatrix.getNonZeroValues(); const auto& nonZeroValuesCuda = gpuSparseMatrix.getNonZeroValues();
std::vector<double> buffer(cuSparseMatrix.nonzeroes(), 0.0); std::vector<double> buffer(gpuSparseMatrix.nonzeroes(), 0.0);
nonZeroValuesCuda.copyToHost(buffer.data(), buffer.size()); nonZeroValuesCuda.copyToHost(buffer.data(), buffer.size());
const double* nonZeroElements = static_cast<const double*>(&((B[0][0][0][0]))); const double* nonZeroElements = static_cast<const double*>(&((B[0][0][0][0])));
BOOST_CHECK_EQUAL_COLLECTIONS(buffer.begin(), buffer.end(), nonZeroElements, nonZeroElements + B.nonzeroes()); BOOST_CHECK_EQUAL_COLLECTIONS(buffer.begin(), buffer.end(), nonZeroElements, nonZeroElements + B.nonzeroes());
BOOST_CHECK_EQUAL(N * 3 - 2, cuSparseMatrix.nonzeroes()); BOOST_CHECK_EQUAL(N * 3 - 2, gpuSparseMatrix.nonzeroes());
std::vector<int> rowIndicesFromCUDA(N + 1); std::vector<int> rowIndicesFromCUDA(N + 1);
cuSparseMatrix.getRowIndices().copyToHost(rowIndicesFromCUDA.data(), rowIndicesFromCUDA.size()); gpuSparseMatrix.getRowIndices().copyToHost(rowIndicesFromCUDA.data(), rowIndicesFromCUDA.size());
BOOST_CHECK_EQUAL(rowIndicesFromCUDA[0], 0); BOOST_CHECK_EQUAL(rowIndicesFromCUDA[0], 0);
BOOST_CHECK_EQUAL(rowIndicesFromCUDA[1], 2); BOOST_CHECK_EQUAL(rowIndicesFromCUDA[1], 2);
for (int i = 2; i < N; ++i) { for (int i = 2; i < N; ++i) {
@ -95,7 +95,7 @@ BOOST_AUTO_TEST_CASE(TestConstruction1D)
std::vector<int> columnIndicesFromCUDA(B.nonzeroes(), 0); std::vector<int> columnIndicesFromCUDA(B.nonzeroes(), 0);
cuSparseMatrix.getColumnIndices().copyToHost(columnIndicesFromCUDA.data(), columnIndicesFromCUDA.size()); gpuSparseMatrix.getColumnIndices().copyToHost(columnIndicesFromCUDA.data(), columnIndicesFromCUDA.size());
BOOST_CHECK_EQUAL(columnIndicesFromCUDA[0], 0); BOOST_CHECK_EQUAL(columnIndicesFromCUDA[0], 0);
BOOST_CHECK_EQUAL(columnIndicesFromCUDA[1], 1); BOOST_CHECK_EQUAL(columnIndicesFromCUDA[1], 1);
@ -143,19 +143,19 @@ BOOST_AUTO_TEST_CASE(RandomSparsityMatrix)
} }
} }
auto cuSparseMatrix = Opm::cuistl::CuSparseMatrix<double>::fromMatrix(B); auto gpuSparseMatrix = Opm::gpuistl::GpuSparseMatrix<double>::fromMatrix(B);
// check each column // check each column
for (size_t component = 0; component < N; ++component) { for (size_t component = 0; component < N; ++component) {
std::vector<double> inputDataX(N * dim, 0.0); std::vector<double> inputDataX(N * dim, 0.0);
inputDataX[component] = 1.0; inputDataX[component] = 1.0;
std::vector<double> inputDataY(N * dim, .25); std::vector<double> inputDataY(N * dim, .25);
auto inputVectorX = Opm::cuistl::CuVector<double>(inputDataX.data(), inputDataX.size()); auto inputVectorX = Opm::gpuistl::GpuVector<double>(inputDataX.data(), inputDataX.size());
auto inputVectorY = Opm::cuistl::CuVector<double>(inputDataY.data(), inputDataY.size()); auto inputVectorY = Opm::gpuistl::GpuVector<double>(inputDataY.data(), inputDataY.size());
Vector xHost(N), yHost(N); Vector xHost(N), yHost(N);
yHost = inputDataY[0]; yHost = inputDataY[0];
inputVectorX.copyToHost(xHost); inputVectorX.copyToHost(xHost);
const double alpha = 1.42; const double alpha = 1.42;
cuSparseMatrix.usmv(alpha, inputVectorX, inputVectorY); gpuSparseMatrix.usmv(alpha, inputVectorX, inputVectorY);
inputVectorY.copyToHost(inputDataY); inputVectorY.copyToHost(inputDataY);
@ -167,7 +167,7 @@ BOOST_AUTO_TEST_CASE(RandomSparsityMatrix)
} }
inputVectorX.copyToHost(xHost); inputVectorX.copyToHost(xHost);
cuSparseMatrix.mv(inputVectorX, inputVectorY); gpuSparseMatrix.mv(inputVectorX, inputVectorY);
inputVectorY.copyToHost(inputDataY); inputVectorY.copyToHost(inputDataY);

View File

@ -18,21 +18,21 @@
*/ */
#include <config.h> #include <config.h>
#define BOOST_TEST_MODULE TestCuVector #define BOOST_TEST_MODULE TestGpuVector
#include <boost/test/unit_test.hpp> #include <boost/test/unit_test.hpp>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <dune/common/fvector.hh> #include <dune/common/fvector.hh>
#include <dune/istl/bvector.hh> #include <dune/istl/bvector.hh>
#include <opm/simulators/linalg/cuistl/CuVector.hpp> #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
#include <random> #include <random>
BOOST_AUTO_TEST_CASE(TestDocumentedUsage) BOOST_AUTO_TEST_CASE(TestDocumentedUsage)
{ {
auto someDataOnCPU = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692}); auto someDataOnCPU = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
auto dataOnGPU = ::Opm::cuistl::CuVector<double>(someDataOnCPU); auto dataOnGPU = ::Opm::gpuistl::GpuVector<double>(someDataOnCPU);
// Multiply by 4.0: // Multiply by 4.0:
dataOnGPU *= 4.0; dataOnGPU *= 4.0;
@ -50,14 +50,14 @@ BOOST_AUTO_TEST_CASE(TestDocumentedUsage)
BOOST_AUTO_TEST_CASE(TestConstructionSize) BOOST_AUTO_TEST_CASE(TestConstructionSize)
{ {
const int numberOfElements = 1234; const int numberOfElements = 1234;
auto vectorOnGPU = Opm::cuistl::CuVector<double>(numberOfElements); auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(numberOfElements);
BOOST_CHECK_EQUAL(numberOfElements, vectorOnGPU.dim()); BOOST_CHECK_EQUAL(numberOfElements, vectorOnGPU.dim());
} }
BOOST_AUTO_TEST_CASE(TestCopyFromHostConstructor) BOOST_AUTO_TEST_CASE(TestCopyFromHostConstructor)
{ {
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}}; std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size()); auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim()); BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
std::vector<double> buffer(data.size(), 0.0); std::vector<double> buffer(data.size(), 0.0);
vectorOnGPU.copyToHost(buffer.data(), buffer.size()); vectorOnGPU.copyToHost(buffer.data(), buffer.size());
@ -68,7 +68,7 @@ BOOST_AUTO_TEST_CASE(TestCopyFromHostConstructor)
BOOST_AUTO_TEST_CASE(TestCopyFromHostFunction) BOOST_AUTO_TEST_CASE(TestCopyFromHostFunction)
{ {
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}}; std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.size()); auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.size());
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim()); BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
vectorOnGPU.copyFromHost(data.data(), data.size()); vectorOnGPU.copyFromHost(data.data(), data.size());
std::vector<double> buffer(data.size(), 0.0); std::vector<double> buffer(data.size(), 0.0);
@ -80,7 +80,7 @@ BOOST_AUTO_TEST_CASE(TestCopyFromHostFunction)
BOOST_AUTO_TEST_CASE(TestCopyFromBvector) BOOST_AUTO_TEST_CASE(TestCopyFromBvector)
{ {
auto blockVector = Dune::BlockVector<Dune::FieldVector<double, 2>> {{{42, 43}, {44, 45}, {46, 47}}}; auto blockVector = Dune::BlockVector<Dune::FieldVector<double, 2>> {{{42, 43}, {44, 45}, {46, 47}}};
auto vectorOnGPU = Opm::cuistl::CuVector<double>(blockVector.dim()); auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(blockVector.dim());
vectorOnGPU.copyFromHost(blockVector); vectorOnGPU.copyFromHost(blockVector);
std::vector<double> buffer(vectorOnGPU.dim()); std::vector<double> buffer(vectorOnGPU.dim());
vectorOnGPU.copyToHost(buffer.data(), buffer.size()); vectorOnGPU.copyToHost(buffer.data(), buffer.size());
@ -93,7 +93,7 @@ BOOST_AUTO_TEST_CASE(TestCopyToBvector)
{ {
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7, 8, 9}}; std::vector<double> data {{1, 2, 3, 4, 5, 6, 7, 8, 9}};
auto blockVector = Dune::BlockVector<Dune::FieldVector<double, 3>>(3); auto blockVector = Dune::BlockVector<Dune::FieldVector<double, 3>>(3);
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size()); auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
vectorOnGPU.copyToHost(blockVector); vectorOnGPU.copyToHost(blockVector);
@ -103,17 +103,17 @@ BOOST_AUTO_TEST_CASE(TestCopyToBvector)
BOOST_AUTO_TEST_CASE(TestDataPointer) BOOST_AUTO_TEST_CASE(TestDataPointer)
{ {
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7, 8, 9}}; std::vector<double> data {{1, 2, 3, 4, 5, 6, 7, 8, 9}};
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size()); auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
std::vector<double> buffer(data.size(), 0.0); std::vector<double> buffer(data.size(), 0.0);
OPM_CUDA_SAFE_CALL(cudaMemcpy(buffer.data(), vectorOnGPU.data(), sizeof(double) * data.size(), cudaMemcpyDeviceToHost)); OPM_GPU_SAFE_CALL(cudaMemcpy(buffer.data(), vectorOnGPU.data(), sizeof(double) * data.size(), cudaMemcpyDeviceToHost));
BOOST_CHECK_EQUAL_COLLECTIONS(data.begin(), data.end(), buffer.begin(), buffer.end()); BOOST_CHECK_EQUAL_COLLECTIONS(data.begin(), data.end(), buffer.begin(), buffer.end());
} }
BOOST_AUTO_TEST_CASE(TestCopyScalarMultiply) BOOST_AUTO_TEST_CASE(TestCopyScalarMultiply)
{ {
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}}; std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size()); auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim()); BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
const double scalar = 42.25; const double scalar = 42.25;
vectorOnGPU *= scalar; vectorOnGPU *= scalar;
@ -128,7 +128,7 @@ BOOST_AUTO_TEST_CASE(TestCopyScalarMultiply)
BOOST_AUTO_TEST_CASE(TestTwoNorm) BOOST_AUTO_TEST_CASE(TestTwoNorm)
{ {
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}}; std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size()); auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
auto twoNorm = vectorOnGPU.two_norm(); auto twoNorm = vectorOnGPU.two_norm();
double correctAnswer = 0.0; double correctAnswer = 0.0;
@ -143,8 +143,8 @@ BOOST_AUTO_TEST_CASE(TestDot)
{ {
std::vector<double> dataA {{1, 2, 3, 4, 5, 6, 7}}; std::vector<double> dataA {{1, 2, 3, 4, 5, 6, 7}};
std::vector<double> dataB {{8, 9, 10, 11, 12, 13, 14}}; std::vector<double> dataB {{8, 9, 10, 11, 12, 13, 14}};
auto vectorOnGPUA = Opm::cuistl::CuVector<double>(dataA.data(), dataA.size()); auto vectorOnGPUA = Opm::gpuistl::GpuVector<double>(dataA.data(), dataA.size());
auto vectorOnGPUB = Opm::cuistl::CuVector<double>(dataB.data(), dataB.size()); auto vectorOnGPUB = Opm::gpuistl::GpuVector<double>(dataB.data(), dataB.size());
auto dot = vectorOnGPUA.dot(vectorOnGPUB); auto dot = vectorOnGPUA.dot(vectorOnGPUB);
double correctAnswer = 0.0; double correctAnswer = 0.0;
@ -158,7 +158,7 @@ BOOST_AUTO_TEST_CASE(TestDot)
BOOST_AUTO_TEST_CASE(Assigment) BOOST_AUTO_TEST_CASE(Assigment)
{ {
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}}; std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size()); auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
vectorOnGPU = 10.0; vectorOnGPU = 10.0;
vectorOnGPU.copyToHost(data.data(), data.size()); vectorOnGPU.copyToHost(data.data(), data.size());
@ -171,9 +171,9 @@ BOOST_AUTO_TEST_CASE(Assigment)
BOOST_AUTO_TEST_CASE(CopyAssignment) BOOST_AUTO_TEST_CASE(CopyAssignment)
{ {
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}}; std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size()); auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
vectorOnGPU.copyToHost(data.data(), data.size()); vectorOnGPU.copyToHost(data.data(), data.size());
auto vectorOnGPUB = Opm::cuistl::CuVector<double>(data.size()); auto vectorOnGPUB = Opm::gpuistl::GpuVector<double>(data.size());
vectorOnGPUB = 4.0; vectorOnGPUB = 4.0;
vectorOnGPUB = vectorOnGPU; vectorOnGPUB = vectorOnGPU;
@ -185,7 +185,7 @@ BOOST_AUTO_TEST_CASE(CopyAssignment)
BOOST_AUTO_TEST_CASE(RandomVectors) BOOST_AUTO_TEST_CASE(RandomVectors)
{ {
using GVector = Opm::cuistl::CuVector<double>; using GVector = Opm::gpuistl::GpuVector<double>;
std::srand(0); std::srand(0);
std::mt19937 generator; std::mt19937 generator;
std::uniform_real_distribution<double> distribution(-100.0, 100.0); std::uniform_real_distribution<double> distribution(-100.0, 100.0);
@ -268,7 +268,7 @@ BOOST_AUTO_TEST_CASE(RandomVectors)
indexSet.push_back(i); indexSet.push_back(i);
} }
} }
auto indexSetGPU = Opm::cuistl::CuVector<int>(indexSet); auto indexSetGPU = Opm::gpuistl::GpuVector<int>(indexSet);
aGPU.setZeroAtIndexSet(indexSetGPU); aGPU.setZeroAtIndexSet(indexSetGPU);
auto projectedA = aGPU.asStdVector(); auto projectedA = aGPU.asStdVector();

View File

@ -18,24 +18,24 @@
*/ */
#include <config.h> #include <config.h>
#define BOOST_TEST_MODULE TestCuView #define BOOST_TEST_MODULE TestGpuView
#include <boost/test/unit_test.hpp> #include <boost/test/unit_test.hpp>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <dune/common/fvector.hh> #include <dune/common/fvector.hh>
#include <dune/istl/bvector.hh> #include <dune/istl/bvector.hh>
#include <opm/simulators/linalg/cuistl/CuView.hpp> #include <opm/simulators/linalg/gpuistl/GpuView.hpp>
#include <opm/simulators/linalg/cuistl/CuBuffer.hpp> #include <opm/simulators/linalg/gpuistl/GpuBuffer.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
#include <random> #include <random>
#include <array> #include <array>
#include <algorithm> #include <algorithm>
#include <type_traits> #include <type_traits>
using CuViewDouble = ::Opm::cuistl::CuView<double>; using GpuViewDouble = ::Opm::gpuistl::GpuView<double>;
using CuBufferDouble = ::Opm::cuistl::CuBuffer<double>; using GpuBufferDouble = ::Opm::gpuistl::GpuBuffer<double>;
__global__ void useCuViewOnGPU(CuViewDouble a, CuViewDouble b){ __global__ void useGpuViewOnGPU(GpuViewDouble a, GpuViewDouble b){
b[0] = a.front(); b[0] = a.front();
b[1] = a.back(); b[1] = a.back();
b[2] = *a.begin(); b[2] = *a.begin();
@ -48,24 +48,24 @@ BOOST_AUTO_TEST_CASE(TestCreationAndIndexing)
{ {
// A simple test to check that we can move data to and from the GPU // A simple test to check that we can move data to and from the GPU
auto cpubuffer = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692}); auto cpubuffer = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
auto cubuffer = CuBufferDouble(cpubuffer); auto cubuffer = GpuBufferDouble(cpubuffer);
auto cuview = CuViewDouble(cubuffer.data(), cubuffer.size()); auto gpuview = GpuViewDouble(cubuffer.data(), cubuffer.size());
const auto const_cuview = CuViewDouble(cubuffer.data(), cubuffer.size()); const auto const_gpuview = GpuViewDouble(cubuffer.data(), cubuffer.size());
auto stdVecOfCuView = cuview.asStdVector(); auto stdVecOfGpuView = gpuview.asStdVector();
auto const_stdVecOfCuView = cuview.asStdVector(); auto const_stdVecOfGpuView = gpuview.asStdVector();
BOOST_CHECK_EQUAL_COLLECTIONS( BOOST_CHECK_EQUAL_COLLECTIONS(
stdVecOfCuView.begin(), stdVecOfCuView.end(), cpubuffer.begin(), cpubuffer.end()); stdVecOfGpuView.begin(), stdVecOfGpuView.end(), cpubuffer.begin(), cpubuffer.end());
BOOST_CHECK_EQUAL_COLLECTIONS( BOOST_CHECK_EQUAL_COLLECTIONS(
stdVecOfCuView.begin(), stdVecOfCuView.end(), const_stdVecOfCuView.begin(), const_stdVecOfCuView.end()); stdVecOfGpuView.begin(), stdVecOfGpuView.end(), const_stdVecOfGpuView.begin(), const_stdVecOfGpuView.end());
} }
BOOST_AUTO_TEST_CASE(TestCuViewOnCPUTypes) BOOST_AUTO_TEST_CASE(TestGpuViewOnCPUTypes)
{ {
auto buf = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692}); auto buf = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
auto cpuview = CuViewDouble(buf.data(), buf.size()); auto cpuview = GpuViewDouble(buf.data(), buf.size());
const auto const_cpuview = CuViewDouble(buf.data(), buf.size()); const auto const_cpuview = GpuViewDouble(buf.data(), buf.size());
// check that indexing a mutable view gives references when indexing it // check that indexing a mutable view gives references when indexing it
bool correct_type_of_cpu_front = std::is_same_v<double&, decltype(cpuview.front())>; bool correct_type_of_cpu_front = std::is_same_v<double&, decltype(cpuview.front())>;
@ -83,26 +83,26 @@ BOOST_AUTO_TEST_CASE(TestCuViewOnCPUTypes)
BOOST_CHECK(cpuview.back() == buf.back()); BOOST_CHECK(cpuview.back() == buf.back());
} }
BOOST_AUTO_TEST_CASE(TestCuViewOnCPUWithSTLIteratorAlgorithm) BOOST_AUTO_TEST_CASE(TestGpuViewOnCPUWithSTLIteratorAlgorithm)
{ {
auto buf = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692}); auto buf = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
auto cpuview = CuViewDouble(buf.data(), buf.size()); auto cpuview = GpuViewDouble(buf.data(), buf.size());
std::sort(buf.begin(), buf.end()); std::sort(buf.begin(), buf.end());
BOOST_CHECK(42.0 == cpuview[3]); BOOST_CHECK(42.0 == cpuview[3]);
} }
BOOST_AUTO_TEST_CASE(TestCuViewOnGPU) BOOST_AUTO_TEST_CASE(TestGpuViewOnGPU)
{ {
auto buf = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692}); auto buf = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
auto cubufA = CuBufferDouble(buf); auto cubufA = GpuBufferDouble(buf);
auto cuviewA = CuViewDouble(cubufA.data(), cubufA.size()); auto gpuviewA = GpuViewDouble(cubufA.data(), cubufA.size());
auto cubufB = CuBufferDouble(4); auto cubufB = GpuBufferDouble(4);
auto cuviewB = CuViewDouble(cubufB.data(), cubufB.size()); auto gpuviewB = GpuViewDouble(cubufB.data(), cubufB.size());
useCuViewOnGPU<<<1,1>>>(cuviewA, cuviewB); useGpuViewOnGPU<<<1,1>>>(gpuviewA, gpuviewB);
auto vecA = cuviewA.asStdVector(); auto vecA = gpuviewA.asStdVector();
auto vecB = cuviewB.asStdVector(); auto vecB = gpuviewB.asStdVector();
// checks that front/back/begin/end works // checks that front/back/begin/end works
BOOST_CHECK(vecB[0] == buf[0]); BOOST_CHECK(vecB[0] == buf[0]);

View File

@ -29,7 +29,7 @@
#include <dune/istl/preconditioners.hh> #include <dune/istl/preconditioners.hh>
#include <limits> #include <limits>
#include <memory> #include <memory>
#include <opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp> #include <opm/simulators/linalg/gpuistl/PreconditionerConvertFieldTypeAdapter.hpp>
using XDouble = Dune::BlockVector<Dune::FieldVector<double, 2>>; using XDouble = Dune::BlockVector<Dune::FieldVector<double, 2>>;
@ -167,7 +167,7 @@ BOOST_AUTO_TEST_CASE(TestFiniteDifference1D)
expectedOutputVector[i][1] = 43.0; expectedOutputVector[i][1] = 43.0;
inputVector[i][0] = 1.0; inputVector[i][0] = 1.0;
auto converter auto converter
= Opm::cuistl::PreconditionerConvertFieldTypeAdapter<TestPreconditioner, SpMatrixDouble, XDouble, XDouble>( = Opm::gpuistl::PreconditionerConvertFieldTypeAdapter<TestPreconditioner, SpMatrixDouble, XDouble, XDouble>(
B); B);
auto underlyingPreconditioner = std::make_shared<TestPreconditioner>( auto underlyingPreconditioner = std::make_shared<TestPreconditioner>(
converter.getConvertedMatrix(), inputVector, B, expectedOutputVector); converter.getConvertedMatrix(), inputVector, B, expectedOutputVector);

View File

@ -18,17 +18,17 @@
*/ */
#include <config.h> #include <config.h>
#define BOOST_TEST_MODULE TestCuSparseMatrixOperations #define BOOST_TEST_MODULE TestGpuSparseMatrixOperations
#include <boost/mpl/list.hpp> #include <boost/mpl/list.hpp>
#include <boost/test/unit_test.hpp> #include <boost/test/unit_test.hpp>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <dune/istl/bcrsmatrix.hh> #include <dune/istl/bcrsmatrix.hh>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp> #include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp> #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp> #include <opm/simulators/linalg/gpuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp> #include <opm/simulators/linalg/gpuistl/detail/fix_zero_diagonal.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/JacKernels.hpp> #include <opm/simulators/linalg/gpuistl/detail/preconditionerKernels/JacKernels.hpp>
using NumericTypes = boost::mpl::list<double, float>; using NumericTypes = boost::mpl::list<double, float>;
@ -85,10 +85,10 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith3By3Blocks, T, Numeric
B[1][1][1][1] = -1.0; B[1][1][1][1] = -1.0;
B[1][1][2][2] = -1.0; B[1][1][2][2] = -1.0;
Opm::cuistl::CuSparseMatrix<T> m = Opm::cuistl::CuSparseMatrix<T>::fromMatrix(B); Opm::gpuistl::GpuSparseMatrix<T> m = Opm::gpuistl::GpuSparseMatrix<T>::fromMatrix(B);
Opm::cuistl::CuVector<T> dInvDiag(blocksize * blocksize * N); Opm::gpuistl::GpuVector<T> dInvDiag(blocksize * blocksize * N);
Opm::cuistl::detail::JAC::invertDiagonalAndFlatten<T, 3>( Opm::gpuistl::detail::JAC::invertDiagonalAndFlatten<T, 3>(
m.getNonZeroValues().data(), m.getRowIndices().data(), m.getColumnIndices().data(), N, dInvDiag.data()); m.getNonZeroValues().data(), m.getRowIndices().data(), m.getColumnIndices().data(), N, dInvDiag.data());
std::vector<T> expectedInvDiag {-1.0 / 4.0, std::vector<T> expectedInvDiag {-1.0 / 4.0,
@ -159,10 +159,10 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith2By2Blocks, T, Numeric
B[1][1][0][0] = -1.0; B[1][1][0][0] = -1.0;
B[1][1][1][1] = -1.0; B[1][1][1][1] = -1.0;
Opm::cuistl::CuSparseMatrix<T> m = Opm::cuistl::CuSparseMatrix<T>::fromMatrix(B); Opm::gpuistl::GpuSparseMatrix<T> m = Opm::gpuistl::GpuSparseMatrix<T>::fromMatrix(B);
Opm::cuistl::CuVector<T> dInvDiag(blocksize * blocksize * N); Opm::gpuistl::GpuVector<T> dInvDiag(blocksize * blocksize * N);
Opm::cuistl::detail::JAC::invertDiagonalAndFlatten<T, 2>( Opm::gpuistl::detail::JAC::invertDiagonalAndFlatten<T, 2>(
m.getNonZeroValues().data(), m.getRowIndices().data(), m.getColumnIndices().data(), N, dInvDiag.data()); m.getNonZeroValues().data(), m.getRowIndices().data(), m.getColumnIndices().data(), N, dInvDiag.data());
std::vector<T> expectedInvDiag {2.0, -2.0, -1.0 / 2.0, 1.0, -1.0, 0.0, 0.0, -1.0}; std::vector<T> expectedInvDiag {2.0, -2.0, -1.0 / 2.0, 1.0, -1.0, 0.0, 0.0, -1.0};

View File

@ -18,16 +18,16 @@
*/ */
#include <config.h> #include <config.h>
#define BOOST_TEST_MODULE TestCuVectorOperations #define BOOST_TEST_MODULE TestGpuVectorOperations
#include <boost/mpl/list.hpp> #include <boost/mpl/list.hpp>
#include <boost/test/unit_test.hpp> #include <boost/test/unit_test.hpp>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <dune/istl/bcrsmatrix.hh> #include <dune/istl/bcrsmatrix.hh>
#include <opm/simulators/linalg/cuistl/CuJac.hpp> #include <opm/simulators/linalg/gpuistl/GpuJac.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp> #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp> #include <opm/simulators/linalg/gpuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp> #include <opm/simulators/linalg/gpuistl/detail/vector_operations.hpp>
using NumericTypes = boost::mpl::list<double, float>; using NumericTypes = boost::mpl::list<double, float>;
@ -47,11 +47,11 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf3By3BlockVectorAndVecto
std::vector<T> hostBlockVector({1.0, 2.0, 3.0, 5.0, 2.0, 3.0, 2.0, 1.0, 2.0}); std::vector<T> hostBlockVector({1.0, 2.0, 3.0, 5.0, 2.0, 3.0, 2.0, 1.0, 2.0});
std::vector<T> hostVecVector({3.0, 2.0, 1.0}); std::vector<T> hostVecVector({3.0, 2.0, 1.0});
std::vector<T> hostDstVector({0, 0, 0}); std::vector<T> hostDstVector({0, 0, 0});
Opm::cuistl::CuVector<T> deviceBlockVector(hostBlockVector); Opm::gpuistl::GpuVector<T> deviceBlockVector(hostBlockVector);
Opm::cuistl::CuVector<T> deviceVecVector(hostVecVector); Opm::gpuistl::GpuVector<T> deviceVecVector(hostVecVector);
Opm::cuistl::CuVector<T> deviceDstVector(hostDstVector); Opm::gpuistl::GpuVector<T> deviceDstVector(hostDstVector);
Opm::cuistl::detail::weightedDiagMV( Opm::gpuistl::detail::weightedDiagMV(
deviceBlockVector.data(), N, blocksize, weight, deviceVecVector.data(), deviceDstVector.data()); deviceBlockVector.data(), N, blocksize, weight, deviceVecVector.data(), deviceDstVector.data());
std::vector<T> expectedVec {10.0, 22.0, 10.0}; std::vector<T> expectedVec {10.0, 22.0, 10.0};
@ -81,11 +81,11 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf2By2BlockVectorAndVecto
std::vector<T> hostBlockVector({1.0, 2.0, 3.0, 4.0, 4.0, 3.0, 2.0, 1.0}); std::vector<T> hostBlockVector({1.0, 2.0, 3.0, 4.0, 4.0, 3.0, 2.0, 1.0});
std::vector<T> hostVecVector({1.0, 3.0, 2.0, 4.0}); std::vector<T> hostVecVector({1.0, 3.0, 2.0, 4.0});
std::vector<T> hostDstVector({0, 0, 0, 0}); std::vector<T> hostDstVector({0, 0, 0, 0});
Opm::cuistl::CuVector<T> deviceBlockVector(hostBlockVector); Opm::gpuistl::GpuVector<T> deviceBlockVector(hostBlockVector);
Opm::cuistl::CuVector<T> deviceVecVector(hostVecVector); Opm::gpuistl::GpuVector<T> deviceVecVector(hostVecVector);
Opm::cuistl::CuVector<T> deviceDstVector(hostDstVector); Opm::gpuistl::GpuVector<T> deviceDstVector(hostDstVector);
Opm::cuistl::detail::weightedDiagMV( Opm::gpuistl::detail::weightedDiagMV(
deviceBlockVector.data(), N, blocksize, weight, deviceVecVector.data(), deviceDstVector.data()); deviceBlockVector.data(), N, blocksize, weight, deviceVecVector.data(), deviceDstVector.data());
std::vector<T> expectedVec {3.5, 7.5, 10.0, 4.0}; std::vector<T> expectedVec {3.5, 7.5, 10.0, 4.0};
@ -95,4 +95,4 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf2By2BlockVectorAndVecto
for (size_t i = 0; i < expectedVec.size(); i++) { for (size_t i = 0; i < expectedVec.size(); i++) {
BOOST_CHECK_CLOSE(expectedVec[i], computedVec[i], 1e-7); BOOST_CHECK_CLOSE(expectedVec[i], computedVec[i], 1e-7);
} }
} }

View File

@ -16,14 +16,14 @@
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>. along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include "opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp" #include "opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp"
#include <config.h> #include <config.h>
#define BOOST_TEST_MODULE TestCublasHandle #define BOOST_TEST_MODULE TestCublasHandle
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <boost/test/unit_test.hpp> #include <boost/test/unit_test.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp> #include <opm/simulators/linalg/gpuistl/detail/CuBlasHandle.hpp>
BOOST_AUTO_TEST_CASE(TestGetCublasVersion) BOOST_AUTO_TEST_CASE(TestGetCublasVersion)
{ {
@ -32,7 +32,7 @@ BOOST_AUTO_TEST_CASE(TestGetCublasVersion)
// that checks the version of blas programatically. Let the test pass for now. // that checks the version of blas programatically. Let the test pass for now.
BOOST_CHECK(true); BOOST_CHECK(true);
#else #else
auto& cublasHandle = ::Opm::cuistl::detail::CuBlasHandle::getInstance(); auto& cublasHandle = ::Opm::gpuistl::detail::CuBlasHandle::getInstance();
int cuBlasVersion = -1; int cuBlasVersion = -1;
OPM_CUBLAS_SAFE_CALL(cublasGetVersion(cublasHandle.get(), &cuBlasVersion)); OPM_CUBLAS_SAFE_CALL(cublasGetVersion(cublasHandle.get(), &cuBlasVersion));

View File

@ -22,7 +22,7 @@
#include <boost/test/unit_test.hpp> #include <boost/test/unit_test.hpp>
#include <cublas_v2.h> #include <cublas_v2.h>
#include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
BOOST_AUTO_TEST_CASE(TestCreateHandle) BOOST_AUTO_TEST_CASE(TestCreateHandle)
{ {

View File

@ -21,7 +21,7 @@
#define BOOST_TEST_MODULE TestCudaCheckLastError #define BOOST_TEST_MODULE TestCudaCheckLastError
#include <boost/test/unit_test.hpp> #include <boost/test/unit_test.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_check_last_error.hpp> #include <opm/simulators/linalg/gpuistl/detail/cuda_check_last_error.hpp>
BOOST_AUTO_TEST_CASE(TestNoThrowLastError) BOOST_AUTO_TEST_CASE(TestNoThrowLastError)

View File

@ -21,12 +21,12 @@
#define BOOST_TEST_MODULE TestSparseHandle #define BOOST_TEST_MODULE TestSparseHandle
#include <boost/test/unit_test.hpp> #include <boost/test/unit_test.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp> #include <opm/simulators/linalg/gpuistl/detail/CuSparseHandle.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
BOOST_AUTO_TEST_CASE(TestGetSparseVersion) BOOST_AUTO_TEST_CASE(TestGetSparseVersion)
{ {
auto& cuSparseHandle = ::Opm::cuistl::detail::CuSparseHandle::getInstance(); auto& cuSparseHandle = ::Opm::gpuistl::detail::CuSparseHandle::getInstance();
int cuSparseVersion = -1; int cuSparseVersion = -1;
OPM_CUSPARSE_SAFE_CALL(cusparseGetVersion(cuSparseHandle.get(), &cuSparseVersion)); OPM_CUSPARSE_SAFE_CALL(cusparseGetVersion(cuSparseHandle.get(), &cuSparseVersion));
BOOST_CHECK_LT(0, cuSparseVersion); BOOST_CHECK_LT(0, cuSparseVersion);

View File

@ -22,7 +22,7 @@
#include <boost/test/unit_test.hpp> #include <boost/test/unit_test.hpp>
#include <cusparse.h> #include <cusparse.h>
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
BOOST_AUTO_TEST_CASE(TestCreateHandle) BOOST_AUTO_TEST_CASE(TestCreateHandle)
{ {

View File

@ -18,15 +18,15 @@
*/ */
#include <config.h> #include <config.h>
#define BOOST_TEST_MODULE TestCudaSafeCall #define BOOST_TEST_MODULE TestGpuSafeCall
#include <boost/test/unit_test.hpp> #include <boost/test/unit_test.hpp>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp> #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
BOOST_AUTO_TEST_CASE(TestCudaMalloc) BOOST_AUTO_TEST_CASE(TestGpuMalloc)
{ {
void* pointer; void* pointer;
BOOST_CHECK_NO_THROW(OPM_CUDA_SAFE_CALL(cudaMalloc(&pointer, 1));); BOOST_CHECK_NO_THROW(OPM_GPU_SAFE_CALL(cudaMalloc(&pointer, 1)););
} }
@ -41,6 +41,6 @@ BOOST_AUTO_TEST_CASE(TestThrows)
errorCodes = {{cudaErrorAddressOfConstant, cudaErrorAlreadyAcquired}}; errorCodes = {{cudaErrorAddressOfConstant, cudaErrorAlreadyAcquired}};
#endif #endif
for (auto code : errorCodes) { for (auto code : errorCodes) {
BOOST_CHECK_THROW(OPM_CUDA_SAFE_CALL(code), std::exception); BOOST_CHECK_THROW(OPM_GPU_SAFE_CALL(code), std::exception);
} }
} }

View File

@ -21,11 +21,11 @@
#define BOOST_TEST_MODULE TestSafeConversion #define BOOST_TEST_MODULE TestSafeConversion
#include <boost/test/unit_test.hpp> #include <boost/test/unit_test.hpp>
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp> #include <opm/simulators/linalg/gpuistl/detail/safe_conversion.hpp>
BOOST_AUTO_TEST_CASE(TestToIntThrowsOutofRange) BOOST_AUTO_TEST_CASE(TestToIntThrowsOutofRange)
{ {
BOOST_CHECK_THROW(Opm::cuistl::detail::to_int(size_t(std::numeric_limits<int>::max()) + size_t(1)); BOOST_CHECK_THROW(Opm::gpuistl::detail::to_int(size_t(std::numeric_limits<int>::max()) + size_t(1));
, std::invalid_argument); , std::invalid_argument);
} }
@ -33,26 +33,26 @@ BOOST_AUTO_TEST_CASE(TestToIntConvertInRange)
{ {
// This might seem slow, but it is really fast: // This might seem slow, but it is really fast:
for (size_t i = 0; i <= size_t(1024 * 1024); ++i) { for (size_t i = 0; i <= size_t(1024 * 1024); ++i) {
BOOST_CHECK_EQUAL(int(i), Opm::cuistl::detail::to_int(i)); BOOST_CHECK_EQUAL(int(i), Opm::gpuistl::detail::to_int(i));
} }
BOOST_CHECK_EQUAL(std::numeric_limits<int>::max(), BOOST_CHECK_EQUAL(std::numeric_limits<int>::max(),
Opm::cuistl::detail::to_int(size_t(std::numeric_limits<int>::max()))); Opm::gpuistl::detail::to_int(size_t(std::numeric_limits<int>::max())));
} }
BOOST_AUTO_TEST_CASE(TestToSizeTThrowsOutofRange) BOOST_AUTO_TEST_CASE(TestToSizeTThrowsOutofRange)
{ {
BOOST_CHECK_THROW(Opm::cuistl::detail::to_size_t(-1);, std::invalid_argument); BOOST_CHECK_THROW(Opm::gpuistl::detail::to_size_t(-1);, std::invalid_argument);
} }
BOOST_AUTO_TEST_CASE(TestToSizeTConvertInRange) BOOST_AUTO_TEST_CASE(TestToSizeTConvertInRange)
{ {
// This might seem slow, but it is really fast: // This might seem slow, but it is really fast:
for (int i = 0; i <= 1024 * 1024; ++i) { for (int i = 0; i <= 1024 * 1024; ++i) {
BOOST_CHECK_EQUAL(size_t(i), Opm::cuistl::detail::to_size_t(i)); BOOST_CHECK_EQUAL(size_t(i), Opm::gpuistl::detail::to_size_t(i));
} }
BOOST_CHECK_EQUAL(size_t(std::numeric_limits<int>::max()), BOOST_CHECK_EQUAL(size_t(std::numeric_limits<int>::max()),
Opm::cuistl::detail::to_size_t(std::numeric_limits<int>::max())); Opm::gpuistl::detail::to_size_t(std::numeric_limits<int>::max()));
} }

View File

@ -24,14 +24,14 @@
#include <dune/istl/solvers.hh> #include <dune/istl/solvers.hh>
#include <opm/simulators/linalg/PreconditionerFactory.hpp> #include <opm/simulators/linalg/PreconditionerFactory.hpp>
#include <opm/simulators/linalg/PropertyTree.hpp> #include <opm/simulators/linalg/PropertyTree.hpp>
#include <opm/simulators/linalg/cuistl/SolverAdapter.hpp> #include <opm/simulators/linalg/gpuistl/SolverAdapter.hpp>
static const constexpr int dim = 3; static const constexpr int dim = 3;
using Matrix = Dune::BCRSMatrix<Dune::FieldMatrix<double, dim, dim>>; using Matrix = Dune::BCRSMatrix<Dune::FieldMatrix<double, dim, dim>>;
using Vector = Dune::BlockVector<Dune::FieldVector<double, dim>>; using Vector = Dune::BlockVector<Dune::FieldVector<double, dim>>;
using Moperator = Dune::MatrixAdapter<Matrix, Vector, Vector>; using Moperator = Dune::MatrixAdapter<Matrix, Vector, Vector>;
using PrecondFactory = Opm::PreconditionerFactory<Moperator, Dune::Amg::SequentialInformation>; using PrecondFactory = Opm::PreconditionerFactory<Moperator, Dune::Amg::SequentialInformation>;
using SolverAdapter = Opm::cuistl::SolverAdapter<Moperator, Dune::BiCGSTABSolver, Vector>; using SolverAdapter = Opm::gpuistl::SolverAdapter<Moperator, Dune::BiCGSTABSolver, Vector>;
namespace namespace
{ {