mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Merge pull request #5554 from multitalentloes/refactor_cuistl
refactor cuistl to gpuistl
This commit is contained in:
commit
f97389d1b5
@ -530,7 +530,7 @@ if(CUDA_FOUND)
|
||||
if (NOT USE_HIP)
|
||||
target_link_libraries( opmsimulators PUBLIC ${CUDA_cusparse_LIBRARY} )
|
||||
target_link_libraries( opmsimulators PUBLIC ${CUDA_cublas_LIBRARY} )
|
||||
foreach(tgt test_cuda_safe_call test_cuda_check_last_error test_cuvector)
|
||||
foreach(tgt test_gpu_safe_call test_cuda_check_last_error test_GpuVector)
|
||||
target_link_libraries(${tgt} CUDA::cudart)
|
||||
endforeach()
|
||||
endif()
|
||||
@ -545,21 +545,21 @@ if(CUDA_FOUND)
|
||||
endif()
|
||||
set_tests_properties(cusparse_safe_call
|
||||
cublas_safe_call
|
||||
cuda_safe_call
|
||||
gpu_safe_call
|
||||
cuda_check_last_error
|
||||
cublas_handle
|
||||
cujac
|
||||
cudilu
|
||||
GpuJac
|
||||
GpuDILU
|
||||
cusparse_handle
|
||||
cuSparse_matrix_operations
|
||||
cuVector_operations
|
||||
cuvector
|
||||
cusparsematrix
|
||||
cuseqilu0
|
||||
cuowneroverlapcopy
|
||||
GpuVector
|
||||
GpuSparseMatrix
|
||||
GpuSeqILU0
|
||||
GpuOwnerOverlapCopy
|
||||
solver_adapter
|
||||
cubuffer
|
||||
cuview
|
||||
GpuBuffer
|
||||
GpuView
|
||||
PROPERTIES LABELS ${gpu_label})
|
||||
endif()
|
||||
|
||||
|
@ -28,15 +28,15 @@
|
||||
# hipification, we a dependency that will trigger when the cuda source code is
|
||||
# changed.
|
||||
macro (ADD_CUDA_OR_HIP_FILE LIST DIR FILE)
|
||||
set (cuda_file_path "${PROJECT_SOURCE_DIR}/${DIR}/cuistl/${FILE}")
|
||||
set (cuda_file_path "${PROJECT_SOURCE_DIR}/${DIR}/gpuistl/${FILE}")
|
||||
|
||||
if(CUDA_FOUND AND NOT CONVERT_CUDA_TO_HIP)
|
||||
list (APPEND ${LIST} "${DIR}/cuistl/${FILE}")
|
||||
list (APPEND ${LIST} "${DIR}/gpuistl/${FILE}")
|
||||
else()
|
||||
# we must hipify the code
|
||||
# and include the correct path which is in the build/binary dir
|
||||
string(REPLACE ".cu" ".hip" HIP_SOURCE_FILE ${FILE})
|
||||
set (hip_file_path "${PROJECT_BINARY_DIR}/${DIR}/hipistl/${HIP_SOURCE_FILE}")
|
||||
set (hip_file_path "${PROJECT_BINARY_DIR}/${DIR}/gpuistl_hip/${HIP_SOURCE_FILE}")
|
||||
file(RELATIVE_PATH relpath ${PROJECT_SOURCE_DIR} ${hip_file_path})
|
||||
|
||||
# add a custom command that will hipify
|
||||
@ -207,42 +207,42 @@ endif()
|
||||
# add these files if we should compile the hip code
|
||||
if (HAVE_CUDA)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/CuBlasHandle.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/cusparse_matrix_operations.cu)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/gpusparse_matrix_operations.cu)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/CuSparseHandle.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuBuffer.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuBuffer.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/preconditionerKernels/DILUKernels.cu)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/preconditionerKernels/ILU0Kernels.cu)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/preconditionerKernels/JacKernels.cu)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuVector.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuView.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuVector.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuView.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/vector_operations.cu)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuSparseMatrix.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuDILU.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuSparseMatrix.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuDILU.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg OpmCuILU0.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuJac.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuSeqILU0.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuJac.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuSeqILU0.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg set_device.cpp)
|
||||
|
||||
# HEADERS
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/autotuner.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/coloringAndReorderingUtils.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/cuda_safe_call.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/cusparse_matrix_operations.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/gpu_safe_call.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/gpusparse_matrix_operations.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/cusparse_safe_call.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/cublas_safe_call.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/cuda_check_last_error.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuBlasHandle.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuSparseHandle.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuBuffer.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuBuffer.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/preconditionerKernels/DILUKernels.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/preconditionerKernels/ILU0Kernels.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/preconditionerKernels/JacKernels.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuDILU.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuDILU.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg OpmCuILU0.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuJac.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuVector.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuView.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuSparseMatrix.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuJac.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuVector.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuView.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuSparseMatrix.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuMatrixDescription.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuSparseResource.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuSparseResource_impl.hpp)
|
||||
@ -256,12 +256,12 @@ if (HAVE_CUDA)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/deviceBlockOperations.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/gpuThreadUtils.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg PreconditionerAdapter.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuSeqILU0.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuSeqILU0.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/fix_zero_diagonal.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg PreconditionerConvertFieldTypeAdapter.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuOwnerOverlapCopy.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuOwnerOverlapCopy.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg SolverAdapter.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuBlockPreconditioner.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuBlockPreconditioner.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg PreconditionerHolder.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg set_device.hpp)
|
||||
endif()
|
||||
@ -389,19 +389,19 @@ if (HAVE_CUDA)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_converttofloatadapter.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cublas_handle.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cublas_safe_call.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cubuffer.cu)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuview.cu)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuBuffer.cu)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuView.cu)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cusparse_safe_call.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuda_safe_call.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_gpu_safe_call.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuda_check_last_error.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cudilu.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cujac.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuowneroverlapcopy.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuseqilu0.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuDILU.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuJac.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuOwnerOverlapCopy.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuSeqILU0.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cusparse_handle.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuSparse_matrix_operations.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cusparsematrix.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuvector.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuSparseMatrix.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuVector.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuVector_operations.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_safe_conversion.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_solver_adapter.cpp)
|
||||
|
@ -15,6 +15,6 @@ hipify-perl $input_file > $output_file
|
||||
sed -i 's/^#include <hipblas\.h>/#include <hipblas\/hipblas.h>/g' $output_file
|
||||
sed -i 's/^#include <hipsparse\.h>/#include <hipsparse\/hipsparse.h>/g' $output_file
|
||||
# make sure includes refer to hipistl/ files (the ones that are also hipified)
|
||||
sed -i 's/cuistl\//hipistl\//g' $output_file
|
||||
sed -i 's/gpuistl\//gpuistl_hip\//g' $output_file
|
||||
|
||||
echo "$output_file hipified"
|
||||
|
@ -36,7 +36,7 @@
|
||||
#endif
|
||||
|
||||
#if HAVE_CUDA
|
||||
#include <opm/simulators/linalg/cuistl/set_device.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/set_device.hpp>
|
||||
#endif
|
||||
|
||||
namespace Opm {
|
||||
@ -163,7 +163,7 @@ void Main::initMPI()
|
||||
}
|
||||
|
||||
#if HAVE_CUDA
|
||||
Opm::cuistl::setDevice(FlowGenericVanguard::comm().rank(), FlowGenericVanguard::comm().size());
|
||||
Opm::gpuistl::setDevice(FlowGenericVanguard::comm().rank(), FlowGenericVanguard::comm().size());
|
||||
#endif
|
||||
|
||||
#endif // HAVE_MPI
|
||||
|
@ -39,9 +39,9 @@
|
||||
|
||||
#if HAVE_CUDA
|
||||
#if USE_HIP
|
||||
#include <opm/simulators/linalg/hipistl/SolverAdapter.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl_hip/SolverAdapter.hpp>
|
||||
#else
|
||||
#include <opm/simulators/linalg/cuistl/SolverAdapter.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/SolverAdapter.hpp>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -205,7 +205,7 @@ namespace Dune
|
||||
#endif
|
||||
#if HAVE_CUDA
|
||||
} else if (solver_type == "cubicgstab") {
|
||||
linsolver_.reset(new Opm::cuistl::SolverAdapter<Operator, Dune::BiCGSTABSolver, VectorType>(
|
||||
linsolver_.reset(new Opm::gpuistl::SolverAdapter<Operator, Dune::BiCGSTABSolver, VectorType>(
|
||||
*linearoperator_for_solver_,
|
||||
*scalarproduct_,
|
||||
preconditioner_,
|
||||
|
@ -22,22 +22,22 @@
|
||||
// both with the normal cuistl path, and the hipistl path
|
||||
#if HAVE_CUDA
|
||||
#if USE_HIP
|
||||
#include <opm/simulators/linalg/hipistl/CuBlockPreconditioner.hpp>
|
||||
#include <opm/simulators/linalg/hipistl/CuDILU.hpp>
|
||||
#include <opm/simulators/linalg/hipistl/OpmCuILU0.hpp>
|
||||
#include <opm/simulators/linalg/hipistl/CuJac.hpp>
|
||||
#include <opm/simulators/linalg/hipistl/CuSeqILU0.hpp>
|
||||
#include <opm/simulators/linalg/hipistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/hipistl/PreconditionerConvertFieldTypeAdapter.hpp>
|
||||
#include <opm/simulators/linalg/hipistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl_hip/GpuBlockPreconditioner.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl_hip/GpuDILU.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl_hip/OpmCuILU0.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl_hip/GpuJac.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl_hip/GpuSeqILU0.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl_hip/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl_hip/PreconditionerConvertFieldTypeAdapter.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl_hip/detail/gpu_safe_call.hpp>
|
||||
#else
|
||||
#include <opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuDILU.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/OpmCuILU0.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuJac.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuSeqILU0.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuBlockPreconditioner.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuDILU.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/OpmCuILU0.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuJac.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuSeqILU0.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/PreconditionerConvertFieldTypeAdapter.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
#endif
|
||||
#endif
|
||||
|
@ -322,39 +322,39 @@ struct StandardPreconditioners {
|
||||
}
|
||||
|
||||
#if HAVE_CUDA
|
||||
F::addCreator("CUILU0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
|
||||
F::addCreator("GPUILU0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
|
||||
const double w = prm.get<double>("relaxation", 1.0);
|
||||
using field_type = typename V::field_type;
|
||||
using CuILU0 = typename cuistl::
|
||||
CuSeqILU0<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
|
||||
auto cuILU0 = std::make_shared<CuILU0>(op.getmat(), w);
|
||||
using GpuILU0 = typename gpuistl::
|
||||
GpuSeqILU0<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
|
||||
auto gpuILU0 = std::make_shared<GpuILU0>(op.getmat(), w);
|
||||
|
||||
auto adapted = std::make_shared<cuistl::PreconditionerAdapter<V, V, CuILU0>>(cuILU0);
|
||||
auto wrapped = std::make_shared<cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
|
||||
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, GpuILU0>>(gpuILU0);
|
||||
auto wrapped = std::make_shared<gpuistl::GpuBlockPreconditioner<V, V, Comm>>(adapted, comm);
|
||||
return wrapped;
|
||||
});
|
||||
|
||||
F::addCreator("CUJac", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
|
||||
F::addCreator("GPUJAC", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
|
||||
const double w = prm.get<double>("relaxation", 1.0);
|
||||
using field_type = typename V::field_type;
|
||||
using CuJac =
|
||||
typename cuistl::CuJac<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
|
||||
auto cuJac = std::make_shared<CuJac>(op.getmat(), w);
|
||||
using GpuJac =
|
||||
typename gpuistl::GpuJac<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
|
||||
auto gpuJac = std::make_shared<GpuJac>(op.getmat(), w);
|
||||
|
||||
auto adapted = std::make_shared<cuistl::PreconditionerAdapter<V, V, CuJac>>(cuJac);
|
||||
auto wrapped = std::make_shared<cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
|
||||
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, GpuJac>>(gpuJac);
|
||||
auto wrapped = std::make_shared<gpuistl::GpuBlockPreconditioner<V, V, Comm>>(adapted, comm);
|
||||
return wrapped;
|
||||
});
|
||||
|
||||
F::addCreator("CUDILU", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
|
||||
F::addCreator("GPUDILU", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
|
||||
const bool split_matrix = prm.get<bool>("split_matrix", true);
|
||||
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
|
||||
using field_type = typename V::field_type;
|
||||
using CuDILU = typename cuistl::CuDILU<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
|
||||
auto cuDILU = std::make_shared<CuDILU>(op.getmat(), split_matrix, tune_gpu_kernels);
|
||||
using GpuDILU = typename gpuistl::GpuDILU<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
|
||||
auto gpuDILU = std::make_shared<GpuDILU>(op.getmat(), split_matrix, tune_gpu_kernels);
|
||||
|
||||
auto adapted = std::make_shared<cuistl::PreconditionerAdapter<V, V, CuDILU>>(cuDILU);
|
||||
auto wrapped = std::make_shared<cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
|
||||
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, GpuDILU>>(gpuDILU);
|
||||
auto wrapped = std::make_shared<gpuistl::GpuBlockPreconditioner<V, V, Comm>>(adapted, comm);
|
||||
return wrapped;
|
||||
});
|
||||
|
||||
@ -362,11 +362,11 @@ struct StandardPreconditioners {
|
||||
const bool split_matrix = prm.get<bool>("split_matrix", true);
|
||||
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
|
||||
using field_type = typename V::field_type;
|
||||
using OpmCuILU0 = typename cuistl::OpmCuILU0<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
|
||||
using OpmCuILU0 = typename gpuistl::OpmCuILU0<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
|
||||
auto cuilu0 = std::make_shared<OpmCuILU0>(op.getmat(), split_matrix, tune_gpu_kernels);
|
||||
|
||||
auto adapted = std::make_shared<cuistl::PreconditionerAdapter<V, V, OpmCuILU0>>(cuilu0);
|
||||
auto wrapped = std::make_shared<cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
|
||||
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, OpmCuILU0>>(cuilu0);
|
||||
auto wrapped = std::make_shared<gpuistl::GpuBlockPreconditioner<V, V, Comm>>(adapted, comm);
|
||||
return wrapped;
|
||||
});
|
||||
#endif
|
||||
@ -582,68 +582,68 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
|
||||
});
|
||||
|
||||
#if HAVE_CUDA
|
||||
F::addCreator("CUILU0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
|
||||
F::addCreator("GPUILU0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
|
||||
const double w = prm.get<double>("relaxation", 1.0);
|
||||
using field_type = typename V::field_type;
|
||||
using CuILU0 = typename cuistl::
|
||||
CuSeqILU0<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
|
||||
return std::make_shared<cuistl::PreconditionerAdapter<V, V, CuILU0>>(
|
||||
std::make_shared<CuILU0>(op.getmat(), w));
|
||||
using GpuILU0 = typename gpuistl::
|
||||
GpuSeqILU0<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
|
||||
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, GpuILU0>>(
|
||||
std::make_shared<GpuILU0>(op.getmat(), w));
|
||||
});
|
||||
|
||||
F::addCreator("CUILU0Float", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
|
||||
F::addCreator("GPUILU0Float", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
|
||||
const double w = prm.get<double>("relaxation", 1.0);
|
||||
using block_type = typename V::block_type;
|
||||
using VTo = Dune::BlockVector<Dune::FieldVector<float, block_type::dimension>>;
|
||||
using matrix_type_to =
|
||||
typename Dune::BCRSMatrix<Dune::FieldMatrix<float, block_type::dimension, block_type::dimension>>;
|
||||
using CuILU0 = typename cuistl::
|
||||
CuSeqILU0<matrix_type_to, cuistl::CuVector<float>, cuistl::CuVector<float>>;
|
||||
using Adapter = typename cuistl::PreconditionerAdapter<VTo, VTo, CuILU0>;
|
||||
using Converter = typename cuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
|
||||
using GpuILU0 = typename gpuistl::
|
||||
GpuSeqILU0<matrix_type_to, gpuistl::GpuVector<float>, gpuistl::GpuVector<float>>;
|
||||
using Adapter = typename gpuistl::PreconditionerAdapter<VTo, VTo, GpuILU0>;
|
||||
using Converter = typename gpuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
|
||||
auto converted = std::make_shared<Converter>(op.getmat());
|
||||
auto adapted = std::make_shared<Adapter>(std::make_shared<CuILU0>(converted->getConvertedMatrix(), w));
|
||||
auto adapted = std::make_shared<Adapter>(std::make_shared<GpuILU0>(converted->getConvertedMatrix(), w));
|
||||
converted->setUnderlyingPreconditioner(adapted);
|
||||
return converted;
|
||||
});
|
||||
|
||||
F::addCreator("CUJac", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
|
||||
F::addCreator("GPUJAC", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
|
||||
const double w = prm.get<double>("relaxation", 1.0);
|
||||
using field_type = typename V::field_type;
|
||||
using CUJac =
|
||||
typename cuistl::CuJac<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
|
||||
return std::make_shared<cuistl::PreconditionerAdapter<V, V, CUJac>>(
|
||||
std::make_shared<CUJac>(op.getmat(), w));
|
||||
using GPUJac =
|
||||
typename gpuistl::GpuJac<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
|
||||
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, GPUJac>>(
|
||||
std::make_shared<GPUJac>(op.getmat(), w));
|
||||
});
|
||||
|
||||
F::addCreator("OPMCUILU0", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
|
||||
const bool split_matrix = prm.get<bool>("split_matrix", true);
|
||||
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
|
||||
using field_type = typename V::field_type;
|
||||
using CUILU0 = typename cuistl::OpmCuILU0<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
|
||||
using CUILU0 = typename gpuistl::OpmCuILU0<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
|
||||
|
||||
return std::make_shared<cuistl::PreconditionerAdapter<V, V, CUILU0>>(std::make_shared<CUILU0>(op.getmat(), split_matrix, tune_gpu_kernels));
|
||||
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, CUILU0>>(std::make_shared<CUILU0>(op.getmat(), split_matrix, tune_gpu_kernels));
|
||||
});
|
||||
|
||||
F::addCreator("CUDILU", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
|
||||
F::addCreator("GPUDILU", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
|
||||
const bool split_matrix = prm.get<bool>("split_matrix", true);
|
||||
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
|
||||
using field_type = typename V::field_type;
|
||||
using CUDILU = typename cuistl::CuDILU<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
|
||||
return std::make_shared<cuistl::PreconditionerAdapter<V, V, CUDILU>>(std::make_shared<CUDILU>(op.getmat(), split_matrix, tune_gpu_kernels));
|
||||
using GPUDILU = typename gpuistl::GpuDILU<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
|
||||
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, GPUDILU>>(std::make_shared<GPUDILU>(op.getmat(), split_matrix, tune_gpu_kernels));
|
||||
});
|
||||
|
||||
F::addCreator("CUDILUFloat", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
|
||||
F::addCreator("GPUDILUFloat", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
|
||||
const bool split_matrix = prm.get<bool>("split_matrix", true);
|
||||
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
|
||||
using block_type = typename V::block_type;
|
||||
using VTo = Dune::BlockVector<Dune::FieldVector<float, block_type::dimension>>;
|
||||
using matrix_type_to = typename Dune::BCRSMatrix<Dune::FieldMatrix<float, block_type::dimension, block_type::dimension>>;
|
||||
using CuDILU = typename cuistl::CuDILU<matrix_type_to, cuistl::CuVector<float>, cuistl::CuVector<float>>;
|
||||
using Adapter = typename cuistl::PreconditionerAdapter<VTo, VTo, CuDILU>;
|
||||
using Converter = typename cuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
|
||||
using GpuDILU = typename gpuistl::GpuDILU<matrix_type_to, gpuistl::GpuVector<float>, gpuistl::GpuVector<float>>;
|
||||
using Adapter = typename gpuistl::PreconditionerAdapter<VTo, VTo, GpuDILU>;
|
||||
using Converter = typename gpuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
|
||||
auto converted = std::make_shared<Converter>(op.getmat());
|
||||
auto adapted = std::make_shared<Adapter>(std::make_shared<CuDILU>(converted->getConvertedMatrix(), split_matrix, tune_gpu_kernels));
|
||||
auto adapted = std::make_shared<Adapter>(std::make_shared<GpuDILU>(converted->getConvertedMatrix(), split_matrix, tune_gpu_kernels));
|
||||
converted->setUnderlyingPreconditioner(adapted);
|
||||
return converted;
|
||||
});
|
||||
|
@ -16,22 +16,22 @@
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef OPM_CUISTL_CUBLOCKPRECONDITIONER_HPP
|
||||
#define OPM_CUISTL_CUBLOCKPRECONDITIONER_HPP
|
||||
#ifndef OPM_CUISTL_GPUBLOCKPRECONDITIONER_HPP
|
||||
#define OPM_CUISTL_GPUBLOCKPRECONDITIONER_HPP
|
||||
|
||||
#include <dune/common/shared_ptr.hh>
|
||||
#include <memory>
|
||||
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerHolder.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/PreconditionerHolder.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/preconditioner_should_call_post_pre.hpp>
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
//! @brief Is an adaptation of Dune::BlockPreconditioner that works within the CuISTL framework.
|
||||
//!
|
||||
//! @note We aim to intgrate this into OwningBlockPreconditioner (or a relative thereof).
|
||||
template <class X, class Y, class C, class P = Dune::PreconditionerWithUpdate<X, Y>>
|
||||
class CuBlockPreconditioner : public Dune::PreconditionerWithUpdate<X, Y>, public PreconditionerHolder<X, Y>
|
||||
class GpuBlockPreconditioner : public Dune::PreconditionerWithUpdate<X, Y>, public PreconditionerHolder<X, Y>
|
||||
{
|
||||
public:
|
||||
using domain_type = X;
|
||||
@ -47,13 +47,13 @@ public:
|
||||
//! @param c The communication object for syncing overlap and copy
|
||||
//! data points. (E.~g. OwnerOverlapCopyCommunication )
|
||||
//!
|
||||
CuBlockPreconditioner(const std::shared_ptr<P>& p, const std::shared_ptr<const communication_type>& c)
|
||||
GpuBlockPreconditioner(const std::shared_ptr<P>& p, const std::shared_ptr<const communication_type>& c)
|
||||
: m_preconditioner(p)
|
||||
, m_communication(c)
|
||||
{
|
||||
}
|
||||
|
||||
CuBlockPreconditioner(const std::shared_ptr<P>& p, const communication_type& c)
|
||||
GpuBlockPreconditioner(const std::shared_ptr<P>& p, const communication_type& c)
|
||||
: m_preconditioner(p)
|
||||
, m_communication(Dune::stackobject_to_shared_ptr(c))
|
||||
{
|
||||
@ -125,5 +125,5 @@ private:
|
||||
//! \brief the communication object
|
||||
std::shared_ptr<const communication_type> m_communication;
|
||||
};
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
#endif
|
@ -20,83 +20,83 @@
|
||||
#include <cuda_runtime.h>
|
||||
#include <algorithm>
|
||||
#include <fmt/core.h>
|
||||
#include <opm/simulators/linalg/cuistl/CuBuffer.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuView.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuBuffer.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuView.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
template <class T>
|
||||
CuBuffer<T>::CuBuffer(const std::vector<T>& data)
|
||||
: CuBuffer(data.data(), data.size())
|
||||
GpuBuffer<T>::GpuBuffer(const std::vector<T>& data)
|
||||
: GpuBuffer(data.data(), data.size())
|
||||
{
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuBuffer<T>::CuBuffer(const size_t numberOfElements)
|
||||
GpuBuffer<T>::GpuBuffer(const size_t numberOfElements)
|
||||
: m_numberOfElements(numberOfElements)
|
||||
{
|
||||
if (numberOfElements < 1) {
|
||||
OPM_THROW(std::invalid_argument, "Setting a CuBuffer size to a non-positive number is not allowed");
|
||||
OPM_THROW(std::invalid_argument, "Setting a GpuBuffer size to a non-positive number is not allowed");
|
||||
}
|
||||
OPM_CUDA_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * m_numberOfElements));
|
||||
OPM_GPU_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * m_numberOfElements));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuBuffer<T>::CuBuffer(const T* dataOnHost, const size_t numberOfElements)
|
||||
: CuBuffer(numberOfElements)
|
||||
GpuBuffer<T>::GpuBuffer(const T* dataOnHost, const size_t numberOfElements)
|
||||
: GpuBuffer(numberOfElements)
|
||||
{
|
||||
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(
|
||||
m_dataOnDevice, dataOnHost, m_numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuBuffer<T>::CuBuffer(const CuBuffer<T>& other)
|
||||
: CuBuffer(other.m_numberOfElements)
|
||||
GpuBuffer<T>::GpuBuffer(const GpuBuffer<T>& other)
|
||||
: GpuBuffer(other.m_numberOfElements)
|
||||
{
|
||||
assertHasElements();
|
||||
assertSameSize(other);
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
|
||||
other.m_dataOnDevice,
|
||||
m_numberOfElements * sizeof(T),
|
||||
cudaMemcpyDeviceToDevice));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuBuffer<T>::~CuBuffer()
|
||||
GpuBuffer<T>::~GpuBuffer()
|
||||
{
|
||||
OPM_CUDA_WARN_IF_ERROR(cudaFree(m_dataOnDevice));
|
||||
OPM_GPU_WARN_IF_ERROR(cudaFree(m_dataOnDevice));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
typename CuBuffer<T>::size_type
|
||||
CuBuffer<T>::size() const
|
||||
typename GpuBuffer<T>::size_type
|
||||
GpuBuffer<T>::size() const
|
||||
{
|
||||
return m_numberOfElements;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuBuffer<T>::resize(size_t newSize)
|
||||
GpuBuffer<T>::resize(size_t newSize)
|
||||
{
|
||||
if (newSize < 1) {
|
||||
OPM_THROW(std::invalid_argument, "Setting a CuBuffer size to a non-positive number is not allowed");
|
||||
OPM_THROW(std::invalid_argument, "Setting a GpuBuffer size to a non-positive number is not allowed");
|
||||
}
|
||||
// Allocate memory for the new buffer
|
||||
T* tmpBuffer = nullptr;
|
||||
OPM_CUDA_SAFE_CALL(cudaMalloc(&tmpBuffer, sizeof(T) * newSize));
|
||||
OPM_GPU_SAFE_CALL(cudaMalloc(&tmpBuffer, sizeof(T) * newSize));
|
||||
|
||||
// Move the data from the old to the new buffer with truncation
|
||||
size_t sizeOfMove = std::min({m_numberOfElements, newSize});
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(tmpBuffer,
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(tmpBuffer,
|
||||
m_dataOnDevice,
|
||||
sizeOfMove * sizeof(T),
|
||||
cudaMemcpyDeviceToDevice));
|
||||
|
||||
// free the old buffer
|
||||
OPM_CUDA_SAFE_CALL(cudaFree(m_dataOnDevice));
|
||||
OPM_GPU_SAFE_CALL(cudaFree(m_dataOnDevice));
|
||||
|
||||
// swap the buffers
|
||||
m_dataOnDevice = tmpBuffer;
|
||||
@ -107,7 +107,7 @@ CuBuffer<T>::resize(size_t newSize)
|
||||
|
||||
template <typename T>
|
||||
std::vector<T>
|
||||
CuBuffer<T>::asStdVector() const
|
||||
GpuBuffer<T>::asStdVector() const
|
||||
{
|
||||
std::vector<T> temporary(m_numberOfElements);
|
||||
copyToHost(temporary);
|
||||
@ -116,14 +116,14 @@ CuBuffer<T>::asStdVector() const
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuBuffer<T>::assertSameSize(const CuBuffer<T>& x) const
|
||||
GpuBuffer<T>::assertSameSize(const GpuBuffer<T>& x) const
|
||||
{
|
||||
assertSameSize(x.m_numberOfElements);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuBuffer<T>::assertSameSize(size_t size) const
|
||||
GpuBuffer<T>::assertSameSize(size_t size) const
|
||||
{
|
||||
if (size != m_numberOfElements) {
|
||||
OPM_THROW(std::invalid_argument,
|
||||
@ -133,7 +133,7 @@ CuBuffer<T>::assertSameSize(size_t size) const
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuBuffer<T>::assertHasElements() const
|
||||
GpuBuffer<T>::assertHasElements() const
|
||||
{
|
||||
if (m_numberOfElements <= 0) {
|
||||
OPM_THROW(std::invalid_argument, "We have 0 elements");
|
||||
@ -142,21 +142,21 @@ CuBuffer<T>::assertHasElements() const
|
||||
|
||||
template <typename T>
|
||||
T*
|
||||
CuBuffer<T>::data()
|
||||
GpuBuffer<T>::data()
|
||||
{
|
||||
return m_dataOnDevice;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const T*
|
||||
CuBuffer<T>::data() const
|
||||
GpuBuffer<T>::data() const
|
||||
{
|
||||
return m_dataOnDevice;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void
|
||||
CuBuffer<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
|
||||
GpuBuffer<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
|
||||
{
|
||||
if (numberOfElements > size()) {
|
||||
OPM_THROW(std::runtime_error,
|
||||
@ -164,41 +164,41 @@ CuBuffer<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
|
||||
size(),
|
||||
numberOfElements));
|
||||
}
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void
|
||||
CuBuffer<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
|
||||
GpuBuffer<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
|
||||
{
|
||||
assertSameSize(numberOfElements);
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void
|
||||
CuBuffer<T>::copyFromHost(const std::vector<T>& data)
|
||||
GpuBuffer<T>::copyFromHost(const std::vector<T>& data)
|
||||
{
|
||||
copyFromHost(data.data(), data.size());
|
||||
}
|
||||
template <class T>
|
||||
void
|
||||
CuBuffer<T>::copyToHost(std::vector<T>& data) const
|
||||
GpuBuffer<T>::copyToHost(std::vector<T>& data) const
|
||||
{
|
||||
copyToHost(data.data(), data.size());
|
||||
}
|
||||
|
||||
template class CuBuffer<double>;
|
||||
template class CuBuffer<float>;
|
||||
template class CuBuffer<int>;
|
||||
template class GpuBuffer<double>;
|
||||
template class GpuBuffer<float>;
|
||||
template class GpuBuffer<int>;
|
||||
|
||||
template <class T>
|
||||
CuView<const T> make_view(const CuBuffer<T>& buf) {
|
||||
return CuView<const T>(buf.data(), buf.size());
|
||||
GpuView<const T> make_view(const GpuBuffer<T>& buf) {
|
||||
return GpuView<const T>(buf.data(), buf.size());
|
||||
}
|
||||
|
||||
template CuView<const double> make_view<double>(const CuBuffer<double>&);
|
||||
template CuView<const float> make_view<float>(const CuBuffer<float>&);
|
||||
template CuView<const int> make_view<int>(const CuBuffer<int>&);
|
||||
template GpuView<const double> make_view<double>(const GpuBuffer<double>&);
|
||||
template GpuView<const float> make_view<float>(const GpuBuffer<float>&);
|
||||
template GpuView<const int> make_view<int>(const GpuBuffer<int>&);
|
||||
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
@ -16,35 +16,35 @@
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef OPM_CUBUFFER_HEADER_HPP
|
||||
#define OPM_CUBUFFER_HEADER_HPP
|
||||
#ifndef OPM_GPUBUFFER_HEADER_HPP
|
||||
#define OPM_GPUBUFFER_HEADER_HPP
|
||||
#include <dune/common/fvector.hh>
|
||||
#include <dune/istl/bvector.hh>
|
||||
#include <exception>
|
||||
#include <fmt/core.h>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuView.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/safe_conversion.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuView.hpp>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
/**
|
||||
* @brief The CuBuffer class is a simple container class for the GPU.
|
||||
* @brief The GpuBuffer class is a simple container class for the GPU.
|
||||
*
|
||||
*
|
||||
* Example usage:
|
||||
*
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/cuistl/CuBuffer.hpp>
|
||||
* #include <opm/simulators/linalg/gpuistl/GpuBuffer.hpp>
|
||||
*
|
||||
* void someFunction() {
|
||||
* auto someDataOnCPU = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
|
||||
*
|
||||
* auto dataOnGPU = CuBuffer<double>(someDataOnCPU);
|
||||
* auto dataOnGPU = GpuBuffer<double>(someDataOnCPU);
|
||||
*
|
||||
* auto stdVectorOnCPU = dataOnGPU.asStdVector();
|
||||
* }
|
||||
@ -52,7 +52,7 @@ namespace Opm::cuistl
|
||||
* @tparam T the type to store. Can be either float, double or int.
|
||||
*/
|
||||
template <typename T>
|
||||
class CuBuffer
|
||||
class GpuBuffer
|
||||
{
|
||||
public:
|
||||
using field_type = T;
|
||||
@ -60,17 +60,17 @@ public:
|
||||
using value_type = T;
|
||||
|
||||
/**
|
||||
* @brief CuBuffer allocates new GPU memory of the same size as other and copies the content of the other buffer to
|
||||
* @brief GpuBuffer allocates new GPU memory of the same size as other and copies the content of the other buffer to
|
||||
* this newly allocated memory.
|
||||
*
|
||||
* @note This does synchronous transfer.
|
||||
*
|
||||
* @param other the buffer to copy from
|
||||
*/
|
||||
CuBuffer(const CuBuffer<T>& other);
|
||||
GpuBuffer(const GpuBuffer<T>& other);
|
||||
|
||||
/**
|
||||
* @brief CuBuffer allocates new GPU memory of the same size as data and copies the content of the data vector to
|
||||
* @brief GpuBuffer allocates new GPU memory of the same size as data and copies the content of the data vector to
|
||||
* this newly allocated memory.
|
||||
*
|
||||
* @note This does CPU to GPU transfer.
|
||||
@ -78,23 +78,23 @@ public:
|
||||
*
|
||||
* @param data the vector to copy from
|
||||
*/
|
||||
explicit CuBuffer(const std::vector<T>& data);
|
||||
explicit GpuBuffer(const std::vector<T>& data);
|
||||
|
||||
/**
|
||||
* @brief Default constructor that will initialize cublas and allocate 0 bytes of memory
|
||||
*/
|
||||
explicit CuBuffer();
|
||||
explicit GpuBuffer();
|
||||
|
||||
/**
|
||||
* @brief CuBuffer allocates new GPU memory of size numberOfElements * sizeof(T)
|
||||
* @brief GpuBuffer allocates new GPU memory of size numberOfElements * sizeof(T)
|
||||
*
|
||||
* @param numberOfElements number of T elements to allocate
|
||||
*/
|
||||
explicit CuBuffer(const size_t numberOfElements);
|
||||
explicit GpuBuffer(const size_t numberOfElements);
|
||||
|
||||
|
||||
/**
|
||||
* @brief CuBuffer allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from
|
||||
* @brief GpuBuffer allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from
|
||||
* data
|
||||
*
|
||||
* @note This assumes the data is on the CPU.
|
||||
@ -102,12 +102,12 @@ public:
|
||||
* @param numberOfElements number of T elements to allocate
|
||||
* @param dataOnHost data on host/CPU
|
||||
*/
|
||||
CuBuffer(const T* dataOnHost, const size_t numberOfElements);
|
||||
GpuBuffer(const T* dataOnHost, const size_t numberOfElements);
|
||||
|
||||
/**
|
||||
* @brief ~CuBuffer calls cudaFree
|
||||
* @brief ~GpuBuffer calls cudaFree
|
||||
*/
|
||||
virtual ~CuBuffer();
|
||||
virtual ~GpuBuffer();
|
||||
|
||||
/**
|
||||
* @return the raw pointer to the GPU data
|
||||
@ -120,7 +120,7 @@ public:
|
||||
const T* data() const;
|
||||
|
||||
/**
|
||||
* @return fetch the first element in a CuBuffer
|
||||
* @return fetch the first element in a GpuBuffer
|
||||
*/
|
||||
__host__ __device__ T& front()
|
||||
{
|
||||
@ -131,7 +131,7 @@ public:
|
||||
}
|
||||
|
||||
/**
|
||||
* @return fetch the last element in a CuBuffer
|
||||
* @return fetch the last element in a GpuBuffer
|
||||
*/
|
||||
__host__ __device__ T& back()
|
||||
{
|
||||
@ -142,7 +142,7 @@ public:
|
||||
}
|
||||
|
||||
/**
|
||||
* @return fetch the first element in a CuBuffer
|
||||
* @return fetch the first element in a GpuBuffer
|
||||
*/
|
||||
__host__ __device__ T front() const
|
||||
{
|
||||
@ -153,7 +153,7 @@ public:
|
||||
}
|
||||
|
||||
/**
|
||||
* @return fetch the last element in a CuBuffer
|
||||
* @return fetch the last element in a GpuBuffer
|
||||
*/
|
||||
__host__ __device__ T back() const
|
||||
{
|
||||
@ -176,7 +176,7 @@ public:
|
||||
// TODO: [perf] vector.size() can be replaced by bvector.N() * BlockDimension
|
||||
if (m_numberOfElements != bvector.size()) {
|
||||
OPM_THROW(std::runtime_error,
|
||||
fmt::format("Given incompatible vector size. CuBuffer has size {}, \n"
|
||||
fmt::format("Given incompatible vector size. GpuBuffer has size {}, \n"
|
||||
"however, BlockVector has N() = {}, and size = {}.",
|
||||
m_numberOfElements,
|
||||
bvector.N(),
|
||||
@ -199,7 +199,7 @@ public:
|
||||
// TODO: [perf] vector.size() can be replaced by bvector.N() * BlockDimension
|
||||
if (m_numberOfElements != bvector.size()) {
|
||||
OPM_THROW(std::runtime_error,
|
||||
fmt::format("Given incompatible vector size. CuBuffer has size {},\n however, the BlockVector "
|
||||
fmt::format("Given incompatible vector size. GpuBuffer has size {},\n however, the BlockVector "
|
||||
"has has N() = {}, and size() = {}.",
|
||||
m_numberOfElements,
|
||||
bvector.N(),
|
||||
@ -267,14 +267,14 @@ private:
|
||||
T* m_dataOnDevice = nullptr;
|
||||
size_t m_numberOfElements;
|
||||
|
||||
void assertSameSize(const CuBuffer<T>& other) const;
|
||||
void assertSameSize(const GpuBuffer<T>& other) const;
|
||||
void assertSameSize(size_t size) const;
|
||||
|
||||
void assertHasElements() const;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
CuView<const T> make_view(const CuBuffer<T>&);
|
||||
GpuView<const T> make_view(const GpuBuffer<T>&);
|
||||
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
#endif
|
@ -25,28 +25,28 @@
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/common/TimingMacros.hpp>
|
||||
#include <opm/simulators/linalg/GraphColoring.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/autotuner.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuDILU.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/coloringAndReorderingUtils.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/DILUKernels.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/autotuner.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuDILU.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/coloringAndReorderingUtils.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpusparse_matrix_operations.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/preconditionerKernels/DILUKernels.hpp>
|
||||
#include <opm/simulators/linalg/matrixblock.hh>
|
||||
#include <tuple>
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
CuDILU<M, X, Y, l>::CuDILU(const M& A, bool splitMatrix, bool tuneKernels)
|
||||
GpuDILU<M, X, Y, l>::GpuDILU(const M& A, bool splitMatrix, bool tuneKernels)
|
||||
: m_cpuMatrix(A)
|
||||
, m_levelSets(Opm::getMatrixRowColoring(m_cpuMatrix, Opm::ColoringType::LOWER))
|
||||
, m_reorderedToNatural(detail::createReorderedToNatural(m_levelSets))
|
||||
, m_naturalToReordered(detail::createNaturalToReordered(m_levelSets))
|
||||
, m_gpuMatrix(CuSparseMatrix<field_type>::fromMatrix(m_cpuMatrix, true))
|
||||
, m_gpuMatrix(GpuSparseMatrix<field_type>::fromMatrix(m_cpuMatrix, true))
|
||||
, m_gpuNaturalToReorder(m_naturalToReordered)
|
||||
, m_gpuReorderToNatural(m_reorderedToNatural)
|
||||
, m_gpuDInv(m_gpuMatrix.N() * m_gpuMatrix.blockSize() * m_gpuMatrix.blockSize())
|
||||
@ -71,13 +71,13 @@ CuDILU<M, X, Y, l>::CuDILU(const M& A, bool splitMatrix, bool tuneKernels)
|
||||
m_gpuMatrix.nonzeroes(),
|
||||
A.nonzeroes()));
|
||||
if (m_splitMatrix) {
|
||||
m_gpuMatrixReorderedDiag = std::make_unique<CuVector<field_type>>(blocksize_ * blocksize_ * m_cpuMatrix.N());
|
||||
m_gpuMatrixReorderedDiag = std::make_unique<GpuVector<field_type>>(blocksize_ * blocksize_ * m_cpuMatrix.N());
|
||||
std::tie(m_gpuMatrixReorderedLower, m_gpuMatrixReorderedUpper)
|
||||
= detail::extractLowerAndUpperMatrices<M, field_type, CuSparseMatrix<field_type>>(m_cpuMatrix,
|
||||
= detail::extractLowerAndUpperMatrices<M, field_type, GpuSparseMatrix<field_type>>(m_cpuMatrix,
|
||||
m_reorderedToNatural);
|
||||
}
|
||||
else {
|
||||
m_gpuMatrixReordered = detail::createReorderedMatrix<M, field_type, CuSparseMatrix<field_type>>(
|
||||
m_gpuMatrixReordered = detail::createReorderedMatrix<M, field_type, GpuSparseMatrix<field_type>>(
|
||||
m_cpuMatrix, m_reorderedToNatural);
|
||||
}
|
||||
computeDiagAndMoveReorderedData(m_moveThreadBlockSize, m_DILUFactorizationThreadBlockSize);
|
||||
@ -89,13 +89,13 @@ CuDILU<M, X, Y, l>::CuDILU(const M& A, bool splitMatrix, bool tuneKernels)
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
void
|
||||
CuDILU<M, X, Y, l>::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b)
|
||||
GpuDILU<M, X, Y, l>::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b)
|
||||
{
|
||||
}
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
void
|
||||
CuDILU<M, X, Y, l>::apply(X& v, const Y& d)
|
||||
GpuDILU<M, X, Y, l>::apply(X& v, const Y& d)
|
||||
{
|
||||
OPM_TIMEBLOCK(prec_apply);
|
||||
{
|
||||
@ -105,7 +105,7 @@ CuDILU<M, X, Y, l>::apply(X& v, const Y& d)
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
void
|
||||
CuDILU<M, X, Y, l>::apply(X& v, const Y& d, int lowerSolveThreadBlockSize, int upperSolveThreadBlockSize)
|
||||
GpuDILU<M, X, Y, l>::apply(X& v, const Y& d, int lowerSolveThreadBlockSize, int upperSolveThreadBlockSize)
|
||||
{
|
||||
int levelStartIdx = 0;
|
||||
for (int level = 0; level < m_levelSets.size(); ++level) {
|
||||
@ -172,20 +172,20 @@ CuDILU<M, X, Y, l>::apply(X& v, const Y& d, int lowerSolveThreadBlockSize, int u
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
void
|
||||
CuDILU<M, X, Y, l>::post([[maybe_unused]] X& x)
|
||||
GpuDILU<M, X, Y, l>::post([[maybe_unused]] X& x)
|
||||
{
|
||||
}
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
Dune::SolverCategory::Category
|
||||
CuDILU<M, X, Y, l>::category() const
|
||||
GpuDILU<M, X, Y, l>::category() const
|
||||
{
|
||||
return Dune::SolverCategory::sequential;
|
||||
}
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
void
|
||||
CuDILU<M, X, Y, l>::update()
|
||||
GpuDILU<M, X, Y, l>::update()
|
||||
{
|
||||
OPM_TIMEBLOCK(prec_update);
|
||||
{
|
||||
@ -195,7 +195,7 @@ CuDILU<M, X, Y, l>::update()
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
void
|
||||
CuDILU<M, X, Y, l>::update(int moveThreadBlockSize, int factorizationBlockSize)
|
||||
GpuDILU<M, X, Y, l>::update(int moveThreadBlockSize, int factorizationBlockSize)
|
||||
{
|
||||
m_gpuMatrix.updateNonzeroValues(m_cpuMatrix, true); // send updated matrix to the gpu
|
||||
computeDiagAndMoveReorderedData(moveThreadBlockSize, factorizationBlockSize);
|
||||
@ -203,7 +203,7 @@ CuDILU<M, X, Y, l>::update(int moveThreadBlockSize, int factorizationBlockSize)
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
void
|
||||
CuDILU<M, X, Y, l>::computeDiagAndMoveReorderedData(int moveThreadBlockSize, int factorizationBlockSize)
|
||||
GpuDILU<M, X, Y, l>::computeDiagAndMoveReorderedData(int moveThreadBlockSize, int factorizationBlockSize)
|
||||
{
|
||||
if (m_splitMatrix) {
|
||||
detail::copyMatDataToReorderedSplit<field_type, blocksize_>(
|
||||
@ -264,7 +264,7 @@ CuDILU<M, X, Y, l>::computeDiagAndMoveReorderedData(int moveThreadBlockSize, int
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
void
|
||||
CuDILU<M, X, Y, l>::tuneThreadBlockSizes()
|
||||
GpuDILU<M, X, Y, l>::tuneThreadBlockSizes()
|
||||
{
|
||||
// tune the thread-block size of the update function
|
||||
auto tuneMoveThreadBlockSizeInUpdate = [this](int moveThreadBlockSize){
|
||||
@ -278,8 +278,8 @@ CuDILU<M, X, Y, l>::tuneThreadBlockSizes()
|
||||
m_DILUFactorizationThreadBlockSize = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing DILU factorization");
|
||||
|
||||
// tune the thread-block size of the apply
|
||||
CuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
|
||||
CuVector<field_type> tmpD(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
|
||||
GpuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
|
||||
GpuVector<field_type> tmpD(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
|
||||
tmpD = 1;
|
||||
|
||||
auto tuneLowerSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int lowerSolveThreadBlockSize){
|
||||
@ -293,14 +293,14 @@ CuDILU<M, X, Y, l>::tuneThreadBlockSizes()
|
||||
m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set");
|
||||
}
|
||||
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
#define INSTANTIATE_CUDILU_DUNE(realtype, blockdim) \
|
||||
template class ::Opm::cuistl::CuDILU<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
|
||||
::Opm::cuistl::CuVector<realtype>, \
|
||||
::Opm::cuistl::CuVector<realtype>>; \
|
||||
template class ::Opm::cuistl::CuDILU<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
|
||||
::Opm::cuistl::CuVector<realtype>, \
|
||||
::Opm::cuistl::CuVector<realtype>>
|
||||
template class ::Opm::gpuistl::GpuDILU<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
|
||||
::Opm::gpuistl::GpuVector<realtype>, \
|
||||
::Opm::gpuistl::GpuVector<realtype>>; \
|
||||
template class ::Opm::gpuistl::GpuDILU<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
|
||||
::Opm::gpuistl::GpuVector<realtype>, \
|
||||
::Opm::gpuistl::GpuVector<realtype>>
|
||||
|
||||
INSTANTIATE_CUDILU_DUNE(double, 1);
|
||||
INSTANTIATE_CUDILU_DUNE(double, 2);
|
@ -16,18 +16,18 @@
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef OPM_CUDILU_HPP
|
||||
#define OPM_CUDILU_HPP
|
||||
#ifndef OPM_GPUDILU_HPP
|
||||
#define OPM_GPUDILU_HPP
|
||||
|
||||
#include <memory>
|
||||
#include <opm/grid/utility/SparseTable.hpp>
|
||||
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
|
||||
#include <vector>
|
||||
|
||||
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
//! \brief DILU preconditioner on the GPU.
|
||||
//!
|
||||
@ -37,10 +37,10 @@ namespace Opm::cuistl
|
||||
//! \tparam l Ignored. Just there to have the same number of template arguments
|
||||
//! as other preconditioners.
|
||||
//!
|
||||
//! \note We assume X and Y are both CuVector<real_type>, but we leave them as template
|
||||
//! \note We assume X and Y are both GpuVector<real_type>, but we leave them as template
|
||||
//! arguments in case of future additions.
|
||||
template <class M, class X, class Y, int l = 1>
|
||||
class CuDILU : public Dune::PreconditionerWithUpdate<X, Y>
|
||||
class GpuDILU : public Dune::PreconditionerWithUpdate<X, Y>
|
||||
{
|
||||
public:
|
||||
//! \brief The matrix type the preconditioner is for.
|
||||
@ -52,7 +52,7 @@ public:
|
||||
//! \brief The field type of the preconditioner.
|
||||
using field_type = typename X::field_type;
|
||||
//! \brief The GPU matrix type
|
||||
using CuMat = CuSparseMatrix<field_type>;
|
||||
using CuMat = GpuSparseMatrix<field_type>;
|
||||
|
||||
//! \brief Constructor.
|
||||
//!
|
||||
@ -60,7 +60,7 @@ public:
|
||||
//! \param A The matrix to operate on.
|
||||
//! \param w The relaxation factor.
|
||||
//!
|
||||
explicit CuDILU(const M& A, bool splitMatrix, bool tuneKernels);
|
||||
explicit GpuDILU(const M& A, bool splitMatrix, bool tuneKernels);
|
||||
|
||||
//! \brief Prepare the preconditioner.
|
||||
//! \note Does nothing at the time being.
|
||||
@ -126,13 +126,13 @@ private:
|
||||
std::unique_ptr<CuMat> m_gpuMatrixReorderedLower;
|
||||
std::unique_ptr<CuMat> m_gpuMatrixReorderedUpper;
|
||||
//! \brief If matrix splitting is enabled, we also store the diagonal separately
|
||||
std::unique_ptr<CuVector<field_type>> m_gpuMatrixReorderedDiag;
|
||||
std::unique_ptr<GpuVector<field_type>> m_gpuMatrixReorderedDiag;
|
||||
//! row conversion from natural to reordered matrix indices stored on the GPU
|
||||
CuVector<int> m_gpuNaturalToReorder;
|
||||
GpuVector<int> m_gpuNaturalToReorder;
|
||||
//! row conversion from reordered to natural matrix indices stored on the GPU
|
||||
CuVector<int> m_gpuReorderToNatural;
|
||||
GpuVector<int> m_gpuReorderToNatural;
|
||||
//! \brief Stores the inverted diagonal that we use in DILU
|
||||
CuVector<field_type> m_gpuDInv;
|
||||
GpuVector<field_type> m_gpuDInv;
|
||||
//! \brief Bool storing whether or not we should store matrices in a split format
|
||||
bool m_splitMatrix;
|
||||
//! \brief Bool storing whether or not we will tune the threadblock sizes. Only used for AMD cards
|
||||
@ -144,6 +144,6 @@ private:
|
||||
int m_moveThreadBlockSize = -1;
|
||||
int m_DILUFactorizationThreadBlockSize = -1;
|
||||
};
|
||||
} // end namespace Opm::cuistl
|
||||
} // end namespace Opm::gpuistl
|
||||
|
||||
#endif
|
@ -20,20 +20,20 @@
|
||||
#include <dune/istl/bcrsmatrix.hh>
|
||||
#include <fmt/core.h>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuJac.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/JacKernels.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuJac.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/preconditionerKernels/JacKernels.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/vector_operations.hpp>
|
||||
#include <opm/simulators/linalg/matrixblock.hh>
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
CuJac<M, X, Y, l>::CuJac(const M& A, field_type w)
|
||||
GpuJac<M, X, Y, l>::GpuJac(const M& A, field_type w)
|
||||
: m_cpuMatrix(A)
|
||||
, m_relaxationFactor(w)
|
||||
, m_gpuMatrix(CuSparseMatrix<field_type>::fromMatrix(A))
|
||||
, m_gpuMatrix(GpuSparseMatrix<field_type>::fromMatrix(A))
|
||||
, m_diagInvFlattened(m_gpuMatrix.N() * m_gpuMatrix.blockSize() * m_gpuMatrix.blockSize())
|
||||
{
|
||||
// Some sanity check
|
||||
@ -58,13 +58,13 @@ CuJac<M, X, Y, l>::CuJac(const M& A, field_type w)
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
void
|
||||
CuJac<M, X, Y, l>::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b)
|
||||
GpuJac<M, X, Y, l>::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b)
|
||||
{
|
||||
}
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
void
|
||||
CuJac<M, X, Y, l>::apply(X& v, const Y& d)
|
||||
GpuJac<M, X, Y, l>::apply(X& v, const Y& d)
|
||||
{
|
||||
// Jacobi preconditioner: x_{n+1} = x_n + w * (D^-1 * (b - Ax_n) )
|
||||
// Working with defect d and update v it we only need to set v = w*(D^-1)*d
|
||||
@ -77,20 +77,20 @@ CuJac<M, X, Y, l>::apply(X& v, const Y& d)
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
void
|
||||
CuJac<M, X, Y, l>::post([[maybe_unused]] X& x)
|
||||
GpuJac<M, X, Y, l>::post([[maybe_unused]] X& x)
|
||||
{
|
||||
}
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
Dune::SolverCategory::Category
|
||||
CuJac<M, X, Y, l>::category() const
|
||||
GpuJac<M, X, Y, l>::category() const
|
||||
{
|
||||
return Dune::SolverCategory::sequential;
|
||||
}
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
void
|
||||
CuJac<M, X, Y, l>::update()
|
||||
GpuJac<M, X, Y, l>::update()
|
||||
{
|
||||
m_gpuMatrix.updateNonzeroValues(m_cpuMatrix);
|
||||
invertDiagonalAndFlatten();
|
||||
@ -98,7 +98,7 @@ CuJac<M, X, Y, l>::update()
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
void
|
||||
CuJac<M, X, Y, l>::invertDiagonalAndFlatten()
|
||||
GpuJac<M, X, Y, l>::invertDiagonalAndFlatten()
|
||||
{
|
||||
detail::JAC::invertDiagonalAndFlatten<field_type, matrix_type::block_type::cols>(
|
||||
m_gpuMatrix.getNonZeroValues().data(),
|
||||
@ -108,14 +108,14 @@ CuJac<M, X, Y, l>::invertDiagonalAndFlatten()
|
||||
m_diagInvFlattened.data());
|
||||
}
|
||||
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
#define INSTANTIATE_CUJAC_DUNE(realtype, blockdim) \
|
||||
template class ::Opm::cuistl::CuJac<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
|
||||
::Opm::cuistl::CuVector<realtype>, \
|
||||
::Opm::cuistl::CuVector<realtype>>; \
|
||||
template class ::Opm::cuistl::CuJac<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
|
||||
::Opm::cuistl::CuVector<realtype>, \
|
||||
::Opm::cuistl::CuVector<realtype>>
|
||||
template class ::Opm::gpuistl::GpuJac<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
|
||||
::Opm::gpuistl::GpuVector<realtype>, \
|
||||
::Opm::gpuistl::GpuVector<realtype>>; \
|
||||
template class ::Opm::gpuistl::GpuJac<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
|
||||
::Opm::gpuistl::GpuVector<realtype>, \
|
||||
::Opm::gpuistl::GpuVector<realtype>>
|
||||
|
||||
INSTANTIATE_CUJAC_DUNE(double, 1);
|
||||
INSTANTIATE_CUJAC_DUNE(double, 2);
|
@ -16,19 +16,19 @@
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef OPM_CUJAC_HPP
|
||||
#define OPM_CUJAC_HPP
|
||||
#ifndef OPM_GPUJAC_HPP
|
||||
#define OPM_GPUJAC_HPP
|
||||
|
||||
#include <dune/istl/preconditioner.hh>
|
||||
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/CuMatrixDescription.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/CuSparseHandle.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/CuSparseResource.hpp>
|
||||
|
||||
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
//! \brief Jacobi preconditioner on the GPU.
|
||||
//!
|
||||
@ -40,10 +40,10 @@ namespace Opm::cuistl
|
||||
//! \tparam l Ignored. Just there to have the same number of template arguments
|
||||
//! as other preconditioners.
|
||||
//!
|
||||
//! \note We assume X and Y are both CuVector<real_type>, but we leave them as template
|
||||
//! \note We assume X and Y are both GpuVector<real_type>, but we leave them as template
|
||||
//! arguments in case of future additions.
|
||||
template <class M, class X, class Y, int l = 1>
|
||||
class CuJac : public Dune::PreconditionerWithUpdate<X, Y>
|
||||
class GpuJac : public Dune::PreconditionerWithUpdate<X, Y>
|
||||
{
|
||||
public:
|
||||
//! \brief The matrix type the preconditioner is for.
|
||||
@ -61,7 +61,7 @@ public:
|
||||
//! \param A The matrix to operate on.
|
||||
//! \param w The relaxation factor.
|
||||
//!
|
||||
CuJac(const M& A, field_type w);
|
||||
GpuJac(const M& A, field_type w);
|
||||
|
||||
//! \brief Prepare the preconditioner.
|
||||
//! \note Does nothing at the time being.
|
||||
@ -104,12 +104,12 @@ private:
|
||||
//! \brief The relaxation factor to use.
|
||||
const field_type m_relaxationFactor;
|
||||
//! \brief The A matrix stored on the gpu
|
||||
CuSparseMatrix<field_type> m_gpuMatrix;
|
||||
GpuSparseMatrix<field_type> m_gpuMatrix;
|
||||
//! \brief the diagonal of cuMatrix inverted, and then flattened to fit in a vector
|
||||
CuVector<field_type> m_diagInvFlattened;
|
||||
GpuVector<field_type> m_diagInvFlattened;
|
||||
|
||||
void invertDiagonalAndFlatten();
|
||||
};
|
||||
} // end namespace Opm::cuistl
|
||||
} // end namespace Opm::gpuistl
|
||||
|
||||
#endif
|
@ -16,15 +16,15 @@
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef OPM_CUISTL_CUOWNEROVERLAPCOPY_HPP
|
||||
#define OPM_CUISTL_CUOWNEROVERLAPCOPY_HPP
|
||||
#ifndef OPM_GPUISTL_GPUOWNEROVERLAPCOPY_HPP
|
||||
#define OPM_GPUISTL_GPUOWNEROVERLAPCOPY_HPP
|
||||
#include <dune/istl/owneroverlapcopy.hh>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
#include <vector>
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
/**
|
||||
* @brief GPUSender is a wrapper class for classes which will implement copOwnerToAll
|
||||
@ -36,7 +36,7 @@ namespace Opm::cuistl
|
||||
template<class field_type, class OwnerOverlapCopyCommunicationType>
|
||||
class GPUSender {
|
||||
public:
|
||||
using X = CuVector<field_type>;
|
||||
using X = GpuVector<field_type>;
|
||||
|
||||
GPUSender(const OwnerOverlapCopyCommunicationType& cpuOwnerOverlapCopy) : m_cpuOwnerOverlapCopy(cpuOwnerOverlapCopy){}
|
||||
|
||||
@ -97,8 +97,8 @@ protected:
|
||||
// premature optimization, in the sense that we could just initialize these indices
|
||||
// always, but they are not always used.
|
||||
mutable std::once_flag m_initializedIndices;
|
||||
mutable std::unique_ptr<CuVector<int>> m_indicesOwner;
|
||||
mutable std::unique_ptr<CuVector<int>> m_indicesCopy;
|
||||
mutable std::unique_ptr<GpuVector<int>> m_indicesOwner;
|
||||
mutable std::unique_ptr<GpuVector<int>> m_indicesCopy;
|
||||
const OwnerOverlapCopyCommunicationType& m_cpuOwnerOverlapCopy;
|
||||
};
|
||||
|
||||
@ -113,7 +113,7 @@ template <class field_type, int block_size, class OwnerOverlapCopyCommunicationT
|
||||
class GPUObliviousMPISender : public GPUSender<field_type, OwnerOverlapCopyCommunicationType>
|
||||
{
|
||||
public:
|
||||
using X = CuVector<field_type>;
|
||||
using X = GpuVector<field_type>;
|
||||
|
||||
GPUObliviousMPISender(const OwnerOverlapCopyCommunicationType& cpuOwnerOverlapCopy)
|
||||
: GPUSender<field_type, OwnerOverlapCopyCommunicationType>(cpuOwnerOverlapCopy)
|
||||
@ -151,8 +151,8 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
this->m_indicesCopy = std::make_unique<CuVector<int>>(indicesCopyOnCPU);
|
||||
this->m_indicesOwner = std::make_unique<CuVector<int>>(indicesOwnerCPU);
|
||||
this->m_indicesCopy = std::make_unique<GpuVector<int>>(indicesCopyOnCPU);
|
||||
this->m_indicesOwner = std::make_unique<GpuVector<int>>(indicesOwnerCPU);
|
||||
}
|
||||
};
|
||||
|
||||
@ -168,7 +168,7 @@ template <class field_type, int block_size, class OwnerOverlapCopyCommunicationT
|
||||
class GPUAwareMPISender : public GPUSender<field_type, OwnerOverlapCopyCommunicationType>
|
||||
{
|
||||
public:
|
||||
using X = CuVector<field_type>;
|
||||
using X = GpuVector<field_type>;
|
||||
|
||||
GPUAwareMPISender(const OwnerOverlapCopyCommunicationType& cpuOwnerOverlapCopy)
|
||||
: GPUSender<field_type, OwnerOverlapCopyCommunicationType>(cpuOwnerOverlapCopy)
|
||||
@ -178,7 +178,7 @@ public:
|
||||
void copyOwnerToAll(const X& source, X& dest) const override
|
||||
{
|
||||
|
||||
OPM_ERROR_IF(&source != &dest, "The provided CuVectors' address did not match"); // In this context, source == dest!!!
|
||||
OPM_ERROR_IF(&source != &dest, "The provided GpuVectors' address did not match"); // In this context, source == dest!!!
|
||||
std::call_once(this->m_initializedIndices, [&]() { initIndexSet(); });
|
||||
|
||||
int rank = this->m_cpuOwnerOverlapCopy.communicator().rank();
|
||||
@ -251,10 +251,10 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
mutable std::unique_ptr<CuVector<int>> m_commpairIndicesCopy;
|
||||
mutable std::unique_ptr<CuVector<int>> m_commpairIndicesOwner;
|
||||
mutable std::unique_ptr<CuVector<field_type>> m_GPUSendBuf;
|
||||
mutable std::unique_ptr<CuVector<field_type>> m_GPURecvBuf;
|
||||
mutable std::unique_ptr<GpuVector<int>> m_commpairIndicesCopy;
|
||||
mutable std::unique_ptr<GpuVector<int>> m_commpairIndicesOwner;
|
||||
mutable std::unique_ptr<GpuVector<field_type>> m_GPUSendBuf;
|
||||
mutable std::unique_ptr<GpuVector<field_type>> m_GPURecvBuf;
|
||||
|
||||
struct MessageInformation
|
||||
{
|
||||
@ -332,11 +332,11 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
m_commpairIndicesCopy = std::make_unique<CuVector<int>>(commpairIndicesCopyOnCPU);
|
||||
m_commpairIndicesOwner = std::make_unique<CuVector<int>>(commpairIndicesOwnerCPU);
|
||||
m_commpairIndicesCopy = std::make_unique<GpuVector<int>>(commpairIndicesCopyOnCPU);
|
||||
m_commpairIndicesOwner = std::make_unique<GpuVector<int>>(commpairIndicesOwnerCPU);
|
||||
|
||||
m_GPUSendBuf = std::make_unique<CuVector<field_type>>(sendBufIdx * block_size);
|
||||
m_GPURecvBuf = std::make_unique<CuVector<field_type>>(recvBufIdx * block_size);
|
||||
m_GPUSendBuf = std::make_unique<GpuVector<field_type>>(sendBufIdx * block_size);
|
||||
m_GPURecvBuf = std::make_unique<GpuVector<field_type>>(recvBufIdx * block_size);
|
||||
}
|
||||
|
||||
void initIndexSet() const override
|
||||
@ -360,8 +360,8 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
this->m_indicesCopy = std::make_unique<CuVector<int>>(indicesCopyOnCPU);
|
||||
this->m_indicesOwner = std::make_unique<CuVector<int>>(indicesOwnerCPU);
|
||||
this->m_indicesCopy = std::make_unique<GpuVector<int>>(indicesCopyOnCPU);
|
||||
this->m_indicesOwner = std::make_unique<GpuVector<int>>(indicesOwnerCPU);
|
||||
|
||||
buildCommPairIdxs();
|
||||
}
|
||||
@ -371,21 +371,21 @@ private:
|
||||
* @brief CUDA compatiable variant of Dune::OwnerOverlapCopyCommunication
|
||||
*
|
||||
* This class can essentially be seen as an adapter around Dune::OwnerOverlapCopyCommunication, and should work as
|
||||
* a Dune::OwnerOverlapCopyCommunication on CuVectors
|
||||
* a Dune::OwnerOverlapCopyCommunication on GpuVectors
|
||||
*
|
||||
* @note This currently only has the functionality to parallelize the linear solve.
|
||||
*
|
||||
* @tparam field_type should be a field_type supported by CuVector (double, float)
|
||||
* @tparam field_type should be a field_type supported by GpuVector (double, float)
|
||||
* @tparam block_size the block size used (this is relevant for say figuring out the correct indices)
|
||||
* @tparam OwnerOverlapCopyCommunicationType should mimic Dune::OwnerOverlapCopyCommunication.
|
||||
*/
|
||||
template <class field_type, int block_size, class OwnerOverlapCopyCommunicationType>
|
||||
class CuOwnerOverlapCopy
|
||||
class GpuOwnerOverlapCopy
|
||||
{
|
||||
public:
|
||||
using X = CuVector<field_type>;
|
||||
using X = GpuVector<field_type>;
|
||||
|
||||
CuOwnerOverlapCopy(std::shared_ptr<GPUSender<field_type, OwnerOverlapCopyCommunicationType>> sender) : m_sender(sender){}
|
||||
GpuOwnerOverlapCopy(std::shared_ptr<GPUSender<field_type, OwnerOverlapCopyCommunicationType>> sender) : m_sender(sender){}
|
||||
|
||||
void copyOwnerToAll(const X& source, X& dest) const {
|
||||
m_sender->copyOwnerToAll(source, dest);
|
||||
@ -409,5 +409,5 @@ public:
|
||||
private:
|
||||
std::shared_ptr<GPUSender<field_type, OwnerOverlapCopyCommunicationType>> m_sender;
|
||||
};
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
#endif
|
@ -25,26 +25,26 @@
|
||||
#include <dune/istl/bvector.hh>
|
||||
#include <fmt/core.h>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuSeqILU0.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_constants.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_wrapper.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuSeqILU0.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cusparse_constants.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cusparse_wrapper.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/fix_zero_diagonal.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/safe_conversion.hpp>
|
||||
#include <opm/simulators/linalg/matrixblock.hh>
|
||||
|
||||
// This file is based on the guide at https://docs.nvidia.com/cuda/cusparse/index.html#csrilu02_solve ,
|
||||
// it highly recommended to read that before proceeding.
|
||||
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
CuSeqILU0<M, X, Y, l>::CuSeqILU0(const M& A, field_type w)
|
||||
GpuSeqILU0<M, X, Y, l>::GpuSeqILU0(const M& A, field_type w)
|
||||
: m_underlyingMatrix(A)
|
||||
, m_w(w)
|
||||
, m_LU(CuSparseMatrix<field_type>::fromMatrix(detail::makeMatrixWithNonzeroDiagonal(A)))
|
||||
, m_LU(GpuSparseMatrix<field_type>::fromMatrix(detail::makeMatrixWithNonzeroDiagonal(A)))
|
||||
, m_temporaryStorage(m_LU.N() * m_LU.blockSize())
|
||||
, m_descriptionL(detail::createLowerDiagonalDescription())
|
||||
, m_descriptionU(detail::createUpperDiagonalDescription())
|
||||
@ -70,13 +70,13 @@ CuSeqILU0<M, X, Y, l>::CuSeqILU0(const M& A, field_type w)
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
void
|
||||
CuSeqILU0<M, X, Y, l>::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b)
|
||||
GpuSeqILU0<M, X, Y, l>::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b)
|
||||
{
|
||||
}
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
void
|
||||
CuSeqILU0<M, X, Y, l>::apply(X& v, const Y& d)
|
||||
GpuSeqILU0<M, X, Y, l>::apply(X& v, const Y& d)
|
||||
{
|
||||
|
||||
// We need to pass the solve routine a scalar to multiply.
|
||||
@ -133,20 +133,20 @@ CuSeqILU0<M, X, Y, l>::apply(X& v, const Y& d)
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
void
|
||||
CuSeqILU0<M, X, Y, l>::post([[maybe_unused]] X& x)
|
||||
GpuSeqILU0<M, X, Y, l>::post([[maybe_unused]] X& x)
|
||||
{
|
||||
}
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
Dune::SolverCategory::Category
|
||||
CuSeqILU0<M, X, Y, l>::category() const
|
||||
GpuSeqILU0<M, X, Y, l>::category() const
|
||||
{
|
||||
return Dune::SolverCategory::sequential;
|
||||
}
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
void
|
||||
CuSeqILU0<M, X, Y, l>::update()
|
||||
GpuSeqILU0<M, X, Y, l>::update()
|
||||
{
|
||||
m_LU.updateNonzeroValues(detail::makeMatrixWithNonzeroDiagonal(m_underlyingMatrix));
|
||||
createILU();
|
||||
@ -154,7 +154,7 @@ CuSeqILU0<M, X, Y, l>::update()
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
void
|
||||
CuSeqILU0<M, X, Y, l>::analyzeMatrix()
|
||||
GpuSeqILU0<M, X, Y, l>::analyzeMatrix()
|
||||
{
|
||||
|
||||
if (!m_buffer) {
|
||||
@ -226,7 +226,7 @@ CuSeqILU0<M, X, Y, l>::analyzeMatrix()
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
size_t
|
||||
CuSeqILU0<M, X, Y, l>::findBufferSize()
|
||||
GpuSeqILU0<M, X, Y, l>::findBufferSize()
|
||||
{
|
||||
// We have three calls that need buffers:
|
||||
// 1) LU decomposition
|
||||
@ -290,7 +290,7 @@ CuSeqILU0<M, X, Y, l>::findBufferSize()
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
void
|
||||
CuSeqILU0<M, X, Y, l>::createILU()
|
||||
GpuSeqILU0<M, X, Y, l>::createILU()
|
||||
{
|
||||
OPM_ERROR_IF(!m_buffer, "Buffer not initialized. Call findBufferSize() then initialize with the appropiate size.");
|
||||
OPM_ERROR_IF(!m_analysisDone, "Analyzis of matrix not done. Call analyzeMatrix() first.");
|
||||
@ -328,35 +328,35 @@ CuSeqILU0<M, X, Y, l>::createILU()
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
void
|
||||
CuSeqILU0<M, X, Y, l>::updateILUConfiguration()
|
||||
GpuSeqILU0<M, X, Y, l>::updateILUConfiguration()
|
||||
{
|
||||
auto bufferSize = findBufferSize();
|
||||
if (!m_buffer || m_buffer->dim() < bufferSize) {
|
||||
m_buffer.reset(new CuVector<field_type>((bufferSize + sizeof(field_type) - 1) / sizeof(field_type)));
|
||||
m_buffer.reset(new GpuVector<field_type>((bufferSize + sizeof(field_type) - 1) / sizeof(field_type)));
|
||||
}
|
||||
analyzeMatrix();
|
||||
createILU();
|
||||
}
|
||||
} // namespace Opm::cuistl
|
||||
#define INSTANTIATE_CUSEQILU0_DUNE(realtype, blockdim) \
|
||||
template class ::Opm::cuistl::CuSeqILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
|
||||
::Opm::cuistl::CuVector<realtype>, \
|
||||
::Opm::cuistl::CuVector<realtype>>; \
|
||||
template class ::Opm::cuistl::CuSeqILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
|
||||
::Opm::cuistl::CuVector<realtype>, \
|
||||
::Opm::cuistl::CuVector<realtype>>
|
||||
} // namespace Opm::gpuistl
|
||||
#define INSTANTIATE_GPUSEQILU0_DUNE(realtype, blockdim) \
|
||||
template class ::Opm::gpuistl::GpuSeqILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
|
||||
::Opm::gpuistl::GpuVector<realtype>, \
|
||||
::Opm::gpuistl::GpuVector<realtype>>; \
|
||||
template class ::Opm::gpuistl::GpuSeqILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
|
||||
::Opm::gpuistl::GpuVector<realtype>, \
|
||||
::Opm::gpuistl::GpuVector<realtype>>
|
||||
|
||||
|
||||
INSTANTIATE_CUSEQILU0_DUNE(double, 1);
|
||||
INSTANTIATE_CUSEQILU0_DUNE(double, 2);
|
||||
INSTANTIATE_CUSEQILU0_DUNE(double, 3);
|
||||
INSTANTIATE_CUSEQILU0_DUNE(double, 4);
|
||||
INSTANTIATE_CUSEQILU0_DUNE(double, 5);
|
||||
INSTANTIATE_CUSEQILU0_DUNE(double, 6);
|
||||
INSTANTIATE_GPUSEQILU0_DUNE(double, 1);
|
||||
INSTANTIATE_GPUSEQILU0_DUNE(double, 2);
|
||||
INSTANTIATE_GPUSEQILU0_DUNE(double, 3);
|
||||
INSTANTIATE_GPUSEQILU0_DUNE(double, 4);
|
||||
INSTANTIATE_GPUSEQILU0_DUNE(double, 5);
|
||||
INSTANTIATE_GPUSEQILU0_DUNE(double, 6);
|
||||
|
||||
INSTANTIATE_CUSEQILU0_DUNE(float, 1);
|
||||
INSTANTIATE_CUSEQILU0_DUNE(float, 2);
|
||||
INSTANTIATE_CUSEQILU0_DUNE(float, 3);
|
||||
INSTANTIATE_CUSEQILU0_DUNE(float, 4);
|
||||
INSTANTIATE_CUSEQILU0_DUNE(float, 5);
|
||||
INSTANTIATE_CUSEQILU0_DUNE(float, 6);
|
||||
INSTANTIATE_GPUSEQILU0_DUNE(float, 1);
|
||||
INSTANTIATE_GPUSEQILU0_DUNE(float, 2);
|
||||
INSTANTIATE_GPUSEQILU0_DUNE(float, 3);
|
||||
INSTANTIATE_GPUSEQILU0_DUNE(float, 4);
|
||||
INSTANTIATE_GPUSEQILU0_DUNE(float, 5);
|
||||
INSTANTIATE_GPUSEQILU0_DUNE(float, 6);
|
@ -16,19 +16,19 @@
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef OPM_CUSEQILU0_HPP
|
||||
#define OPM_CUSEQILU0_HPP
|
||||
#ifndef OPM_GPUSEQILU0_HPP
|
||||
#define OPM_GPUSEQILU0_HPP
|
||||
|
||||
#include <dune/istl/preconditioner.hh>
|
||||
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/CuMatrixDescription.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/CuSparseHandle.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/CuSparseResource.hpp>
|
||||
|
||||
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
//! \brief Sequential ILU0 preconditioner on the GPU through the CuSparse library.
|
||||
//!
|
||||
@ -43,10 +43,10 @@ namespace Opm::cuistl
|
||||
//! \tparam l Ignored. Just there to have the same number of template arguments
|
||||
//! as other preconditioners.
|
||||
//!
|
||||
//! \note We assume X and Y are both CuVector<real_type>, but we leave them as template
|
||||
//! \note We assume X and Y are both GpuVector<real_type>, but we leave them as template
|
||||
//! arguments in case of future additions.
|
||||
template <class M, class X, class Y, int l = 1>
|
||||
class CuSeqILU0 : public Dune::PreconditionerWithUpdate<X, Y>
|
||||
class GpuSeqILU0 : public Dune::PreconditionerWithUpdate<X, Y>
|
||||
{
|
||||
public:
|
||||
//! \brief The matrix type the preconditioner is for.
|
||||
@ -64,7 +64,7 @@ public:
|
||||
//! \param A The matrix to operate on.
|
||||
//! \param w The relaxation factor.
|
||||
//!
|
||||
CuSeqILU0(const M& A, field_type w);
|
||||
GpuSeqILU0(const M& A, field_type w);
|
||||
|
||||
//! \brief Prepare the preconditioner.
|
||||
//! \note Does nothing at the time being.
|
||||
@ -110,18 +110,18 @@ private:
|
||||
//! This is the storage for the LU composition.
|
||||
//! Initially this will have the values of A, but will be
|
||||
//! modified in the constructor to be the proper LU decomposition.
|
||||
CuSparseMatrix<field_type> m_LU;
|
||||
GpuSparseMatrix<field_type> m_LU;
|
||||
|
||||
CuVector<field_type> m_temporaryStorage;
|
||||
GpuVector<field_type> m_temporaryStorage;
|
||||
|
||||
|
||||
detail::CuSparseMatrixDescriptionPtr m_descriptionL;
|
||||
detail::CuSparseMatrixDescriptionPtr m_descriptionU;
|
||||
detail::GpuSparseMatrixDescriptionPtr m_descriptionL;
|
||||
detail::GpuSparseMatrixDescriptionPtr m_descriptionU;
|
||||
detail::CuSparseResource<bsrsv2Info_t> m_infoL;
|
||||
detail::CuSparseResource<bsrsv2Info_t> m_infoU;
|
||||
detail::CuSparseResource<bsrilu02Info_t> m_infoM;
|
||||
|
||||
std::unique_ptr<CuVector<field_type>> m_buffer;
|
||||
std::unique_ptr<GpuVector<field_type>> m_buffer;
|
||||
detail::CuSparseHandle& m_cuSparseHandle;
|
||||
|
||||
bool m_analysisDone = false;
|
||||
@ -133,6 +133,6 @@ private:
|
||||
|
||||
void updateILUConfiguration();
|
||||
};
|
||||
} // end namespace Opm::cuistl
|
||||
} // end namespace Opm::gpuistl
|
||||
|
||||
#endif
|
@ -22,14 +22,14 @@
|
||||
#include <dune/istl/bcrsmatrix.hh>
|
||||
#include <dune/istl/bvector.hh>
|
||||
#include <fmt/core.h>
|
||||
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_constants.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_wrapper.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cusparse_constants.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cusparse_wrapper.hpp>
|
||||
#include <opm/simulators/linalg/matrixblock.hh>
|
||||
#include <type_traits>
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
namespace
|
||||
@ -61,7 +61,7 @@ namespace
|
||||
|
||||
|
||||
template <class T>
|
||||
CuSparseMatrix<T>::CuSparseMatrix(const T* nonZeroElements,
|
||||
GpuSparseMatrix<T>::GpuSparseMatrix(const T* nonZeroElements,
|
||||
const int* rowIndices,
|
||||
const int* columnIndices,
|
||||
size_t numberOfNonzeroBlocks,
|
||||
@ -82,15 +82,15 @@ CuSparseMatrix<T>::CuSparseMatrix(const T* nonZeroElements,
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuSparseMatrix<T>::~CuSparseMatrix()
|
||||
GpuSparseMatrix<T>::~GpuSparseMatrix()
|
||||
{
|
||||
// empty
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
template <typename MatrixType>
|
||||
CuSparseMatrix<T>
|
||||
CuSparseMatrix<T>::fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
|
||||
GpuSparseMatrix<T>
|
||||
GpuSparseMatrix<T>::fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
|
||||
{
|
||||
// TODO: Do we need this intermediate storage? Or this shuffling of data?
|
||||
std::vector<int> columnIndices;
|
||||
@ -129,18 +129,18 @@ CuSparseMatrix<T>::fromMatrix(const MatrixType& matrix, bool copyNonZeroElements
|
||||
// Sanity check
|
||||
// h_rows and h_cols could be changed to 'unsigned int', but cusparse expects 'int'
|
||||
OPM_ERROR_IF(rowIndices[matrix.N()] != detail::to_int(matrix.nonzeroes()),
|
||||
"Error size of rows do not sum to number of nonzeroes in CuSparseMatrix.");
|
||||
OPM_ERROR_IF(rowIndices.size() != numberOfRows + 1, "Row indices do not match for CuSparseMatrix.");
|
||||
OPM_ERROR_IF(columnIndices.size() != numberOfNonzeroBlocks, "Column indices do not match for CuSparseMatrix.");
|
||||
"Error size of rows do not sum to number of nonzeroes in GpuSparseMatrix.");
|
||||
OPM_ERROR_IF(rowIndices.size() != numberOfRows + 1, "Row indices do not match for GpuSparseMatrix.");
|
||||
OPM_ERROR_IF(columnIndices.size() != numberOfNonzeroBlocks, "Column indices do not match for GpuSparseMatrix.");
|
||||
|
||||
|
||||
if (copyNonZeroElementsDirectly) {
|
||||
const T* nonZeroElements = nonZeroElementsTmp;
|
||||
return CuSparseMatrix<T>(
|
||||
return GpuSparseMatrix<T>(
|
||||
nonZeroElements, rowIndices.data(), columnIndices.data(), numberOfNonzeroBlocks, blockSize, numberOfRows);
|
||||
} else {
|
||||
auto nonZeroElementData = extractNonzeroValues<T>(matrix);
|
||||
return CuSparseMatrix<T>(nonZeroElementData.data(),
|
||||
return GpuSparseMatrix<T>(nonZeroElementData.data(),
|
||||
rowIndices.data(),
|
||||
columnIndices.data(),
|
||||
numberOfNonzeroBlocks,
|
||||
@ -152,7 +152,7 @@ CuSparseMatrix<T>::fromMatrix(const MatrixType& matrix, bool copyNonZeroElements
|
||||
template <class T>
|
||||
template <class MatrixType>
|
||||
void
|
||||
CuSparseMatrix<T>::updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
|
||||
GpuSparseMatrix<T>::updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
|
||||
{
|
||||
OPM_ERROR_IF(nonzeroes() != matrix.nonzeroes(), "Matrix does not have the same number of non-zero elements.");
|
||||
OPM_ERROR_IF(matrix[0][0].N() != blockSize(), "Matrix does not have the same blocksize.");
|
||||
@ -170,42 +170,42 @@ CuSparseMatrix<T>::updateNonzeroValues(const MatrixType& matrix, bool copyNonZer
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuSparseMatrix<T>::setUpperTriangular()
|
||||
GpuSparseMatrix<T>::setUpperTriangular()
|
||||
{
|
||||
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatFillMode(m_matrixDescription->get(), CUSPARSE_FILL_MODE_UPPER));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuSparseMatrix<T>::setLowerTriangular()
|
||||
GpuSparseMatrix<T>::setLowerTriangular()
|
||||
{
|
||||
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatFillMode(m_matrixDescription->get(), CUSPARSE_FILL_MODE_LOWER));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuSparseMatrix<T>::setUnitDiagonal()
|
||||
GpuSparseMatrix<T>::setUnitDiagonal()
|
||||
{
|
||||
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(m_matrixDescription->get(), CUSPARSE_DIAG_TYPE_UNIT));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuSparseMatrix<T>::setNonUnitDiagonal()
|
||||
GpuSparseMatrix<T>::setNonUnitDiagonal()
|
||||
{
|
||||
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(m_matrixDescription->get(), CUSPARSE_DIAG_TYPE_NON_UNIT));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuSparseMatrix<T>::mv(const CuVector<T>& x, CuVector<T>& y) const
|
||||
GpuSparseMatrix<T>::mv(const GpuVector<T>& x, GpuVector<T>& y) const
|
||||
{
|
||||
assertSameSize(x);
|
||||
assertSameSize(y);
|
||||
if (blockSize() < 2u) {
|
||||
OPM_THROW(
|
||||
std::invalid_argument,
|
||||
"CuSparseMatrix<T>::usmv and CuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
|
||||
"GpuSparseMatrix<T>::usmv and GpuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
|
||||
}
|
||||
const auto nonzeroValues = getNonZeroValues().data();
|
||||
|
||||
@ -232,14 +232,14 @@ CuSparseMatrix<T>::mv(const CuVector<T>& x, CuVector<T>& y) const
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuSparseMatrix<T>::umv(const CuVector<T>& x, CuVector<T>& y) const
|
||||
GpuSparseMatrix<T>::umv(const GpuVector<T>& x, GpuVector<T>& y) const
|
||||
{
|
||||
assertSameSize(x);
|
||||
assertSameSize(y);
|
||||
if (blockSize() < 2u) {
|
||||
OPM_THROW(
|
||||
std::invalid_argument,
|
||||
"CuSparseMatrix<T>::usmv and CuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
|
||||
"GpuSparseMatrix<T>::usmv and GpuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
|
||||
}
|
||||
|
||||
const auto nonzeroValues = getNonZeroValues().data();
|
||||
@ -267,14 +267,14 @@ CuSparseMatrix<T>::umv(const CuVector<T>& x, CuVector<T>& y) const
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuSparseMatrix<T>::usmv(T alpha, const CuVector<T>& x, CuVector<T>& y) const
|
||||
GpuSparseMatrix<T>::usmv(T alpha, const GpuVector<T>& x, GpuVector<T>& y) const
|
||||
{
|
||||
assertSameSize(x);
|
||||
assertSameSize(y);
|
||||
if (blockSize() < 2) {
|
||||
OPM_THROW(
|
||||
std::invalid_argument,
|
||||
"CuSparseMatrix<T>::usmv and CuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
|
||||
"GpuSparseMatrix<T>::usmv and GpuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
|
||||
}
|
||||
const auto numberOfRows = N();
|
||||
const auto numberOfNonzeroBlocks = nonzeroes();
|
||||
@ -304,7 +304,7 @@ CuSparseMatrix<T>::usmv(T alpha, const CuVector<T>& x, CuVector<T>& y) const
|
||||
template <class T>
|
||||
template <class VectorType>
|
||||
void
|
||||
CuSparseMatrix<T>::assertSameSize(const VectorType& x) const
|
||||
GpuSparseMatrix<T>::assertSameSize(const VectorType& x) const
|
||||
{
|
||||
if (x.dim() != blockSize() * N()) {
|
||||
OPM_THROW(std::invalid_argument,
|
||||
@ -317,17 +317,17 @@ CuSparseMatrix<T>::assertSameSize(const VectorType& x) const
|
||||
|
||||
|
||||
#define INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(realtype, blockdim) \
|
||||
template CuSparseMatrix<realtype> CuSparseMatrix<realtype>::fromMatrix( \
|
||||
template GpuSparseMatrix<realtype> GpuSparseMatrix<realtype>::fromMatrix( \
|
||||
const Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>&, bool); \
|
||||
template CuSparseMatrix<realtype> CuSparseMatrix<realtype>::fromMatrix( \
|
||||
template GpuSparseMatrix<realtype> GpuSparseMatrix<realtype>::fromMatrix( \
|
||||
const Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>&, bool); \
|
||||
template void CuSparseMatrix<realtype>::updateNonzeroValues( \
|
||||
template void GpuSparseMatrix<realtype>::updateNonzeroValues( \
|
||||
const Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>&, bool); \
|
||||
template void CuSparseMatrix<realtype>::updateNonzeroValues( \
|
||||
template void GpuSparseMatrix<realtype>::updateNonzeroValues( \
|
||||
const Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>&, bool)
|
||||
|
||||
template class CuSparseMatrix<float>;
|
||||
template class CuSparseMatrix<double>;
|
||||
template class GpuSparseMatrix<float>;
|
||||
template class GpuSparseMatrix<double>;
|
||||
|
||||
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 1);
|
||||
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 2);
|
||||
@ -343,4 +343,4 @@ INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 4);
|
||||
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 5);
|
||||
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 6);
|
||||
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
@ -16,23 +16,23 @@
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef OPM_CUSPARSEMATRIX_HPP
|
||||
#define OPM_CUSPARSEMATRIX_HPP
|
||||
#ifndef OPM_GPUSPARSEMATRIX_HPP
|
||||
#define OPM_GPUSPARSEMATRIX_HPP
|
||||
#include <cusparse.h>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/CuMatrixDescription.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/CuSparseHandle.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/safe_conversion.hpp>
|
||||
#include <vector>
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
/**
|
||||
* @brief The CuSparseMatrix class simple wrapper class for a CuSparse matrix.
|
||||
* @brief The GpuSparseMatrix class simple wrapper class for a CuSparse matrix.
|
||||
*
|
||||
* @note we currently only support simple raw primitives for T (double and float). Block size is handled through the
|
||||
* block size parameter
|
||||
@ -44,7 +44,7 @@ namespace Opm::cuistl
|
||||
* @note We only support Block Compressed Sparse Row Format (BSR) for now.
|
||||
*/
|
||||
template <typename T>
|
||||
class CuSparseMatrix
|
||||
class GpuSparseMatrix
|
||||
{
|
||||
public:
|
||||
//! Create the sparse matrix specified by the raw data.
|
||||
@ -60,7 +60,7 @@ public:
|
||||
//!
|
||||
//! \note We assume numberOfNonzeroBlocks, blockSize and numberOfRows all are representable as int due to
|
||||
//! restrictions in the current version of cusparse. This might change in future versions.
|
||||
CuSparseMatrix(const T* nonZeroElements,
|
||||
GpuSparseMatrix(const T* nonZeroElements,
|
||||
const int* rowIndices,
|
||||
const int* columnIndices,
|
||||
size_t numberOfNonzeroBlocks,
|
||||
@ -70,14 +70,14 @@ public:
|
||||
/**
|
||||
* We don't want to be able to copy this for now (too much hassle in copying the cusparse resources)
|
||||
*/
|
||||
CuSparseMatrix(const CuSparseMatrix&) = delete;
|
||||
GpuSparseMatrix(const GpuSparseMatrix&) = delete;
|
||||
|
||||
/**
|
||||
* We don't want to be able to copy this for now (too much hassle in copying the cusparse resources)
|
||||
*/
|
||||
CuSparseMatrix& operator=(const CuSparseMatrix&) = delete;
|
||||
GpuSparseMatrix& operator=(const GpuSparseMatrix&) = delete;
|
||||
|
||||
virtual ~CuSparseMatrix();
|
||||
virtual ~GpuSparseMatrix();
|
||||
|
||||
/**
|
||||
* @brief fromMatrix creates a new matrix with the same block size and values as the given matrix
|
||||
@ -89,7 +89,7 @@ public:
|
||||
* @tparam MatrixType is assumed to be a Dune::BCRSMatrix compatible matrix.
|
||||
*/
|
||||
template <class MatrixType>
|
||||
static CuSparseMatrix<T> fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);
|
||||
static GpuSparseMatrix<T> fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);
|
||||
|
||||
/**
|
||||
* @brief setUpperTriangular sets the CuSparse flag that this is an upper diagonal (with unit diagonal) matrix.
|
||||
@ -144,7 +144,7 @@ public:
|
||||
*
|
||||
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
|
||||
*/
|
||||
CuVector<T>& getNonZeroValues()
|
||||
GpuVector<T>& getNonZeroValues()
|
||||
{
|
||||
return m_nonZeroElements;
|
||||
}
|
||||
@ -154,7 +154,7 @@ public:
|
||||
*
|
||||
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
|
||||
*/
|
||||
const CuVector<T>& getNonZeroValues() const
|
||||
const GpuVector<T>& getNonZeroValues() const
|
||||
{
|
||||
return m_nonZeroElements;
|
||||
}
|
||||
@ -164,7 +164,7 @@ public:
|
||||
*
|
||||
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
|
||||
*/
|
||||
CuVector<int>& getRowIndices()
|
||||
GpuVector<int>& getRowIndices()
|
||||
{
|
||||
return m_rowIndices;
|
||||
}
|
||||
@ -174,7 +174,7 @@ public:
|
||||
*
|
||||
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
|
||||
*/
|
||||
const CuVector<int>& getRowIndices() const
|
||||
const GpuVector<int>& getRowIndices() const
|
||||
{
|
||||
return m_rowIndices;
|
||||
}
|
||||
@ -184,7 +184,7 @@ public:
|
||||
*
|
||||
* @return Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
|
||||
*/
|
||||
CuVector<int>& getColumnIndices()
|
||||
GpuVector<int>& getColumnIndices()
|
||||
{
|
||||
return m_columnIndices;
|
||||
}
|
||||
@ -194,7 +194,7 @@ public:
|
||||
*
|
||||
* @return Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
|
||||
*/
|
||||
const CuVector<int>& getColumnIndices() const
|
||||
const GpuVector<int>& getColumnIndices() const
|
||||
{
|
||||
return m_columnIndices;
|
||||
}
|
||||
@ -233,7 +233,7 @@ public:
|
||||
*
|
||||
* This description is needed for most calls to the CuSparse library
|
||||
*/
|
||||
detail::CuSparseMatrixDescription& getDescription()
|
||||
detail::GpuSparseMatrixDescription& getDescription()
|
||||
{
|
||||
return *m_matrixDescription;
|
||||
}
|
||||
@ -245,7 +245,7 @@ public:
|
||||
*
|
||||
* @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1.
|
||||
*/
|
||||
virtual void mv(const CuVector<T>& x, CuVector<T>& y) const;
|
||||
virtual void mv(const GpuVector<T>& x, GpuVector<T>& y) const;
|
||||
|
||||
/**
|
||||
* @brief umv computes y=Ax+y
|
||||
@ -254,7 +254,7 @@ public:
|
||||
*
|
||||
* @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1.
|
||||
*/
|
||||
virtual void umv(const CuVector<T>& x, CuVector<T>& y) const;
|
||||
virtual void umv(const GpuVector<T>& x, GpuVector<T>& y) const;
|
||||
|
||||
|
||||
/**
|
||||
@ -264,7 +264,7 @@ public:
|
||||
*
|
||||
* @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1.
|
||||
*/
|
||||
virtual void usmv(T alpha, const CuVector<T>& x, CuVector<T>& y) const;
|
||||
virtual void usmv(T alpha, const GpuVector<T>& x, GpuVector<T>& y) const;
|
||||
|
||||
/**
|
||||
* @brief updateNonzeroValues updates the non-zero values by using the non-zero values of the supplied matrix
|
||||
@ -280,9 +280,9 @@ public:
|
||||
void updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);
|
||||
|
||||
private:
|
||||
CuVector<T> m_nonZeroElements;
|
||||
CuVector<int> m_columnIndices;
|
||||
CuVector<int> m_rowIndices;
|
||||
GpuVector<T> m_nonZeroElements;
|
||||
GpuVector<int> m_columnIndices;
|
||||
GpuVector<int> m_rowIndices;
|
||||
|
||||
// Notice that we store these three as int to make sure we are cusparse compatible.
|
||||
//
|
||||
@ -292,11 +292,11 @@ private:
|
||||
const int m_numberOfRows;
|
||||
const int m_blockSize;
|
||||
|
||||
detail::CuSparseMatrixDescriptionPtr m_matrixDescription;
|
||||
detail::GpuSparseMatrixDescriptionPtr m_matrixDescription;
|
||||
detail::CuSparseHandle& m_cusparseHandle;
|
||||
|
||||
template <class VectorType>
|
||||
void assertSameSize(const VectorType& vector) const;
|
||||
};
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
#endif
|
@ -20,41 +20,41 @@
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <fmt/core.h>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cublas_wrapper.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cublas_wrapper.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/vector_operations.hpp>
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
template <class T>
|
||||
CuVector<T>::CuVector(const std::vector<T>& data)
|
||||
: CuVector(data.data(), detail::to_int(data.size()))
|
||||
GpuVector<T>::GpuVector(const std::vector<T>& data)
|
||||
: GpuVector(data.data(), detail::to_int(data.size()))
|
||||
{
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>::CuVector(const size_t numberOfElements)
|
||||
GpuVector<T>::GpuVector(const size_t numberOfElements)
|
||||
: m_numberOfElements(detail::to_int(numberOfElements))
|
||||
, m_cuBlasHandle(detail::CuBlasHandle::getInstance())
|
||||
{
|
||||
OPM_CUDA_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * detail::to_size_t(m_numberOfElements)));
|
||||
OPM_GPU_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * detail::to_size_t(m_numberOfElements)));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>::CuVector(const T* dataOnHost, const size_t numberOfElements)
|
||||
: CuVector(numberOfElements)
|
||||
GpuVector<T>::GpuVector(const T* dataOnHost, const size_t numberOfElements)
|
||||
: GpuVector(numberOfElements)
|
||||
{
|
||||
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(
|
||||
m_dataOnDevice, dataOnHost, detail::to_size_t(m_numberOfElements) * sizeof(T), cudaMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>&
|
||||
CuVector<T>::operator=(T scalar)
|
||||
GpuVector<T>&
|
||||
GpuVector<T>::operator=(T scalar)
|
||||
{
|
||||
assertHasElements();
|
||||
detail::setVectorValue(data(), detail::to_size_t(m_numberOfElements), scalar);
|
||||
@ -62,13 +62,13 @@ CuVector<T>::operator=(T scalar)
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>&
|
||||
CuVector<T>::operator=(const CuVector<T>& other)
|
||||
GpuVector<T>&
|
||||
GpuVector<T>::operator=(const GpuVector<T>& other)
|
||||
{
|
||||
assertHasElements();
|
||||
assertSameSize(other);
|
||||
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
|
||||
other.m_dataOnDevice,
|
||||
detail::to_size_t(m_numberOfElements) * sizeof(T),
|
||||
cudaMemcpyDeviceToDevice));
|
||||
@ -76,33 +76,33 @@ CuVector<T>::operator=(const CuVector<T>& other)
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>::CuVector(const CuVector<T>& other)
|
||||
: CuVector(other.m_numberOfElements)
|
||||
GpuVector<T>::GpuVector(const GpuVector<T>& other)
|
||||
: GpuVector(other.m_numberOfElements)
|
||||
{
|
||||
assertHasElements();
|
||||
assertSameSize(other);
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
|
||||
other.m_dataOnDevice,
|
||||
detail::to_size_t(m_numberOfElements) * sizeof(T),
|
||||
cudaMemcpyDeviceToDevice));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>::~CuVector()
|
||||
GpuVector<T>::~GpuVector()
|
||||
{
|
||||
OPM_CUDA_WARN_IF_ERROR(cudaFree(m_dataOnDevice));
|
||||
OPM_GPU_WARN_IF_ERROR(cudaFree(m_dataOnDevice));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const T*
|
||||
CuVector<T>::data() const
|
||||
GpuVector<T>::data() const
|
||||
{
|
||||
return m_dataOnDevice;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
typename CuVector<T>::size_type
|
||||
CuVector<T>::dim() const
|
||||
typename GpuVector<T>::size_type
|
||||
GpuVector<T>::dim() const
|
||||
{
|
||||
// Note that there is no way for m_numberOfElements to be non-positive,
|
||||
// but for sanity we still use the safe conversion function here.
|
||||
@ -114,7 +114,7 @@ CuVector<T>::dim() const
|
||||
|
||||
template <typename T>
|
||||
std::vector<T>
|
||||
CuVector<T>::asStdVector() const
|
||||
GpuVector<T>::asStdVector() const
|
||||
{
|
||||
std::vector<T> temporary(detail::to_size_t(m_numberOfElements));
|
||||
copyToHost(temporary);
|
||||
@ -123,21 +123,21 @@ CuVector<T>::asStdVector() const
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuVector<T>::setZeroAtIndexSet(const CuVector<int>& indexSet)
|
||||
GpuVector<T>::setZeroAtIndexSet(const GpuVector<int>& indexSet)
|
||||
{
|
||||
detail::setZeroAtIndexSet(m_dataOnDevice, indexSet.dim(), indexSet.data());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuVector<T>::assertSameSize(const CuVector<T>& x) const
|
||||
GpuVector<T>::assertSameSize(const GpuVector<T>& x) const
|
||||
{
|
||||
assertSameSize(x.m_numberOfElements);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuVector<T>::assertSameSize(int size) const
|
||||
GpuVector<T>::assertSameSize(int size) const
|
||||
{
|
||||
if (size != m_numberOfElements) {
|
||||
OPM_THROW(std::invalid_argument,
|
||||
@ -147,7 +147,7 @@ CuVector<T>::assertSameSize(int size) const
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuVector<T>::assertHasElements() const
|
||||
GpuVector<T>::assertHasElements() const
|
||||
{
|
||||
if (m_numberOfElements <= 0) {
|
||||
OPM_THROW(std::invalid_argument, "We have 0 elements");
|
||||
@ -156,14 +156,14 @@ CuVector<T>::assertHasElements() const
|
||||
|
||||
template <typename T>
|
||||
T*
|
||||
CuVector<T>::data()
|
||||
GpuVector<T>::data()
|
||||
{
|
||||
return m_dataOnDevice;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>&
|
||||
CuVector<T>::operator*=(const T& scalar)
|
||||
GpuVector<T>&
|
||||
GpuVector<T>::operator*=(const T& scalar)
|
||||
{
|
||||
assertHasElements();
|
||||
OPM_CUBLAS_SAFE_CALL(detail::cublasScal(m_cuBlasHandle.get(), m_numberOfElements, &scalar, data(), 1));
|
||||
@ -171,8 +171,8 @@ CuVector<T>::operator*=(const T& scalar)
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>&
|
||||
CuVector<T>::axpy(T alpha, const CuVector<T>& y)
|
||||
GpuVector<T>&
|
||||
GpuVector<T>::axpy(T alpha, const GpuVector<T>& y)
|
||||
{
|
||||
assertHasElements();
|
||||
assertSameSize(y);
|
||||
@ -182,7 +182,7 @@ CuVector<T>::axpy(T alpha, const CuVector<T>& y)
|
||||
|
||||
template <class T>
|
||||
T
|
||||
CuVector<T>::dot(const CuVector<T>& other) const
|
||||
GpuVector<T>::dot(const GpuVector<T>& other) const
|
||||
{
|
||||
assertHasElements();
|
||||
assertSameSize(other);
|
||||
@ -193,7 +193,7 @@ CuVector<T>::dot(const CuVector<T>& other) const
|
||||
}
|
||||
template <class T>
|
||||
T
|
||||
CuVector<T>::two_norm() const
|
||||
GpuVector<T>::two_norm() const
|
||||
{
|
||||
assertHasElements();
|
||||
T result = T(0);
|
||||
@ -203,14 +203,14 @@ CuVector<T>::two_norm() const
|
||||
|
||||
template <typename T>
|
||||
T
|
||||
CuVector<T>::dot(const CuVector<T>& other, const CuVector<int>& indexSet, CuVector<T>& buffer) const
|
||||
GpuVector<T>::dot(const GpuVector<T>& other, const GpuVector<int>& indexSet, GpuVector<T>& buffer) const
|
||||
{
|
||||
return detail::innerProductAtIndices(m_cuBlasHandle.get(), m_dataOnDevice, other.data(), buffer.data(), indexSet.dim(), indexSet.data());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T
|
||||
CuVector<T>::two_norm(const CuVector<int>& indexSet, CuVector<T>& buffer) const
|
||||
GpuVector<T>::two_norm(const GpuVector<int>& indexSet, GpuVector<T>& buffer) const
|
||||
{
|
||||
// TODO: [perf] Optimize this to a single call
|
||||
return std::sqrt(this->dot(*this, indexSet, buffer));
|
||||
@ -218,23 +218,23 @@ CuVector<T>::two_norm(const CuVector<int>& indexSet, CuVector<T>& buffer) const
|
||||
|
||||
template <typename T>
|
||||
T
|
||||
CuVector<T>::dot(const CuVector<T>& other, const CuVector<int>& indexSet) const
|
||||
GpuVector<T>::dot(const GpuVector<T>& other, const GpuVector<int>& indexSet) const
|
||||
{
|
||||
CuVector<T> buffer(indexSet.dim());
|
||||
GpuVector<T> buffer(indexSet.dim());
|
||||
return detail::innerProductAtIndices(m_cuBlasHandle.get(), m_dataOnDevice, other.data(), buffer.data(), indexSet.dim(), indexSet.data());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T
|
||||
CuVector<T>::two_norm(const CuVector<int>& indexSet) const
|
||||
GpuVector<T>::two_norm(const GpuVector<int>& indexSet) const
|
||||
{
|
||||
CuVector<T> buffer(indexSet.dim());
|
||||
GpuVector<T> buffer(indexSet.dim());
|
||||
// TODO: [perf] Optimize this to a single call
|
||||
return std::sqrt(this->dot(*this, indexSet, buffer));
|
||||
}
|
||||
template <class T>
|
||||
CuVector<T>&
|
||||
CuVector<T>::operator+=(const CuVector<T>& other)
|
||||
GpuVector<T>&
|
||||
GpuVector<T>::operator+=(const GpuVector<T>& other)
|
||||
{
|
||||
assertHasElements();
|
||||
assertSameSize(other);
|
||||
@ -243,8 +243,8 @@ CuVector<T>::operator+=(const CuVector<T>& other)
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>&
|
||||
CuVector<T>::operator-=(const CuVector<T>& other)
|
||||
GpuVector<T>&
|
||||
GpuVector<T>::operator-=(const GpuVector<T>& other)
|
||||
{
|
||||
assertHasElements();
|
||||
assertSameSize(other);
|
||||
@ -255,7 +255,7 @@ CuVector<T>::operator-=(const CuVector<T>& other)
|
||||
|
||||
template <class T>
|
||||
void
|
||||
CuVector<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
|
||||
GpuVector<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
|
||||
{
|
||||
if (numberOfElements > dim()) {
|
||||
OPM_THROW(std::runtime_error,
|
||||
@ -263,45 +263,45 @@ CuVector<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
|
||||
dim(),
|
||||
numberOfElements));
|
||||
}
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void
|
||||
CuVector<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
|
||||
GpuVector<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
|
||||
{
|
||||
assertSameSize(detail::to_int(numberOfElements));
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void
|
||||
CuVector<T>::copyFromHost(const std::vector<T>& data)
|
||||
GpuVector<T>::copyFromHost(const std::vector<T>& data)
|
||||
{
|
||||
copyFromHost(data.data(), data.size());
|
||||
}
|
||||
template <class T>
|
||||
void
|
||||
CuVector<T>::copyToHost(std::vector<T>& data) const
|
||||
GpuVector<T>::copyToHost(std::vector<T>& data) const
|
||||
{
|
||||
copyToHost(data.data(), data.size());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuVector<T>::prepareSendBuf(CuVector<T>& buffer, const CuVector<int>& indexSet) const
|
||||
GpuVector<T>::prepareSendBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const
|
||||
{
|
||||
return detail::prepareSendBuf(m_dataOnDevice, buffer.data(), indexSet.dim(), indexSet.data());
|
||||
}
|
||||
template <typename T>
|
||||
void
|
||||
CuVector<T>::syncFromRecvBuf(CuVector<T>& buffer, const CuVector<int>& indexSet) const
|
||||
GpuVector<T>::syncFromRecvBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const
|
||||
{
|
||||
return detail::syncFromRecvBuf(m_dataOnDevice, buffer.data(), indexSet.dim(), indexSet.data());
|
||||
}
|
||||
|
||||
template class CuVector<double>;
|
||||
template class CuVector<float>;
|
||||
template class CuVector<int>;
|
||||
template class GpuVector<double>;
|
||||
template class GpuVector<float>;
|
||||
template class GpuVector<int>;
|
||||
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
@ -16,24 +16,24 @@
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef OPM_CUVECTOR_HEADER_HPP
|
||||
#define OPM_CUVECTOR_HEADER_HPP
|
||||
#ifndef OPM_GPUVECTOR_HEADER_HPP
|
||||
#define OPM_GPUVECTOR_HEADER_HPP
|
||||
#include <dune/common/fvector.hh>
|
||||
#include <dune/istl/bvector.hh>
|
||||
#include <exception>
|
||||
#include <fmt/core.h>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/CuBlasHandle.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/safe_conversion.hpp>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
/**
|
||||
* @brief The CuVector class is a simple (arithmetic) vector class for the GPU.
|
||||
* @brief The GpuVector class is a simple (arithmetic) vector class for the GPU.
|
||||
*
|
||||
* @note we currently only support simple raw primitives for T (double, float and int)
|
||||
*
|
||||
@ -45,12 +45,12 @@ namespace Opm::cuistl
|
||||
* Example usage:
|
||||
*
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
* #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
*
|
||||
* void someFunction() {
|
||||
* auto someDataOnCPU = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
|
||||
*
|
||||
* auto dataOnGPU = CuVector<double>(someDataOnCPU);
|
||||
* auto dataOnGPU = GpuVector<double>(someDataOnCPU);
|
||||
*
|
||||
* // Multiply by 4.0:
|
||||
* dataOnGPU *= 4.0;
|
||||
@ -62,7 +62,7 @@ namespace Opm::cuistl
|
||||
* @tparam T the type to store. Can be either float, double or int.
|
||||
*/
|
||||
template <typename T>
|
||||
class CuVector
|
||||
class GpuVector
|
||||
{
|
||||
public:
|
||||
using field_type = T;
|
||||
@ -70,17 +70,17 @@ public:
|
||||
|
||||
|
||||
/**
|
||||
* @brief CuVector allocates new GPU memory of the same size as other and copies the content of the other vector to
|
||||
* @brief GpuVector allocates new GPU memory of the same size as other and copies the content of the other vector to
|
||||
* this newly allocated memory.
|
||||
*
|
||||
* @note This does synchronous transfer.
|
||||
*
|
||||
* @param other the vector to copy from
|
||||
*/
|
||||
CuVector(const CuVector<T>& other);
|
||||
GpuVector(const GpuVector<T>& other);
|
||||
|
||||
/**
|
||||
* @brief CuVector allocates new GPU memory of the same size as data and copies the content of the data vector to
|
||||
* @brief GpuVector allocates new GPU memory of the same size as data and copies the content of the data vector to
|
||||
* this newly allocated memory.
|
||||
*
|
||||
* @note This does CPU to GPU transfer.
|
||||
@ -90,7 +90,7 @@ public:
|
||||
*
|
||||
* @param data the vector to copy from
|
||||
*/
|
||||
explicit CuVector(const std::vector<T>& data);
|
||||
explicit GpuVector(const std::vector<T>& data);
|
||||
|
||||
/**
|
||||
* @brief operator= copies the content of the data vector to the memory of this vector.
|
||||
@ -100,7 +100,7 @@ public:
|
||||
*
|
||||
* @param other the vector to copy from
|
||||
*/
|
||||
CuVector& operator=(const CuVector<T>& other);
|
||||
GpuVector& operator=(const GpuVector<T>& other);
|
||||
|
||||
/**
|
||||
* @brief operator= sets the whole vector equal to the scalar value.
|
||||
@ -109,20 +109,20 @@ public:
|
||||
*
|
||||
* @param scalar the value all elements will be set to.
|
||||
*/
|
||||
CuVector& operator=(T scalar);
|
||||
GpuVector& operator=(T scalar);
|
||||
|
||||
/**
|
||||
* @brief CuVector allocates new GPU memory of size numberOfElements * sizeof(T)
|
||||
* @brief GpuVector allocates new GPU memory of size numberOfElements * sizeof(T)
|
||||
*
|
||||
* @note For now numberOfElements needs to be within the limits of int due to restrictions in cublas
|
||||
*
|
||||
* @param numberOfElements number of T elements to allocate
|
||||
*/
|
||||
explicit CuVector(const size_t numberOfElements);
|
||||
explicit GpuVector(const size_t numberOfElements);
|
||||
|
||||
|
||||
/**
|
||||
* @brief CuVector allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from
|
||||
* @brief GpuVector allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from
|
||||
* data
|
||||
*
|
||||
* @note This assumes the data is on the CPU.
|
||||
@ -132,12 +132,12 @@ public:
|
||||
*
|
||||
* @note For now numberOfElements needs to be within the limits of int due to restrictions in cublas
|
||||
*/
|
||||
CuVector(const T* dataOnHost, const size_t numberOfElements);
|
||||
GpuVector(const T* dataOnHost, const size_t numberOfElements);
|
||||
|
||||
/**
|
||||
* @brief ~CuVector calls cudaFree
|
||||
* @brief ~GpuVector calls cudaFree
|
||||
*/
|
||||
virtual ~CuVector();
|
||||
virtual ~GpuVector();
|
||||
|
||||
/**
|
||||
* @return the raw pointer to the GPU data
|
||||
@ -162,7 +162,7 @@ public:
|
||||
// TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension
|
||||
if (detail::to_size_t(m_numberOfElements) != bvector.dim()) {
|
||||
OPM_THROW(std::runtime_error,
|
||||
fmt::format("Given incompatible vector size. CuVector has size {}, \n"
|
||||
fmt::format("Given incompatible vector size. GpuVector has size {}, \n"
|
||||
"however, BlockVector has N() = {}, and dim = {}.",
|
||||
m_numberOfElements,
|
||||
bvector.N(),
|
||||
@ -185,7 +185,7 @@ public:
|
||||
// TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension
|
||||
if (detail::to_size_t(m_numberOfElements) != bvector.dim()) {
|
||||
OPM_THROW(std::runtime_error,
|
||||
fmt::format("Given incompatible vector size. CuVector has size {},\n however, the BlockVector "
|
||||
fmt::format("Given incompatible vector size. GpuVector has size {},\n however, the BlockVector "
|
||||
"has has N() = {}, and dim() = {}.",
|
||||
m_numberOfElements,
|
||||
bvector.N(),
|
||||
@ -231,8 +231,8 @@ public:
|
||||
*/
|
||||
void copyToHost(std::vector<T>& data) const;
|
||||
|
||||
void prepareSendBuf(CuVector<T>& buffer, const CuVector<int>& indexSet) const;
|
||||
void syncFromRecvBuf(CuVector<T>& buffer, const CuVector<int>& indexSet) const;
|
||||
void prepareSendBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const;
|
||||
void syncFromRecvBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const;
|
||||
|
||||
/**
|
||||
* @brief operator *= multiplies every element by scalar
|
||||
@ -242,7 +242,7 @@ public:
|
||||
*
|
||||
* @note int is not supported
|
||||
*/
|
||||
CuVector<T>& operator*=(const T& scalar);
|
||||
GpuVector<T>& operator*=(const T& scalar);
|
||||
|
||||
/**
|
||||
* @brief axpy sets this vector equal to this + alha * y
|
||||
@ -252,7 +252,7 @@ public:
|
||||
* @note this will call CuBlas in the background
|
||||
* @note int is not supported
|
||||
*/
|
||||
CuVector<T>& axpy(T alpha, const CuVector<T>& y);
|
||||
GpuVector<T>& axpy(T alpha, const GpuVector<T>& y);
|
||||
|
||||
/**
|
||||
* @brief operator+= adds the other vector to this vector
|
||||
@ -260,7 +260,7 @@ public:
|
||||
* @note this will call CuBlas in the background
|
||||
* @note int is not supported
|
||||
*/
|
||||
CuVector<T>& operator+=(const CuVector<T>& other);
|
||||
GpuVector<T>& operator+=(const GpuVector<T>& other);
|
||||
|
||||
/**
|
||||
* @brief operator-= subtracts the other vector from this vector
|
||||
@ -268,7 +268,7 @@ public:
|
||||
* @note this will call CuBlas in the background
|
||||
* @note int is not supported
|
||||
*/
|
||||
CuVector<T>& operator-=(const CuVector<T>& other);
|
||||
GpuVector<T>& operator-=(const GpuVector<T>& other);
|
||||
|
||||
/**
|
||||
* @brief dot computes the dot product (standard inner product) against the other vector
|
||||
@ -278,7 +278,7 @@ public:
|
||||
*
|
||||
* @return the result on the inner product
|
||||
*/
|
||||
T dot(const CuVector<T>& other) const;
|
||||
T dot(const GpuVector<T>& other) const;
|
||||
|
||||
/**
|
||||
* @brief returns the l2 norm of the vector
|
||||
@ -294,14 +294,14 @@ public:
|
||||
*
|
||||
* @note int is not supported
|
||||
*/
|
||||
T dot(const CuVector<T>& other, const CuVector<int>& indexSet, CuVector<T>& buffer) const;
|
||||
T dot(const GpuVector<T>& other, const GpuVector<int>& indexSet, GpuVector<T>& buffer) const;
|
||||
|
||||
/**
|
||||
* Computes the norm sqrt(sum_i this[indexSet[i]] * this[indexSet[i]])
|
||||
*
|
||||
* @note int is not supported
|
||||
*/
|
||||
T two_norm(const CuVector<int>& indexSet, CuVector<T>& buffer) const;
|
||||
T two_norm(const GpuVector<int>& indexSet, GpuVector<T>& buffer) const;
|
||||
|
||||
|
||||
/**
|
||||
@ -309,14 +309,14 @@ public:
|
||||
*
|
||||
* @note int is not supported
|
||||
*/
|
||||
T dot(const CuVector<T>& other, const CuVector<int>& indexSet) const;
|
||||
T dot(const GpuVector<T>& other, const GpuVector<int>& indexSet) const;
|
||||
|
||||
/**
|
||||
* Computes the norm sqrt(sum_i this[indexSet[i]] * this[indexSet[i]])
|
||||
*
|
||||
* @note int is not supported
|
||||
*/
|
||||
T two_norm(const CuVector<int>& indexSet) const;
|
||||
T two_norm(const GpuVector<int>& indexSet) const;
|
||||
|
||||
|
||||
/**
|
||||
@ -363,9 +363,9 @@ public:
|
||||
* }
|
||||
* @endcode
|
||||
*/
|
||||
void setZeroAtIndexSet(const CuVector<int>& indexSet);
|
||||
void setZeroAtIndexSet(const GpuVector<int>& indexSet);
|
||||
|
||||
// Slow method that creates a string representation of a CuVector for debug purposes
|
||||
// Slow method that creates a string representation of a GpuVector for debug purposes
|
||||
std::string toDebugString()
|
||||
{
|
||||
std::vector<T> v = asStdVector();
|
||||
@ -385,11 +385,11 @@ private:
|
||||
const int m_numberOfElements;
|
||||
detail::CuBlasHandle& m_cuBlasHandle;
|
||||
|
||||
void assertSameSize(const CuVector<T>& other) const;
|
||||
void assertSameSize(const GpuVector<T>& other) const;
|
||||
void assertSameSize(int size) const;
|
||||
|
||||
void assertHasElements() const;
|
||||
};
|
||||
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
#endif
|
@ -20,21 +20,21 @@
|
||||
#include <cuda_runtime.h>
|
||||
#include <algorithm>
|
||||
#include <fmt/core.h>
|
||||
#include <opm/simulators/linalg/cuistl/CuView.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuView.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
template <class T>
|
||||
CuView<T>::CuView(std::vector<T>& data)
|
||||
: CuView(data.data(), data.size())
|
||||
GpuView<T>::GpuView(std::vector<T>& data)
|
||||
: GpuView(data.data(), data.size())
|
||||
{
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::vector<T>
|
||||
CuView<T>::asStdVector() const
|
||||
GpuView<T>::asStdVector() const
|
||||
{
|
||||
std::vector<T> temporary(m_numberOfElements);
|
||||
copyToHost(temporary);
|
||||
@ -43,7 +43,7 @@ CuView<T>::asStdVector() const
|
||||
|
||||
template <class T>
|
||||
void
|
||||
CuView<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
|
||||
GpuView<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
|
||||
{
|
||||
if (numberOfElements > size()) {
|
||||
OPM_THROW(std::runtime_error,
|
||||
@ -51,32 +51,32 @@ CuView<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
|
||||
size(),
|
||||
numberOfElements));
|
||||
}
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void
|
||||
CuView<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
|
||||
GpuView<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
|
||||
{
|
||||
assertSameSize(numberOfElements);
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void
|
||||
CuView<T>::copyFromHost(const std::vector<T>& data)
|
||||
GpuView<T>::copyFromHost(const std::vector<T>& data)
|
||||
{
|
||||
copyFromHost(data.data(), data.size());
|
||||
}
|
||||
template <class T>
|
||||
void
|
||||
CuView<T>::copyToHost(std::vector<T>& data) const
|
||||
GpuView<T>::copyToHost(std::vector<T>& data) const
|
||||
{
|
||||
copyToHost(data.data(), data.size());
|
||||
}
|
||||
|
||||
template class CuView<double>;
|
||||
template class CuView<float>;
|
||||
template class CuView<int>;
|
||||
template class GpuView<double>;
|
||||
template class GpuView<float>;
|
||||
template class GpuView<int>;
|
||||
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
@ -16,14 +16,14 @@
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef OPM_CUVIEW_HEADER_HPP
|
||||
#define OPM_CUVIEW_HEADER_HPP
|
||||
#ifndef OPM_GPUVIEW_HEADER_HPP
|
||||
#define OPM_GPUVIEW_HEADER_HPP
|
||||
|
||||
#include <dune/common/fvector.hh>
|
||||
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
|
||||
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/safe_conversion.hpp>
|
||||
|
||||
#include <stdexcept>
|
||||
#include <vector>
|
||||
@ -37,37 +37,37 @@
|
||||
#define OPM_IS_INSIDE_DEVICE_FUNCTION_TEMPORARY 0
|
||||
#endif
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
/**
|
||||
* @brief The CuView class is provides a view of some data allocated on the GPU
|
||||
* @brief The GpuView class is provides a view of some data allocated on the GPU
|
||||
* Essenstially is only stores a pointer and a size.
|
||||
*
|
||||
* This class supports being used from inside a CUDA/HIP Kernel.
|
||||
* Implementations are placed in this headerfile for functions that may be called
|
||||
* inside a kernel to avoid expensive RDC (relocatable device code)
|
||||
*
|
||||
* The view will typically provide a view into a CuBuffer and be able to
|
||||
* The view will typically provide a view into a GpuBuffer and be able to
|
||||
* manipulate the data within it
|
||||
*
|
||||
* @param T Type of the data we store, typically int/float/double w/o const specifier
|
||||
*
|
||||
**/
|
||||
template <typename T>
|
||||
class CuView
|
||||
class GpuView
|
||||
{
|
||||
public:
|
||||
using value_type = T;
|
||||
/**
|
||||
* @brief Default constructor that will initialize cublas and allocate 0 bytes of memory
|
||||
*/
|
||||
explicit CuView() = default;
|
||||
explicit GpuView() = default;
|
||||
|
||||
//TODO: we probably dont need anything like this or is it useful to have views also be able to handle things on CPU?
|
||||
/// @brief constructor based on std::vectors, this will make a view on the CPU
|
||||
/// @param data std vector to pr
|
||||
CuView(std::vector<T>& data);
|
||||
GpuView(std::vector<T>& data);
|
||||
|
||||
/**
|
||||
* @brief operator[] to retrieve a reference to an item in the buffer
|
||||
@ -95,7 +95,7 @@ public:
|
||||
|
||||
|
||||
/**
|
||||
* @brief CuView allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from
|
||||
* @brief GpuView allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from
|
||||
* data
|
||||
*
|
||||
* @note This assumes the data is on the CPU.
|
||||
@ -103,15 +103,15 @@ public:
|
||||
* @param numberOfElements number of T elements to allocate
|
||||
* @param dataOnHost data on host/CPU
|
||||
*/
|
||||
__host__ __device__ CuView(T* dataOnHost, size_t numberOfElements)
|
||||
__host__ __device__ GpuView(T* dataOnHost, size_t numberOfElements)
|
||||
: m_dataPtr(dataOnHost), m_numberOfElements(numberOfElements)
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief ~CuView calls cudaFree
|
||||
* @brief ~GpuView calls cudaFree
|
||||
*/
|
||||
~CuView() = default;
|
||||
~GpuView() = default;
|
||||
|
||||
/**
|
||||
* @return the raw pointer to the GPU data
|
||||
@ -128,7 +128,7 @@ public:
|
||||
}
|
||||
|
||||
/**
|
||||
* @return fetch the first element in a CuView
|
||||
* @return fetch the first element in a GpuView
|
||||
*/
|
||||
__host__ __device__ T& front()
|
||||
{
|
||||
@ -139,7 +139,7 @@ public:
|
||||
}
|
||||
|
||||
/**
|
||||
* @return fetch the last element in a CuView
|
||||
* @return fetch the last element in a GpuView
|
||||
*/
|
||||
__host__ __device__ T& back()
|
||||
{
|
||||
@ -150,7 +150,7 @@ public:
|
||||
}
|
||||
|
||||
/**
|
||||
* @return fetch the first element in a CuView
|
||||
* @return fetch the first element in a GpuView
|
||||
*/
|
||||
__host__ __device__ T front() const
|
||||
{
|
||||
@ -161,7 +161,7 @@ public:
|
||||
}
|
||||
|
||||
/**
|
||||
* @return fetch the last element in a CuView
|
||||
* @return fetch the last element in a GpuView
|
||||
*/
|
||||
__host__ __device__ T back() const
|
||||
{
|
||||
@ -220,7 +220,7 @@ public:
|
||||
* @return an std::vector containing the elements copied from the GPU.
|
||||
*/
|
||||
std::vector<T> asStdVector() const;
|
||||
/// @brief Iterator class to make CuViews more similar to std containers
|
||||
/// @brief Iterator class to make GpuViews more similar to std containers
|
||||
class iterator {
|
||||
public:
|
||||
// Iterator typedefs
|
||||
@ -363,7 +363,7 @@ private:
|
||||
|
||||
/// @brief Helper function to assert if another view has the same size
|
||||
/// @param other view
|
||||
__host__ __device__ void assertSameSize(const CuView<T>& other) const
|
||||
__host__ __device__ void assertSameSize(const GpuView<T>& other) const
|
||||
{
|
||||
assertSameSize(other.m_numberOfElements);
|
||||
}
|
||||
@ -410,6 +410,6 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
|
||||
#endif
|
@ -26,18 +26,18 @@
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/common/TimingMacros.hpp>
|
||||
#include <opm/simulators/linalg/GraphColoring.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/OpmCuILU0.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/autotuner.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/coloringAndReorderingUtils.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/ILU0Kernels.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/OpmCuILU0.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/autotuner.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/coloringAndReorderingUtils.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpusparse_matrix_operations.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/preconditionerKernels/ILU0Kernels.hpp>
|
||||
#include <opm/simulators/linalg/matrixblock.hh>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
@ -46,7 +46,7 @@ OpmCuILU0<M, X, Y, l>::OpmCuILU0(const M& A, bool splitMatrix, bool tuneKernels)
|
||||
, m_levelSets(Opm::getMatrixRowColoring(m_cpuMatrix, Opm::ColoringType::LOWER))
|
||||
, m_reorderedToNatural(detail::createReorderedToNatural(m_levelSets))
|
||||
, m_naturalToReordered(detail::createNaturalToReordered(m_levelSets))
|
||||
, m_gpuMatrix(CuSparseMatrix<field_type>::fromMatrix(m_cpuMatrix, true))
|
||||
, m_gpuMatrix(GpuSparseMatrix<field_type>::fromMatrix(m_cpuMatrix, true))
|
||||
, m_gpuMatrixReorderedLower(nullptr)
|
||||
, m_gpuMatrixReorderedUpper(nullptr)
|
||||
, m_gpuNaturalToReorder(m_naturalToReordered)
|
||||
@ -72,12 +72,12 @@ OpmCuILU0<M, X, Y, l>::OpmCuILU0(const M& A, bool splitMatrix, bool tuneKernels)
|
||||
m_gpuMatrix.nonzeroes(),
|
||||
A.nonzeroes()));
|
||||
if (m_splitMatrix) {
|
||||
m_gpuMatrixReorderedDiag.emplace(CuVector<field_type>(blocksize_ * blocksize_ * m_cpuMatrix.N()));
|
||||
m_gpuMatrixReorderedDiag.emplace(GpuVector<field_type>(blocksize_ * blocksize_ * m_cpuMatrix.N()));
|
||||
std::tie(m_gpuMatrixReorderedLower, m_gpuMatrixReorderedUpper)
|
||||
= detail::extractLowerAndUpperMatrices<M, field_type, CuSparseMatrix<field_type>>(m_cpuMatrix,
|
||||
= detail::extractLowerAndUpperMatrices<M, field_type, GpuSparseMatrix<field_type>>(m_cpuMatrix,
|
||||
m_reorderedToNatural);
|
||||
} else {
|
||||
m_gpuReorderedLU = detail::createReorderedMatrix<M, field_type, CuSparseMatrix<field_type>>(
|
||||
m_gpuReorderedLU = detail::createReorderedMatrix<M, field_type, GpuSparseMatrix<field_type>>(
|
||||
m_cpuMatrix, m_reorderedToNatural);
|
||||
}
|
||||
LUFactorizeAndMoveData(m_moveThreadBlockSize, m_ILU0FactorizationThreadBlockSize);
|
||||
@ -272,8 +272,8 @@ OpmCuILU0<M, X, Y, l>::tuneThreadBlockSizes()
|
||||
= detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing ILU0 factorization");
|
||||
|
||||
// tune the thread-block size of the apply
|
||||
CuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
|
||||
CuVector<field_type> tmpD(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
|
||||
GpuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
|
||||
GpuVector<field_type> tmpD(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
|
||||
tmpD = 1;
|
||||
|
||||
auto tuneLowerSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int lowerSolveThreadBlockSize) {
|
||||
@ -289,14 +289,14 @@ OpmCuILU0<M, X, Y, l>::tuneThreadBlockSizes()
|
||||
tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set");
|
||||
}
|
||||
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
#define INSTANTIATE_CUDILU_DUNE(realtype, blockdim) \
|
||||
template class ::Opm::cuistl::OpmCuILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
|
||||
::Opm::cuistl::CuVector<realtype>, \
|
||||
::Opm::cuistl::CuVector<realtype>>; \
|
||||
template class ::Opm::cuistl::OpmCuILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
|
||||
::Opm::cuistl::CuVector<realtype>, \
|
||||
::Opm::cuistl::CuVector<realtype>>
|
||||
template class ::Opm::gpuistl::OpmCuILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
|
||||
::Opm::gpuistl::GpuVector<realtype>, \
|
||||
::Opm::gpuistl::GpuVector<realtype>>; \
|
||||
template class ::Opm::gpuistl::OpmCuILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
|
||||
::Opm::gpuistl::GpuVector<realtype>, \
|
||||
::Opm::gpuistl::GpuVector<realtype>>
|
||||
|
||||
INSTANTIATE_CUDILU_DUNE(double, 1);
|
||||
INSTANTIATE_CUDILU_DUNE(double, 2);
|
@ -22,14 +22,14 @@
|
||||
#include <memory>
|
||||
#include <opm/grid/utility/SparseTable.hpp>
|
||||
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
#include <optional>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
//! \brief ILU0 preconditioner on the GPU.
|
||||
//!
|
||||
@ -39,7 +39,7 @@ namespace Opm::cuistl
|
||||
//! \tparam l Ignored. Just there to have the same number of template arguments
|
||||
//! as other preconditioners.
|
||||
//!
|
||||
//! \note We assume X and Y are both CuVector<real_type>, but we leave them as template
|
||||
//! \note We assume X and Y are both GpuVector<real_type>, but we leave them as template
|
||||
//! arguments in case of future additions.
|
||||
template <class M, class X, class Y, int l = 1>
|
||||
class OpmCuILU0 : public Dune::PreconditionerWithUpdate<X, Y>
|
||||
@ -54,7 +54,7 @@ public:
|
||||
//! \brief The field type of the preconditioner.
|
||||
using field_type = typename X::field_type;
|
||||
//! \brief The GPU matrix type
|
||||
using CuMat = CuSparseMatrix<field_type>;
|
||||
using CuMat = GpuSparseMatrix<field_type>;
|
||||
|
||||
//! \brief Constructor.
|
||||
//!
|
||||
@ -126,13 +126,13 @@ private:
|
||||
std::unique_ptr<CuMat> m_gpuMatrixReorderedLower;
|
||||
std::unique_ptr<CuMat> m_gpuMatrixReorderedUpper;
|
||||
//! \brief If matrix splitting is enabled, we also store the diagonal separately
|
||||
std::optional<CuVector<field_type>> m_gpuMatrixReorderedDiag;
|
||||
std::optional<GpuVector<field_type>> m_gpuMatrixReorderedDiag;
|
||||
//! row conversion from natural to reordered matrix indices stored on the GPU
|
||||
CuVector<int> m_gpuNaturalToReorder;
|
||||
GpuVector<int> m_gpuNaturalToReorder;
|
||||
//! row conversion from reordered to natural matrix indices stored on the GPU
|
||||
CuVector<int> m_gpuReorderToNatural;
|
||||
GpuVector<int> m_gpuReorderToNatural;
|
||||
//! \brief Stores the inverted diagonal that we use in ILU0
|
||||
CuVector<field_type> m_gpuDInv;
|
||||
GpuVector<field_type> m_gpuDInv;
|
||||
//! \brief Bool storing whether or not we should store matrices in a split format
|
||||
bool m_splitMatrix;
|
||||
//! \brief Bool storing whether or not we will tune the threadblock sizes. Only used for AMD cards
|
||||
@ -144,6 +144,6 @@ private:
|
||||
int m_moveThreadBlockSize = -1;
|
||||
int m_ILU0FactorizationThreadBlockSize = -1;
|
||||
};
|
||||
} // end namespace Opm::cuistl
|
||||
} // end namespace Opm::gpuistl
|
||||
|
||||
#endif
|
@ -21,12 +21,12 @@
|
||||
#include <cusparse.h>
|
||||
#include <dune/istl/preconditioner.hh>
|
||||
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerHolder.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/PreconditionerHolder.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/preconditioner_should_call_post_pre.hpp>
|
||||
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
//!\brief Makes a CUDA preconditioner available to a CPU simulator.
|
||||
//!
|
||||
@ -35,11 +35,11 @@ namespace Opm::cuistl
|
||||
//!
|
||||
//! \tparam X the domain type (should be on the CPU). Typicall a Dune::BlockVector
|
||||
//! \tparam Y the range type (should be on the CPU). Typicall a Dune::BlockVector
|
||||
//! \tparam CudaPreconditionerType the preconditioner taking CuVector<real_type> as arguments to apply
|
||||
//! \tparam CudaPreconditionerType the preconditioner taking GpuVector<real_type> as arguments to apply
|
||||
template <class X, class Y, class CudaPreconditionerType>
|
||||
class PreconditionerAdapter
|
||||
: public Dune::PreconditionerWithUpdate<X, Y>,
|
||||
public PreconditionerHolder<CuVector<typename X::field_type>, CuVector<typename Y::field_type>>
|
||||
public PreconditionerHolder<GpuVector<typename X::field_type>, GpuVector<typename Y::field_type>>
|
||||
{
|
||||
public:
|
||||
//! \brief The domain type of the preconditioner.
|
||||
@ -77,8 +77,8 @@ public:
|
||||
virtual void apply(X& v, const Y& d) override
|
||||
{
|
||||
if (!m_inputBuffer) {
|
||||
m_inputBuffer.reset(new CuVector<field_type>(v.dim()));
|
||||
m_outputBuffer.reset(new CuVector<field_type>(v.dim()));
|
||||
m_inputBuffer.reset(new GpuVector<field_type>(v.dim()));
|
||||
m_outputBuffer.reset(new GpuVector<field_type>(v.dim()));
|
||||
}
|
||||
m_inputBuffer->copyFromHost(d);
|
||||
m_underlyingPreconditioner->apply(*m_outputBuffer, *m_inputBuffer);
|
||||
@ -117,7 +117,7 @@ public:
|
||||
return detail::shouldCallPreconditionerPre<CudaPreconditionerType>();
|
||||
}
|
||||
|
||||
virtual std::shared_ptr<Dune::PreconditionerWithUpdate<CuVector<field_type>, CuVector<field_type>>>
|
||||
virtual std::shared_ptr<Dune::PreconditionerWithUpdate<GpuVector<field_type>, GpuVector<field_type>>>
|
||||
getUnderlyingPreconditioner() override
|
||||
{
|
||||
return m_underlyingPreconditioner;
|
||||
@ -131,9 +131,9 @@ private:
|
||||
//! \brief the underlying preconditioner to use
|
||||
std::shared_ptr<CudaPreconditionerType> m_underlyingPreconditioner;
|
||||
|
||||
std::unique_ptr<CuVector<field_type>> m_inputBuffer;
|
||||
std::unique_ptr<CuVector<field_type>> m_outputBuffer;
|
||||
std::unique_ptr<GpuVector<field_type>> m_inputBuffer;
|
||||
std::unique_ptr<GpuVector<field_type>> m_outputBuffer;
|
||||
};
|
||||
} // end namespace Opm::cuistl
|
||||
} // end namespace Opm::gpuistl
|
||||
|
||||
#endif
|
@ -22,16 +22,16 @@
|
||||
#include <dune/istl/bcrsmatrix.hh>
|
||||
#include <dune/istl/preconditioner.hh>
|
||||
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_constants.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/CuMatrixDescription.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/CuSparseHandle.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/CuSparseResource.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cusparse_constants.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/preconditioner_should_call_post_pre.hpp>
|
||||
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
//! \brief Converts the field type (eg. double to float) to benchmark single precision preconditioners
|
||||
//!
|
||||
@ -50,7 +50,7 @@ namespace Opm::cuistl
|
||||
//!
|
||||
//! To use this, use something like the following code:
|
||||
//! \code{.cpp}
|
||||
//! #include <opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp>
|
||||
//! #include <opm/simulators/linalg/gpuistl/PreconditionerConvertFieldTypeAdapter.hpp>
|
||||
//! #include <opm/simulators/linalg/ParallelOverlappingILU0.hpp>
|
||||
//!
|
||||
//! using XDouble = Dune::BlockVector<Dune::FieldVector<double, 2>>;
|
||||
@ -64,7 +64,7 @@ namespace Opm::cuistl
|
||||
//! void applyILU0AsFloat(const MDouble& matrix, const XDouble& x, XDouble& y) {
|
||||
//!
|
||||
//! using FloatILU0 = typename Opm::ParallelOverlappingILU0<MFloat, XFloat, XFloat, ParallelInfo>;
|
||||
//! using DoubleToFloatConverter = typename Opm::cuistl::PreconditionerConvertFieldTypeAdapter<FloatILU0, MDouble,
|
||||
//! using DoubleToFloatConverter = typename Opm::gpuistl::PreconditionerConvertFieldTypeAdapter<FloatILU0, MDouble,
|
||||
//! XDouble, XDouble>;
|
||||
//!
|
||||
//! // Note that we do not need to make a new instance for every invocation, this
|
||||
@ -239,6 +239,6 @@ private:
|
||||
//! \brief the underlying preconditioner to use
|
||||
std::shared_ptr<CudaPreconditionerType> m_underlyingPreconditioner;
|
||||
};
|
||||
} // end namespace Opm::cuistl
|
||||
} // end namespace Opm::gpuistl
|
||||
|
||||
#endif
|
@ -21,7 +21,7 @@
|
||||
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
|
||||
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
//! \brief Common interface for adapters that hold preconditioners.
|
||||
//!
|
||||
@ -38,6 +38,6 @@ public:
|
||||
*/
|
||||
virtual std::shared_ptr<Dune::PreconditionerWithUpdate<X, Y>> getUnderlyingPreconditioner() = 0;
|
||||
};
|
||||
} // end namespace Opm::cuistl
|
||||
} // end namespace Opm::gpuistl
|
||||
|
||||
#endif
|
@ -26,12 +26,12 @@
|
||||
#include <dune/istl/schwarz.hh>
|
||||
#include <dune/istl/solver.hh>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/has_function.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuBlockPreconditioner.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuOwnerOverlapCopy.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/has_function.hpp>
|
||||
|
||||
#ifdef OPEN_MPI
|
||||
#if OPEN_MPI
|
||||
@ -39,7 +39,7 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
//! @brief Wraps a CUDA solver to work with CPU data.
|
||||
//!
|
||||
@ -56,7 +56,7 @@ public:
|
||||
using typename Dune::IterativeSolver<X, X>::real_type;
|
||||
using typename Dune::IterativeSolver<X, X>::scalar_real_type;
|
||||
static constexpr auto block_size = domain_type::block_type::dimension;
|
||||
using XGPU = Opm::cuistl::CuVector<real_type>;
|
||||
using XGPU = Opm::gpuistl::GpuVector<real_type>;
|
||||
|
||||
// TODO: Use a std::forward
|
||||
SolverAdapter(Operator& op,
|
||||
@ -67,7 +67,7 @@ public:
|
||||
int verbose)
|
||||
: Dune::IterativeSolver<X, X>(op, sp, *prec, reduction, maxit, verbose)
|
||||
, m_opOnCPUWithMatrix(op)
|
||||
, m_matrix(CuSparseMatrix<real_type>::fromMatrix(op.getmat()))
|
||||
, m_matrix(GpuSparseMatrix<real_type>::fromMatrix(op.getmat()))
|
||||
, m_underlyingSolver(constructSolver(prec, reduction, maxit, verbose))
|
||||
{
|
||||
}
|
||||
@ -116,7 +116,7 @@ public:
|
||||
|
||||
private:
|
||||
Operator& m_opOnCPUWithMatrix;
|
||||
CuSparseMatrix<real_type> m_matrix;
|
||||
GpuSparseMatrix<real_type> m_matrix;
|
||||
|
||||
UnderlyingSolver<XGPU> m_underlyingSolver;
|
||||
|
||||
@ -148,8 +148,8 @@ private:
|
||||
if (!precAsHolder) {
|
||||
OPM_THROW(std::invalid_argument,
|
||||
"The preconditioner needs to be a CUDA preconditioner (eg. CuILU0) wrapped in a "
|
||||
"Opm::cuistl::PreconditionerAdapter wrapped in a "
|
||||
"Opm::cuistl::CuBlockPreconditioner. If you are unsure what this means, set "
|
||||
"Opm::gpuistl::PreconditionerAdapter wrapped in a "
|
||||
"Opm::gpuistl::GpuBlockPreconditioner. If you are unsure what this means, set "
|
||||
"preconditioner to 'CUILU0'"); // TODO: Suggest a better preconditioner
|
||||
}
|
||||
|
||||
@ -159,8 +159,8 @@ private:
|
||||
if (!preconditionerAdapterAsHolder) {
|
||||
OPM_THROW(std::invalid_argument,
|
||||
"The preconditioner needs to be a CUDA preconditioner (eg. CuILU0) wrapped in a "
|
||||
"Opm::cuistl::PreconditionerAdapter wrapped in a "
|
||||
"Opm::cuistl::CuBlockPreconditioner. If you are unsure what this means, set "
|
||||
"Opm::gpuistl::PreconditionerAdapter wrapped in a "
|
||||
"Opm::gpuistl::GpuBlockPreconditioner. If you are unsure what this means, set "
|
||||
"preconditioner to 'CUILU0'"); // TODO: Suggest a better preconditioner
|
||||
}
|
||||
// We need to get the underlying preconditioner:
|
||||
@ -183,20 +183,20 @@ private:
|
||||
|
||||
|
||||
// TODO add typename Operator communication type as a named type with using
|
||||
std::shared_ptr<Opm::cuistl::GPUSender<real_type, typename Operator::communication_type>> gpuComm;
|
||||
std::shared_ptr<Opm::gpuistl::GPUSender<real_type, typename Operator::communication_type>> gpuComm;
|
||||
if (mpiSupportsCudaAwareAtCompileTime && mpiSupportsCudaAwareAtRunTime){
|
||||
gpuComm = std::make_shared<Opm::cuistl::GPUAwareMPISender<real_type, block_size, typename Operator::communication_type>>(communication);
|
||||
gpuComm = std::make_shared<Opm::gpuistl::GPUAwareMPISender<real_type, block_size, typename Operator::communication_type>>(communication);
|
||||
}
|
||||
else{
|
||||
gpuComm = std::make_shared<Opm::cuistl::GPUObliviousMPISender<real_type, block_size, typename Operator::communication_type>>(communication);
|
||||
gpuComm = std::make_shared<Opm::gpuistl::GPUObliviousMPISender<real_type, block_size, typename Operator::communication_type>>(communication);
|
||||
}
|
||||
|
||||
using CudaCommunication = CuOwnerOverlapCopy<real_type, block_size, typename Operator::communication_type>;
|
||||
using CudaCommunication = GpuOwnerOverlapCopy<real_type, block_size, typename Operator::communication_type>;
|
||||
using SchwarzOperator
|
||||
= Dune::OverlappingSchwarzOperator<CuSparseMatrix<real_type>, XGPU, XGPU, CudaCommunication>;
|
||||
= Dune::OverlappingSchwarzOperator<GpuSparseMatrix<real_type>, XGPU, XGPU, CudaCommunication>;
|
||||
auto cudaCommunication = std::make_shared<CudaCommunication>(gpuComm);
|
||||
|
||||
auto mpiPreconditioner = std::make_shared<CuBlockPreconditioner<XGPU, XGPU, CudaCommunication>>(
|
||||
auto mpiPreconditioner = std::make_shared<GpuBlockPreconditioner<XGPU, XGPU, CudaCommunication>>(
|
||||
preconditionerReallyOnGPU, cudaCommunication);
|
||||
|
||||
auto scalarProduct = std::make_shared<Dune::ParallelScalarProduct<XGPU, CudaCommunication>>(
|
||||
@ -206,8 +206,8 @@ private:
|
||||
// NOTE: Ownsership of cudaCommunication is handled by mpiPreconditioner. However, just to make sure we
|
||||
// remember
|
||||
// this, we add this check. So remember that we hold one count in this scope, and one in the
|
||||
// CuBlockPreconditioner instance. We accomedate for the fact that it could be passed around in
|
||||
// CuBlockPreconditioner, hence we do not test for != 2
|
||||
// GpuBlockPreconditioner instance. We accomedate for the fact that it could be passed around in
|
||||
// GpuBlockPreconditioner, hence we do not test for != 2
|
||||
OPM_ERROR_IF(cudaCommunication.use_count() < 2, "Internal error. Shared pointer not owned properly.");
|
||||
auto overlappingCudaOperator = std::make_shared<SchwarzOperator>(m_matrix, *cudaCommunication);
|
||||
|
||||
@ -222,12 +222,12 @@ private:
|
||||
if (!precAsHolder) {
|
||||
OPM_THROW(std::invalid_argument,
|
||||
"The preconditioner needs to be a CUDA preconditioner wrapped in a "
|
||||
"Opm::cuistl::PreconditionerHolder (eg. CuILU0).");
|
||||
"Opm::gpuistl::PreconditionerHolder (eg. CuILU0).");
|
||||
}
|
||||
auto preconditionerOnGPU = precAsHolder->getUnderlyingPreconditioner();
|
||||
|
||||
auto matrixOperator
|
||||
= std::make_shared<Dune::MatrixAdapter<CuSparseMatrix<real_type>, XGPU, XGPU>>(m_matrix);
|
||||
= std::make_shared<Dune::MatrixAdapter<GpuSparseMatrix<real_type>, XGPU, XGPU>>(m_matrix);
|
||||
auto scalarProduct = std::make_shared<Dune::SeqScalarProduct<XGPU>>();
|
||||
return UnderlyingSolver<XGPU>(
|
||||
matrixOperator, scalarProduct, preconditionerOnGPU, reduction, maxit, verbose);
|
||||
@ -237,6 +237,6 @@ private:
|
||||
std::unique_ptr<XGPU> m_inputBuffer;
|
||||
std::unique_ptr<XGPU> m_outputBuffer;
|
||||
};
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
|
||||
#endif
|
@ -17,9 +17,9 @@
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include <cublas_v2.h>
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
|
||||
namespace Opm::cuistl::detail
|
||||
#include <opm/simulators/linalg/gpuistl/detail/CuBlasHandle.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
|
||||
@ -46,4 +46,4 @@ CuBlasHandle::getInstance()
|
||||
return instance;
|
||||
}
|
||||
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
@ -21,7 +21,7 @@
|
||||
#include <cublas_v2.h>
|
||||
#include <memory>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
/**
|
||||
@ -29,9 +29,9 @@ namespace Opm::cuistl::detail
|
||||
*
|
||||
* Example use:
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp>
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/CuBlasHandle.hpp>
|
||||
* void someFunction() {
|
||||
* auto& cublasHandle = ::Opm::cuistl::detail::CuBlasHandle::getInstance();
|
||||
* auto& cublasHandle = ::Opm::gpuistl::detail::CuBlasHandle::getInstance();
|
||||
* int cuBlasVersion = -1;
|
||||
* OPM_CUBLAS_SAFE_CALL(cublasGetVersion(cublasHandle.get(), &cuBlasVersion));
|
||||
* }
|
||||
@ -64,5 +64,5 @@ private:
|
||||
CuBlasHandle();
|
||||
cublasHandle_t m_handle;
|
||||
};
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
#endif // OPM_CUBLASHANDLE_HPP
|
@ -18,30 +18,30 @@
|
||||
*/
|
||||
#ifndef CU_MATRIX_DESCRIPTION_HPP
|
||||
#define CU_MATRIX_DESCRIPTION_HPP
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/CuSparseResource.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
/**
|
||||
* CuSparseMatrixDescription holder. This is internal information needed for most calls to the CuSparse API.
|
||||
* GpuSparseMatrixDescription holder. This is internal information needed for most calls to the CuSparse API.
|
||||
*/
|
||||
using CuSparseMatrixDescription = CuSparseResource<cusparseMatDescr_t>;
|
||||
using GpuSparseMatrixDescription = CuSparseResource<cusparseMatDescr_t>;
|
||||
|
||||
/**
|
||||
* Pointer to CuSparseMatrixDescription holder. This is internal information needed for most calls to the CuSparse API.
|
||||
* Pointer to GpuSparseMatrixDescription holder. This is internal information needed for most calls to the CuSparse API.
|
||||
*/
|
||||
using CuSparseMatrixDescriptionPtr = std::shared_ptr<CuSparseResource<cusparseMatDescr_t>>;
|
||||
using GpuSparseMatrixDescriptionPtr = std::shared_ptr<CuSparseResource<cusparseMatDescr_t>>;
|
||||
|
||||
/**
|
||||
* @brief createMatrixDescription creates a default matrix description
|
||||
* @return a matrix description to a general sparse matrix with zero based indexing.
|
||||
*/
|
||||
inline CuSparseMatrixDescriptionPtr
|
||||
inline GpuSparseMatrixDescriptionPtr
|
||||
createMatrixDescription()
|
||||
{
|
||||
auto description = std::make_shared<CuSparseMatrixDescription>();
|
||||
auto description = std::make_shared<GpuSparseMatrixDescription>();
|
||||
|
||||
// Note: We always want to use zero base indexing.
|
||||
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatType(description->get(), CUSPARSE_MATRIX_TYPE_GENERAL));
|
||||
@ -52,11 +52,11 @@ createMatrixDescription()
|
||||
|
||||
/**
|
||||
* @brief createLowerDiagonalDescription creates a lower diagonal matrix description
|
||||
* @return a lower diagonal matrix description overlapped with options from ::Opm::cuistl::detail::createMatrixDescription()
|
||||
* @return a lower diagonal matrix description overlapped with options from ::Opm::gpuistl::detail::createMatrixDescription()
|
||||
*
|
||||
* @note This will assume it has a unit diagonal
|
||||
*/
|
||||
inline CuSparseMatrixDescriptionPtr
|
||||
inline GpuSparseMatrixDescriptionPtr
|
||||
createLowerDiagonalDescription()
|
||||
{
|
||||
auto description = createMatrixDescription();
|
||||
@ -67,11 +67,11 @@ createLowerDiagonalDescription()
|
||||
|
||||
/**
|
||||
* @brief createUpperDiagonalDescription creates an upper diagonal matrix description
|
||||
* @return an upper diagonal matrix description overlapped with options from ::Opm::cuistl::detail::createMatrixDescription()
|
||||
* @return an upper diagonal matrix description overlapped with options from ::Opm::gpuistl::detail::createMatrixDescription()
|
||||
*
|
||||
* @note This will assume it has a non-unit diagonal.
|
||||
*/
|
||||
inline CuSparseMatrixDescriptionPtr
|
||||
inline GpuSparseMatrixDescriptionPtr
|
||||
createUpperDiagonalDescription()
|
||||
{
|
||||
auto description = createMatrixDescription();
|
||||
@ -81,6 +81,6 @@ createUpperDiagonalDescription()
|
||||
return description;
|
||||
}
|
||||
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
|
||||
#endif // CU_MATRIX_DESCRIPTION_HPP
|
@ -16,9 +16,9 @@
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
|
||||
namespace Opm::cuistl::detail
|
||||
#include <opm/simulators/linalg/gpuistl/detail/CuSparseHandle.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
|
||||
@ -46,4 +46,4 @@ CuSparseHandle::getInstance()
|
||||
return instance;
|
||||
}
|
||||
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
@ -21,7 +21,7 @@
|
||||
#include <cusparse.h>
|
||||
#include <memory>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
/**
|
||||
@ -29,9 +29,9 @@ namespace Opm::cuistl::detail
|
||||
*
|
||||
* Example use:
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/CuSparseHandle.hpp>
|
||||
* void someFunction() {
|
||||
* auto& cuSparseHandle = ::Opm::cuistl::detail::CuSparseHandle::getInstance();
|
||||
* auto& cuSparseHandle = ::Opm::gpuistl::detail::CuSparseHandle::getInstance();
|
||||
* int cuSparseVersion = -1;
|
||||
* OPM_CUSPARSE_SAFE_CALL(cusparseGetVersion(cuSparseHandle.get(), &cuSparseVersion));
|
||||
* }
|
||||
@ -63,5 +63,5 @@ private:
|
||||
CuSparseHandle();
|
||||
cusparseHandle_t m_handle;
|
||||
};
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
#endif // OPM_CUSPARSEHANDLE_HPP
|
@ -23,7 +23,7 @@
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
/**
|
||||
@ -43,7 +43,7 @@ namespace Opm::cuistl::detail
|
||||
*
|
||||
* Example usage:
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulator/linalg/cuistl/detail/CuSparseResource.hpp>
|
||||
* #include <opm/simulator/linalg/gpuistl/detail/CuSparseResource.hpp>
|
||||
*
|
||||
* void someFunction() {
|
||||
* auto resource = CuSparseResource<cuSparseMatDescr_t>();
|
||||
@ -94,6 +94,6 @@ private:
|
||||
DeleterType m_deleter;
|
||||
};
|
||||
|
||||
} // namespace Opm::cuistl::impl
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource_impl.hpp>
|
||||
} // namespace Opm::gpuistl::impl
|
||||
#include <opm/simulators/linalg/gpuistl/detail/CuSparseResource_impl.hpp>
|
||||
#endif // CUSPARSERESOURCE_HPP
|
@ -18,9 +18,9 @@
|
||||
*/
|
||||
#include <exception>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
namespace
|
||||
@ -100,4 +100,4 @@ CuSparseResource<T>::~CuSparseResource()
|
||||
// proper name of the function being called.
|
||||
OPM_CUSPARSE_WARN_IF_ERROR(m_deleter(m_resource));
|
||||
}
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
@ -21,11 +21,11 @@
|
||||
#include <limits>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
/// @brief Function that tests the best thread block size, assumes the provided function depends on threadblock-size
|
||||
@ -47,7 +47,7 @@ tuneThreadBlockSize(func& f, std::string descriptionOfFunction)
|
||||
|
||||
// create the events
|
||||
for (int i = 0; i < runs + 1; ++i) {
|
||||
OPM_CUDA_SAFE_CALL(cudaEventCreate(&events[i]));
|
||||
OPM_GPU_SAFE_CALL(cudaEventCreate(&events[i]));
|
||||
}
|
||||
|
||||
// Initialize helper variables
|
||||
@ -59,21 +59,21 @@ tuneThreadBlockSize(func& f, std::string descriptionOfFunction)
|
||||
for (int thrBlockSize = interval; thrBlockSize <= 1024; thrBlockSize += interval) {
|
||||
|
||||
// record a first event, and then an event after each kernel
|
||||
OPM_CUDA_SAFE_CALL(cudaEventRecord(events[0]));
|
||||
OPM_GPU_SAFE_CALL(cudaEventRecord(events[0]));
|
||||
for (int i = 0; i < runs; ++i) {
|
||||
f(thrBlockSize); // runs an arbitrary function with the provided arguments
|
||||
OPM_CUDA_SAFE_CALL(cudaEventRecord(events[i + 1]));
|
||||
OPM_GPU_SAFE_CALL(cudaEventRecord(events[i + 1]));
|
||||
}
|
||||
|
||||
// make suret he runs are over
|
||||
OPM_CUDA_SAFE_CALL(cudaEventSynchronize(events[runs]));
|
||||
OPM_GPU_SAFE_CALL(cudaEventSynchronize(events[runs]));
|
||||
|
||||
// kernel launch was valid
|
||||
if (cudaSuccess == cudaGetLastError()) {
|
||||
// check if we beat the record for the fastest kernel
|
||||
for (int i = 0; i < runs; ++i) {
|
||||
float candidateBlockSizeTime;
|
||||
OPM_CUDA_SAFE_CALL(cudaEventElapsedTime(&candidateBlockSizeTime, events[i], events[i + 1]));
|
||||
OPM_GPU_SAFE_CALL(cudaEventElapsedTime(&candidateBlockSizeTime, events[i], events[i + 1]));
|
||||
if (candidateBlockSizeTime < bestTime) { // checks if this configuration beat the current best
|
||||
bestTime = candidateBlockSizeTime;
|
||||
bestBlockSize = thrBlockSize;
|
||||
@ -88,6 +88,6 @@ tuneThreadBlockSize(func& f, std::string descriptionOfFunction)
|
||||
return bestBlockSize;
|
||||
}
|
||||
|
||||
} // end namespace Opm::cuistl::detail
|
||||
} // end namespace Opm::gpuistl::detail
|
||||
|
||||
#endif
|
@ -23,24 +23,24 @@
|
||||
#include <memory>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/grid/utility/SparseTable.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/safe_conversion.hpp>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
/*
|
||||
This file contains a collection of utility functions used in the GPU implementation of ILU and DILU
|
||||
The functions deal with creating the mappings between reordered and natural indices, as well as
|
||||
extracting sparsity structures from dune matrices and creating cusparsematrix indices
|
||||
extracting sparsity structures from dune matrices and creating gpusparsematrix indices
|
||||
*/
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
inline std::vector<int>
|
||||
createReorderedToNatural(const Opm::SparseTable<size_t>& levelSets)
|
||||
{
|
||||
auto res = std::vector<int>(Opm::cuistl::detail::to_size_t(levelSets.dataSize()));
|
||||
auto res = std::vector<int>(Opm::gpuistl::detail::to_size_t(levelSets.dataSize()));
|
||||
int globCnt = 0;
|
||||
for (auto row : levelSets) {
|
||||
for (auto col : row) {
|
||||
OPM_ERROR_IF(Opm::cuistl::detail::to_size_t(globCnt) >= res.size(),
|
||||
OPM_ERROR_IF(Opm::gpuistl::detail::to_size_t(globCnt) >= res.size(),
|
||||
fmt::format("Internal error. globCnt = {}, res.size() = {}", globCnt, res.size()));
|
||||
res[globCnt++] = static_cast<int>(col);
|
||||
}
|
||||
@ -51,11 +51,11 @@ createReorderedToNatural(const Opm::SparseTable<size_t>& levelSets)
|
||||
inline std::vector<int>
|
||||
createNaturalToReordered(const Opm::SparseTable<size_t>& levelSets)
|
||||
{
|
||||
auto res = std::vector<int>(Opm::cuistl::detail::to_size_t(levelSets.dataSize()));
|
||||
auto res = std::vector<int>(Opm::gpuistl::detail::to_size_t(levelSets.dataSize()));
|
||||
int globCnt = 0;
|
||||
for (auto row : levelSets) {
|
||||
for (auto col : row) {
|
||||
OPM_ERROR_IF(Opm::cuistl::detail::to_size_t(globCnt) >= res.size(),
|
||||
OPM_ERROR_IF(Opm::gpuistl::detail::to_size_t(globCnt) >= res.size(),
|
||||
fmt::format("Internal error. globCnt = {}, res.size() = {}", globCnt, res.size()));
|
||||
res[col] = globCnt++;
|
||||
}
|
||||
@ -105,6 +105,6 @@ extractLowerAndUpperMatrices(const M& naturalMatrix, const std::vector<int>& reo
|
||||
return {std::unique_ptr<GPUM>(new auto(GPUM::fromMatrix(reorderedLower, true))),
|
||||
std::unique_ptr<GPUM>(new auto(GPUM::fromMatrix(reorderedUpper, true)))};
|
||||
}
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
|
||||
#endif
|
@ -28,7 +28,7 @@
|
||||
|
||||
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
#define CHECK_CUBLAS_ERROR_TYPE(code, x) \
|
||||
@ -108,7 +108,7 @@ getCublasErrorMessage(cublasStatus_t error,
|
||||
*
|
||||
* Example usage:
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
|
||||
* #include <cublas_v2.h>
|
||||
*
|
||||
* void some_function() {
|
||||
@ -147,7 +147,7 @@ cublasSafeCall(cublasStatus_t error,
|
||||
*
|
||||
* Example usage:
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
|
||||
* #include <cublas_v2.h>
|
||||
*
|
||||
* void some_function() {
|
||||
@ -174,7 +174,7 @@ cublasWarnIfError(cublasStatus_t error,
|
||||
|
||||
return error;
|
||||
}
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
|
||||
/**
|
||||
* @brief OPM_CUBLAS_SAFE_CALL checks the return type of the cublas expression (function call) and throws an exception
|
||||
@ -183,7 +183,7 @@ cublasWarnIfError(cublasStatus_t error,
|
||||
* Example usage:
|
||||
* @code{.cpp}
|
||||
* #include <cublas_v2.h>
|
||||
* #include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
|
||||
*
|
||||
* void some_function() {
|
||||
* cublasHandle_t cublasHandle;
|
||||
@ -194,7 +194,7 @@ cublasWarnIfError(cublasStatus_t error,
|
||||
* @note This should be used for any call to cuBlas unless you have a good reason not to.
|
||||
*/
|
||||
#define OPM_CUBLAS_SAFE_CALL(expression) \
|
||||
::Opm::cuistl::detail::cublasSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
::Opm::gpuistl::detail::cublasSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
|
||||
/**
|
||||
* @brief OPM_CUBLAS_WARN_IF_ERROR checks the return type of the cublas expression (function call) and issues a warning
|
||||
@ -203,7 +203,7 @@ cublasWarnIfError(cublasStatus_t error,
|
||||
* Example usage:
|
||||
* @code{.cpp}
|
||||
* #include <cublas_v2.h>
|
||||
* #include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
|
||||
*
|
||||
* void some_function() {
|
||||
* cublasHandle_t cublasHandle;
|
||||
@ -214,6 +214,6 @@ cublasWarnIfError(cublasStatus_t error,
|
||||
* @note Prefer the cublasSafeCall/OPM_CUBLAS_SAFE_CALL counterpart unless you really don't want to throw an exception.
|
||||
*/
|
||||
#define OPM_CUBLAS_WARN_IF_ERROR(expression) \
|
||||
::Opm::cuistl::detail::cublasWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
::Opm::gpuistl::detail::cublasWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
|
||||
#endif // OPM_CUBLAS_SAFE_CALL_HPP
|
@ -29,7 +29,7 @@
|
||||
#include <cublas_v2.h>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
inline cublasStatus_t
|
||||
@ -164,5 +164,5 @@ cublasNrm2([[maybe_unused]] cublasHandle_t handle,
|
||||
OPM_THROW(std::runtime_error, "norm2 for integer vectors is not implemented yet.");
|
||||
}
|
||||
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
#endif
|
@ -20,7 +20,7 @@
|
||||
#define OPM_CUDA_CHECK_LAST_ERROR_HPP
|
||||
#include <cuda_runtime.h>
|
||||
#include <fmt/core.h>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
|
||||
/**
|
||||
* @brief OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE checks the return type of cudaDeviceSynchronize(),
|
||||
@ -28,7 +28,7 @@
|
||||
*
|
||||
* Example usage:
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/cuistl/detail/cuda_check_last_error.hpp>
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/cuda_check_last_error.hpp>
|
||||
*
|
||||
* void some_function() {
|
||||
* OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE;
|
||||
@ -38,7 +38,7 @@
|
||||
* @note This can be used to debug the code, or simply make sure that no error has occured.
|
||||
* @note This is a rather heavy operation, so prefer to use only in Debug mode (see OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE_IF_DEBUG)
|
||||
*/
|
||||
#define OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE OPM_CUDA_SAFE_CALL(cudaDeviceSynchronize())
|
||||
#define OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE OPM_GPU_SAFE_CALL(cudaDeviceSynchronize())
|
||||
|
||||
#ifdef NDEBUG
|
||||
#define OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE_IF_DEBUG
|
||||
@ -50,7 +50,7 @@
|
||||
*
|
||||
* Example usage:
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
*
|
||||
* void some_function() {
|
||||
* OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE_IF_DEBUG;
|
||||
@ -69,7 +69,7 @@
|
||||
*
|
||||
* Example usage:
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/cuistl/detail/cuda_check_last_error.hpp>
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/cuda_check_last_error.hpp>
|
||||
*
|
||||
* void some_function() {
|
||||
* OPM_CUDA_CHECK_LAST_ERROR;
|
||||
@ -78,7 +78,7 @@
|
||||
*
|
||||
* @note This can be used to debug the code, or simply make sure that no error has occured.
|
||||
*/
|
||||
#define OPM_CUDA_CHECK_LAST_ERROR OPM_CUDA_SAFE_CALL(cudaGetLastError())
|
||||
#define OPM_CUDA_CHECK_LAST_ERROR OPM_GPU_SAFE_CALL(cudaGetLastError())
|
||||
|
||||
#ifdef NDEBUG
|
||||
#define OPM_CUDA_CHECK_LAST_ERROR_IF_DEBUG
|
||||
@ -90,7 +90,7 @@
|
||||
*
|
||||
* Example usage:
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/cuistl/detail/cuda_check_last_error.hpp>
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/cuda_check_last_error.hpp>
|
||||
*
|
||||
* void some_function() {
|
||||
* OPM_CUDA_CHECK_LAST_ERROR_IF_DEBUG;
|
@ -19,7 +19,7 @@
|
||||
#ifndef CUSPARSE_CONSTANTS_HPP
|
||||
#define CUSPARSE_CONSTANTS_HPP
|
||||
#include <cusparse.h>
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
const constexpr auto CUSPARSE_MATRIX_ORDER = CUSPARSE_DIRECTION_ROW;
|
||||
}
|
@ -24,7 +24,7 @@
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
#define CHECK_CUSPARSE_ERROR_TYPE(code, x) \
|
||||
@ -93,7 +93,7 @@ getCusparseErrorMessage(cusparseStatus_t error,
|
||||
*
|
||||
* Example usage:
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
|
||||
* #include <cublas_v2.h>
|
||||
*
|
||||
* void some_function() {
|
||||
@ -133,7 +133,7 @@ cusparseSafeCall(cusparseStatus_t error,
|
||||
*
|
||||
* Example usage:
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
|
||||
* #include <cublas_v2.h>
|
||||
*
|
||||
* void some_function() {
|
||||
@ -161,7 +161,7 @@ cusparseWarnIfError(cusparseStatus_t error,
|
||||
|
||||
return error;
|
||||
}
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
|
||||
|
||||
|
||||
@ -171,7 +171,7 @@ cusparseWarnIfError(cusparseStatus_t error,
|
||||
*
|
||||
* Example usage:
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
|
||||
* #include <cusparse.h>
|
||||
*
|
||||
* void some_function() {
|
||||
@ -183,7 +183,7 @@ cusparseWarnIfError(cusparseStatus_t error,
|
||||
* @note This should be used for any call to cuSparse unless you have a good reason not to.
|
||||
*/
|
||||
#define OPM_CUSPARSE_SAFE_CALL(expression) \
|
||||
::Opm::cuistl::detail::cusparseSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
::Opm::gpuistl::detail::cusparseSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
|
||||
/**
|
||||
* @brief OPM_CUSPARSE_WARN_IF_ERROR checks the return type of the cusparse expression (function call) and issues a
|
||||
@ -191,7 +191,7 @@ cusparseWarnIfError(cusparseStatus_t error,
|
||||
*
|
||||
* Example usage:
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
|
||||
* #include <cusparse.h>
|
||||
*
|
||||
* void some_function() {
|
||||
@ -204,5 +204,5 @@ cusparseWarnIfError(cusparseStatus_t error,
|
||||
* exception.
|
||||
*/
|
||||
#define OPM_CUSPARSE_WARN_IF_ERROR(expression) \
|
||||
::Opm::cuistl::detail::cusparseWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
::Opm::gpuistl::detail::cusparseWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
#endif // OPM_CUSPARSE_SAFE_CALL_HPP
|
@ -27,7 +27,7 @@
|
||||
#include <type_traits>
|
||||
#ifndef OPM_CUSPARSE_WRAPPER_HPP
|
||||
#define OPM_CUSPARSE_WRAPPER_HPP
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
inline cusparseStatus_t
|
||||
@ -450,5 +450,5 @@ cusparseBsrmv(cusparseHandle_t handle,
|
||||
beta,
|
||||
y);
|
||||
}
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
#endif
|
@ -22,7 +22,7 @@
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
/**
|
||||
@ -53,6 +53,6 @@ makeMatrixWithNonzeroDiagonal(const Matrix& matrix,
|
||||
|
||||
return newMatrix;
|
||||
}
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
|
||||
#endif
|
@ -21,12 +21,12 @@
|
||||
#include <cstddef>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
|
||||
/*
|
||||
This file provides some logic for handling how to choose the correct thread-block size
|
||||
*/
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
constexpr inline size_t
|
||||
getThreads([[maybe_unused]] size_t numberOfRows)
|
||||
@ -51,7 +51,7 @@ getCudaRecomendedThreadBlockSize(Kernel k, int suggestedThrBlockSize = -1)
|
||||
}
|
||||
int blockSize;
|
||||
int tmpGridSize;
|
||||
OPM_CUDA_SAFE_CALL(cudaOccupancyMaxPotentialBlockSize(&tmpGridSize, &blockSize, k, 0, 0));
|
||||
OPM_GPU_SAFE_CALL(cudaOccupancyMaxPotentialBlockSize(&tmpGridSize, &blockSize, k, 0, 0));
|
||||
return blockSize;
|
||||
}
|
||||
|
||||
@ -61,6 +61,6 @@ getNumberOfBlocks(int wantedThreads, int threadBlockSize)
|
||||
return (wantedThreads + threadBlockSize - 1) / threadBlockSize;
|
||||
}
|
||||
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
|
||||
#endif
|
@ -16,15 +16,15 @@
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef OPM_CUDA_SAFE_CALL_HPP
|
||||
#define OPM_CUDA_SAFE_CALL_HPP
|
||||
#ifndef OPM_GPU_SAFE_CALL_HPP
|
||||
#define OPM_GPU_SAFE_CALL_HPP
|
||||
#include <cuda_runtime.h>
|
||||
#include <fmt/core.h>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||
#include <string_view>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
/**
|
||||
* @brief getCudaErrorMessage generates the error message to display for a given error.
|
||||
@ -48,9 +48,9 @@ getCudaErrorMessage(cudaError_t error,
|
||||
const std::string_view& functionName,
|
||||
size_t lineNumber)
|
||||
{
|
||||
return fmt::format("CUDA expression did not execute correctly. Expression was: \n"
|
||||
return fmt::format("GPU expression did not execute correctly. Expression was: \n"
|
||||
" {}\n"
|
||||
"CUDA error was {}\n"
|
||||
"GPU error was {}\n"
|
||||
"in function {}, in {}, at line {}\n",
|
||||
expression,
|
||||
cudaGetErrorString(error),
|
||||
@ -60,12 +60,12 @@ getCudaErrorMessage(cudaError_t error,
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief cudaSafeCall checks the return type of the CUDA expression (function call) and throws an exception if it
|
||||
* @brief cudaSafeCall checks the return type of the GPU expression (function call) and throws an exception if it
|
||||
* does not equal cudaSuccess.
|
||||
*
|
||||
* Example usage:
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
* #include <cuda_runtime.h>
|
||||
*
|
||||
* void some_function() {
|
||||
@ -74,7 +74,7 @@ getCudaErrorMessage(cudaError_t error,
|
||||
* }
|
||||
* @endcode
|
||||
*
|
||||
* @note It is probably easier to use the macro OPM_CUDA_SAFE_CALL
|
||||
* @note It is probably easier to use the macro OPM_GPU_SAFE_CALL
|
||||
*
|
||||
* @todo Refactor to use std::source_location once we shift to C++20
|
||||
*/
|
||||
@ -91,7 +91,7 @@ cudaSafeCall(cudaError_t error,
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief cudaWarnIfError checks the return type of the CUDA expression (function call) and issues a warning if it
|
||||
* @brief cudaWarnIfError checks the return type of the GPU expression (function call) and issues a warning if it
|
||||
* does not equal cudaSuccess.
|
||||
*
|
||||
* @param error the error code from cublas
|
||||
@ -102,7 +102,7 @@ cudaSafeCall(cudaError_t error,
|
||||
*
|
||||
* Example usage:
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
* #include <cuda_runtime.h>
|
||||
*
|
||||
* void some_function() {
|
||||
@ -111,9 +111,9 @@ cudaSafeCall(cudaError_t error,
|
||||
* }
|
||||
* @endcode
|
||||
*
|
||||
* @note It is probably easier to use the macro OPM_CUDA_WARN_IF_ERROR
|
||||
* @note It is probably easier to use the macro OPM_GPU_WARN_IF_ERROR
|
||||
*
|
||||
* @note Prefer the cudaSafeCall/OPM_CUDA_SAFE_CALL counterpart unless you really don't want to throw an exception.
|
||||
* @note Prefer the cudaSafeCall/OPM_GPU_SAFE_CALL counterpart unless you really don't want to throw an exception.
|
||||
*
|
||||
* @todo Refactor to use std::source_location once we shift to C++20
|
||||
*/
|
||||
@ -128,47 +128,47 @@ cudaWarnIfError(cudaError_t error,
|
||||
OpmLog::warning(getCudaErrorMessage(error, expression, filename, functionName, lineNumber));
|
||||
}
|
||||
}
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
|
||||
/**
|
||||
* @brief OPM_CUDA_SAFE_CALL checks the return type of the CUDA expression (function call) and throws an exception if it
|
||||
* @brief OPM_GPU_SAFE_CALL checks the return type of the GPU expression (function call) and throws an exception if it
|
||||
* does not equal cudaSuccess.
|
||||
*
|
||||
* Example usage:
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
* #include <cuda_runtime.h>
|
||||
*
|
||||
* void some_function() {
|
||||
* void* somePointer;
|
||||
* OPM_CUDA_SAFE_CALL(cudaMalloc(&somePointer, 1));
|
||||
* OPM_GPU_SAFE_CALL(cudaMalloc(&somePointer, 1));
|
||||
* }
|
||||
* @endcode
|
||||
*
|
||||
* @note This should be used for any call to the CUDA runtime API unless you have a good reason not to.
|
||||
* @note This should be used for any call to the GPU runtime API unless you have a good reason not to.
|
||||
*/
|
||||
#define OPM_CUDA_SAFE_CALL(expression) \
|
||||
::Opm::cuistl::detail::cudaSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
#define OPM_GPU_SAFE_CALL(expression) \
|
||||
::Opm::gpuistl::detail::cudaSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
|
||||
|
||||
/**
|
||||
* @brief OPM_CUDA_WARN_IF_ERROR checks the return type of the CUDA expression (function call) and issues a warning if
|
||||
* @brief OPM_GPU_WARN_IF_ERROR checks the return type of the GPU expression (function call) and issues a warning if
|
||||
* it does not equal cudaSuccess.
|
||||
*
|
||||
* Example usage:
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
* #include <cuda_runtime.h>
|
||||
*
|
||||
* void some_function() {
|
||||
* void* somePointer;
|
||||
* OPM_CUDA_WARN_IF_ERROR(cudaMalloc(&somePointer, 1));
|
||||
* OPM_GPU_WARN_IF_ERROR(cudaMalloc(&somePointer, 1));
|
||||
* }
|
||||
* @endcode
|
||||
*
|
||||
* @note Prefer the cudaSafeCall/OPM_CUDA_SAFE_CALL counterpart unless you really don't want to throw an exception.
|
||||
* @note Prefer the cudaSafeCall/OPM_GPU_SAFE_CALL counterpart unless you really don't want to throw an exception.
|
||||
*/
|
||||
#define OPM_CUDA_WARN_IF_ERROR(expression) \
|
||||
::Opm::cuistl::detail::cudaWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
#define OPM_GPU_WARN_IF_ERROR(expression) \
|
||||
::Opm::gpuistl::detail::cudaWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
|
||||
#endif
|
@ -18,12 +18,12 @@
|
||||
*/
|
||||
#include <config.h>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/deviceBlockOperations.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/gpuThreadUtils.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpusparse_matrix_operations.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/deviceBlockOperations.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpuThreadUtils.hpp>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
namespace
|
||||
{
|
||||
@ -110,8 +110,8 @@ copyMatDataToReordered(T* srcMatrix,
|
||||
int thrBlockSize)
|
||||
{
|
||||
int threadBlockSize
|
||||
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuMoveDataToReordered<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
|
||||
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuMoveDataToReordered<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
|
||||
cuMoveDataToReordered<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
|
||||
srcMatrix, srcRowIndices, dstMatrix, dstRowIndices, naturalToReordered, numberOfRows);
|
||||
}
|
||||
@ -130,9 +130,9 @@ copyMatDataToReorderedSplit(T* srcMatrix,
|
||||
size_t numberOfRows,
|
||||
int thrBlockSize)
|
||||
{
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
cuMoveDataToReorderedSplit<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
|
||||
cuMoveDataToReorderedSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(srcMatrix,
|
||||
srcRowIndices,
|
||||
srcColumnIndices,
|
||||
@ -161,4 +161,4 @@ INSTANTIATE_KERNEL_WRAPPERS(double, 3);
|
||||
INSTANTIATE_KERNEL_WRAPPERS(double, 4);
|
||||
INSTANTIATE_KERNEL_WRAPPERS(double, 5);
|
||||
INSTANTIATE_KERNEL_WRAPPERS(double, 6);
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
@ -16,11 +16,11 @@
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef OPM_CUISTL_CUSPARSE_MATRIX_OPERATIONS_HPP
|
||||
#define OPM_CUISTL_CUSPARSE_MATRIX_OPERATIONS_HPP
|
||||
#ifndef OPM_GPUISTL_GPUSPARSE_MATRIX_OPERATIONS_HPP
|
||||
#define OPM_GPUISTL_GPUSPARSE_MATRIX_OPERATIONS_HPP
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
/**
|
||||
* @brief Reorders the elements of a matrix by copying them from one matrix to another using a permutation list
|
||||
@ -68,5 +68,5 @@ void copyMatDataToReorderedSplit(T* srcMatrix,
|
||||
size_t numberOfRows,
|
||||
int threadBlockSize);
|
||||
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
#endif
|
@ -29,7 +29,7 @@
|
||||
* TODO: Use the requires-keyword once C++20 becomes availble (https://en.cppreference.com/w/cpp/language/constraints ).
|
||||
* With C++20 this file can be removed.
|
||||
*/
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
/**
|
||||
@ -121,5 +121,5 @@ public:
|
||||
static constexpr bool value = std::is_same_v<decltype(test<T>(0)), std::true_type>;
|
||||
};
|
||||
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
#endif
|
@ -18,12 +18,12 @@
|
||||
*/
|
||||
#include <config.h>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/deviceBlockOperations.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/gpuThreadUtils.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/DILUKernels.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/deviceBlockOperations.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpuThreadUtils.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/preconditionerKernels/DILUKernels.hpp>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace Opm::cuistl::detail::DILU
|
||||
namespace Opm::gpuistl::detail::DILU
|
||||
{
|
||||
namespace
|
||||
{
|
||||
@ -282,8 +282,8 @@ solveLowerLevelSet(T* reorderedMat,
|
||||
int thrBlockSize)
|
||||
{
|
||||
int threadBlockSize
|
||||
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
cuSolveLowerLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
|
||||
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, d, v);
|
||||
}
|
||||
@ -302,9 +302,9 @@ solveLowerLevelSetSplit(T* reorderedMat,
|
||||
T* v,
|
||||
int thrBlockSize)
|
||||
{
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
cuSolveLowerLevelSetSplit<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
cuSolveLowerLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
|
||||
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, d, v);
|
||||
}
|
||||
@ -322,8 +322,8 @@ solveUpperLevelSet(T* reorderedMat,
|
||||
int thrBlockSize)
|
||||
{
|
||||
int threadBlockSize
|
||||
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
cuSolveUpperLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
|
||||
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, v);
|
||||
}
|
||||
@ -340,9 +340,9 @@ solveUpperLevelSetSplit(T* reorderedMat,
|
||||
T* v,
|
||||
int thrBlockSize)
|
||||
{
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
cuSolveUpperLevelSetSplit<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
cuSolveUpperLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
|
||||
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, v);
|
||||
}
|
||||
@ -360,9 +360,9 @@ computeDiluDiagonal(T* reorderedMat,
|
||||
int thrBlockSize)
|
||||
{
|
||||
if (blocksize <= 3) {
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
cuComputeDiluDiagonal<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
cuComputeDiluDiagonal<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(reorderedMat,
|
||||
rowIndices,
|
||||
colIndices,
|
||||
@ -393,9 +393,9 @@ computeDiluDiagonalSplit(T* reorderedLowerMat,
|
||||
int thrBlockSize)
|
||||
{
|
||||
if (blocksize <= 3) {
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
cuComputeDiluDiagonalSplit<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
cuComputeDiluDiagonalSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(reorderedLowerMat,
|
||||
lowerRowIndices,
|
||||
lowerColIndices,
|
||||
@ -434,4 +434,4 @@ INSTANTIATE_KERNEL_WRAPPERS(double, 3);
|
||||
INSTANTIATE_KERNEL_WRAPPERS(double, 4);
|
||||
INSTANTIATE_KERNEL_WRAPPERS(double, 5);
|
||||
INSTANTIATE_KERNEL_WRAPPERS(double, 6);
|
||||
} // namespace Opm::cuistl::detail::DILU
|
||||
} // namespace Opm::gpuistl::detail::DILU
|
@ -24,7 +24,7 @@
|
||||
#include <cuda_runtime.h>
|
||||
#include <vector>
|
||||
|
||||
namespace Opm::cuistl::detail::DILU
|
||||
namespace Opm::gpuistl::detail::DILU
|
||||
{
|
||||
|
||||
/**
|
||||
@ -198,5 +198,5 @@ void computeDiluDiagonalSplit(T* reorderedLowerMat,
|
||||
T* dInv,
|
||||
int threadBlockSize);
|
||||
|
||||
} // namespace Opm::cuistl::detail::DILU
|
||||
} // namespace Opm::gpuistl::detail::DILU
|
||||
#endif
|
@ -18,16 +18,16 @@
|
||||
*/
|
||||
#include <config.h>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/deviceBlockOperations.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/gpuThreadUtils.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/ILU0Kernels.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/deviceBlockOperations.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpuThreadUtils.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/preconditionerKernels/ILU0Kernels.hpp>
|
||||
#include <stdexcept>
|
||||
|
||||
/*
|
||||
The LU factorization and apply step is written based on the Dune implementations
|
||||
*/
|
||||
|
||||
namespace Opm::cuistl::detail::ILU0
|
||||
namespace Opm::gpuistl::detail::ILU0
|
||||
{
|
||||
namespace
|
||||
{
|
||||
@ -341,8 +341,8 @@ solveLowerLevelSet(T* reorderedMat,
|
||||
int thrBlockSize)
|
||||
{
|
||||
int threadBlockSize
|
||||
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
cuSolveLowerLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
|
||||
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, d, v);
|
||||
}
|
||||
@ -359,8 +359,8 @@ solveUpperLevelSet(T* reorderedMat,
|
||||
int thrBlockSize)
|
||||
{
|
||||
int threadBlockSize
|
||||
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
cuSolveUpperLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
|
||||
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, v);
|
||||
}
|
||||
@ -377,9 +377,9 @@ solveLowerLevelSetSplit(T* reorderedMat,
|
||||
T* v,
|
||||
int thrBlockSize)
|
||||
{
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
cuSolveLowerLevelSetSplit<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
cuSolveLowerLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
|
||||
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, d, v);
|
||||
}
|
||||
@ -396,9 +396,9 @@ solveUpperLevelSetSplit(T* reorderedMat,
|
||||
T* v,
|
||||
int thrBlockSize)
|
||||
{
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
cuSolveUpperLevelSetSplit<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
cuSolveUpperLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
|
||||
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, v);
|
||||
}
|
||||
@ -415,8 +415,8 @@ LUFactorization(T* srcMatrix,
|
||||
int thrBlockSize)
|
||||
{
|
||||
int threadBlockSize
|
||||
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuLUFactorization<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuLUFactorization<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
cuLUFactorization<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
|
||||
srcMatrix, srcRowIndices, srcColumnIndices, naturalToReordered, reorderedToNatual, rowsInLevelSet, startIdx);
|
||||
}
|
||||
@ -437,8 +437,8 @@ LUFactorizationSplit(T* reorderedLowerMat,
|
||||
int thrBlockSize)
|
||||
{
|
||||
int threadBlockSize
|
||||
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuLUFactorizationSplit<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuLUFactorizationSplit<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
cuLUFactorizationSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(reorderedLowerMat,
|
||||
lowerRowIndices,
|
||||
lowerColIndices,
|
||||
@ -473,4 +473,4 @@ INSTANTIATE_KERNEL_WRAPPERS(double, 3);
|
||||
INSTANTIATE_KERNEL_WRAPPERS(double, 4);
|
||||
INSTANTIATE_KERNEL_WRAPPERS(double, 5);
|
||||
INSTANTIATE_KERNEL_WRAPPERS(double, 6);
|
||||
} // namespace Opm::cuistl::detail::ILU0
|
||||
} // namespace Opm::gpuistl::detail::ILU0
|
@ -20,7 +20,7 @@
|
||||
#define OPM_ILU0_KERNELS_HPP
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
namespace Opm::cuistl::detail::ILU0
|
||||
namespace Opm::gpuistl::detail::ILU0
|
||||
{
|
||||
|
||||
/**
|
||||
@ -189,5 +189,5 @@ void LUFactorizationSplit(T* reorderedLowerMat,
|
||||
int rowsInLevelSet,
|
||||
int threadBlockSize);
|
||||
|
||||
} // namespace Opm::cuistl::detail::ILU0
|
||||
} // namespace Opm::gpuistl::detail::ILU0
|
||||
#endif
|
@ -18,12 +18,12 @@
|
||||
*/
|
||||
#include <config.h>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/deviceBlockOperations.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/gpuThreadUtils.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/JacKernels.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/deviceBlockOperations.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpuThreadUtils.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/preconditionerKernels/JacKernels.hpp>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace Opm::cuistl::detail::JAC
|
||||
namespace Opm::gpuistl::detail::JAC
|
||||
{
|
||||
namespace
|
||||
{
|
||||
@ -60,8 +60,8 @@ invertDiagonalAndFlatten(T* mat, int* rowIndices, int* colIndices, size_t number
|
||||
{
|
||||
if (blocksize <= 3) {
|
||||
int threadBlockSize
|
||||
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuInvertDiagonalAndFlatten<T, blocksize>);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
|
||||
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuInvertDiagonalAndFlatten<T, blocksize>);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
|
||||
cuInvertDiagonalAndFlatten<T, blocksize>
|
||||
<<<nThreadBlocks, threadBlockSize>>>(mat, rowIndices, colIndices, numberOfRows, vec);
|
||||
} else {
|
||||
@ -85,4 +85,4 @@ INSTANTIATE_KERNEL_WRAPPERS(double, 4);
|
||||
INSTANTIATE_KERNEL_WRAPPERS(double, 5);
|
||||
INSTANTIATE_KERNEL_WRAPPERS(double, 6);
|
||||
|
||||
} // namespace Opm::cuistl::detail::JAC
|
||||
} // namespace Opm::gpuistl::detail::JAC
|
@ -20,7 +20,7 @@
|
||||
#define OPM_JAC_KERNELS_HPP
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
namespace Opm::cuistl::detail::JAC
|
||||
namespace Opm::gpuistl::detail::JAC
|
||||
{
|
||||
|
||||
/**
|
||||
@ -34,5 +34,5 @@ namespace Opm::cuistl::detail::JAC
|
||||
template <class T, int blocksize>
|
||||
void invertDiagonalAndFlatten(T* mat, int* rowIndices, int* colIndices, size_t numberOfRows, T* vec);
|
||||
|
||||
} // namespace Opm::cuistl::detail::JAC
|
||||
} // namespace Opm::gpuistl::detail::JAC
|
||||
#endif
|
@ -20,9 +20,9 @@
|
||||
#ifndef OPM_CUISTL_PRECONDIDTIONER_SHOULD_CALL_POST_PRE_HPP
|
||||
#define OPM_CUISTL_PRECONDIDTIONER_SHOULD_CALL_POST_PRE_HPP
|
||||
|
||||
#include <opm/simulators/linalg/cuistl/detail/has_function.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/has_function.hpp>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
//! @brief Tests (compile time) if the preconditioner type needs to call pre() before a call to apply()
|
||||
@ -60,5 +60,5 @@ shouldCallPreconditionerPost()
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
#endif
|
@ -37,7 +37,7 @@
|
||||
* while Dune::BlockVector (and relatives) use unsigned size_t.
|
||||
*/
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
/**
|
||||
@ -106,6 +106,6 @@ to_size_t(int i)
|
||||
|
||||
return std::size_t(i);
|
||||
}
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
|
||||
#endif
|
@ -18,14 +18,14 @@
|
||||
*/
|
||||
#include <config.h>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cublas_wrapper.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/gpuThreadUtils.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cublas_wrapper.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpuThreadUtils.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/vector_operations.hpp>
|
||||
#include <stdexcept>
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
namespace
|
||||
@ -108,8 +108,8 @@ template <class T>
|
||||
void
|
||||
setVectorValue(T* deviceData, size_t numberOfElements, const T& value)
|
||||
{
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(setVectorValueKernel<T>);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(setVectorValueKernel<T>);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
setVectorValueKernel<<<nThreadBlocks, threadBlockSize>>>(deviceData, numberOfElements, value);
|
||||
}
|
||||
|
||||
@ -121,8 +121,8 @@ template <class T>
|
||||
void
|
||||
setZeroAtIndexSet(T* deviceData, size_t numberOfElements, const int* indices)
|
||||
{
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(setZeroAtIndexSetKernel<T>);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(setZeroAtIndexSetKernel<T>);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
setZeroAtIndexSetKernel<<<nThreadBlocks, threadBlockSize>>>(deviceData, numberOfElements, indices);
|
||||
}
|
||||
template void setZeroAtIndexSet(double*, size_t, const int*);
|
||||
@ -138,12 +138,12 @@ innerProductAtIndices(cublasHandle_t cublasHandle,
|
||||
size_t numberOfElements,
|
||||
const int* indices)
|
||||
{
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(elementWiseMultiplyKernel<T>);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(elementWiseMultiplyKernel<T>);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
elementWiseMultiplyKernel<<<nThreadBlocks, threadBlockSize>>>(deviceA, deviceB, buffer, numberOfElements, indices);
|
||||
|
||||
// TODO: [perf] Get rid of the allocation here.
|
||||
CuVector<T> oneVector(numberOfElements);
|
||||
GpuVector<T> oneVector(numberOfElements);
|
||||
oneVector = 1.0;
|
||||
T result = 0.0;
|
||||
OPM_CUBLAS_SAFE_CALL(cublasDot(cublasHandle, numberOfElements, oneVector.data(), 1, buffer, 1, &result));
|
||||
@ -158,10 +158,10 @@ template <class T>
|
||||
void
|
||||
prepareSendBuf(const T* deviceA, T* buffer, size_t numberOfElements, const int* indices)
|
||||
{
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(prepareSendBufKernel<T>);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(prepareSendBufKernel<T>);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
prepareSendBufKernel<<<nThreadBlocks, threadBlockSize>>>(deviceA, buffer, numberOfElements, indices);
|
||||
OPM_CUDA_SAFE_CALL(cudaDeviceSynchronize()); // The buffers are prepared for MPI. Wait for them to finish.
|
||||
OPM_GPU_SAFE_CALL(cudaDeviceSynchronize()); // The buffers are prepared for MPI. Wait for them to finish.
|
||||
}
|
||||
template void prepareSendBuf(const double* deviceA, double* buffer, size_t numberOfElements, const int* indices);
|
||||
template void prepareSendBuf(const float* deviceA, float* buffer, size_t numberOfElements, const int* indices);
|
||||
@ -171,8 +171,8 @@ template <class T>
|
||||
void
|
||||
syncFromRecvBuf(T* deviceA, T* buffer, size_t numberOfElements, const int* indices)
|
||||
{
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(syncFromRecvBufKernel<T>);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(syncFromRecvBufKernel<T>);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
syncFromRecvBufKernel<<<nThreadBlocks, threadBlockSize>>>(deviceA, buffer, numberOfElements, indices);
|
||||
// cudaDeviceSynchronize(); // Not needed, I guess...
|
||||
}
|
||||
@ -191,20 +191,20 @@ weightedDiagMV(const T* squareBlockVector,
|
||||
{
|
||||
switch (blocksize) {
|
||||
case 1: {
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 1>);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 1>);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
weightedDiagMV<T, 1>
|
||||
<<<nThreadBlocks, threadBlockSize>>>(squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec);
|
||||
} break;
|
||||
case 2: {
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 2>);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 2>);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
weightedDiagMV<T, 2>
|
||||
<<<nThreadBlocks, threadBlockSize>>>(squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec);
|
||||
} break;
|
||||
case 3: {
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 3>);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 3>);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
weightedDiagMV<T, 3>
|
||||
<<<nThreadBlocks, threadBlockSize>>>(squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec);
|
||||
} break;
|
||||
@ -217,4 +217,4 @@ weightedDiagMV(const T* squareBlockVector,
|
||||
template void weightedDiagMV(const double*, const size_t, const size_t, double, const double*, double*);
|
||||
template void weightedDiagMV(const float*, const size_t, const size_t, float, const float*, float*);
|
||||
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
@ -20,7 +20,7 @@
|
||||
#define OPM_CUISTL_VECTOR_OPERATIONS_HPP
|
||||
#include <cstddef>
|
||||
#include <cublas_v2.h>
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
/**
|
||||
@ -65,11 +65,11 @@ void syncFromRecvBuf(T* deviceA, T* buffer, size_t numberOfElements, const int*
|
||||
/**
|
||||
* @brief Compue the weighted matrix vector product where the matrix is diagonal, the diagonal is a vector, meaning we
|
||||
* compute the Hadamard product.
|
||||
* @param squareBlockVector A CuVector whose elements are NxN matrix blocks
|
||||
* @param squareBlockVector A GpuVector whose elements are NxN matrix blocks
|
||||
* @param numberOfRows The number of rows in the vector
|
||||
* @param blocksize The sidelength of the square block elements in the vector
|
||||
* @param src_vec A pointer to the data of the CuVector we multiply the blockvector with
|
||||
* @param[out] dst_vec A pointer to the data of the CuVector we store the result in
|
||||
* @param src_vec A pointer to the data of the GpuVector we multiply the blockvector with
|
||||
* @param[out] dst_vec A pointer to the data of the GpuVector we store the result in
|
||||
*
|
||||
* @note This is implemented as a faster way to multiply a diagonal matrix with a blockvector. We need only store the
|
||||
* diagonal of the matrix and use this product.
|
||||
@ -81,5 +81,5 @@ void weightedDiagMV(const T* squareBlockVector,
|
||||
T relaxationFactor,
|
||||
const T* srcVec,
|
||||
T* dstVec);
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
#endif
|
@ -19,9 +19,9 @@
|
||||
#include <config.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/set_device.hpp>
|
||||
namespace Opm::cuistl
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/set_device.hpp>
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
void
|
||||
setDevice(int mpiRank, [[maybe_unused]] int numberOfMpiRanks)
|
||||
@ -41,9 +41,9 @@ setDevice(int mpiRank, [[maybe_unused]] int numberOfMpiRanks)
|
||||
// Now do a round robin kind of assignment
|
||||
// TODO: We need to be more sophistacted here. We have no guarantee this will pick the correct device.
|
||||
const auto deviceId = mpiRank % deviceCount;
|
||||
OPM_CUDA_SAFE_CALL(cudaDeviceReset());
|
||||
OPM_CUDA_SAFE_CALL(cudaSetDevice(deviceId));
|
||||
OPM_GPU_SAFE_CALL(cudaDeviceReset());
|
||||
OPM_GPU_SAFE_CALL(cudaSetDevice(deviceId));
|
||||
OpmLog::info("Set CUDA device to " + std::to_string(deviceId) + " (out of " + std::to_string(deviceCount)
|
||||
+ " devices).");
|
||||
}
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
@ -20,7 +20,7 @@
|
||||
#ifndef OPM_CUISTL_SET_DEVICE_HEADER
|
||||
#define OPM_CUISTL_SET_DEVICE_HEADER
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
//! @brief Sets the correct CUDA device in the setting of MPI
|
||||
//!
|
||||
@ -32,5 +32,5 @@ namespace Opm::cuistl
|
||||
//!
|
||||
//! @note If no CUDA device is present, this does nothing.
|
||||
void setDevice(int mpiRank, int numberOfMpiRanks);
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
#endif
|
@ -18,14 +18,14 @@
|
||||
*/
|
||||
#include <config.h>
|
||||
|
||||
#define BOOST_TEST_MODULE TestCuBuffer
|
||||
#define BOOST_TEST_MODULE TestGpuBuffer
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <opm/simulators/linalg/cuistl/CuBuffer.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuView.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuBuffer.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuView.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
|
||||
#include <array>
|
||||
#include <algorithm>
|
||||
@ -35,15 +35,15 @@ BOOST_AUTO_TEST_CASE(TestMakeView)
|
||||
{
|
||||
// test that we can create buffers and make views of the buffers using the pointer constructor
|
||||
auto buf = std::vector<int>({1, 2, 3, 4, 5, 6});
|
||||
const auto gpubuf = ::Opm::cuistl::CuBuffer<int>(buf);
|
||||
auto gpuview = ::Opm::cuistl::CuView<int>(buf.data(), buf.size());
|
||||
bool gpuBufCreatedView = std::is_same<::Opm::cuistl::CuView<int>, decltype(gpuview)>::value;
|
||||
const auto gpubuf = ::Opm::gpuistl::GpuBuffer<int>(buf);
|
||||
auto gpuview = ::Opm::gpuistl::GpuView<int>(buf.data(), buf.size());
|
||||
bool gpuBufCreatedView = std::is_same<::Opm::gpuistl::GpuView<int>, decltype(gpuview)>::value;
|
||||
|
||||
BOOST_CHECK(gpuBufCreatedView);
|
||||
|
||||
// test that we can make views of buffers by using the cubuffer constructor
|
||||
auto gpuview2 = ::Opm::cuistl::make_view(gpubuf);
|
||||
bool gpuBufCreatedView2 = std::is_same<::Opm::cuistl::CuView<const int>, decltype(gpuview2)>::value;
|
||||
// test that we can make views of buffers by using the GpuBuffer constructor
|
||||
auto gpuview2 = ::Opm::gpuistl::make_view(gpubuf);
|
||||
bool gpuBufCreatedView2 = std::is_same<::Opm::gpuistl::GpuView<const int>, decltype(gpuview2)>::value;
|
||||
|
||||
BOOST_CHECK(gpuBufCreatedView2);
|
||||
|
@ -18,18 +18,18 @@
|
||||
*/
|
||||
#include <config.h>
|
||||
|
||||
#define BOOST_TEST_MODULE TestCuDiluHelpers
|
||||
#define BOOST_TEST_MODULE TestGpuDILU
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <dune/common/fmatrix.hh>
|
||||
#include <dune/istl/bcrsmatrix.hh>
|
||||
#include <memory>
|
||||
#include <opm/simulators/linalg/DILU.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuDILU.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuDILU.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpusparse_matrix_operations.hpp>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
|
||||
@ -41,11 +41,11 @@ using B1x1Vec = Dune::BlockVector<Dune::FieldVector<double, 1>>;
|
||||
using B2x2Vec = Dune::BlockVector<Dune::FieldVector<double, 2>>;
|
||||
using Sp1x1BlockMatrix = Dune::BCRSMatrix<FM1x1>;
|
||||
using Sp2x2BlockMatrix = Dune::BCRSMatrix<FM2x2>;
|
||||
using CuMatrix = Opm::cuistl::CuSparseMatrix<T>;
|
||||
using CuIntVec = Opm::cuistl::CuVector<int>;
|
||||
using CuFloatingPointVec = Opm::cuistl::CuVector<T>;
|
||||
using CuDilu1x1 = Opm::cuistl::CuDILU<Sp1x1BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
|
||||
using CuDilu2x2 = Opm::cuistl::CuDILU<Sp2x2BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
|
||||
using CuMatrix = Opm::gpuistl::GpuSparseMatrix<T>;
|
||||
using CuIntVec = Opm::gpuistl::GpuVector<int>;
|
||||
using CuFloatingPointVec = Opm::gpuistl::GpuVector<T>;
|
||||
using GpuDilu1x1 = Opm::gpuistl::GpuDILU<Sp1x1BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
|
||||
using GpuDilu2x2 = Opm::gpuistl::GpuDILU<Sp2x2BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
|
||||
|
||||
Sp1x1BlockMatrix
|
||||
get1x1BlockTestMatrix()
|
||||
@ -211,7 +211,7 @@ BOOST_AUTO_TEST_CASE(TestDiluApply)
|
||||
|
||||
// Initialize preconditioner objects
|
||||
Dune::MultithreadDILU<Sp1x1BlockMatrix, B1x1Vec, B1x1Vec> cpudilu(matA);
|
||||
auto gpudilu = CuDilu1x1(matA, true, true);
|
||||
auto gpudilu = GpuDilu1x1(matA, true, true);
|
||||
|
||||
// Use the apply
|
||||
gpudilu.apply(d_output, d_input);
|
||||
@ -224,7 +224,7 @@ BOOST_AUTO_TEST_CASE(TestDiluApply)
|
||||
}
|
||||
auto cudilures = d_output.asStdVector();
|
||||
|
||||
// check that CuDilu results matches that of CPU dilu
|
||||
// check that GpuDilu results matches that of CPU dilu
|
||||
for (size_t i = 0; i < cudilures.size(); ++i) {
|
||||
BOOST_CHECK_CLOSE(cudilures[i], cpudilures[i], 1e-7);
|
||||
}
|
||||
@ -235,7 +235,7 @@ BOOST_AUTO_TEST_CASE(TestDiluApplyBlocked)
|
||||
|
||||
// init matrix with 2x2 blocks
|
||||
Sp2x2BlockMatrix matA = get2x2BlockTestMatrix();
|
||||
auto gpudilu = CuDilu2x2(matA, true, true);
|
||||
auto gpudilu = GpuDilu2x2(matA, true, true);
|
||||
Dune::MultithreadDILU<Sp2x2BlockMatrix, B2x2Vec, B2x2Vec> cpudilu(matA);
|
||||
|
||||
// create input/output buffers for the apply
|
||||
@ -275,7 +275,7 @@ BOOST_AUTO_TEST_CASE(TestDiluInitAndUpdateLarge)
|
||||
{
|
||||
// create gpu dilu preconditioner
|
||||
Sp1x1BlockMatrix matA = get1x1BlockTestMatrix();
|
||||
auto gpudilu = CuDilu1x1(matA, true, true);
|
||||
auto gpudilu = GpuDilu1x1(matA, true, true);
|
||||
|
||||
matA[0][0][0][0] = 11.0;
|
||||
matA[0][1][0][0] = 12.0;
|
||||
@ -325,7 +325,7 @@ BOOST_AUTO_TEST_CASE(TestDiluInitAndUpdateLarge)
|
||||
}
|
||||
auto cudilures = d_output.asStdVector();
|
||||
|
||||
// check that CuDilu results matches that of CPU dilu
|
||||
// check that GpuDilu results matches that of CPU dilu
|
||||
for (size_t i = 0; i < cudilures.size(); ++i) {
|
||||
BOOST_CHECK_CLOSE(cudilures[i], cpudilures[i], 1e-7);
|
||||
}
|
@ -18,22 +18,22 @@
|
||||
*/
|
||||
#include <config.h>
|
||||
|
||||
#define BOOST_TEST_MODULE TestCuJac
|
||||
#define BOOST_TEST_MODULE TestGpuJac
|
||||
#include <boost/mpl/list.hpp>
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <cuda_runtime.h>
|
||||
#include <dune/istl/bcrsmatrix.hh>
|
||||
#include <opm/simulators/linalg/cuistl/CuJac.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuJac.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpusparse_matrix_operations.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/fix_zero_diagonal.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/vector_operations.hpp>
|
||||
|
||||
using NumericTypes = boost::mpl::list<double, float>;
|
||||
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize2, T, NumericTypes)
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(GPUJACApplyBlocksize2, T, NumericTypes)
|
||||
{
|
||||
/*
|
||||
Test data to validate jacobi preconditioner, expected result is x_1, and relaxation factor is 0.5
|
||||
@ -49,7 +49,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize2, T, NumericTypes)
|
||||
using M = Dune::FieldMatrix<T, blocksize, blocksize>;
|
||||
using SpMatrix = Dune::BCRSMatrix<M>;
|
||||
using Vector = Dune::BlockVector<Dune::FieldVector<T, blocksize>>;
|
||||
using CuJac = Opm::cuistl::CuJac<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>;
|
||||
using GpuJac = Opm::gpuistl::GpuJac<SpMatrix, Opm::gpuistl::GpuVector<T>, Opm::gpuistl::GpuVector<T>>;
|
||||
|
||||
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
|
||||
for (auto row = B.createbegin(); row != B.createend(); ++row) {
|
||||
@ -70,7 +70,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize2, T, NumericTypes)
|
||||
B[1][1][0][0] = -1.0;
|
||||
B[1][1][1][1] = -1.0;
|
||||
|
||||
auto cujac = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuJac>(std::make_shared<CuJac>(B, 0.5));
|
||||
auto gpujac = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, GpuJac>(std::make_shared<GpuJac>(B, 0.5));
|
||||
|
||||
Vector vVector(2);
|
||||
Vector dVector(2);
|
||||
@ -81,14 +81,14 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize2, T, NumericTypes)
|
||||
|
||||
const T expectedAns[2][2] = {{1.0 / 2.0, -1.0 / 2.0}, {-3.0 / 2.0, -2.0}};
|
||||
|
||||
cujac.apply(vVector, dVector);
|
||||
gpujac.apply(vVector, dVector);
|
||||
BOOST_CHECK_CLOSE(vVector[0][0], expectedAns[0][0], 1e-7);
|
||||
BOOST_CHECK_CLOSE(vVector[0][1], expectedAns[0][1], 1e-7);
|
||||
BOOST_CHECK_CLOSE(vVector[1][0], expectedAns[1][0], 1e-7);
|
||||
BOOST_CHECK_CLOSE(vVector[1][1], expectedAns[1][1], 1e-7);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize1, T, NumericTypes)
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(GPUJACApplyBlocksize1, T, NumericTypes)
|
||||
{
|
||||
/*
|
||||
Test data to validate jacobi preconditioner, expected result is x_1, relaxation factor is 0.5
|
||||
@ -103,7 +103,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize1, T, NumericTypes)
|
||||
using M = Dune::FieldMatrix<T, blocksize, blocksize>;
|
||||
using SpMatrix = Dune::BCRSMatrix<M>;
|
||||
using Vector = Dune::BlockVector<Dune::FieldVector<T, blocksize>>;
|
||||
using CuJac = Opm::cuistl::CuJac<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>;
|
||||
using GpuJac = Opm::gpuistl::GpuJac<SpMatrix, Opm::gpuistl::GpuVector<T>, Opm::gpuistl::GpuVector<T>>;
|
||||
|
||||
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
|
||||
for (auto row = B.createbegin(); row != B.createend(); ++row) {
|
||||
@ -129,7 +129,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize1, T, NumericTypes)
|
||||
B[2][2][0][0] = -1.0;
|
||||
B[3][3][0][0] = -1.0;
|
||||
|
||||
auto cujac = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuJac>(std::make_shared<CuJac>(B, 0.5));
|
||||
auto gpujac = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, GpuJac>(std::make_shared<GpuJac>(B, 0.5));
|
||||
|
||||
Vector vVector(4);
|
||||
Vector dVector(4);
|
||||
@ -140,7 +140,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize1, T, NumericTypes)
|
||||
|
||||
const T expectedAns[4] = {1.0 / 3.0, 1.0 / 2.0, -3.0 / 2.0, -2.0};
|
||||
|
||||
cujac.apply(vVector, dVector);
|
||||
gpujac.apply(vVector, dVector);
|
||||
BOOST_CHECK_CLOSE(vVector[0], expectedAns[0], 1e-7);
|
||||
BOOST_CHECK_CLOSE(vVector[1], expectedAns[1], 1e-7);
|
||||
BOOST_CHECK_CLOSE(vVector[2], expectedAns[2], 1e-7);
|
@ -18,7 +18,7 @@
|
||||
*/
|
||||
#include <config.h>
|
||||
|
||||
#define BOOST_TEST_MODULE TestCuOwnerOverlapCopy
|
||||
#define BOOST_TEST_MODULE TestGpuOwnerOverlapCopy
|
||||
#define BOOST_TEST_NO_MAIN
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
@ -26,10 +26,10 @@
|
||||
#include <dune/istl/bcrsmatrix.hh>
|
||||
#include <dune/istl/owneroverlapcopy.hh>
|
||||
#include <memory>
|
||||
#include <opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/set_device.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuOwnerOverlapCopy.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/set_device.hpp>
|
||||
#include <random>
|
||||
#include <mpi.h>
|
||||
|
||||
@ -46,7 +46,7 @@ main(int argc, char** argv)
|
||||
int rank, totalRanks;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &totalRanks);
|
||||
Opm::cuistl::setDevice(rank, totalRanks);
|
||||
Opm::gpuistl::setDevice(rank, totalRanks);
|
||||
boost::unit_test::unit_test_main(&init_unit_test_func, argc, argv);
|
||||
}
|
||||
|
||||
@ -62,14 +62,14 @@ BOOST_AUTO_TEST_CASE(TestProject)
|
||||
|
||||
auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD);
|
||||
auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}};
|
||||
auto xGPU = Opm::cuistl::CuVector<double>(xCPU);
|
||||
auto xGPU = Opm::gpuistl::GpuVector<double>(xCPU);
|
||||
|
||||
auto gpuComm = std::make_shared<Opm::cuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);
|
||||
auto gpuComm = std::make_shared<Opm::gpuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);
|
||||
|
||||
auto cuOwnerOverlapCopy
|
||||
= Opm::cuistl::CuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(gpuComm);
|
||||
auto GpuOwnerOverlapCopy
|
||||
= Opm::gpuistl::GpuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(gpuComm);
|
||||
|
||||
cuOwnerOverlapCopy.project(xGPU);
|
||||
GpuOwnerOverlapCopy.project(xGPU);
|
||||
|
||||
auto resultOfProject = xGPU.asStdVector();
|
||||
|
||||
@ -94,19 +94,19 @@ BOOST_AUTO_TEST_CASE(TestDot)
|
||||
indexInfo.addRemoteIndex(std::make_tuple(0, 2, Dune::OwnerOverlapCopyAttributeSet::copy));
|
||||
auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD);
|
||||
auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}};
|
||||
auto xGPU = Opm::cuistl::CuVector<double>(xCPU);
|
||||
auto xGPU = Opm::gpuistl::GpuVector<double>(xCPU);
|
||||
|
||||
auto gpuComm = std::make_shared<Opm::cuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);
|
||||
auto gpuComm = std::make_shared<Opm::gpuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);
|
||||
|
||||
auto cuOwnerOverlapCopy
|
||||
= Opm::cuistl::CuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(gpuComm);
|
||||
auto GpuOwnerOverlapCopy
|
||||
= Opm::gpuistl::GpuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(gpuComm);
|
||||
|
||||
double outputDune = -1.0;
|
||||
auto xDune = xGPU.asDuneBlockVector<1>();
|
||||
ownerOverlapCopy.dot(xDune, xDune, outputDune);
|
||||
|
||||
double output = -1.0;
|
||||
cuOwnerOverlapCopy.dot(xGPU, xGPU, output);
|
||||
GpuOwnerOverlapCopy.dot(xGPU, xGPU, output);
|
||||
|
||||
|
||||
BOOST_CHECK_EQUAL(outputDune, output);
|
@ -18,7 +18,7 @@
|
||||
*/
|
||||
#include <config.h>
|
||||
|
||||
#define BOOST_TEST_MODULE TestCuSeqILU0
|
||||
#define BOOST_TEST_MODULE TestGpuSeqILU0
|
||||
#define BOOST_TEST_NO_MAIN
|
||||
|
||||
|
||||
@ -27,10 +27,10 @@
|
||||
#include <dune/common/parallel/mpihelper.hh>
|
||||
#include <dune/istl/bcrsmatrix.hh>
|
||||
#include <dune/istl/preconditioners.hh>
|
||||
#include <opm/simulators/linalg/cuistl/CuSeqILU0.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuSeqILU0.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
@ -63,7 +63,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
|
||||
using M = Dune::FieldMatrix<T, 1, 1>;
|
||||
using SpMatrix = Dune::BCRSMatrix<M>;
|
||||
using Vector = Dune::BlockVector<Dune::FieldVector<T, 1>>;
|
||||
using CuILU0 = Opm::cuistl::CuSeqILU0<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>;
|
||||
using GpuILU0 = Opm::gpuistl::GpuSeqILU0<SpMatrix, Opm::gpuistl::GpuVector<T>, Opm::gpuistl::GpuVector<T>>;
|
||||
|
||||
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
|
||||
for (auto row = B.createbegin(); row != B.createend(); ++row) {
|
||||
@ -91,7 +91,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
|
||||
|
||||
auto duneILU = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);
|
||||
|
||||
auto cuILU = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuILU0>(std::make_shared<CuILU0>(B, 1.0));
|
||||
auto gpuILU = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, GpuILU0>(std::make_shared<GpuILU0>(B, 1.0));
|
||||
|
||||
// check for the standard basis {e_i}
|
||||
// (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
|
||||
@ -101,7 +101,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
|
||||
Vector outputVectorDune(N);
|
||||
Vector outputVectorCuistl(N);
|
||||
duneILU.apply(outputVectorDune, inputVector);
|
||||
cuILU.apply(outputVectorCuistl, inputVector);
|
||||
gpuILU.apply(outputVectorCuistl, inputVector);
|
||||
|
||||
for (int component = 0; component < N; ++component) {
|
||||
BOOST_CHECK_CLOSE(outputVectorDune[component][0],
|
||||
@ -113,7 +113,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
|
||||
// Now we check that we can update the matrix. We basically just negate B
|
||||
B *= -1.0;
|
||||
auto duneILUNew = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);
|
||||
cuILU.update();
|
||||
gpuILU.update();
|
||||
// check for the standard basis {e_i}
|
||||
// (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
|
||||
for (int i = 0; i < N; ++i) {
|
||||
@ -122,7 +122,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
|
||||
Vector outputVectorDune(N);
|
||||
Vector outputVectorCuistl(N);
|
||||
duneILUNew.apply(outputVectorDune, inputVector);
|
||||
cuILU.apply(outputVectorCuistl, inputVector);
|
||||
gpuILU.apply(outputVectorCuistl, inputVector);
|
||||
|
||||
for (int component = 0; component < N; ++component) {
|
||||
BOOST_CHECK_CLOSE(outputVectorDune[component][0],
|
||||
@ -158,7 +158,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
|
||||
using M = Dune::FieldMatrix<T, 2, 2>;
|
||||
using SpMatrix = Dune::BCRSMatrix<M>;
|
||||
using Vector = Dune::BlockVector<Dune::FieldVector<T, 2>>;
|
||||
using CuILU0 = Opm::cuistl::CuSeqILU0<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>;
|
||||
using GpuILU0 = Opm::gpuistl::GpuSeqILU0<SpMatrix, Opm::gpuistl::GpuVector<T>, Opm::gpuistl::GpuVector<T>>;
|
||||
|
||||
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
|
||||
for (auto row = B.createbegin(); row != B.createend(); ++row) {
|
||||
@ -181,7 +181,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
|
||||
|
||||
auto duneILU = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);
|
||||
|
||||
auto cuILU = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuILU0>(std::make_shared<CuILU0>(B, 1.0));
|
||||
auto gpuILU = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, GpuILU0>(std::make_shared<GpuILU0>(B, 1.0));
|
||||
|
||||
// check for the standard basis {e_i}
|
||||
// (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
|
||||
@ -191,7 +191,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
|
||||
Vector outputVectorDune(N);
|
||||
Vector outputVectorCuistl(N);
|
||||
duneILU.apply(outputVectorDune, inputVector);
|
||||
cuILU.apply(outputVectorCuistl, inputVector);
|
||||
gpuILU.apply(outputVectorCuistl, inputVector);
|
||||
|
||||
for (int component = 0; component < N; ++component) {
|
||||
BOOST_CHECK_CLOSE(outputVectorDune[component][0],
|
||||
@ -203,7 +203,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
|
||||
// Now we check that we can update the matrix. We basically just negate B
|
||||
B *= -1.0;
|
||||
auto duneILUNew = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);
|
||||
cuILU.update();
|
||||
gpuILU.update();
|
||||
// check for the standard basis {e_i}
|
||||
// (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
|
||||
for (int i = 0; i < N; ++i) {
|
||||
@ -212,7 +212,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
|
||||
Vector outputVectorDune(N);
|
||||
Vector outputVectorCuistl(N);
|
||||
duneILUNew.apply(outputVectorDune, inputVector);
|
||||
cuILU.apply(outputVectorCuistl, inputVector);
|
||||
gpuILU.apply(outputVectorCuistl, inputVector);
|
||||
|
||||
for (int component = 0; component < N; ++component) {
|
||||
BOOST_CHECK_CLOSE(outputVectorDune[component][0],
|
@ -18,14 +18,14 @@
|
||||
*/
|
||||
#include <config.h>
|
||||
|
||||
#define BOOST_TEST_MODULE TestCuSparseMatrix
|
||||
#define BOOST_TEST_MODULE TestGpuSparseMatrix
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <dune/istl/bcrsmatrix.hh>
|
||||
#include <memory>
|
||||
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
#include <random>
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestConstruction1D)
|
||||
@ -76,17 +76,17 @@ BOOST_AUTO_TEST_CASE(TestConstruction1D)
|
||||
}
|
||||
}
|
||||
|
||||
auto cuSparseMatrix = Opm::cuistl::CuSparseMatrix<double>::fromMatrix(B);
|
||||
auto gpuSparseMatrix = Opm::gpuistl::GpuSparseMatrix<double>::fromMatrix(B);
|
||||
|
||||
const auto& nonZeroValuesCuda = cuSparseMatrix.getNonZeroValues();
|
||||
std::vector<double> buffer(cuSparseMatrix.nonzeroes(), 0.0);
|
||||
const auto& nonZeroValuesCuda = gpuSparseMatrix.getNonZeroValues();
|
||||
std::vector<double> buffer(gpuSparseMatrix.nonzeroes(), 0.0);
|
||||
nonZeroValuesCuda.copyToHost(buffer.data(), buffer.size());
|
||||
const double* nonZeroElements = static_cast<const double*>(&((B[0][0][0][0])));
|
||||
BOOST_CHECK_EQUAL_COLLECTIONS(buffer.begin(), buffer.end(), nonZeroElements, nonZeroElements + B.nonzeroes());
|
||||
BOOST_CHECK_EQUAL(N * 3 - 2, cuSparseMatrix.nonzeroes());
|
||||
BOOST_CHECK_EQUAL(N * 3 - 2, gpuSparseMatrix.nonzeroes());
|
||||
|
||||
std::vector<int> rowIndicesFromCUDA(N + 1);
|
||||
cuSparseMatrix.getRowIndices().copyToHost(rowIndicesFromCUDA.data(), rowIndicesFromCUDA.size());
|
||||
gpuSparseMatrix.getRowIndices().copyToHost(rowIndicesFromCUDA.data(), rowIndicesFromCUDA.size());
|
||||
BOOST_CHECK_EQUAL(rowIndicesFromCUDA[0], 0);
|
||||
BOOST_CHECK_EQUAL(rowIndicesFromCUDA[1], 2);
|
||||
for (int i = 2; i < N; ++i) {
|
||||
@ -95,7 +95,7 @@ BOOST_AUTO_TEST_CASE(TestConstruction1D)
|
||||
|
||||
|
||||
std::vector<int> columnIndicesFromCUDA(B.nonzeroes(), 0);
|
||||
cuSparseMatrix.getColumnIndices().copyToHost(columnIndicesFromCUDA.data(), columnIndicesFromCUDA.size());
|
||||
gpuSparseMatrix.getColumnIndices().copyToHost(columnIndicesFromCUDA.data(), columnIndicesFromCUDA.size());
|
||||
|
||||
BOOST_CHECK_EQUAL(columnIndicesFromCUDA[0], 0);
|
||||
BOOST_CHECK_EQUAL(columnIndicesFromCUDA[1], 1);
|
||||
@ -143,19 +143,19 @@ BOOST_AUTO_TEST_CASE(RandomSparsityMatrix)
|
||||
}
|
||||
}
|
||||
|
||||
auto cuSparseMatrix = Opm::cuistl::CuSparseMatrix<double>::fromMatrix(B);
|
||||
auto gpuSparseMatrix = Opm::gpuistl::GpuSparseMatrix<double>::fromMatrix(B);
|
||||
// check each column
|
||||
for (size_t component = 0; component < N; ++component) {
|
||||
std::vector<double> inputDataX(N * dim, 0.0);
|
||||
inputDataX[component] = 1.0;
|
||||
std::vector<double> inputDataY(N * dim, .25);
|
||||
auto inputVectorX = Opm::cuistl::CuVector<double>(inputDataX.data(), inputDataX.size());
|
||||
auto inputVectorY = Opm::cuistl::CuVector<double>(inputDataY.data(), inputDataY.size());
|
||||
auto inputVectorX = Opm::gpuistl::GpuVector<double>(inputDataX.data(), inputDataX.size());
|
||||
auto inputVectorY = Opm::gpuistl::GpuVector<double>(inputDataY.data(), inputDataY.size());
|
||||
Vector xHost(N), yHost(N);
|
||||
yHost = inputDataY[0];
|
||||
inputVectorX.copyToHost(xHost);
|
||||
const double alpha = 1.42;
|
||||
cuSparseMatrix.usmv(alpha, inputVectorX, inputVectorY);
|
||||
gpuSparseMatrix.usmv(alpha, inputVectorX, inputVectorY);
|
||||
|
||||
inputVectorY.copyToHost(inputDataY);
|
||||
|
||||
@ -167,7 +167,7 @@ BOOST_AUTO_TEST_CASE(RandomSparsityMatrix)
|
||||
}
|
||||
inputVectorX.copyToHost(xHost);
|
||||
|
||||
cuSparseMatrix.mv(inputVectorX, inputVectorY);
|
||||
gpuSparseMatrix.mv(inputVectorX, inputVectorY);
|
||||
|
||||
inputVectorY.copyToHost(inputDataY);
|
||||
|
@ -18,21 +18,21 @@
|
||||
*/
|
||||
#include <config.h>
|
||||
|
||||
#define BOOST_TEST_MODULE TestCuVector
|
||||
#define BOOST_TEST_MODULE TestGpuVector
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <cuda_runtime.h>
|
||||
#include <dune/common/fvector.hh>
|
||||
#include <dune/istl/bvector.hh>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
#include <random>
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestDocumentedUsage)
|
||||
{
|
||||
auto someDataOnCPU = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
|
||||
|
||||
auto dataOnGPU = ::Opm::cuistl::CuVector<double>(someDataOnCPU);
|
||||
auto dataOnGPU = ::Opm::gpuistl::GpuVector<double>(someDataOnCPU);
|
||||
|
||||
// Multiply by 4.0:
|
||||
dataOnGPU *= 4.0;
|
||||
@ -50,14 +50,14 @@ BOOST_AUTO_TEST_CASE(TestDocumentedUsage)
|
||||
BOOST_AUTO_TEST_CASE(TestConstructionSize)
|
||||
{
|
||||
const int numberOfElements = 1234;
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(numberOfElements);
|
||||
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(numberOfElements);
|
||||
BOOST_CHECK_EQUAL(numberOfElements, vectorOnGPU.dim());
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestCopyFromHostConstructor)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
|
||||
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
|
||||
std::vector<double> buffer(data.size(), 0.0);
|
||||
vectorOnGPU.copyToHost(buffer.data(), buffer.size());
|
||||
@ -68,7 +68,7 @@ BOOST_AUTO_TEST_CASE(TestCopyFromHostConstructor)
|
||||
BOOST_AUTO_TEST_CASE(TestCopyFromHostFunction)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.size());
|
||||
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.size());
|
||||
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
|
||||
vectorOnGPU.copyFromHost(data.data(), data.size());
|
||||
std::vector<double> buffer(data.size(), 0.0);
|
||||
@ -80,7 +80,7 @@ BOOST_AUTO_TEST_CASE(TestCopyFromHostFunction)
|
||||
BOOST_AUTO_TEST_CASE(TestCopyFromBvector)
|
||||
{
|
||||
auto blockVector = Dune::BlockVector<Dune::FieldVector<double, 2>> {{{42, 43}, {44, 45}, {46, 47}}};
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(blockVector.dim());
|
||||
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(blockVector.dim());
|
||||
vectorOnGPU.copyFromHost(blockVector);
|
||||
std::vector<double> buffer(vectorOnGPU.dim());
|
||||
vectorOnGPU.copyToHost(buffer.data(), buffer.size());
|
||||
@ -93,7 +93,7 @@ BOOST_AUTO_TEST_CASE(TestCopyToBvector)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7, 8, 9}};
|
||||
auto blockVector = Dune::BlockVector<Dune::FieldVector<double, 3>>(3);
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
|
||||
vectorOnGPU.copyToHost(blockVector);
|
||||
|
||||
|
||||
@ -103,17 +103,17 @@ BOOST_AUTO_TEST_CASE(TestCopyToBvector)
|
||||
BOOST_AUTO_TEST_CASE(TestDataPointer)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7, 8, 9}};
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
|
||||
|
||||
std::vector<double> buffer(data.size(), 0.0);
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(buffer.data(), vectorOnGPU.data(), sizeof(double) * data.size(), cudaMemcpyDeviceToHost));
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(buffer.data(), vectorOnGPU.data(), sizeof(double) * data.size(), cudaMemcpyDeviceToHost));
|
||||
BOOST_CHECK_EQUAL_COLLECTIONS(data.begin(), data.end(), buffer.begin(), buffer.end());
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestCopyScalarMultiply)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
|
||||
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
|
||||
const double scalar = 42.25;
|
||||
vectorOnGPU *= scalar;
|
||||
@ -128,7 +128,7 @@ BOOST_AUTO_TEST_CASE(TestCopyScalarMultiply)
|
||||
BOOST_AUTO_TEST_CASE(TestTwoNorm)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
|
||||
auto twoNorm = vectorOnGPU.two_norm();
|
||||
|
||||
double correctAnswer = 0.0;
|
||||
@ -143,8 +143,8 @@ BOOST_AUTO_TEST_CASE(TestDot)
|
||||
{
|
||||
std::vector<double> dataA {{1, 2, 3, 4, 5, 6, 7}};
|
||||
std::vector<double> dataB {{8, 9, 10, 11, 12, 13, 14}};
|
||||
auto vectorOnGPUA = Opm::cuistl::CuVector<double>(dataA.data(), dataA.size());
|
||||
auto vectorOnGPUB = Opm::cuistl::CuVector<double>(dataB.data(), dataB.size());
|
||||
auto vectorOnGPUA = Opm::gpuistl::GpuVector<double>(dataA.data(), dataA.size());
|
||||
auto vectorOnGPUB = Opm::gpuistl::GpuVector<double>(dataB.data(), dataB.size());
|
||||
auto dot = vectorOnGPUA.dot(vectorOnGPUB);
|
||||
|
||||
double correctAnswer = 0.0;
|
||||
@ -158,7 +158,7 @@ BOOST_AUTO_TEST_CASE(TestDot)
|
||||
BOOST_AUTO_TEST_CASE(Assigment)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
|
||||
vectorOnGPU = 10.0;
|
||||
vectorOnGPU.copyToHost(data.data(), data.size());
|
||||
|
||||
@ -171,9 +171,9 @@ BOOST_AUTO_TEST_CASE(Assigment)
|
||||
BOOST_AUTO_TEST_CASE(CopyAssignment)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
|
||||
vectorOnGPU.copyToHost(data.data(), data.size());
|
||||
auto vectorOnGPUB = Opm::cuistl::CuVector<double>(data.size());
|
||||
auto vectorOnGPUB = Opm::gpuistl::GpuVector<double>(data.size());
|
||||
vectorOnGPUB = 4.0;
|
||||
vectorOnGPUB = vectorOnGPU;
|
||||
|
||||
@ -185,7 +185,7 @@ BOOST_AUTO_TEST_CASE(CopyAssignment)
|
||||
BOOST_AUTO_TEST_CASE(RandomVectors)
|
||||
{
|
||||
|
||||
using GVector = Opm::cuistl::CuVector<double>;
|
||||
using GVector = Opm::gpuistl::GpuVector<double>;
|
||||
std::srand(0);
|
||||
std::mt19937 generator;
|
||||
std::uniform_real_distribution<double> distribution(-100.0, 100.0);
|
||||
@ -268,7 +268,7 @@ BOOST_AUTO_TEST_CASE(RandomVectors)
|
||||
indexSet.push_back(i);
|
||||
}
|
||||
}
|
||||
auto indexSetGPU = Opm::cuistl::CuVector<int>(indexSet);
|
||||
auto indexSetGPU = Opm::gpuistl::GpuVector<int>(indexSet);
|
||||
|
||||
aGPU.setZeroAtIndexSet(indexSetGPU);
|
||||
auto projectedA = aGPU.asStdVector();
|
@ -18,24 +18,24 @@
|
||||
*/
|
||||
#include <config.h>
|
||||
|
||||
#define BOOST_TEST_MODULE TestCuView
|
||||
#define BOOST_TEST_MODULE TestGpuView
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <cuda_runtime.h>
|
||||
#include <dune/common/fvector.hh>
|
||||
#include <dune/istl/bvector.hh>
|
||||
#include <opm/simulators/linalg/cuistl/CuView.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuBuffer.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuView.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuBuffer.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
#include <random>
|
||||
#include <array>
|
||||
#include <algorithm>
|
||||
#include <type_traits>
|
||||
|
||||
using CuViewDouble = ::Opm::cuistl::CuView<double>;
|
||||
using CuBufferDouble = ::Opm::cuistl::CuBuffer<double>;
|
||||
using GpuViewDouble = ::Opm::gpuistl::GpuView<double>;
|
||||
using GpuBufferDouble = ::Opm::gpuistl::GpuBuffer<double>;
|
||||
|
||||
__global__ void useCuViewOnGPU(CuViewDouble a, CuViewDouble b){
|
||||
__global__ void useGpuViewOnGPU(GpuViewDouble a, GpuViewDouble b){
|
||||
b[0] = a.front();
|
||||
b[1] = a.back();
|
||||
b[2] = *a.begin();
|
||||
@ -48,24 +48,24 @@ BOOST_AUTO_TEST_CASE(TestCreationAndIndexing)
|
||||
{
|
||||
// A simple test to check that we can move data to and from the GPU
|
||||
auto cpubuffer = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
|
||||
auto cubuffer = CuBufferDouble(cpubuffer);
|
||||
auto cuview = CuViewDouble(cubuffer.data(), cubuffer.size());
|
||||
const auto const_cuview = CuViewDouble(cubuffer.data(), cubuffer.size());
|
||||
auto cubuffer = GpuBufferDouble(cpubuffer);
|
||||
auto gpuview = GpuViewDouble(cubuffer.data(), cubuffer.size());
|
||||
const auto const_gpuview = GpuViewDouble(cubuffer.data(), cubuffer.size());
|
||||
|
||||
auto stdVecOfCuView = cuview.asStdVector();
|
||||
auto const_stdVecOfCuView = cuview.asStdVector();
|
||||
auto stdVecOfGpuView = gpuview.asStdVector();
|
||||
auto const_stdVecOfGpuView = gpuview.asStdVector();
|
||||
|
||||
BOOST_CHECK_EQUAL_COLLECTIONS(
|
||||
stdVecOfCuView.begin(), stdVecOfCuView.end(), cpubuffer.begin(), cpubuffer.end());
|
||||
stdVecOfGpuView.begin(), stdVecOfGpuView.end(), cpubuffer.begin(), cpubuffer.end());
|
||||
BOOST_CHECK_EQUAL_COLLECTIONS(
|
||||
stdVecOfCuView.begin(), stdVecOfCuView.end(), const_stdVecOfCuView.begin(), const_stdVecOfCuView.end());
|
||||
stdVecOfGpuView.begin(), stdVecOfGpuView.end(), const_stdVecOfGpuView.begin(), const_stdVecOfGpuView.end());
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestCuViewOnCPUTypes)
|
||||
BOOST_AUTO_TEST_CASE(TestGpuViewOnCPUTypes)
|
||||
{
|
||||
auto buf = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
|
||||
auto cpuview = CuViewDouble(buf.data(), buf.size());
|
||||
const auto const_cpuview = CuViewDouble(buf.data(), buf.size());
|
||||
auto cpuview = GpuViewDouble(buf.data(), buf.size());
|
||||
const auto const_cpuview = GpuViewDouble(buf.data(), buf.size());
|
||||
|
||||
// check that indexing a mutable view gives references when indexing it
|
||||
bool correct_type_of_cpu_front = std::is_same_v<double&, decltype(cpuview.front())>;
|
||||
@ -83,26 +83,26 @@ BOOST_AUTO_TEST_CASE(TestCuViewOnCPUTypes)
|
||||
BOOST_CHECK(cpuview.back() == buf.back());
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestCuViewOnCPUWithSTLIteratorAlgorithm)
|
||||
BOOST_AUTO_TEST_CASE(TestGpuViewOnCPUWithSTLIteratorAlgorithm)
|
||||
{
|
||||
auto buf = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
|
||||
auto cpuview = CuViewDouble(buf.data(), buf.size());
|
||||
auto cpuview = GpuViewDouble(buf.data(), buf.size());
|
||||
std::sort(buf.begin(), buf.end());
|
||||
BOOST_CHECK(42.0 == cpuview[3]);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestCuViewOnGPU)
|
||||
BOOST_AUTO_TEST_CASE(TestGpuViewOnGPU)
|
||||
{
|
||||
auto buf = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
|
||||
auto cubufA = CuBufferDouble(buf);
|
||||
auto cuviewA = CuViewDouble(cubufA.data(), cubufA.size());
|
||||
auto cubufB = CuBufferDouble(4);
|
||||
auto cuviewB = CuViewDouble(cubufB.data(), cubufB.size());
|
||||
auto cubufA = GpuBufferDouble(buf);
|
||||
auto gpuviewA = GpuViewDouble(cubufA.data(), cubufA.size());
|
||||
auto cubufB = GpuBufferDouble(4);
|
||||
auto gpuviewB = GpuViewDouble(cubufB.data(), cubufB.size());
|
||||
|
||||
useCuViewOnGPU<<<1,1>>>(cuviewA, cuviewB);
|
||||
useGpuViewOnGPU<<<1,1>>>(gpuviewA, gpuviewB);
|
||||
|
||||
auto vecA = cuviewA.asStdVector();
|
||||
auto vecB = cuviewB.asStdVector();
|
||||
auto vecA = gpuviewA.asStdVector();
|
||||
auto vecB = gpuviewB.asStdVector();
|
||||
|
||||
// checks that front/back/begin/end works
|
||||
BOOST_CHECK(vecB[0] == buf[0]);
|
@ -29,7 +29,7 @@
|
||||
#include <dune/istl/preconditioners.hh>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/PreconditionerConvertFieldTypeAdapter.hpp>
|
||||
|
||||
|
||||
using XDouble = Dune::BlockVector<Dune::FieldVector<double, 2>>;
|
||||
@ -167,7 +167,7 @@ BOOST_AUTO_TEST_CASE(TestFiniteDifference1D)
|
||||
expectedOutputVector[i][1] = 43.0;
|
||||
inputVector[i][0] = 1.0;
|
||||
auto converter
|
||||
= Opm::cuistl::PreconditionerConvertFieldTypeAdapter<TestPreconditioner, SpMatrixDouble, XDouble, XDouble>(
|
||||
= Opm::gpuistl::PreconditionerConvertFieldTypeAdapter<TestPreconditioner, SpMatrixDouble, XDouble, XDouble>(
|
||||
B);
|
||||
auto underlyingPreconditioner = std::make_shared<TestPreconditioner>(
|
||||
converter.getConvertedMatrix(), inputVector, B, expectedOutputVector);
|
@ -18,17 +18,17 @@
|
||||
*/
|
||||
#include <config.h>
|
||||
|
||||
#define BOOST_TEST_MODULE TestCuSparseMatrixOperations
|
||||
#define BOOST_TEST_MODULE TestGpuSparseMatrixOperations
|
||||
#include <boost/mpl/list.hpp>
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <cuda_runtime.h>
|
||||
#include <dune/istl/bcrsmatrix.hh>
|
||||
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/JacKernels.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpusparse_matrix_operations.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/fix_zero_diagonal.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/preconditionerKernels/JacKernels.hpp>
|
||||
|
||||
using NumericTypes = boost::mpl::list<double, float>;
|
||||
|
||||
@ -85,10 +85,10 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith3By3Blocks, T, Numeric
|
||||
B[1][1][1][1] = -1.0;
|
||||
B[1][1][2][2] = -1.0;
|
||||
|
||||
Opm::cuistl::CuSparseMatrix<T> m = Opm::cuistl::CuSparseMatrix<T>::fromMatrix(B);
|
||||
Opm::cuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);
|
||||
Opm::gpuistl::GpuSparseMatrix<T> m = Opm::gpuistl::GpuSparseMatrix<T>::fromMatrix(B);
|
||||
Opm::gpuistl::GpuVector<T> dInvDiag(blocksize * blocksize * N);
|
||||
|
||||
Opm::cuistl::detail::JAC::invertDiagonalAndFlatten<T, 3>(
|
||||
Opm::gpuistl::detail::JAC::invertDiagonalAndFlatten<T, 3>(
|
||||
m.getNonZeroValues().data(), m.getRowIndices().data(), m.getColumnIndices().data(), N, dInvDiag.data());
|
||||
|
||||
std::vector<T> expectedInvDiag {-1.0 / 4.0,
|
||||
@ -159,10 +159,10 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith2By2Blocks, T, Numeric
|
||||
B[1][1][0][0] = -1.0;
|
||||
B[1][1][1][1] = -1.0;
|
||||
|
||||
Opm::cuistl::CuSparseMatrix<T> m = Opm::cuistl::CuSparseMatrix<T>::fromMatrix(B);
|
||||
Opm::cuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);
|
||||
Opm::gpuistl::GpuSparseMatrix<T> m = Opm::gpuistl::GpuSparseMatrix<T>::fromMatrix(B);
|
||||
Opm::gpuistl::GpuVector<T> dInvDiag(blocksize * blocksize * N);
|
||||
|
||||
Opm::cuistl::detail::JAC::invertDiagonalAndFlatten<T, 2>(
|
||||
Opm::gpuistl::detail::JAC::invertDiagonalAndFlatten<T, 2>(
|
||||
m.getNonZeroValues().data(), m.getRowIndices().data(), m.getColumnIndices().data(), N, dInvDiag.data());
|
||||
|
||||
std::vector<T> expectedInvDiag {2.0, -2.0, -1.0 / 2.0, 1.0, -1.0, 0.0, 0.0, -1.0};
|
@ -18,16 +18,16 @@
|
||||
*/
|
||||
#include <config.h>
|
||||
|
||||
#define BOOST_TEST_MODULE TestCuVectorOperations
|
||||
#define BOOST_TEST_MODULE TestGpuVectorOperations
|
||||
#include <boost/mpl/list.hpp>
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <cuda_runtime.h>
|
||||
#include <dune/istl/bcrsmatrix.hh>
|
||||
#include <opm/simulators/linalg/cuistl/CuJac.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuJac.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpusparse_matrix_operations.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/vector_operations.hpp>
|
||||
|
||||
using NumericTypes = boost::mpl::list<double, float>;
|
||||
|
||||
@ -47,11 +47,11 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf3By3BlockVectorAndVecto
|
||||
std::vector<T> hostBlockVector({1.0, 2.0, 3.0, 5.0, 2.0, 3.0, 2.0, 1.0, 2.0});
|
||||
std::vector<T> hostVecVector({3.0, 2.0, 1.0});
|
||||
std::vector<T> hostDstVector({0, 0, 0});
|
||||
Opm::cuistl::CuVector<T> deviceBlockVector(hostBlockVector);
|
||||
Opm::cuistl::CuVector<T> deviceVecVector(hostVecVector);
|
||||
Opm::cuistl::CuVector<T> deviceDstVector(hostDstVector);
|
||||
Opm::gpuistl::GpuVector<T> deviceBlockVector(hostBlockVector);
|
||||
Opm::gpuistl::GpuVector<T> deviceVecVector(hostVecVector);
|
||||
Opm::gpuistl::GpuVector<T> deviceDstVector(hostDstVector);
|
||||
|
||||
Opm::cuistl::detail::weightedDiagMV(
|
||||
Opm::gpuistl::detail::weightedDiagMV(
|
||||
deviceBlockVector.data(), N, blocksize, weight, deviceVecVector.data(), deviceDstVector.data());
|
||||
|
||||
std::vector<T> expectedVec {10.0, 22.0, 10.0};
|
||||
@ -81,11 +81,11 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf2By2BlockVectorAndVecto
|
||||
std::vector<T> hostBlockVector({1.0, 2.0, 3.0, 4.0, 4.0, 3.0, 2.0, 1.0});
|
||||
std::vector<T> hostVecVector({1.0, 3.0, 2.0, 4.0});
|
||||
std::vector<T> hostDstVector({0, 0, 0, 0});
|
||||
Opm::cuistl::CuVector<T> deviceBlockVector(hostBlockVector);
|
||||
Opm::cuistl::CuVector<T> deviceVecVector(hostVecVector);
|
||||
Opm::cuistl::CuVector<T> deviceDstVector(hostDstVector);
|
||||
Opm::gpuistl::GpuVector<T> deviceBlockVector(hostBlockVector);
|
||||
Opm::gpuistl::GpuVector<T> deviceVecVector(hostVecVector);
|
||||
Opm::gpuistl::GpuVector<T> deviceDstVector(hostDstVector);
|
||||
|
||||
Opm::cuistl::detail::weightedDiagMV(
|
||||
Opm::gpuistl::detail::weightedDiagMV(
|
||||
deviceBlockVector.data(), N, blocksize, weight, deviceVecVector.data(), deviceDstVector.data());
|
||||
|
||||
std::vector<T> expectedVec {3.5, 7.5, 10.0, 4.0};
|
||||
@ -95,4 +95,4 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf2By2BlockVectorAndVecto
|
||||
for (size_t i = 0; i < expectedVec.size(); i++) {
|
||||
BOOST_CHECK_CLOSE(expectedVec[i], computedVec[i], 1e-7);
|
||||
}
|
||||
}
|
||||
}
|
@ -16,14 +16,14 @@
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include "opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp"
|
||||
#include "opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp"
|
||||
#include <config.h>
|
||||
|
||||
#define BOOST_TEST_MODULE TestCublasHandle
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/CuBlasHandle.hpp>
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestGetCublasVersion)
|
||||
{
|
||||
@ -32,7 +32,7 @@ BOOST_AUTO_TEST_CASE(TestGetCublasVersion)
|
||||
// that checks the version of blas programatically. Let the test pass for now.
|
||||
BOOST_CHECK(true);
|
||||
#else
|
||||
auto& cublasHandle = ::Opm::cuistl::detail::CuBlasHandle::getInstance();
|
||||
auto& cublasHandle = ::Opm::gpuistl::detail::CuBlasHandle::getInstance();
|
||||
int cuBlasVersion = -1;
|
||||
OPM_CUBLAS_SAFE_CALL(cublasGetVersion(cublasHandle.get(), &cuBlasVersion));
|
||||
|
@ -22,7 +22,7 @@
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <cublas_v2.h>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestCreateHandle)
|
||||
{
|
@ -21,7 +21,7 @@
|
||||
#define BOOST_TEST_MODULE TestCudaCheckLastError
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_check_last_error.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cuda_check_last_error.hpp>
|
||||
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestNoThrowLastError)
|
@ -21,12 +21,12 @@
|
||||
#define BOOST_TEST_MODULE TestSparseHandle
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/CuSparseHandle.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestGetSparseVersion)
|
||||
{
|
||||
auto& cuSparseHandle = ::Opm::cuistl::detail::CuSparseHandle::getInstance();
|
||||
auto& cuSparseHandle = ::Opm::gpuistl::detail::CuSparseHandle::getInstance();
|
||||
int cuSparseVersion = -1;
|
||||
OPM_CUSPARSE_SAFE_CALL(cusparseGetVersion(cuSparseHandle.get(), &cuSparseVersion));
|
||||
BOOST_CHECK_LT(0, cuSparseVersion);
|
@ -22,7 +22,7 @@
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <cusparse.h>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestCreateHandle)
|
||||
{
|
@ -18,15 +18,15 @@
|
||||
*/
|
||||
#include <config.h>
|
||||
|
||||
#define BOOST_TEST_MODULE TestCudaSafeCall
|
||||
#define BOOST_TEST_MODULE TestGpuSafeCall
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <cuda_runtime.h>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestCudaMalloc)
|
||||
BOOST_AUTO_TEST_CASE(TestGpuMalloc)
|
||||
{
|
||||
void* pointer;
|
||||
BOOST_CHECK_NO_THROW(OPM_CUDA_SAFE_CALL(cudaMalloc(&pointer, 1)););
|
||||
BOOST_CHECK_NO_THROW(OPM_GPU_SAFE_CALL(cudaMalloc(&pointer, 1)););
|
||||
}
|
||||
|
||||
|
||||
@ -41,6 +41,6 @@ BOOST_AUTO_TEST_CASE(TestThrows)
|
||||
errorCodes = {{cudaErrorAddressOfConstant, cudaErrorAlreadyAcquired}};
|
||||
#endif
|
||||
for (auto code : errorCodes) {
|
||||
BOOST_CHECK_THROW(OPM_CUDA_SAFE_CALL(code), std::exception);
|
||||
BOOST_CHECK_THROW(OPM_GPU_SAFE_CALL(code), std::exception);
|
||||
}
|
||||
}
|
@ -21,11 +21,11 @@
|
||||
#define BOOST_TEST_MODULE TestSafeConversion
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/safe_conversion.hpp>
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestToIntThrowsOutofRange)
|
||||
{
|
||||
BOOST_CHECK_THROW(Opm::cuistl::detail::to_int(size_t(std::numeric_limits<int>::max()) + size_t(1));
|
||||
BOOST_CHECK_THROW(Opm::gpuistl::detail::to_int(size_t(std::numeric_limits<int>::max()) + size_t(1));
|
||||
, std::invalid_argument);
|
||||
}
|
||||
|
||||
@ -33,26 +33,26 @@ BOOST_AUTO_TEST_CASE(TestToIntConvertInRange)
|
||||
{
|
||||
// This might seem slow, but it is really fast:
|
||||
for (size_t i = 0; i <= size_t(1024 * 1024); ++i) {
|
||||
BOOST_CHECK_EQUAL(int(i), Opm::cuistl::detail::to_int(i));
|
||||
BOOST_CHECK_EQUAL(int(i), Opm::gpuistl::detail::to_int(i));
|
||||
}
|
||||
|
||||
BOOST_CHECK_EQUAL(std::numeric_limits<int>::max(),
|
||||
Opm::cuistl::detail::to_int(size_t(std::numeric_limits<int>::max())));
|
||||
Opm::gpuistl::detail::to_int(size_t(std::numeric_limits<int>::max())));
|
||||
}
|
||||
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestToSizeTThrowsOutofRange)
|
||||
{
|
||||
BOOST_CHECK_THROW(Opm::cuistl::detail::to_size_t(-1);, std::invalid_argument);
|
||||
BOOST_CHECK_THROW(Opm::gpuistl::detail::to_size_t(-1);, std::invalid_argument);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestToSizeTConvertInRange)
|
||||
{
|
||||
// This might seem slow, but it is really fast:
|
||||
for (int i = 0; i <= 1024 * 1024; ++i) {
|
||||
BOOST_CHECK_EQUAL(size_t(i), Opm::cuistl::detail::to_size_t(i));
|
||||
BOOST_CHECK_EQUAL(size_t(i), Opm::gpuistl::detail::to_size_t(i));
|
||||
}
|
||||
|
||||
BOOST_CHECK_EQUAL(size_t(std::numeric_limits<int>::max()),
|
||||
Opm::cuistl::detail::to_size_t(std::numeric_limits<int>::max()));
|
||||
Opm::gpuistl::detail::to_size_t(std::numeric_limits<int>::max()));
|
||||
}
|
@ -24,14 +24,14 @@
|
||||
#include <dune/istl/solvers.hh>
|
||||
#include <opm/simulators/linalg/PreconditionerFactory.hpp>
|
||||
#include <opm/simulators/linalg/PropertyTree.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/SolverAdapter.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/SolverAdapter.hpp>
|
||||
|
||||
static const constexpr int dim = 3;
|
||||
using Matrix = Dune::BCRSMatrix<Dune::FieldMatrix<double, dim, dim>>;
|
||||
using Vector = Dune::BlockVector<Dune::FieldVector<double, dim>>;
|
||||
using Moperator = Dune::MatrixAdapter<Matrix, Vector, Vector>;
|
||||
using PrecondFactory = Opm::PreconditionerFactory<Moperator, Dune::Amg::SequentialInformation>;
|
||||
using SolverAdapter = Opm::cuistl::SolverAdapter<Moperator, Dune::BiCGSTABSolver, Vector>;
|
||||
using SolverAdapter = Opm::gpuistl::SolverAdapter<Moperator, Dune::BiCGSTABSolver, Vector>;
|
||||
|
||||
namespace
|
||||
{
|
Loading…
Reference in New Issue
Block a user