From db440df07280a760de4836dbcea71f5763f295ee Mon Sep 17 00:00:00 2001 From: Kjetil Olsen Lye Date: Tue, 4 Feb 2025 10:26:48 +0100 Subject: [PATCH] Added convenience functions for smart pointers on GPU and a view to pointers. --- CMakeLists.txt | 1 + CMakeLists_files.cmake | 3 + .../linalg/gpuistl/gpu_smart_pointer.hpp | 179 ++++++++++++++++++ tests/gpuistl/test_smart_pointers.cu | 107 +++++++++++ 4 files changed, 290 insertions(+) create mode 100644 opm/simulators/linalg/gpuistl/gpu_smart_pointer.hpp create mode 100644 tests/gpuistl/test_smart_pointers.cu diff --git a/CMakeLists.txt b/CMakeLists.txt index 1fda2d2f5..6ade0665b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -728,6 +728,7 @@ if(CUDA_FOUND) gpu_ad gpu_linear_two_phase_material gpuPvt + smart_pointers PROPERTIES LABELS ${gpu_label}) endif() diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index 37b6995f2..c5c041a95 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -312,6 +312,7 @@ if (HAVE_CUDA) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuBlockPreconditioner.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg PreconditionerHolder.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg set_device.hpp) + ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg gpu_smart_pointer.hpp) endif() if(USE_GPU_BRIDGE) @@ -467,6 +468,8 @@ if (HAVE_CUDA) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_gpu_ad.cu) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_gpu_linear_two_phase_material.cu) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_gpuPvt.cu) + ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_smart_pointers.cu) + # for loop providing the flag --expt-relaxed-constexpr to fix some cuda issues with constexpr if(NOT CONVERT_CUDA_TO_HIP) diff --git a/opm/simulators/linalg/gpuistl/gpu_smart_pointer.hpp b/opm/simulators/linalg/gpuistl/gpu_smart_pointer.hpp new file mode 100644 index 000000000..ee67e97cf --- /dev/null +++ b/opm/simulators/linalg/gpuistl/gpu_smart_pointer.hpp @@ -0,0 +1,179 @@ +/* + Copyright 2025 Equinor ASA + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ + +#ifndef OPM_SIMULATORS_LINALG_GPUISTL_GPU_SMART_POINTER_HPP +#define OPM_SIMULATORS_LINALG_GPUISTL_GPU_SMART_POINTER_HPP + +#include + +#include + +#include +#include + +/** + * @file gpu_smart_pointer.hpp defines convenience classes and functions for using std::shared_ptr and std::unique_ptr + * with GPU allocated memory. + */ + +namespace Opm::gpuistl +{ + + +/** + * @brief Creates a shared pointer managing GPU-allocated memory of the specified element type. + * + * This function allocates memory on the GPU for the type \c T, using \c cudaMalloc. + * It returns a \c std::shared_ptr that automatically handles the release of + * GPU memory with cudaFree when no longer in use. + * + * @tparam T The element type to allocate on the GPU. + * @return A std::shared_ptr to the GPU-allocated memory. + */ +template +std::shared_ptr +make_gpu_shared_ptr() +{ + T* ptr = nullptr; + OPM_GPU_SAFE_CALL(cudaMalloc(&ptr, sizeof(T))); + auto deleter = [](T* ptr) { OPM_GPU_WARN_IF_ERROR(cudaFree(ptr)); }; + return std::shared_ptr(ptr, deleter); +} + +/** + * @brief Creates a shared pointer managing GPU-allocated memory of the specified element type. + * + * This function allocates memory on the GPU for the type \c T, using \c cudaMalloc. + * It returns a std::shared_ptr that automatically handles the release of + * GPU memory with cudaFree when no longer in use. + * + * @tparam T The element type to allocate on the GPU. + * @param value The value to copy to the GPU-allocated memory. + * @return A std::shared_ptr to the GPU-allocated memory. + */ +template +std::shared_ptr +make_gpu_shared_ptr(const T& value) +{ + auto ptr = make_gpu_shared_ptr(); + OPM_GPU_SAFE_CALL(cudaMemcpy(ptr.get(), &value, sizeof(T), cudaMemcpyHostToDevice)); + return ptr; +} + + +/** + * @brief Creates a unique pointer managing GPU-allocated memory of the specified element type. + * + * This function allocates memory on the GPU for the type \c T, using \c cudaMalloc . + * It returns a std::unique_ptr that automatically handles the release of + * GPU memory with cudaFree when no longer in use. + * + * @tparam T The element type to allocate on the GPU. + * @return A std::unique_ptr to the GPU-allocated memory. + */ +template +auto +make_gpu_unique_ptr() +{ + T* ptr = nullptr; + OPM_GPU_SAFE_CALL(cudaMalloc(&ptr, sizeof(T))); + + auto deleter = [](T* ptr) { OPM_GPU_WARN_IF_ERROR(cudaFree(ptr)); }; + return std::unique_ptr(ptr, deleter); +} + +/** + * @brief Creates a unique pointer managing GPU-allocated memory of the specified element type. + * + * This function allocates memory on the GPU for the type \c T, using \c cudaMalloc. + * It returns a std::unique_ptr that automatically handles the release of + * GPU memory with cudaFree when no longer in use. + * + * @tparam T The element type to allocate on the GPU. + * @param value The value to copy to the GPU-allocated memory. + * @return A std::unique_ptr to the GPU-allocated memory. + */ +template +auto +make_gpu_unique_ptr(const T& value) +{ + auto ptr = make_gpu_unique_ptr(); + OPM_GPU_SAFE_CALL(cudaMemcpy(ptr.get(), &value, sizeof(T), cudaMemcpyHostToDevice)); + return ptr; +} + +/** + * @brief A view towards a smart pointer to GPU-allocated memory. + * + * This will emulate a smart pointer to GPU-allocated memory, but without ownership semantics, and + * being compatible with the requirements of the GPU kernels. This is useful when we want to pass + * a smart pointer to a GPU kernel, but we do not want to transfer the ownership of the memory. + */ +template +class PointerView +{ +public: + PointerView(const PointerView& other) = default; + + PointerView(const std::shared_ptr& ptr) + : ptr_(ptr.get()) + { + } + + template + PointerView(const std::unique_ptr& ptr) + : ptr_(ptr.get()) + { + } + + PointerView(T* ptr) + : ptr_(ptr) + { + } + + OPM_HOST_DEVICE T* get() const + { + return ptr_; + } + + OPM_HOST_DEVICE T& operator*() const + { + return *ptr_; + } + + OPM_HOST_DEVICE T* operator->() const + { + return ptr_; + } + +private: + T* ptr_; +}; + +template +PointerView +make_view(const std::shared_ptr& ptr) +{ + return PointerView(ptr); +} + +template +PointerView +make_view(const std::unique_ptr& ptr) +{ + return PointerView(ptr); +} +} // namespace Opm::gpuistl +#endif diff --git a/tests/gpuistl/test_smart_pointers.cu b/tests/gpuistl/test_smart_pointers.cu new file mode 100644 index 000000000..232e372f9 --- /dev/null +++ b/tests/gpuistl/test_smart_pointers.cu @@ -0,0 +1,107 @@ +/* + Copyright 2025 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +#include + +#define BOOST_TEST_MODULE TestSmartPointers + +#include +#include + +namespace +{ + +struct SomeStruct { + __device__ void someFunction() + { + this->isCalled = true; + } + + bool isCalled = false; +}; + +template +__global__ void +setValue(Opm::gpuistl::PointerView ptrIn, Opm::gpuistl::PointerView ptrOut) +{ + *ptrOut = *ptrIn; +} + +template +__global__ void +setValueGet(Opm::gpuistl::PointerView ptrIn, Opm::gpuistl::PointerView ptrOut) +{ + *ptrOut.get() = *ptrIn.get(); +} + +template +__global__ void +callFunction(Opm::gpuistl::PointerView ptrIn) +{ + ptrIn->someFunction(); +} + + +} // namespace + + +BOOST_AUTO_TEST_CASE(TestSharedPointer) +{ + auto sharedPtr = Opm::gpuistl::make_gpu_shared_ptr(1); + + int valueFromDevice = 0; + OPM_GPU_SAFE_CALL(cudaMemcpy(&valueFromDevice, sharedPtr.get(), sizeof(int), cudaMemcpyDeviceToHost)); + BOOST_CHECK_EQUAL(valueFromDevice, 1); +} + +BOOST_AUTO_TEST_CASE(TestUniquePointer) +{ + auto uniquePtr = Opm::gpuistl::make_gpu_unique_ptr(1); + + int valueFromDevice = 0; + OPM_GPU_SAFE_CALL(cudaMemcpy(&valueFromDevice, uniquePtr.get(), sizeof(int), cudaMemcpyDeviceToHost)); + BOOST_CHECK_EQUAL(valueFromDevice, 1); +} + +BOOST_AUTO_TEST_CASE(TestPointerView) +{ + auto pointerDestination = Opm::gpuistl::make_gpu_shared_ptr(92); + auto pointerSource = Opm::gpuistl::make_gpu_shared_ptr(128.5); + + setValue<<<1, 1>>>(Opm::gpuistl::make_view(pointerSource), Opm::gpuistl::make_view(pointerDestination)); + + double valueFromDevice = 0; + OPM_GPU_SAFE_CALL(cudaMemcpy(&valueFromDevice, pointerDestination.get(), sizeof(double), cudaMemcpyDeviceToHost)); + BOOST_CHECK_EQUAL(valueFromDevice, 128.5); + + auto newSource = Opm::gpuistl::make_gpu_shared_ptr(-1.0); + setValueGet<<<1, 1>>>(Opm::gpuistl::make_view(newSource), Opm::gpuistl::make_view(pointerDestination)); + OPM_GPU_SAFE_CALL(cudaMemcpy(&valueFromDevice, pointerDestination.get(), sizeof(double), cudaMemcpyDeviceToHost)); + BOOST_CHECK_EQUAL(valueFromDevice, -1.0); + + auto structPtr = Opm::gpuistl::make_gpu_shared_ptr(); + callFunction<<<1, 1>>>(Opm::gpuistl::make_view(structPtr)); + bool isCalled = false; + OPM_GPU_SAFE_CALL(cudaMemcpy(&isCalled, structPtr.get(), sizeof(bool), cudaMemcpyDeviceToHost)); + BOOST_CHECK_EQUAL(isCalled, true); + + + auto uniquePtr = Opm::gpuistl::make_gpu_unique_ptr(1.0); + auto uniqueView = Opm::gpuistl::make_view(uniquePtr); + + double valueFromDeviceUnique = 0; + OPM_GPU_SAFE_CALL(cudaMemcpy(&valueFromDeviceUnique, uniqueView.get(), sizeof(double), cudaMemcpyDeviceToHost)); + BOOST_CHECK_EQUAL(valueFromDeviceUnique, 1.0); +}