Added convenience functions for smart pointers on GPU and a view to pointers.

This commit is contained in:
Kjetil Olsen Lye 2025-02-04 10:26:48 +01:00
parent fca5899b43
commit db440df072
4 changed files with 290 additions and 0 deletions

View File

@ -728,6 +728,7 @@ if(CUDA_FOUND)
gpu_ad
gpu_linear_two_phase_material
gpuPvt
smart_pointers
PROPERTIES LABELS ${gpu_label})
endif()

View File

@ -312,6 +312,7 @@ if (HAVE_CUDA)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuBlockPreconditioner.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg PreconditionerHolder.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg set_device.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg gpu_smart_pointer.hpp)
endif()
if(USE_GPU_BRIDGE)
@ -467,6 +468,8 @@ if (HAVE_CUDA)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_gpu_ad.cu)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_gpu_linear_two_phase_material.cu)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_gpuPvt.cu)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_smart_pointers.cu)
# for loop providing the flag --expt-relaxed-constexpr to fix some cuda issues with constexpr
if(NOT CONVERT_CUDA_TO_HIP)

View File

@ -0,0 +1,179 @@
/*
Copyright 2025 Equinor ASA
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_SIMULATORS_LINALG_GPUISTL_GPU_SMART_POINTER_HPP
#define OPM_SIMULATORS_LINALG_GPUISTL_GPU_SMART_POINTER_HPP
#include <cuda_runtime.h>
#include <memory>
#include <opm/common/utility/gpuDecorators.hpp>
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
/**
* @file gpu_smart_pointer.hpp defines convenience classes and functions for using std::shared_ptr and std::unique_ptr
* with GPU allocated memory.
*/
namespace Opm::gpuistl
{
/**
* @brief Creates a shared pointer managing GPU-allocated memory of the specified element type.
*
* This function allocates memory on the GPU for the type \c T, using \c cudaMalloc.
* It returns a \c std::shared_ptr that automatically handles the release of
* GPU memory with cudaFree when no longer in use.
*
* @tparam T The element type to allocate on the GPU.
* @return A std::shared_ptr to the GPU-allocated memory.
*/
template <typename T>
std::shared_ptr<T>
make_gpu_shared_ptr()
{
T* ptr = nullptr;
OPM_GPU_SAFE_CALL(cudaMalloc(&ptr, sizeof(T)));
auto deleter = [](T* ptr) { OPM_GPU_WARN_IF_ERROR(cudaFree(ptr)); };
return std::shared_ptr<T>(ptr, deleter);
}
/**
* @brief Creates a shared pointer managing GPU-allocated memory of the specified element type.
*
* This function allocates memory on the GPU for the type \c T, using \c cudaMalloc.
* It returns a std::shared_ptr that automatically handles the release of
* GPU memory with cudaFree when no longer in use.
*
* @tparam T The element type to allocate on the GPU.
* @param value The value to copy to the GPU-allocated memory.
* @return A std::shared_ptr to the GPU-allocated memory.
*/
template <typename T>
std::shared_ptr<T>
make_gpu_shared_ptr(const T& value)
{
auto ptr = make_gpu_shared_ptr<T>();
OPM_GPU_SAFE_CALL(cudaMemcpy(ptr.get(), &value, sizeof(T), cudaMemcpyHostToDevice));
return ptr;
}
/**
* @brief Creates a unique pointer managing GPU-allocated memory of the specified element type.
*
* This function allocates memory on the GPU for the type \c T, using \c cudaMalloc .
* It returns a std::unique_ptr that automatically handles the release of
* GPU memory with cudaFree when no longer in use.
*
* @tparam T The element type to allocate on the GPU.
* @return A std::unique_ptr to the GPU-allocated memory.
*/
template <typename T>
auto
make_gpu_unique_ptr()
{
T* ptr = nullptr;
OPM_GPU_SAFE_CALL(cudaMalloc(&ptr, sizeof(T)));
auto deleter = [](T* ptr) { OPM_GPU_WARN_IF_ERROR(cudaFree(ptr)); };
return std::unique_ptr<T, decltype(deleter)>(ptr, deleter);
}
/**
* @brief Creates a unique pointer managing GPU-allocated memory of the specified element type.
*
* This function allocates memory on the GPU for the type \c T, using \c cudaMalloc.
* It returns a std::unique_ptr that automatically handles the release of
* GPU memory with cudaFree when no longer in use.
*
* @tparam T The element type to allocate on the GPU.
* @param value The value to copy to the GPU-allocated memory.
* @return A std::unique_ptr to the GPU-allocated memory.
*/
template <typename T>
auto
make_gpu_unique_ptr(const T& value)
{
auto ptr = make_gpu_unique_ptr<T>();
OPM_GPU_SAFE_CALL(cudaMemcpy(ptr.get(), &value, sizeof(T), cudaMemcpyHostToDevice));
return ptr;
}
/**
* @brief A view towards a smart pointer to GPU-allocated memory.
*
* This will emulate a smart pointer to GPU-allocated memory, but without ownership semantics, and
* being compatible with the requirements of the GPU kernels. This is useful when we want to pass
* a smart pointer to a GPU kernel, but we do not want to transfer the ownership of the memory.
*/
template <class T>
class PointerView
{
public:
PointerView(const PointerView& other) = default;
PointerView(const std::shared_ptr<T>& ptr)
: ptr_(ptr.get())
{
}
template <class Deleter>
PointerView(const std::unique_ptr<T, Deleter>& ptr)
: ptr_(ptr.get())
{
}
PointerView(T* ptr)
: ptr_(ptr)
{
}
OPM_HOST_DEVICE T* get() const
{
return ptr_;
}
OPM_HOST_DEVICE T& operator*() const
{
return *ptr_;
}
OPM_HOST_DEVICE T* operator->() const
{
return ptr_;
}
private:
T* ptr_;
};
template <class T>
PointerView<T>
make_view(const std::shared_ptr<T>& ptr)
{
return PointerView<T>(ptr);
}
template <class T, class Deleter>
PointerView<T>
make_view(const std::unique_ptr<T, Deleter>& ptr)
{
return PointerView<T>(ptr);
}
} // namespace Opm::gpuistl
#endif

View File

@ -0,0 +1,107 @@
/*
Copyright 2025 Equinor ASA
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#include <config.h>
#define BOOST_TEST_MODULE TestSmartPointers
#include <boost/test/unit_test.hpp>
#include <opm/simulators/linalg/gpuistl/gpu_smart_pointer.hpp>
namespace
{
struct SomeStruct {
__device__ void someFunction()
{
this->isCalled = true;
}
bool isCalled = false;
};
template <class T>
__global__ void
setValue(Opm::gpuistl::PointerView<T> ptrIn, Opm::gpuistl::PointerView<T> ptrOut)
{
*ptrOut = *ptrIn;
}
template <class T>
__global__ void
setValueGet(Opm::gpuistl::PointerView<T> ptrIn, Opm::gpuistl::PointerView<T> ptrOut)
{
*ptrOut.get() = *ptrIn.get();
}
template <class T>
__global__ void
callFunction(Opm::gpuistl::PointerView<T> ptrIn)
{
ptrIn->someFunction();
}
} // namespace
BOOST_AUTO_TEST_CASE(TestSharedPointer)
{
auto sharedPtr = Opm::gpuistl::make_gpu_shared_ptr<int>(1);
int valueFromDevice = 0;
OPM_GPU_SAFE_CALL(cudaMemcpy(&valueFromDevice, sharedPtr.get(), sizeof(int), cudaMemcpyDeviceToHost));
BOOST_CHECK_EQUAL(valueFromDevice, 1);
}
BOOST_AUTO_TEST_CASE(TestUniquePointer)
{
auto uniquePtr = Opm::gpuistl::make_gpu_unique_ptr<int>(1);
int valueFromDevice = 0;
OPM_GPU_SAFE_CALL(cudaMemcpy(&valueFromDevice, uniquePtr.get(), sizeof(int), cudaMemcpyDeviceToHost));
BOOST_CHECK_EQUAL(valueFromDevice, 1);
}
BOOST_AUTO_TEST_CASE(TestPointerView)
{
auto pointerDestination = Opm::gpuistl::make_gpu_shared_ptr<double>(92);
auto pointerSource = Opm::gpuistl::make_gpu_shared_ptr<double>(128.5);
setValue<<<1, 1>>>(Opm::gpuistl::make_view(pointerSource), Opm::gpuistl::make_view(pointerDestination));
double valueFromDevice = 0;
OPM_GPU_SAFE_CALL(cudaMemcpy(&valueFromDevice, pointerDestination.get(), sizeof(double), cudaMemcpyDeviceToHost));
BOOST_CHECK_EQUAL(valueFromDevice, 128.5);
auto newSource = Opm::gpuistl::make_gpu_shared_ptr<double>(-1.0);
setValueGet<<<1, 1>>>(Opm::gpuistl::make_view(newSource), Opm::gpuistl::make_view(pointerDestination));
OPM_GPU_SAFE_CALL(cudaMemcpy(&valueFromDevice, pointerDestination.get(), sizeof(double), cudaMemcpyDeviceToHost));
BOOST_CHECK_EQUAL(valueFromDevice, -1.0);
auto structPtr = Opm::gpuistl::make_gpu_shared_ptr<SomeStruct>();
callFunction<<<1, 1>>>(Opm::gpuistl::make_view(structPtr));
bool isCalled = false;
OPM_GPU_SAFE_CALL(cudaMemcpy(&isCalled, structPtr.get(), sizeof(bool), cudaMemcpyDeviceToHost));
BOOST_CHECK_EQUAL(isCalled, true);
auto uniquePtr = Opm::gpuistl::make_gpu_unique_ptr<double>(1.0);
auto uniqueView = Opm::gpuistl::make_view(uniquePtr);
double valueFromDeviceUnique = 0;
OPM_GPU_SAFE_CALL(cudaMemcpy(&valueFromDeviceUnique, uniqueView.get(), sizeof(double), cudaMemcpyDeviceToHost));
BOOST_CHECK_EQUAL(valueFromDeviceUnique, 1.0);
}