From db440df07280a760de4836dbcea71f5763f295ee Mon Sep 17 00:00:00 2001
From: Kjetil Olsen Lye <kjetil.olsen.lye@sintef.no>
Date: Tue, 4 Feb 2025 10:26:48 +0100
Subject: [PATCH] Added convenience functions for smart pointers on GPU and a
 view to pointers.

---
 CMakeLists.txt                                |   1 +
 CMakeLists_files.cmake                        |   3 +
 .../linalg/gpuistl/gpu_smart_pointer.hpp      | 179 ++++++++++++++++++
 tests/gpuistl/test_smart_pointers.cu          | 107 +++++++++++
 4 files changed, 290 insertions(+)
 create mode 100644 opm/simulators/linalg/gpuistl/gpu_smart_pointer.hpp
 create mode 100644 tests/gpuistl/test_smart_pointers.cu
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1fda2d2f5..6ade0665b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -728,6 +728,7 @@ if(CUDA_FOUND)
                        gpu_ad
                        gpu_linear_two_phase_material
                        gpuPvt
+                       smart_pointers
                        PROPERTIES LABELS ${gpu_label})
 endif()
 
diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake
index 37b6995f2..c5c041a95 100644
--- a/CMakeLists_files.cmake
+++ b/CMakeLists_files.cmake
@@ -312,6 +312,7 @@ if (HAVE_CUDA)
   ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuBlockPreconditioner.hpp)
   ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg PreconditionerHolder.hpp)
   ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg set_device.hpp)
+  ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg gpu_smart_pointer.hpp)
 endif()
 
 if(USE_GPU_BRIDGE)
@@ -467,6 +468,8 @@ if (HAVE_CUDA)
   ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_gpu_ad.cu)
   ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_gpu_linear_two_phase_material.cu)
   ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_gpuPvt.cu)
+  ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_smart_pointers.cu)
+
 
   # for loop providing the flag --expt-relaxed-constexpr to fix some cuda issues with constexpr
   if(NOT CONVERT_CUDA_TO_HIP)
diff --git a/opm/simulators/linalg/gpuistl/gpu_smart_pointer.hpp b/opm/simulators/linalg/gpuistl/gpu_smart_pointer.hpp
new file mode 100644
index 000000000..ee67e97cf
--- /dev/null
+++ b/opm/simulators/linalg/gpuistl/gpu_smart_pointer.hpp
@@ -0,0 +1,179 @@
+/*
+  Copyright 2025 Equinor ASA
+  This file is part of the Open Porous Media project (OPM).
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef OPM_SIMULATORS_LINALG_GPUISTL_GPU_SMART_POINTER_HPP
+#define OPM_SIMULATORS_LINALG_GPUISTL_GPU_SMART_POINTER_HPP
+
+#include <cuda_runtime.h>
+
+#include <memory>
+
+#include <opm/common/utility/gpuDecorators.hpp>
+#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
+
+/**
+ * @file gpu_smart_pointer.hpp defines convenience classes and functions for using std::shared_ptr and std::unique_ptr
+ * with GPU allocated memory.
+ */
+
+namespace Opm::gpuistl
+{
+
+
+/**
+ * @brief Creates a shared pointer managing GPU-allocated memory of the specified element type.
+ *
+ * This function allocates memory on the GPU for the type \c T, using \c cudaMalloc.
+ * It returns a \c std::shared_ptr that automatically handles the release of
+ * GPU memory with cudaFree when no longer in use.
+ *
+ * @tparam T The element type to allocate on the GPU.
+ * @return A std::shared_ptr to the GPU-allocated memory.
+ */
+template <typename T>
+std::shared_ptr<T>
+make_gpu_shared_ptr()
+{
+    T* ptr = nullptr;
+    OPM_GPU_SAFE_CALL(cudaMalloc(&ptr, sizeof(T)));
+    auto deleter = [](T* ptr) { OPM_GPU_WARN_IF_ERROR(cudaFree(ptr)); };
+    return std::shared_ptr<T>(ptr, deleter);
+}
+
+/**
+ * @brief Creates a shared pointer managing GPU-allocated memory of the specified element type.
+ *
+ * This function allocates memory on the GPU for the type \c T, using \c cudaMalloc.
+ * It returns a std::shared_ptr that automatically handles the release of
+ * GPU memory with cudaFree when no longer in use.
+ *
+ * @tparam T The element type to allocate on the GPU.
+ * @param value The value to copy to the GPU-allocated memory.
+ * @return A std::shared_ptr to the GPU-allocated memory.
+ */
+template <typename T>
+std::shared_ptr<T>
+make_gpu_shared_ptr(const T& value)
+{
+    auto ptr = make_gpu_shared_ptr<T>();
+    OPM_GPU_SAFE_CALL(cudaMemcpy(ptr.get(), &value, sizeof(T), cudaMemcpyHostToDevice));
+    return ptr;
+}
+
+
+/**
+ * @brief Creates a unique pointer managing GPU-allocated memory of the specified element type.
+ *
+ * This function allocates memory on the GPU for the type \c T, using \c cudaMalloc .
+ * It returns a std::unique_ptr that automatically handles the release of
+ * GPU memory with cudaFree when no longer in use.
+ *
+ * @tparam T The element type to allocate on the GPU.
+ * @return A std::unique_ptr to the GPU-allocated memory.
+ */
+template <typename T>
+auto
+make_gpu_unique_ptr()
+{
+    T* ptr = nullptr;
+    OPM_GPU_SAFE_CALL(cudaMalloc(&ptr, sizeof(T)));
+
+    auto deleter = [](T* ptr) { OPM_GPU_WARN_IF_ERROR(cudaFree(ptr)); };
+    return std::unique_ptr<T, decltype(deleter)>(ptr, deleter);
+}
+
+/**
+ * @brief Creates a unique pointer managing GPU-allocated memory of the specified element type.
+ *
+ * This function allocates memory on the GPU for the type \c T, using \c cudaMalloc.
+ * It returns a std::unique_ptr that automatically handles the release of
+ * GPU memory with cudaFree when no longer in use.
+ *
+ * @tparam T The element type to allocate on the GPU.
+ * @param value The value to copy to the GPU-allocated memory.
+ * @return A std::unique_ptr to the GPU-allocated memory.
+ */
+template <typename T>
+auto
+make_gpu_unique_ptr(const T& value)
+{
+    auto ptr = make_gpu_unique_ptr<T>();
+    OPM_GPU_SAFE_CALL(cudaMemcpy(ptr.get(), &value, sizeof(T), cudaMemcpyHostToDevice));
+    return ptr;
+}
+
+/**
+ * @brief A view towards a smart pointer to GPU-allocated memory.
+ *
+ * This will emulate a smart pointer to GPU-allocated memory, but without ownership semantics, and
+ * being compatible with the requirements of the GPU kernels. This is useful when we want to pass
+ * a smart pointer to a GPU kernel, but we do not want to transfer the ownership of the memory.
+ */
+template <class T>
+class PointerView
+{
+public:
+    PointerView(const PointerView& other) = default;
+
+    PointerView(const std::shared_ptr<T>& ptr)
+        : ptr_(ptr.get())
+    {
+    }
+
+    template <class Deleter>
+    PointerView(const std::unique_ptr<T, Deleter>& ptr)
+        : ptr_(ptr.get())
+    {
+    }
+
+    PointerView(T* ptr)
+        : ptr_(ptr)
+    {
+    }
+
+    OPM_HOST_DEVICE T* get() const
+    {
+        return ptr_;
+    }
+
+    OPM_HOST_DEVICE T& operator*() const
+    {
+        return *ptr_;
+    }
+
+    OPM_HOST_DEVICE T* operator->() const
+    {
+        return ptr_;
+    }
+
+private:
+    T* ptr_;
+};
+
+template <class T>
+PointerView<T>
+make_view(const std::shared_ptr<T>& ptr)
+{
+    return PointerView<T>(ptr);
+}
+
+template <class T, class Deleter>
+PointerView<T>
+make_view(const std::unique_ptr<T, Deleter>& ptr)
+{
+    return PointerView<T>(ptr);
+}
+} // namespace Opm::gpuistl
+#endif
diff --git a/tests/gpuistl/test_smart_pointers.cu b/tests/gpuistl/test_smart_pointers.cu
new file mode 100644
index 000000000..232e372f9
--- /dev/null
+++ b/tests/gpuistl/test_smart_pointers.cu
@@ -0,0 +1,107 @@
+/*
+  Copyright 2025 Equinor ASA
+
+  This file is part of the Open Porous Media project (OPM).
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#include <config.h>
+
+#define BOOST_TEST_MODULE TestSmartPointers
+
+#include <boost/test/unit_test.hpp>
+#include <opm/simulators/linalg/gpuistl/gpu_smart_pointer.hpp>
+
+namespace
+{
+
+struct SomeStruct {
+    __device__ void someFunction()
+    {
+        this->isCalled = true;
+    }
+
+    bool isCalled = false;
+};
+
+template <class T>
+__global__ void
+setValue(Opm::gpuistl::PointerView<T> ptrIn, Opm::gpuistl::PointerView<T> ptrOut)
+{
+    *ptrOut = *ptrIn;
+}
+
+template <class T>
+__global__ void
+setValueGet(Opm::gpuistl::PointerView<T> ptrIn, Opm::gpuistl::PointerView<T> ptrOut)
+{
+    *ptrOut.get() = *ptrIn.get();
+}
+
+template <class T>
+__global__ void
+callFunction(Opm::gpuistl::PointerView<T> ptrIn)
+{
+    ptrIn->someFunction();
+}
+
+
+} // namespace
+
+
+BOOST_AUTO_TEST_CASE(TestSharedPointer)
+{
+    auto sharedPtr = Opm::gpuistl::make_gpu_shared_ptr<int>(1);
+
+    int valueFromDevice = 0;
+    OPM_GPU_SAFE_CALL(cudaMemcpy(&valueFromDevice, sharedPtr.get(), sizeof(int), cudaMemcpyDeviceToHost));
+    BOOST_CHECK_EQUAL(valueFromDevice, 1);
+}
+
+BOOST_AUTO_TEST_CASE(TestUniquePointer)
+{
+    auto uniquePtr = Opm::gpuistl::make_gpu_unique_ptr<int>(1);
+
+    int valueFromDevice = 0;
+    OPM_GPU_SAFE_CALL(cudaMemcpy(&valueFromDevice, uniquePtr.get(), sizeof(int), cudaMemcpyDeviceToHost));
+    BOOST_CHECK_EQUAL(valueFromDevice, 1);
+}
+
+BOOST_AUTO_TEST_CASE(TestPointerView)
+{
+    auto pointerDestination = Opm::gpuistl::make_gpu_shared_ptr<double>(92);
+    auto pointerSource = Opm::gpuistl::make_gpu_shared_ptr<double>(128.5);
+
+    setValue<<<1, 1>>>(Opm::gpuistl::make_view(pointerSource), Opm::gpuistl::make_view(pointerDestination));
+
+    double valueFromDevice = 0;
+    OPM_GPU_SAFE_CALL(cudaMemcpy(&valueFromDevice, pointerDestination.get(), sizeof(double), cudaMemcpyDeviceToHost));
+    BOOST_CHECK_EQUAL(valueFromDevice, 128.5);
+
+    auto newSource = Opm::gpuistl::make_gpu_shared_ptr<double>(-1.0);
+    setValueGet<<<1, 1>>>(Opm::gpuistl::make_view(newSource), Opm::gpuistl::make_view(pointerDestination));
+    OPM_GPU_SAFE_CALL(cudaMemcpy(&valueFromDevice, pointerDestination.get(), sizeof(double), cudaMemcpyDeviceToHost));
+    BOOST_CHECK_EQUAL(valueFromDevice, -1.0);
+
+    auto structPtr = Opm::gpuistl::make_gpu_shared_ptr<SomeStruct>();
+    callFunction<<<1, 1>>>(Opm::gpuistl::make_view(structPtr));
+    bool isCalled = false;
+    OPM_GPU_SAFE_CALL(cudaMemcpy(&isCalled, structPtr.get(), sizeof(bool), cudaMemcpyDeviceToHost));
+    BOOST_CHECK_EQUAL(isCalled, true);
+
+
+    auto uniquePtr = Opm::gpuistl::make_gpu_unique_ptr<double>(1.0);
+    auto uniqueView = Opm::gpuistl::make_view(uniquePtr);
+
+    double valueFromDeviceUnique = 0;
+    OPM_GPU_SAFE_CALL(cudaMemcpy(&valueFromDeviceUnique, uniqueView.get(), sizeof(double), cudaMemcpyDeviceToHost));
+    BOOST_CHECK_EQUAL(valueFromDeviceUnique, 1.0);
+}