refactor CuBuffer

2025-02-25 18:55:30 -06:00 · 2024-08-22 14:12:30 +02:00
parent 67bc9e8f34
commit 69897753e8
7 changed files with 72 additions and 72 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -558,7 +558,7 @@ if(CUDA_FOUND)
                       cuseqilu0
                       cuowneroverlapcopy
                       solver_adapter
-                       cubuffer
+                       GpuBuffer
                       cuview
                       PROPERTIES LABELS ${gpu_label})
 endif()
--- a/CMakeLists_files.cmake
+++ b/CMakeLists_files.cmake
@@ -209,7 +209,7 @@ if (HAVE_CUDA)
  ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/CuBlasHandle.cpp)
  ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/cusparse_matrix_operations.cu)
  ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/CuSparseHandle.cpp)
-  ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuBuffer.cpp)
+  ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuBuffer.cpp)
  ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/preconditionerKernels/DILUKernels.cu)
  ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/preconditionerKernels/ILU0Kernels.cu)
  ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/preconditionerKernels/JacKernels.cu)
@@ -233,7 +233,7 @@ if (HAVE_CUDA)
  ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/cuda_check_last_error.hpp)
  ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuBlasHandle.hpp)
  ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuSparseHandle.hpp)
-  ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuBuffer.hpp)
+  ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuBuffer.hpp)
  ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/preconditionerKernels/DILUKernels.hpp)
  ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/preconditionerKernels/ILU0Kernels.hpp)
  ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/preconditionerKernels/JacKernels.hpp)
@@ -389,7 +389,7 @@ if (HAVE_CUDA)
  ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_converttofloatadapter.cpp)
  ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cublas_handle.cpp)
  ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cublas_safe_call.cpp)
-  ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cubuffer.cu)
+  ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuBuffer.cu)
  ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuview.cu)
  ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cusparse_safe_call.cpp)
  ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuda_safe_call.cpp)
--- a/opm/simulators/linalg/cuistl/CuView.hpp
+++ b/opm/simulators/linalg/cuistl/CuView.hpp
@@ -48,7 +48,7 @@ namespace Opm::gpuistl
 *  Implementations are placed in this headerfile for functions that may be called
 *  inside a kernel to avoid expensive RDC (relocatable device code)
 *
- * The view will typically provide a view into a CuBuffer and be able to
+ * The view will typically provide a view into a GpuBuffer and be able to
 * manipulate the data within it
 *
 * @param T Type of the data we store, typically int/float/double w/o const specifier
--- a/opm/simulators/linalg/cuistl/GpuBuffer.cpp
+++ b/opm/simulators/linalg/cuistl/GpuBuffer.cpp
@@ -20,7 +20,7 @@
 #include <cuda_runtime.h>
 #include <algorithm>
 #include <fmt/core.h>
-#include <opm/simulators/linalg/cuistl/CuBuffer.hpp>
+#include <opm/simulators/linalg/cuistl/GpuBuffer.hpp>
 #include <opm/simulators/linalg/cuistl/CuView.hpp>
 #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>

@@ -28,24 +28,24 @@ namespace Opm::gpuistl
 {

 template <class T>
-CuBuffer<T>::CuBuffer(const std::vector<T>& data)
-    : CuBuffer(data.data(), data.size())
+GpuBuffer<T>::GpuBuffer(const std::vector<T>& data)
+    : GpuBuffer(data.data(), data.size())
 {
 }

 template <class T>
-CuBuffer<T>::CuBuffer(const size_t numberOfElements)
+GpuBuffer<T>::GpuBuffer(const size_t numberOfElements)
    : m_numberOfElements(numberOfElements)
 {
    if (numberOfElements < 1) {
-        OPM_THROW(std::invalid_argument, "Setting a CuBuffer size to a non-positive number is not allowed");
+        OPM_THROW(std::invalid_argument, "Setting a GpuBuffer size to a non-positive number is not allowed");
    }
    OPM_CUDA_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * m_numberOfElements));
 }

 template <class T>
-CuBuffer<T>::CuBuffer(const T* dataOnHost, const size_t numberOfElements)
-    : CuBuffer(numberOfElements)
+GpuBuffer<T>::GpuBuffer(const T* dataOnHost, const size_t numberOfElements)
+    : GpuBuffer(numberOfElements)
 {

    OPM_CUDA_SAFE_CALL(cudaMemcpy(
@@ -53,8 +53,8 @@ CuBuffer<T>::CuBuffer(const T* dataOnHost, const size_t numberOfElements)
 }

 template <class T>
-CuBuffer<T>::CuBuffer(const CuBuffer<T>& other)
-    : CuBuffer(other.m_numberOfElements)
+GpuBuffer<T>::GpuBuffer(const GpuBuffer<T>& other)
+    : GpuBuffer(other.m_numberOfElements)
 {
    assertHasElements();
    assertSameSize(other);
@@ -65,24 +65,24 @@ CuBuffer<T>::CuBuffer(const CuBuffer<T>& other)
 }

 template <class T>
-CuBuffer<T>::~CuBuffer()
+GpuBuffer<T>::~GpuBuffer()
 {
    OPM_CUDA_WARN_IF_ERROR(cudaFree(m_dataOnDevice));
 }

 template <typename T>
-typename CuBuffer<T>::size_type
-CuBuffer<T>::size() const
+typename GpuBuffer<T>::size_type
+GpuBuffer<T>::size() const
 {
    return m_numberOfElements;
 }

 template <typename T>
 void
-CuBuffer<T>::resize(size_t newSize)
+GpuBuffer<T>::resize(size_t newSize)
 {
    if (newSize < 1) {
-        OPM_THROW(std::invalid_argument, "Setting a CuBuffer size to a non-positive number is not allowed");
+        OPM_THROW(std::invalid_argument, "Setting a GpuBuffer size to a non-positive number is not allowed");
    }
    // Allocate memory for the new buffer
    T* tmpBuffer = nullptr;
@@ -107,7 +107,7 @@ CuBuffer<T>::resize(size_t newSize)

 template <typename T>
 std::vector<T>
-CuBuffer<T>::asStdVector() const
+GpuBuffer<T>::asStdVector() const
 {
    std::vector<T> temporary(m_numberOfElements);
    copyToHost(temporary);
@@ -116,14 +116,14 @@ CuBuffer<T>::asStdVector() const

 template <typename T>
 void
-CuBuffer<T>::assertSameSize(const CuBuffer<T>& x) const
+GpuBuffer<T>::assertSameSize(const GpuBuffer<T>& x) const
 {
    assertSameSize(x.m_numberOfElements);
 }

 template <typename T>
 void
-CuBuffer<T>::assertSameSize(size_t size) const
+GpuBuffer<T>::assertSameSize(size_t size) const
 {
    if (size != m_numberOfElements) {
        OPM_THROW(std::invalid_argument,
@@ -133,7 +133,7 @@ CuBuffer<T>::assertSameSize(size_t size) const

 template <typename T>
 void
-CuBuffer<T>::assertHasElements() const
+GpuBuffer<T>::assertHasElements() const
 {
    if (m_numberOfElements <= 0) {
        OPM_THROW(std::invalid_argument, "We have 0 elements");
@@ -142,21 +142,21 @@ CuBuffer<T>::assertHasElements() const

 template <typename T>
 T*
-CuBuffer<T>::data()
+GpuBuffer<T>::data()
 {
    return m_dataOnDevice;
 }

 template <typename T>
 const T*
-CuBuffer<T>::data() const
+GpuBuffer<T>::data() const
 {
    return m_dataOnDevice;
 }

 template <class T>
 void
-CuBuffer<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
+GpuBuffer<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
 {
    if (numberOfElements > size()) {
        OPM_THROW(std::runtime_error,
@@ -169,7 +169,7 @@ CuBuffer<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)

 template <class T>
 void
-CuBuffer<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
+GpuBuffer<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
 {
    assertSameSize(numberOfElements);
    OPM_CUDA_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
@@ -177,28 +177,28 @@ CuBuffer<T>::copyToHost(T* dataPointer, size_t numberOfElements) const

 template <class T>
 void
-CuBuffer<T>::copyFromHost(const std::vector<T>& data)
+GpuBuffer<T>::copyFromHost(const std::vector<T>& data)
 {
    copyFromHost(data.data(), data.size());
 }
 template <class T>
 void
-CuBuffer<T>::copyToHost(std::vector<T>& data) const
+GpuBuffer<T>::copyToHost(std::vector<T>& data) const
 {
    copyToHost(data.data(), data.size());
 }

-template class CuBuffer<double>;
-template class CuBuffer<float>;
-template class CuBuffer<int>;
+template class GpuBuffer<double>;
+template class GpuBuffer<float>;
+template class GpuBuffer<int>;

 template <class T>
-CuView<const T> make_view(const CuBuffer<T>& buf) {
+CuView<const T> make_view(const GpuBuffer<T>& buf) {
    return CuView<const T>(buf.data(), buf.size());
 }

-template CuView<const double> make_view<double>(const CuBuffer<double>&);
-template CuView<const float> make_view<float>(const CuBuffer<float>&);
-template CuView<const int> make_view<int>(const CuBuffer<int>&);
+template CuView<const double> make_view<double>(const GpuBuffer<double>&);
+template CuView<const float> make_view<float>(const GpuBuffer<float>&);
+template CuView<const int> make_view<int>(const GpuBuffer<int>&);

 } // namespace Opm::gpuistl
--- a/opm/simulators/linalg/cuistl/GpuBuffer.hpp
+++ b/opm/simulators/linalg/cuistl/GpuBuffer.hpp
@@ -16,8 +16,8 @@
  You should have received a copy of the GNU General Public License
  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
 */
-#ifndef OPM_CUBUFFER_HEADER_HPP
-#define OPM_CUBUFFER_HEADER_HPP
+#ifndef OPM_GPUBUFFER_HEADER_HPP
+#define OPM_GPUBUFFER_HEADER_HPP
 #include <dune/common/fvector.hh>
 #include <dune/istl/bvector.hh>
 #include <exception>
@@ -33,18 +33,18 @@ namespace Opm::gpuistl
 {

 /**
- * @brief The CuBuffer class is a simple container class for the GPU.
+ * @brief The GpuBuffer class is a simple container class for the GPU.
 *
 *
 * Example usage:
 *
 * @code{.cpp}
- * #include <opm/simulators/linalg/cuistl/CuBuffer.hpp>
+ * #include <opm/simulators/linalg/cuistl/GpuBuffer.hpp>
 *
 * void someFunction() {
 *     auto someDataOnCPU = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
 *
- *     auto dataOnGPU = CuBuffer<double>(someDataOnCPU);
+ *     auto dataOnGPU = GpuBuffer<double>(someDataOnCPU);
 *
 *     auto stdVectorOnCPU = dataOnGPU.asStdVector();
 * }
@@ -52,7 +52,7 @@ namespace Opm::gpuistl
 * @tparam T the type to store. Can be either float, double or int.
 */
 template <typename T>
-class CuBuffer
+class GpuBuffer
 {
 public:
    using field_type = T;
@@ -60,17 +60,17 @@ public:
    using value_type = T;

    /**
-     * @brief CuBuffer allocates new GPU memory of the same size as other and copies the content of the other buffer to
+     * @brief GpuBuffer allocates new GPU memory of the same size as other and copies the content of the other buffer to
     * this newly allocated memory.
     *
     * @note This does synchronous transfer.
     *
     * @param other the buffer to copy from
     */
-    CuBuffer(const CuBuffer<T>& other);
+    GpuBuffer(const GpuBuffer<T>& other);

    /**
-     * @brief CuBuffer allocates new GPU memory of the same size as data and copies the content of the data vector to
+     * @brief GpuBuffer allocates new GPU memory of the same size as data and copies the content of the data vector to
     * this newly allocated memory.
     *
     * @note This does CPU to GPU transfer.
@@ -78,23 +78,23 @@ public:
     *
     * @param data the vector to copy from
     */
-    explicit CuBuffer(const std::vector<T>& data);
+    explicit GpuBuffer(const std::vector<T>& data);

    /**
     * @brief Default constructor that will initialize cublas and allocate 0 bytes of memory
     */
-    explicit CuBuffer();
+    explicit GpuBuffer();

    /**
-     * @brief CuBuffer allocates new GPU memory of size numberOfElements * sizeof(T)
+     * @brief GpuBuffer allocates new GPU memory of size numberOfElements * sizeof(T)
     *
     * @param numberOfElements number of T elements to allocate
     */
-    explicit CuBuffer(const size_t numberOfElements);
+    explicit GpuBuffer(const size_t numberOfElements);


    /**
-     * @brief CuBuffer allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from
+     * @brief GpuBuffer allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from
     * data
     *
     * @note This assumes the data is on the CPU.
@@ -102,12 +102,12 @@ public:
     * @param numberOfElements number of T elements to allocate
     * @param dataOnHost data on host/CPU
     */
-    CuBuffer(const T* dataOnHost, const size_t numberOfElements);
+    GpuBuffer(const T* dataOnHost, const size_t numberOfElements);

    /**
-     * @brief ~CuBuffer calls cudaFree
+     * @brief ~GpuBuffer calls cudaFree
     */
-    virtual ~CuBuffer();
+    virtual ~GpuBuffer();

    /**
     * @return the raw pointer to the GPU data
@@ -120,7 +120,7 @@ public:
    const T* data() const;

    /**
-     * @return fetch the first element in a CuBuffer
+     * @return fetch the first element in a GpuBuffer
     */
    __host__ __device__ T& front()
    {
@@ -131,7 +131,7 @@ public:
    }

    /**
-     * @return fetch the last element in a CuBuffer
+     * @return fetch the last element in a GpuBuffer
     */
    __host__ __device__ T& back()
    {
@@ -142,7 +142,7 @@ public:
    }

    /**
-     * @return fetch the first element in a CuBuffer
+     * @return fetch the first element in a GpuBuffer
     */
    __host__ __device__ T front() const
    {
@@ -153,7 +153,7 @@ public:
    }

    /**
-     * @return fetch the last element in a CuBuffer
+     * @return fetch the last element in a GpuBuffer
     */
    __host__ __device__ T back() const
    {
@@ -176,7 +176,7 @@ public:
        // TODO: [perf] vector.size() can be replaced by bvector.N() * BlockDimension
        if (m_numberOfElements != bvector.size()) {
            OPM_THROW(std::runtime_error,
-                      fmt::format("Given incompatible vector size. CuBuffer has size {}, \n"
+                      fmt::format("Given incompatible vector size. GpuBuffer has size {}, \n"
                                  "however, BlockVector has N() = {}, and size = {}.",
                                  m_numberOfElements,
                                  bvector.N(),
@@ -199,7 +199,7 @@ public:
        // TODO: [perf] vector.size() can be replaced by bvector.N() * BlockDimension
        if (m_numberOfElements != bvector.size()) {
            OPM_THROW(std::runtime_error,
-                      fmt::format("Given incompatible vector size. CuBuffer has size {},\n however, the BlockVector "
+                      fmt::format("Given incompatible vector size. GpuBuffer has size {},\n however, the BlockVector "
                                  "has has N() = {}, and size() = {}.",
                                  m_numberOfElements,
                                  bvector.N(),
@@ -267,14 +267,14 @@ private:
    T* m_dataOnDevice = nullptr;
    size_t m_numberOfElements;

-    void assertSameSize(const CuBuffer<T>& other) const;
+    void assertSameSize(const GpuBuffer<T>& other) const;
    void assertSameSize(size_t size) const;

    void assertHasElements() const;
 };

 template <class T>
-CuView<const T> make_view(const CuBuffer<T>&);
+CuView<const T> make_view(const GpuBuffer<T>&);

 } // namespace Opm::gpuistl
 #endif
--- a/tests/cuistl/test_GpuBuffer.cu
+++ b/tests/cuistl/test_GpuBuffer.cu
@@ -18,12 +18,12 @@
 */
 #include <config.h>

-#define BOOST_TEST_MODULE TestCuBuffer
+#define BOOST_TEST_MODULE TestGpuBuffer

 #include <boost/test/unit_test.hpp>
 #include <cuda_runtime.h>

-#include <opm/simulators/linalg/cuistl/CuBuffer.hpp>
+#include <opm/simulators/linalg/cuistl/GpuBuffer.hpp>
 #include <opm/simulators/linalg/cuistl/CuView.hpp>
 #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>

@@ -35,13 +35,13 @@ BOOST_AUTO_TEST_CASE(TestMakeView)
 {
    // test that we can create buffers and make views of the buffers using the pointer constructor
    auto buf = std::vector<int>({1, 2, 3, 4, 5, 6});
-    const auto gpubuf = ::Opm::gpuistl::CuBuffer<int>(buf);
+    const auto gpubuf = ::Opm::gpuistl::GpuBuffer<int>(buf);
    auto gpuview = ::Opm::gpuistl::CuView<int>(buf.data(), buf.size());
    bool gpuBufCreatedView = std::is_same<::Opm::gpuistl::CuView<int>, decltype(gpuview)>::value;

    BOOST_CHECK(gpuBufCreatedView);

-    // test that we can make views of buffers by using the cubuffer constructor
+    // test that we can make views of buffers by using the GpuBuffer constructor
    auto gpuview2 = ::Opm::gpuistl::make_view(gpubuf);
    bool gpuBufCreatedView2 = std::is_same<::Opm::gpuistl::CuView<const int>, decltype(gpuview2)>::value;

--- a/tests/cuistl/test_cuview.cu
+++ b/tests/cuistl/test_cuview.cu
@@ -25,7 +25,7 @@
 #include <dune/common/fvector.hh>
 #include <dune/istl/bvector.hh>
 #include <opm/simulators/linalg/cuistl/CuView.hpp>
-#include <opm/simulators/linalg/cuistl/CuBuffer.hpp>
+#include <opm/simulators/linalg/cuistl/GpuBuffer.hpp>
 #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
 #include <random>
 #include <array>
@@ -33,7 +33,7 @@
 #include <type_traits>

 using CuViewDouble = ::Opm::gpuistl::CuView<double>;
-using CuBufferDouble = ::Opm::gpuistl::CuBuffer<double>;
+using GpuBufferDouble = ::Opm::gpuistl::GpuBuffer<double>;

 __global__ void useCuViewOnGPU(CuViewDouble a, CuViewDouble b){
    b[0] = a.front();
@@ -48,9 +48,9 @@ BOOST_AUTO_TEST_CASE(TestCreationAndIndexing)
 {
    // A simple test to check that we can move data to and from the GPU
    auto cpubuffer = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
-    auto cubuffer = CuBufferDouble(cpubuffer);
-    auto cuview = CuViewDouble(cubuffer.data(), cubuffer.size());
-    const auto const_cuview = CuViewDouble(cubuffer.data(), cubuffer.size());
+    auto GpuBuffer = GpuBufferDouble(cpubuffer);
+    auto cuview = CuViewDouble(GpuBuffer.data(), GpuBuffer.size());
+    const auto const_cuview = CuViewDouble(GpuBuffer.data(), GpuBuffer.size());

    auto stdVecOfCuView = cuview.asStdVector();
    auto const_stdVecOfCuView = cuview.asStdVector();
@@ -94,9 +94,9 @@ BOOST_AUTO_TEST_CASE(TestCuViewOnCPUWithSTLIteratorAlgorithm)
 BOOST_AUTO_TEST_CASE(TestCuViewOnGPU)
 {
    auto buf = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
-    auto cubufA = CuBufferDouble(buf);
+    auto cubufA = GpuBufferDouble(buf);
    auto cuviewA = CuViewDouble(cubufA.data(), cubufA.size());
-    auto cubufB = CuBufferDouble(4);
+    auto cubufB = GpuBufferDouble(4);
    auto cuviewB = CuViewDouble(cubufB.data(), cubufB.size());

    useCuViewOnGPU<<<1,1>>>(cuviewA, cuviewB);