Used safe conversion in CuVector

2025-02-25 18:55:30 -06:00 · 2023-03-29 13:43:43 +02:00
parent 5fee5cece8
commit 3f1cbbe1b8
2 changed files with 42 additions and 21 deletions
--- a/opm/simulators/linalg/cuistl/CuVector.cpp
+++ b/opm/simulators/linalg/cuistl/CuVector.cpp
@@ -41,24 +41,25 @@ namespace Opm::cuistl

 template <class T>
 CuVector<T>::CuVector(const std::vector<T>& data)
-    : CuVector(data.data(), data.size())
+    : CuVector(data.data(), detail::to_int(data.size()))
 {
 }

 template <class T>
-CuVector<T>::CuVector(const int numberOfElements)
-    : m_numberOfElements(numberOfElements)
+CuVector<T>::CuVector(const size_t numberOfElements)
+    : m_numberOfElements(detail::to_int(numberOfElements))
    , m_cuBlasHandle(detail::CuBlasHandle::getInstance())
 {
-    OPM_CUDA_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * m_numberOfElements));
+    OPM_CUDA_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * detail::to_size_t(m_numberOfElements)));
 }

 template <class T>
-CuVector<T>::CuVector(const T* dataOnHost, const int numberOfElements)
+CuVector<T>::CuVector(const T* dataOnHost, const size_t numberOfElements)
    : CuVector(numberOfElements)
 {

-    OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice, dataOnHost, m_numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
+    OPM_CUDA_SAFE_CALL(cudaMemcpy(
+        m_dataOnDevice, dataOnHost, detail::to_size_t(m_numberOfElements) * sizeof(T), cudaMemcpyHostToDevice));
 }

 template <class T>
@@ -66,7 +67,7 @@ CuVector<T>&
 CuVector<T>::operator=(T scalar)
 {
    CHECKPOSITIVESIZE
-    detail::setVectorValue(data(), m_numberOfElements, scalar);
+    detail::setVectorValue(data(), detail::to_size_t(m_numberOfElements), scalar);
    return *this;
 }

@@ -79,8 +80,10 @@ CuVector<T>::operator=(const CuVector<T>& other)
    if (other.m_numberOfElements != m_numberOfElements) {
        OPM_THROW(std::invalid_argument, "Can only copy from vector of same size.");
    }
-    OPM_CUDA_SAFE_CALL(
-        cudaMemcpy(m_dataOnDevice, other.m_dataOnDevice, m_numberOfElements * sizeof(T), cudaMemcpyDeviceToDevice));
+    OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
+                                  other.m_dataOnDevice,
+                                  detail::to_size_t(m_numberOfElements) * sizeof(T),
+                                  cudaMemcpyDeviceToDevice));
    return *this;
 }

@@ -90,8 +93,10 @@ CuVector<T>::CuVector(const CuVector<T>& other)
 {
    CHECKPOSITIVESIZE
    CHECKSIZE(other)
-    OPM_CUDA_SAFE_CALL(
-        cudaMemcpy(m_dataOnDevice, other.m_dataOnDevice, m_numberOfElements * sizeof(T), cudaMemcpyDeviceToDevice));
+    OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
+                                  other.m_dataOnDevice,
+                                  detail::to_size_t(m_numberOfElements) * sizeof(T),
+                                  cudaMemcpyDeviceToDevice));
 }

 template <class T>
@@ -111,14 +116,19 @@ template <typename T>
 typename CuVector<T>::size_type
 CuVector<T>::dim() const
 {
-    return m_numberOfElements;
+    // Note that there is no way for m_numberOfElements to be non-positive,
+    // but for sanity we still use the safe conversion function here.
+    //
+    // We also doubt that this will lead to any performance penality, but should this prove
+    // to be false, this can be replaced by a simple cast to size_t
+    return detail::to_size_t(m_numberOfElements);
 }

 template <typename T>
 std::vector<T>
 CuVector<T>::asStdVector() const
 {
-    std::vector<T> temporary(m_numberOfElements);
+    std::vector<T> temporary(detail::to_size_t(m_numberOfElements));
    copyToHost(temporary);
    return temporary;
 }
@@ -231,7 +241,7 @@ CuVector<T>::operator-=(const CuVector<T>& other)

 template <class T>
 void
-CuVector<T>::copyFromHost(const T* dataPointer, int numberOfElements)
+CuVector<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
 {
    if (numberOfElements > dim()) {
        OPM_THROW(std::runtime_error,
@@ -244,7 +254,7 @@ CuVector<T>::copyFromHost(const T* dataPointer, int numberOfElements)

 template <class T>
 void
-CuVector<T>::copyToHost(T* dataPointer, int numberOfElements) const
+CuVector<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
 {
    OPM_CUDA_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
 }
--- a/opm/simulators/linalg/cuistl/CuVector.hpp
+++ b/opm/simulators/linalg/cuistl/CuVector.hpp
@@ -24,8 +24,10 @@
 #include <fmt/core.h>
 #include <opm/common/ErrorMacros.hpp>
 #include <opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp>
+#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
 #include <vector>

+
 namespace Opm::cuistl
 {

@@ -80,6 +82,8 @@ public:
     * @note This does CPU to GPU transfer.
     * @note This does synchronous transfer.
     *
+     * @note For now data.size() needs to be within the limits of int due to restrctions of CuBlas.
+     *
     * @param data the vector to copy from
     */
    explicit CuVector(const std::vector<T>& data);
@@ -106,9 +110,11 @@ public:
    /**
     * @brief CuVector allocates new GPU memory of size numberOfElements * sizeof(T)
     *
+     * @note For now numberOfElements needs to be within the limits of int due to restrictions in cublas
+     *
     * @param numberOfElements number of T elements to allocate
     */
-    explicit CuVector(const int numberOfElements);
+    explicit CuVector(const size_t numberOfElements);


    /**
@@ -119,8 +125,10 @@ public:
     *
     * @param numberOfElements number of T elements to allocate
     * @param dataOnHost data on host/CPU
+     *
+     * @note For now numberOfElements needs to be within the limits of int due to restrictions in cublas
     */
-    CuVector(const T* dataOnHost, const int numberOfElements);
+    CuVector(const T* dataOnHost, const size_t numberOfElements);

    /**
     * @brief ~CuVector calls cudaFree
@@ -147,7 +155,7 @@ public:
    template <int BlockDimension>
    void copyFromHost(const Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& vector)
    {
-        if (m_numberOfElements != vector.dim()) {
+        if (detail::to_size_t(m_numberOfElements) != vector.dim()) {
            OPM_THROW(std::runtime_error,
                      fmt::format("Given incompatible vector size. CuVector has size {}, \n"
                                  "however, BlockVector has N() = {}, and dim = {}.",
@@ -169,7 +177,7 @@ public:
    template <int BlockDimension>
    void copyToHost(Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& vector) const
    {
-        if (m_numberOfElements != vector.dim()) {
+        if (detail::to_size_t(m_numberOfElements) != vector.dim()) {
            OPM_THROW(std::runtime_error,
                      fmt::format("Given incompatible vector size. CuVector has size {},\n however, the BlockVector "
                                  "has has N() = {}, and dim() = {}.",
@@ -188,7 +196,7 @@ public:
     * @note This does synchronous transfer.
     * @note assumes that this vector has numberOfElements elements
     */
-    void copyFromHost(const T* dataPointer, int numberOfElements);
+    void copyFromHost(const T* dataPointer, size_t numberOfElements);

    /**
     * @brief copyFromHost copies numberOfElements to the CPU memory dataPointer
@@ -197,7 +205,7 @@ public:
     * @note This does synchronous transfer.
     * @note assumes that this vector has numberOfElements elements
     */
-    void copyToHost(T* dataPointer, int numberOfElements) const;
+    void copyToHost(T* dataPointer, size_t numberOfElements) const;

    /**
     * @brief copyToHost copies data from an std::vector
@@ -328,6 +336,9 @@ public:

 private:
    T* m_dataOnDevice = nullptr;
+
+    // Note that we store this as int to make sure we are always cublas compatible.
+    // This gives the added benifit that a size_t to int conversion error occurs during construction.
    const int m_numberOfElements;
    detail::CuBlasHandle& m_cuBlasHandle;
 };