diff --git a/opm/simulators/linalg/cuistl/CuVector.cpp b/opm/simulators/linalg/cuistl/CuVector.cpp index 609ab2f38..f5d48cbf5 100644 --- a/opm/simulators/linalg/cuistl/CuVector.cpp +++ b/opm/simulators/linalg/cuistl/CuVector.cpp @@ -41,24 +41,25 @@ namespace Opm::cuistl template CuVector::CuVector(const std::vector& data) - : CuVector(data.data(), data.size()) + : CuVector(data.data(), detail::to_int(data.size())) { } template -CuVector::CuVector(const int numberOfElements) - : m_numberOfElements(numberOfElements) +CuVector::CuVector(const size_t numberOfElements) + : m_numberOfElements(detail::to_int(numberOfElements)) , m_cuBlasHandle(detail::CuBlasHandle::getInstance()) { - OPM_CUDA_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * m_numberOfElements)); + OPM_CUDA_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * detail::to_size_t(m_numberOfElements))); } template -CuVector::CuVector(const T* dataOnHost, const int numberOfElements) +CuVector::CuVector(const T* dataOnHost, const size_t numberOfElements) : CuVector(numberOfElements) { - OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice, dataOnHost, m_numberOfElements * sizeof(T), cudaMemcpyHostToDevice)); + OPM_CUDA_SAFE_CALL(cudaMemcpy( + m_dataOnDevice, dataOnHost, detail::to_size_t(m_numberOfElements) * sizeof(T), cudaMemcpyHostToDevice)); } template @@ -66,7 +67,7 @@ CuVector& CuVector::operator=(T scalar) { CHECKPOSITIVESIZE - detail::setVectorValue(data(), m_numberOfElements, scalar); + detail::setVectorValue(data(), detail::to_size_t(m_numberOfElements), scalar); return *this; } @@ -79,8 +80,10 @@ CuVector::operator=(const CuVector& other) if (other.m_numberOfElements != m_numberOfElements) { OPM_THROW(std::invalid_argument, "Can only copy from vector of same size."); } - OPM_CUDA_SAFE_CALL( - cudaMemcpy(m_dataOnDevice, other.m_dataOnDevice, m_numberOfElements * sizeof(T), cudaMemcpyDeviceToDevice)); + OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice, + other.m_dataOnDevice, + detail::to_size_t(m_numberOfElements) * sizeof(T), + cudaMemcpyDeviceToDevice)); return *this; } @@ -90,8 +93,10 @@ CuVector::CuVector(const CuVector& other) { CHECKPOSITIVESIZE CHECKSIZE(other) - OPM_CUDA_SAFE_CALL( - cudaMemcpy(m_dataOnDevice, other.m_dataOnDevice, m_numberOfElements * sizeof(T), cudaMemcpyDeviceToDevice)); + OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice, + other.m_dataOnDevice, + detail::to_size_t(m_numberOfElements) * sizeof(T), + cudaMemcpyDeviceToDevice)); } template @@ -111,14 +116,19 @@ template typename CuVector::size_type CuVector::dim() const { - return m_numberOfElements; + // Note that there is no way for m_numberOfElements to be non-positive, + // but for sanity we still use the safe conversion function here. + // + // We also doubt that this will lead to any performance penality, but should this prove + // to be false, this can be replaced by a simple cast to size_t + return detail::to_size_t(m_numberOfElements); } template std::vector CuVector::asStdVector() const { - std::vector temporary(m_numberOfElements); + std::vector temporary(detail::to_size_t(m_numberOfElements)); copyToHost(temporary); return temporary; } @@ -231,7 +241,7 @@ CuVector::operator-=(const CuVector& other) template void -CuVector::copyFromHost(const T* dataPointer, int numberOfElements) +CuVector::copyFromHost(const T* dataPointer, size_t numberOfElements) { if (numberOfElements > dim()) { OPM_THROW(std::runtime_error, @@ -244,7 +254,7 @@ CuVector::copyFromHost(const T* dataPointer, int numberOfElements) template void -CuVector::copyToHost(T* dataPointer, int numberOfElements) const +CuVector::copyToHost(T* dataPointer, size_t numberOfElements) const { OPM_CUDA_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost)); } diff --git a/opm/simulators/linalg/cuistl/CuVector.hpp b/opm/simulators/linalg/cuistl/CuVector.hpp index d7a3e7a60..3aa1f6d07 100644 --- a/opm/simulators/linalg/cuistl/CuVector.hpp +++ b/opm/simulators/linalg/cuistl/CuVector.hpp @@ -24,8 +24,10 @@ #include #include #include +#include #include + namespace Opm::cuistl { @@ -80,6 +82,8 @@ public: * @note This does CPU to GPU transfer. * @note This does synchronous transfer. * + * @note For now data.size() needs to be within the limits of int due to restrctions of CuBlas. + * * @param data the vector to copy from */ explicit CuVector(const std::vector& data); @@ -106,9 +110,11 @@ public: /** * @brief CuVector allocates new GPU memory of size numberOfElements * sizeof(T) * + * @note For now numberOfElements needs to be within the limits of int due to restrictions in cublas + * * @param numberOfElements number of T elements to allocate */ - explicit CuVector(const int numberOfElements); + explicit CuVector(const size_t numberOfElements); /** @@ -119,8 +125,10 @@ public: * * @param numberOfElements number of T elements to allocate * @param dataOnHost data on host/CPU + * + * @note For now numberOfElements needs to be within the limits of int due to restrictions in cublas */ - CuVector(const T* dataOnHost, const int numberOfElements); + CuVector(const T* dataOnHost, const size_t numberOfElements); /** * @brief ~CuVector calls cudaFree @@ -147,7 +155,7 @@ public: template void copyFromHost(const Dune::BlockVector>& vector) { - if (m_numberOfElements != vector.dim()) { + if (detail::to_size_t(m_numberOfElements) != vector.dim()) { OPM_THROW(std::runtime_error, fmt::format("Given incompatible vector size. CuVector has size {}, \n" "however, BlockVector has N() = {}, and dim = {}.", @@ -169,7 +177,7 @@ public: template void copyToHost(Dune::BlockVector>& vector) const { - if (m_numberOfElements != vector.dim()) { + if (detail::to_size_t(m_numberOfElements) != vector.dim()) { OPM_THROW(std::runtime_error, fmt::format("Given incompatible vector size. CuVector has size {},\n however, the BlockVector " "has has N() = {}, and dim() = {}.", @@ -188,7 +196,7 @@ public: * @note This does synchronous transfer. * @note assumes that this vector has numberOfElements elements */ - void copyFromHost(const T* dataPointer, int numberOfElements); + void copyFromHost(const T* dataPointer, size_t numberOfElements); /** * @brief copyFromHost copies numberOfElements to the CPU memory dataPointer @@ -197,7 +205,7 @@ public: * @note This does synchronous transfer. * @note assumes that this vector has numberOfElements elements */ - void copyToHost(T* dataPointer, int numberOfElements) const; + void copyToHost(T* dataPointer, size_t numberOfElements) const; /** * @brief copyToHost copies data from an std::vector @@ -328,6 +336,9 @@ public: private: T* m_dataOnDevice = nullptr; + + // Note that we store this as int to make sure we are always cublas compatible. + // This gives the added benifit that a size_t to int conversion error occurs during construction. const int m_numberOfElements; detail::CuBlasHandle& m_cuBlasHandle; };