/* Copyright 2024 SINTEF AS This file is part of the Open Porous Media project (OPM). OPM is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OPM is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OPM. If not, see . */ #ifndef OPM_GPUBUFFER_HEADER_HPP #define OPM_GPUBUFFER_HEADER_HPP #include #include #include #include #include #include #include #include #include namespace Opm::gpuistl { /** * @brief The GpuBuffer class is a simple container class for the GPU. * * * Example usage: * * @code{.cpp} * #include * * void someFunction() { * auto someDataOnCPU = std::vector({1.0, 2.0, 42.0, 59.9451743, 10.7132692}); * * auto dataOnGPU = GpuBuffer(someDataOnCPU); * * auto stdVectorOnCPU = dataOnGPU.asStdVector(); * } * * @tparam T the type to store. Can be either float, double or int. */ template class GpuBuffer { public: using field_type = T; using size_type = size_t; using value_type = T; /** * @brief GpuBuffer allocates new GPU memory of the same size as other and copies the content of the other buffer to * this newly allocated memory. * * @note This does synchronous transfer. * * @param other the buffer to copy from */ GpuBuffer(const GpuBuffer& other); /** * @brief GpuBuffer allocates new GPU memory of the same size as data and copies the content of the data vector to * this newly allocated memory. * * @note This does CPU to GPU transfer. * @note This does synchronous transfer. * * @param data the vector to copy from */ explicit GpuBuffer(const std::vector& data); /** * @brief Default constructor that will initialize cublas and allocate 0 bytes of memory */ explicit GpuBuffer(); /** * @brief GpuBuffer allocates new GPU memory of size numberOfElements * sizeof(T) * * @param numberOfElements number of T elements to allocate */ explicit GpuBuffer(const size_t numberOfElements); /** * @brief GpuBuffer allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from * data * * @note This assumes the data is on the CPU. * * @param numberOfElements number of T elements to allocate * @param dataOnHost data on host/CPU */ GpuBuffer(const T* dataOnHost, const size_t numberOfElements); /** * @brief ~GpuBuffer calls cudaFree */ virtual ~GpuBuffer(); /** * @return the raw pointer to the GPU data */ T* data(); /** * @return the raw pointer to the GPU data */ const T* data() const; /** * @return fetch the first element in a GpuBuffer */ __host__ __device__ T& front() { #ifndef NDEBUG assertHasElements(); #endif return m_dataOnDevice[0]; } /** * @return fetch the last element in a GpuBuffer */ __host__ __device__ T& back() { #ifndef NDEBUG assertHasElements(); #endif return m_dataOnDevice[m_numberOfElements-1]; } /** * @return fetch the first element in a GpuBuffer */ __host__ __device__ T front() const { #ifndef NDEBUG assertHasElements(); #endif return m_dataOnDevice[0]; } /** * @return fetch the last element in a GpuBuffer */ __host__ __device__ T back() const { #ifndef NDEBUG assertHasElements(); #endif return m_dataOnDevice[m_numberOfElements-1]; } /** * @brief copyFromHost copies data from a Dune::BlockVector * @param bvector the vector to copy from * * @note This does synchronous transfer. * @note This assumes that the size of this vector is equal to the size of the input vector. */ template void copyFromHost(const Dune::BlockVector>& bvector) { // TODO: [perf] vector.size() can be replaced by bvector.N() * BlockDimension if (m_numberOfElements != bvector.size()) { OPM_THROW(std::runtime_error, fmt::format("Given incompatible vector size. GpuBuffer has size {}, \n" "however, BlockVector has N() = {}, and size = {}.", m_numberOfElements, bvector.N(), bvector.size())); } const auto dataPointer = static_cast(&(bvector[0][0])); copyFromHost(dataPointer, m_numberOfElements); } /** * @brief copyToHost copies data to a Dune::BlockVector * @param bvector the vector to copy to * * @note This does synchronous transfer. * @note This assumes that the size of this vector is equal to the size of the input vector. */ template void copyToHost(Dune::BlockVector>& bvector) const { // TODO: [perf] vector.size() can be replaced by bvector.N() * BlockDimension if (m_numberOfElements != bvector.size()) { OPM_THROW(std::runtime_error, fmt::format("Given incompatible vector size. GpuBuffer has size {},\n however, the BlockVector " "has has N() = {}, and size() = {}.", m_numberOfElements, bvector.N(), bvector.size())); } const auto dataPointer = static_cast(&(bvector[0][0])); copyToHost(dataPointer, m_numberOfElements); } /** * @brief copyFromHost copies numberOfElements from the CPU memory dataPointer * @param dataPointer raw pointer to CPU memory * @param numberOfElements number of elements to copy * @note This does synchronous transfer. * @note assumes that this buffer has numberOfElements elements */ void copyFromHost(const T* dataPointer, size_t numberOfElements); /** * @brief copyFromHost copies numberOfElements to the CPU memory dataPointer * @param dataPointer raw pointer to CPU memory * @param numberOfElements number of elements to copy * @note This does synchronous transfer. * @note assumes that this buffer has numberOfElements elements */ void copyToHost(T* dataPointer, size_t numberOfElements) const; /** * @brief copyToHost copies data from an std::vector * @param data the vector to copy from * * @note This does synchronous transfer. * @note This assumes that the size of this buffer is equal to the size of the input vector. */ void copyFromHost(const std::vector& data); /** * @brief copyToHost copies data to an std::vector * @param data the vector to copy to * * @note This does synchronous transfer. * @note This assumes that the size of this buffer is equal to the size of the input vector. */ void copyToHost(std::vector& data) const; /** * @brief size returns the size (number of T elements) in the buffer * @return number of elements */ size_type size() const; /** * @brief resize the number of elements that fit in the buffer, shrinking it causes truncation * @param number of elements in the new buffer */ void resize(size_t); /** * @brief creates an std::vector of the same size and copies the GPU data to this std::vector * @return an std::vector containing the elements copied from the GPU. */ std::vector asStdVector() const; private: T* m_dataOnDevice = nullptr; size_t m_numberOfElements; void assertSameSize(const GpuBuffer& other) const; void assertSameSize(size_t size) const; void assertHasElements() const; }; template GpuView make_view(const GpuBuffer&); } // namespace Opm::gpuistl #endif