diff --git a/CMakeLists.txt b/CMakeLists.txt index 7224e4c2c..759cb1d40 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -734,6 +734,8 @@ if(CUDA_FOUND) cuseqilu0 cuowneroverlapcopy solver_adapter + cubuffer + cuview PROPERTIES LABELS ${gpu_label}) endif() diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index b369710e9..b841478e7 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -204,7 +204,9 @@ if (HAVE_CUDA) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/CuBlasHandle.cpp) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/cusparse_matrix_operations.cu) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/CuSparseHandle.cpp) + ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuBuffer.cpp) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuVector.cpp) + ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuView.cpp) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/vector_operations.cu) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuSparseMatrix.cpp) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuDILU.cpp) @@ -220,9 +222,11 @@ if (HAVE_CUDA) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/cuda_check_last_error.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuBlasHandle.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuSparseHandle.hpp) + ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuBuffer.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuDILU.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuJac.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuVector.hpp) + ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuView.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuSparseMatrix.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuMatrixDescription.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuSparseResource.hpp) @@ -367,6 +371,8 @@ if (HAVE_CUDA) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_converttofloatadapter.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cublas_handle.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cublas_safe_call.cpp) + ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cubuffer.cu) + ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuview.cu) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cusparse_safe_call.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuda_safe_call.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuda_check_last_error.cpp) diff --git a/opm/simulators/linalg/cuistl/CuBuffer.cpp b/opm/simulators/linalg/cuistl/CuBuffer.cpp new file mode 100644 index 000000000..02d7f07f1 --- /dev/null +++ b/opm/simulators/linalg/cuistl/CuBuffer.cpp @@ -0,0 +1,204 @@ +/* + Copyright 2024 SINTEF AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +#include +#include +#include +#include +#include +#include +#include + +namespace Opm::cuistl +{ + +template +CuBuffer::CuBuffer(const std::vector& data) + : CuBuffer(data.data(), data.size()) +{ +} + +template +CuBuffer::CuBuffer(const size_t numberOfElements) + : m_numberOfElements(numberOfElements) +{ + if (numberOfElements < 1) { + OPM_THROW(std::invalid_argument, "Setting a CuBuffer size to a non-positive number is not allowed"); + } + OPM_CUDA_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * detail::to_size_t(m_numberOfElements))); +} + +template +CuBuffer::CuBuffer(const T* dataOnHost, const size_t numberOfElements) + : CuBuffer(numberOfElements) +{ + + OPM_CUDA_SAFE_CALL(cudaMemcpy( + m_dataOnDevice, dataOnHost, detail::to_size_t(m_numberOfElements) * sizeof(T), cudaMemcpyHostToDevice)); +} + +template +CuBuffer::CuBuffer(const CuBuffer& other) + : CuBuffer(other.m_numberOfElements) +{ + assertHasElements(); + assertSameSize(other); + OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice, + other.m_dataOnDevice, + detail::to_size_t(m_numberOfElements) * sizeof(T), + cudaMemcpyDeviceToDevice)); +} + +template +CuBuffer::~CuBuffer() +{ + OPM_CUDA_WARN_IF_ERROR(cudaFree(m_dataOnDevice)); +} + +template +typename CuBuffer::size_type +CuBuffer::size() const +{ + return detail::to_size_t(m_numberOfElements); +} + +template +void +CuBuffer::resize(size_t newSize) +{ + if (newSize < 1) { + OPM_THROW(std::invalid_argument, "Setting a CuBuffer size to a non-positive number is not allowed"); + } + // Allocate memory for the new buffer + T* tmpBuffer = nullptr; + OPM_CUDA_SAFE_CALL(cudaMalloc(&tmpBuffer, sizeof(T) * detail::to_size_t(newSize))); + + // Move the data from the old to the new buffer with truncation + size_t sizeOfMove = std::min({m_numberOfElements, newSize}); + OPM_CUDA_SAFE_CALL(cudaMemcpy(tmpBuffer, + m_dataOnDevice, + detail::to_size_t(sizeOfMove) * sizeof(T), + cudaMemcpyDeviceToDevice)); + + // free the old buffer + OPM_CUDA_SAFE_CALL(cudaFree(m_dataOnDevice)); + + // swap the buffers + m_dataOnDevice = tmpBuffer; + + // update size + m_numberOfElements = newSize; +} + +template +std::vector +CuBuffer::asStdVector() const +{ + std::vector temporary(detail::to_size_t(m_numberOfElements)); + copyToHost(temporary); + return temporary; +} + +template +void +CuBuffer::assertSameSize(const CuBuffer& x) const +{ + assertSameSize(x.m_numberOfElements); +} + +template +void +CuBuffer::assertSameSize(size_t size) const +{ + if (size != m_numberOfElements) { + OPM_THROW(std::invalid_argument, + fmt::format("Given buffer has {}, while we have {}.", size, m_numberOfElements)); + } +} + +template +void +CuBuffer::assertHasElements() const +{ + if (m_numberOfElements <= 0) { + OPM_THROW(std::invalid_argument, "We have 0 elements"); + } +} + +template +T* +CuBuffer::data() +{ + return m_dataOnDevice; +} + +template +const T* +CuBuffer::data() const +{ + return m_dataOnDevice; +} + +template +void +CuBuffer::copyFromHost(const T* dataPointer, size_t numberOfElements) +{ + if (numberOfElements > size()) { + OPM_THROW(std::runtime_error, + fmt::format("Requesting to copy too many elements. buffer has {} elements, while {} was requested.", + size(), + numberOfElements)); + } + OPM_CUDA_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice)); +} + +template +void +CuBuffer::copyToHost(T* dataPointer, size_t numberOfElements) const +{ + assertSameSize(numberOfElements); + OPM_CUDA_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost)); +} + +template +void +CuBuffer::copyFromHost(const std::vector& data) +{ + copyFromHost(data.data(), data.size()); +} +template +void +CuBuffer::copyToHost(std::vector& data) const +{ + copyToHost(data.data(), data.size()); +} + +template class CuBuffer; +template class CuBuffer; +template class CuBuffer; + +template +CuView make_view(const CuBuffer& buf) { + return CuView(buf.data(), buf.size()); +} + +template CuView make_view(const CuBuffer&); +template CuView make_view(const CuBuffer&); +template CuView make_view(const CuBuffer&); + +} // namespace Opm::cuistl diff --git a/opm/simulators/linalg/cuistl/CuBuffer.hpp b/opm/simulators/linalg/cuistl/CuBuffer.hpp new file mode 100644 index 000000000..f9758d271 --- /dev/null +++ b/opm/simulators/linalg/cuistl/CuBuffer.hpp @@ -0,0 +1,280 @@ +/* + Copyright 2024 SINTEF AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +#ifndef OPM_CUBUFFER_HEADER_HPP +#define OPM_CUBUFFER_HEADER_HPP +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace Opm::cuistl +{ + +/** + * @brief The CuBuffer class is a simple container class for the GPU. + * + * + * Example usage: + * + * @code{.cpp} + * #include + * + * void someFunction() { + * auto someDataOnCPU = std::vector({1.0, 2.0, 42.0, 59.9451743, 10.7132692}); + * + * auto dataOnGPU = CuBuffer(someDataOnCPU); + * + * auto stdVectorOnCPU = dataOnGPU.asStdVector(); + * } + * + * @tparam T the type to store. Can be either float, double or int. + */ +template +class CuBuffer +{ +public: + using field_type = T; + using size_type = size_t; + using value_type = T; + + /** + * @brief CuBuffer allocates new GPU memory of the same size as other and copies the content of the other buffer to + * this newly allocated memory. + * + * @note This does synchronous transfer. + * + * @param other the buffer to copy from + */ + CuBuffer(const CuBuffer& other); + + /** + * @brief CuBuffer allocates new GPU memory of the same size as data and copies the content of the data vector to + * this newly allocated memory. + * + * @note This does CPU to GPU transfer. + * @note This does synchronous transfer. + * + * @param data the vector to copy from + */ + explicit CuBuffer(const std::vector& data); + + /** + * @brief Default constructor that will initialize cublas and allocate 0 bytes of memory + */ + explicit CuBuffer(); + + /** + * @brief CuBuffer allocates new GPU memory of size numberOfElements * sizeof(T) + * + * @param numberOfElements number of T elements to allocate + */ + explicit CuBuffer(const size_t numberOfElements); + + + /** + * @brief CuBuffer allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from + * data + * + * @note This assumes the data is on the CPU. + * + * @param numberOfElements number of T elements to allocate + * @param dataOnHost data on host/CPU + */ + CuBuffer(const T* dataOnHost, const size_t numberOfElements); + + /** + * @brief ~CuBuffer calls cudaFree + */ + virtual ~CuBuffer(); + + /** + * @return the raw pointer to the GPU data + */ + T* data(); + + /** + * @return the raw pointer to the GPU data + */ + const T* data() const; + + /** + * @return fetch the first element in a CuBuffer + */ + __host__ __device__ T& front() + { +#ifndef NDEBUG + assertHasElements(); +#endif + return m_dataOnDevice[0]; + } + + /** + * @return fetch the last element in a CuBuffer + */ + __host__ __device__ T& back() + { +#ifndef NDEBUG + assertHasElements(); +#endif + return m_dataOnDevice[m_numberOfElements-1]; + } + + /** + * @return fetch the first element in a CuBuffer + */ + __host__ __device__ T front() const + { +#ifndef NDEBUG + assertHasElements(); +#endif + return m_dataOnDevice[0]; + } + + /** + * @return fetch the last element in a CuBuffer + */ + __host__ __device__ T back() const + { +#ifndef NDEBUG + assertHasElements(); +#endif + return m_dataOnDevice[m_numberOfElements-1]; + } + + /** + * @brief copyFromHost copies data from a Dune::BlockVector + * @param bvector the vector to copy from + * + * @note This does synchronous transfer. + * @note This assumes that the size of this vector is equal to the size of the input vector. + */ + template + void copyFromHost(const Dune::BlockVector>& bvector) + { + // TODO: [perf] vector.size() can be replaced by bvector.N() * BlockDimension + if (m_numberOfElements != bvector.size()) { + OPM_THROW(std::runtime_error, + fmt::format("Given incompatible vector size. CuBuffer has size {}, \n" + "however, BlockVector has N() = {}, and size = {}.", + m_numberOfElements, + bvector.N(), + bvector.size())); + } + const auto dataPointer = static_cast(&(bvector[0][0])); + copyFromHost(dataPointer, m_numberOfElements); + } + + /** + * @brief copyToHost copies data to a Dune::BlockVector + * @param bvector the vector to copy to + * + * @note This does synchronous transfer. + * @note This assumes that the size of this vector is equal to the size of the input vector. + */ + template + void copyToHost(Dune::BlockVector>& bvector) const + { + // TODO: [perf] vector.size() can be replaced by bvector.N() * BlockDimension + if (m_numberOfElements != bvector.size()) { + OPM_THROW(std::runtime_error, + fmt::format("Given incompatible vector size. CuBuffer has size {},\n however, the BlockVector " + "has has N() = {}, and size() = {}.", + m_numberOfElements, + bvector.N(), + bvector.size())); + } + const auto dataPointer = static_cast(&(bvector[0][0])); + copyToHost(dataPointer, m_numberOfElements); + } + + /** + * @brief copyFromHost copies numberOfElements from the CPU memory dataPointer + * @param dataPointer raw pointer to CPU memory + * @param numberOfElements number of elements to copy + * @note This does synchronous transfer. + * @note assumes that this buffer has numberOfElements elements + */ + void copyFromHost(const T* dataPointer, size_t numberOfElements); + + /** + * @brief copyFromHost copies numberOfElements to the CPU memory dataPointer + * @param dataPointer raw pointer to CPU memory + * @param numberOfElements number of elements to copy + * @note This does synchronous transfer. + * @note assumes that this buffer has numberOfElements elements + */ + void copyToHost(T* dataPointer, size_t numberOfElements) const; + + /** + * @brief copyToHost copies data from an std::vector + * @param data the vector to copy from + * + * @note This does synchronous transfer. + * @note This assumes that the size of this buffer is equal to the size of the input vector. + */ + void copyFromHost(const std::vector& data); + + /** + * @brief copyToHost copies data to an std::vector + * @param data the vector to copy to + * + * @note This does synchronous transfer. + * @note This assumes that the size of this buffer is equal to the size of the input vector. + */ + void copyToHost(std::vector& data) const; + + /** + * @brief size returns the size (number of T elements) in the buffer + * @return number of elements + */ + size_type size() const; + + /** + * @brief resize the number of elements that fit in the buffer, shrinking it causes truncation + * @param number of elements in the new buffer + */ + void resize(size_t); + + /** + * @brief creates an std::vector of the same size and copies the GPU data to this std::vector + * @return an std::vector containing the elements copied from the GPU. + */ + std::vector asStdVector() const; + +private: + T* m_dataOnDevice = nullptr; + size_t m_numberOfElements; + + void assertSameSize(const CuBuffer& other) const; + void assertSameSize(size_t size) const; + + void assertHasElements() const; +}; + +template +CuView make_view(const CuBuffer&); + +} // namespace Opm::cuistl +#endif diff --git a/opm/simulators/linalg/cuistl/CuView.cpp b/opm/simulators/linalg/cuistl/CuView.cpp new file mode 100644 index 000000000..0f8d00373 --- /dev/null +++ b/opm/simulators/linalg/cuistl/CuView.cpp @@ -0,0 +1,82 @@ +/* + Copyright 2024 SINTEF AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +#include +#include +#include +#include +#include +#include + +namespace Opm::cuistl +{ + +template +CuView::CuView(std::vector& data) + : CuView(data.data(), detail::to_int(data.size())) +{ +} + +template +std::vector +CuView::asStdVector() const +{ + std::vector temporary(detail::to_size_t(m_numberOfElements)); + copyToHost(temporary); + return temporary; +} + +template +void +CuView::copyFromHost(const T* dataPointer, size_t numberOfElements) +{ + if (numberOfElements > size()) { + OPM_THROW(std::runtime_error, + fmt::format("Requesting to copy too many elements. View has {} elements, while {} was requested.", + size(), + numberOfElements)); + } + OPM_CUDA_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice)); +} + +template +void +CuView::copyToHost(T* dataPointer, size_t numberOfElements) const +{ + assertSameSize(detail::to_int(numberOfElements)); + OPM_CUDA_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost)); +} + +template +void +CuView::copyFromHost(const std::vector& data) +{ + copyFromHost(data.data(), data.size()); +} +template +void +CuView::copyToHost(std::vector& data) const +{ + copyToHost(data.data(), data.size()); +} + +template class CuView; +template class CuView; +template class CuView; + +} // namespace Opm::cuistl diff --git a/opm/simulators/linalg/cuistl/CuView.hpp b/opm/simulators/linalg/cuistl/CuView.hpp new file mode 100644 index 000000000..c05edbdc5 --- /dev/null +++ b/opm/simulators/linalg/cuistl/CuView.hpp @@ -0,0 +1,390 @@ +/* + Copyright 2024 SINTEF AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +#ifndef OPM_CUVIEW_HEADER_HPP +#define OPM_CUVIEW_HEADER_HPP +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace Opm::cuistl +{ + +/** + * @brief The CuView class is provides a view of some data allocated on the GPU + * Essenstially is only stores a pointer and a size. + * + * This class supports being used from inside a CUDA/HIP Kernel. + * Implementations are placed in this headerfile for functions that may be called + * inside a kernel to avoid expensive RDC (relocatable device code) + * + * The view will typically provide a view into a CuBuffer and be able to + * manipulate the data within it + * + * @param T Type of the data we store, typically int/float/double w/o const specifier + * + **/ +template +class CuView +{ +public: + using value_type = T; + /** + * @brief Default constructor that will initialize cublas and allocate 0 bytes of memory + */ + __host__ __device__ explicit CuView() = default; + + //TODO: we probably dont need anything like this or is it useful to have views also be able to handle things on CPU? + /// @brief constructor based on std::vectors, this will make a view on the CPU + /// @param data std vector to pr + CuView(std::vector& data); + + /** + * @brief operator[] to retrieve a reference to an item in the buffer + * + * @param idx The index of the element + */ + __host__ __device__ T& operator[](size_t idx){ +#ifndef NDEBUG + assertInRange(idx); +#endif + return m_dataPtr[idx]; + } + + /** + * @brief operator[] to retrieve a copy of an item in the buffer + * + * @param idx The index of the element + */ + __host__ __device__ T operator[](size_t idx) const { +#ifndef NDEBUG + assertInRange(idx); +#endif + return m_dataPtr[idx]; + } + + + /** + * @brief CuView allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from + * data + * + * @note This assumes the data is on the CPU. + * + * @param numberOfElements number of T elements to allocate + * @param dataOnHost data on host/CPU + */ + __host__ __device__ CuView(T* dataOnHost, size_t numberOfElements) + : m_dataPtr(dataOnHost), m_numberOfElements(numberOfElements) + { + } + + /** + * @brief ~CuView calls cudaFree + */ + __host__ __device__ ~CuView() = default; + + /** + * @return the raw pointer to the GPU data + */ + __host__ __device__ T* data(){ + return m_dataPtr; + } + + /** + * @return the raw pointer to the GPU data + */ + __host__ __device__ const T* data() const{ + return m_dataPtr; + } + + /** + * @return fetch the first element in a CuView + */ + __host__ __device__ T& front() + { +#ifndef NDEBUG + assertHasElements(); +#endif + return m_dataPtr[0]; + } + + /** + * @return fetch the last element in a CuView + */ + __host__ __device__ T& back() + { +#ifndef NDEBUG + assertHasElements(); +#endif + return m_dataPtr[m_numberOfElements-1]; + } + + /** + * @return fetch the first element in a CuView + */ + __host__ __device__ T front() const + { +#ifndef NDEBUG + assertHasElements(); +#endif + return m_dataPtr[0]; + } + + /** + * @return fetch the last element in a CuView + */ + __host__ __device__ T back() const + { +#ifndef NDEBUG + assertHasElements(); +#endif + return m_dataPtr[m_numberOfElements-1]; + } + + /** + * @brief copyFromHost copies numberOfElements from the CPU memory dataPointer + * @param dataPointer raw pointer to CPU memory + * @param numberOfElements number of elements to copy + * @note This does synchronous transfer. + * @note assumes that this view has numberOfElements elements + */ + void copyFromHost(const T* dataPointer, size_t numberOfElements); + + /** + * @brief copyFromHost copies numberOfElements to the CPU memory dataPointer + * @param dataPointer raw pointer to CPU memory + * @param numberOfElements number of elements to copy + * @note This does synchronous transfer. + * @note assumes that this view has numberOfElements elements + */ + void copyToHost(T* dataPointer, size_t numberOfElements) const; + + /** + * @brief copyToHost copies data from an std::vector + * @param data the vector to copy from + * + * @note This does synchronous transfer. + * @note This assumes that the size of this view is equal to the size of the input vector. + */ + void copyFromHost(const std::vector& data); + + /** + * @brief copyToHost copies data to an std::vector + * @param data the vector to copy to + * + * @note This does synchronous transfer. + * @note This assumes that the size of this view is equal to the size of the input vector. + */ + void copyToHost(std::vector& data) const; + + /** + * @brief size returns the size (number of T elements) in the vector + * @return number of elements + */ + __host__ __device__ size_t size() const{ + return m_numberOfElements; + } + + /** + * @brief creates an std::vector of the same size and copies the GPU data to this std::vector + * @return an std::vector containing the elements copied from the GPU. + */ + std::vector asStdVector() const; + /// @brief Iterator class to make CuViews more similar to std containers + class iterator { + public: + // Iterator typedefs + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = T; + using pointer = T*; + using reference = T&; + + /// @brief Create iterator from a pointer + /// @param ptr provided pointer that will become an iterator + /// @return // the created iterator object + __host__ __device__ iterator(T* ptr) : m_ptr(ptr) {} + + /// @brief Dereference operator + /// @return retrieve what the iterator points at + __host__ __device__ reference operator*() const { + return *m_ptr; + } + + /// @brief Pre-increment operator + /// @return return the pointer after it is incremented + __host__ __device__ iterator& operator++() { + ++m_ptr; + return *this; + } + + /// @brief Post-increment operator + /// @param no parameter, int is placeholder for c++ implementation to differentiate from pre-increment + /// @return Iterator before it is incremented + __host__ __device__ iterator operator++(int) { + iterator tmp = *this; + ++m_ptr; + return tmp; + } + + /// @brief Pre-decrement operator + /// @return return the pointer after it is decremented + __host__ __device__ iterator& operator--() { + --m_ptr; + return *this; + } + + /// @brief Post-decrement operator + /// @param no parameter, int is placeholder for c++ implementation to differentiate from pre-decrement + /// @return Iterator before it is decremented + __host__ __device__ iterator operator--(int) { + iterator tmp = *this; + --m_ptr; + return tmp; + } + + /// @brief Inequality comparison operator + /// @return boolean value that is true if the pointers contains different addresses + __host__ __device__ bool operator!=(const iterator& other) const { + return !(m_ptr == other.m_ptr); + } + + /// @brief Inequality comparison operator + /// @return boolean value that is true if the pointers contains the same address + __host__ __device__ bool operator==(const iterator& other) const { + return m_ptr == other.m_ptr; + } + + /// @brief subtraction operator + /// @param other iterator to subtract + /// @return diffptr that represents difference between the iterators + __host__ __device__ difference_type operator-(const iterator& other) const { + return std::distance(other.m_ptr, m_ptr); + } + + /// @brief Subtraction of given number of elements from iterator + /// @param n the number of elements to step backwards + /// @return An iterator pointing to a location n steps behind + __host__ __device__ iterator operator-(difference_type n) const { + return iterator(m_ptr-n); + } + + /// @brief Addition operator with diffptr + /// @param n diffptr to add + /// @return new iterator with diffptr added + __host__ __device__ iterator operator+(difference_type n) const { + return iterator(m_ptr + n); + } + + /// @brief Less than comparison + /// @param other iterator + /// @return true if this objects iterator is less than the other iterator + __host__ __device__ bool operator<(const iterator& other) const { + return m_ptr < other.m_ptr; + } + + /// @brief Greater than comparison + /// @param other iterator + /// @return true if this objects iterator is greater than than the other iterator + __host__ __device__ bool operator>(const iterator& other) const { + return m_ptr > other.m_ptr; + } + + private: + // Pointer to the current element + T* m_ptr; + }; + + /** + * @brief Get an iterator pointing to the first element of the buffer + * @param iterator to traverse the buffer + */ + __host__ __device__ iterator begin(){ + return iterator(m_dataPtr); + } + + /** + * @brief Get a const iterator pointing to the first element of the buffer + * @param iterator to traverse the buffer + */ + __host__ __device__ iterator begin() const { + return iterator(m_dataPtr); + } + + /** + * @brief Get an iterator pointing to the address after the last element of the buffer + * @param iterator pointing to the first value after the end of the buffer + */ + __host__ __device__ iterator end(){ + return iterator(m_dataPtr + m_numberOfElements); + } + + /** + * @brief Get a const iterator pointing to the address after the last element of the buffer + * @param iterator pointing to the first value after the end of the buffer + */ + __host__ __device__ iterator end() const { + return iterator(m_dataPtr + m_numberOfElements); + } + +private: + T* m_dataPtr; + size_t m_numberOfElements; + + /// @brief Helper function to assert if another view has the same size + /// @param other view + __host__ __device__ void assertSameSize(const CuView& other) const + { + assertSameSize(other.m_numberOfElements); + } + /// @brief Helper function to assert if the size of this view equal to a given value + /// @param size The value to compare with the size of this view + __host__ __device__ void assertSameSize(size_t size) const + { + if (size != m_numberOfElements) { + OPM_THROW(std::invalid_argument, + fmt::format("Given view has {}, while we have {}.", size, m_numberOfElements)); + } + } + + /// @brief Helper function to assert that the view has at least one element + __host__ __device__ void assertHasElements() const + { + if (m_numberOfElements <= 0) { + OPM_THROW(std::invalid_argument, "We have 0 elements"); + } + } + + /// @brief Helper function to determine if an index is within the range of valid indexes in the view + __host__ __device__ void assertInRange(size_t idx) const + { + if (idx >= m_numberOfElements) { + OPM_THROW(std::invalid_argument, + fmt::format("The index provided was not in the range [0, buffersize-1]")); + } + } +}; + +} // namespace Opm::cuistl +#endif diff --git a/opm/simulators/linalg/cuistl/detail/safe_conversion.hpp b/opm/simulators/linalg/cuistl/detail/safe_conversion.hpp index 2aa2db6e8..e71330e60 100644 --- a/opm/simulators/linalg/cuistl/detail/safe_conversion.hpp +++ b/opm/simulators/linalg/cuistl/detail/safe_conversion.hpp @@ -26,6 +26,7 @@ #include #include #include +#include /** @@ -81,7 +82,7 @@ to_int(std::size_t s) * @throw std::invalid_argument if i is negative. * @todo This can be done for more generic types, but then it is probably wise to wait for C++20's cmp-functions */ -inline std::size_t +__host__ __device__ inline std::size_t to_size_t(int i) { static_assert( @@ -95,10 +96,13 @@ to_size_t(int i) sizeof(int) <= sizeof(std::size_t), "Weird architecture or my understanding of the standard is flawed. Better have a look at this function."); - +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) + assert(i >= int(0)); +#else if (i < int(0)) { OPM_THROW(std::invalid_argument, fmt::format("Trying to convert the negative number {} to size_t.", i)); } +#endif return std::size_t(i); } diff --git a/tests/cuistl/test_cubuffer.cu b/tests/cuistl/test_cubuffer.cu new file mode 100644 index 000000000..04e72ce45 --- /dev/null +++ b/tests/cuistl/test_cubuffer.cu @@ -0,0 +1,53 @@ +/* + Copyright 2024 SINTEF AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +#include + +#define BOOST_TEST_MODULE TestCuBuffer + +#include +#include + +#include +#include +#include + +#include +#include +#include + +BOOST_AUTO_TEST_CASE(TestMakeView) +{ + // test that we can create buffers and make views of the buffers using the pointer constructor + auto buf = std::vector({1, 2, 3, 4, 5, 6}); + const auto gpubuf = ::Opm::cuistl::CuBuffer(buf); + auto gpuview = ::Opm::cuistl::CuView(buf.data(), buf.size()); + bool gpuBufCreatedView = std::is_same<::Opm::cuistl::CuView, decltype(gpuview)>::value; + + BOOST_CHECK(gpuBufCreatedView); + + // test that we can make views of buffers by using the cubuffer constructor + auto gpuview2 = ::Opm::cuistl::make_view(gpubuf); + bool gpuBufCreatedView2 = std::is_same<::Opm::cuistl::CuView, decltype(gpuview2)>::value; + + BOOST_CHECK(gpuBufCreatedView2); + + // check that we retrieve the same values when pulling the data back to the cpu as a vector + auto gpuBufOnCpu = gpubuf.asStdVector(); + BOOST_CHECK_EQUAL_COLLECTIONS(gpuBufOnCpu.begin(), gpuBufOnCpu.end(), buf.begin(), buf.end()); +} diff --git a/tests/cuistl/test_cuview.cu b/tests/cuistl/test_cuview.cu new file mode 100644 index 000000000..e611f4d32 --- /dev/null +++ b/tests/cuistl/test_cuview.cu @@ -0,0 +1,115 @@ +/* + Copyright 2024 SINTEF AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +#include + +#define BOOST_TEST_MODULE TestCuView + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using CuViewDouble = ::Opm::cuistl::CuView; +using CuBufferDouble = ::Opm::cuistl::CuBuffer; + +__global__ void useCuViewOnGPU(CuViewDouble a, CuViewDouble b){ + b[0] = a.front(); + b[1] = a.back(); + b[2] = *a.begin(); + b[3] = *(--a.end()); + + a[0] = a[2]; +} + +BOOST_AUTO_TEST_CASE(TestCreationAndIndexing) +{ + // A simple test to check that we can move data to and from the GPU + auto cpubuffer = std::vector({1.0, 2.0, 42.0, 59.9451743, 10.7132692}); + auto cubuffer = CuBufferDouble(cpubuffer); + auto cuview = CuViewDouble(cubuffer.data(), cubuffer.size()); + const auto const_cuview = CuViewDouble(cubuffer.data(), cubuffer.size()); + + auto stdVecOfCuView = cuview.asStdVector(); + auto const_stdVecOfCuView = cuview.asStdVector(); + + BOOST_CHECK_EQUAL_COLLECTIONS( + stdVecOfCuView.begin(), stdVecOfCuView.end(), cpubuffer.begin(), cpubuffer.end()); + BOOST_CHECK_EQUAL_COLLECTIONS( + stdVecOfCuView.begin(), stdVecOfCuView.end(), const_stdVecOfCuView.begin(), const_stdVecOfCuView.end()); +} + +BOOST_AUTO_TEST_CASE(TestCuViewOnCPUTypes) +{ + auto buf = std::vector({1.0, 2.0, 42.0, 59.9451743, 10.7132692}); + auto cpuview = CuViewDouble(buf.data(), buf.size()); + const auto const_cpuview = CuViewDouble(buf.data(), buf.size()); + + // check that indexing a mutable view gives references when indexing it + bool correct_type_of_cpu_front = std::is_same_v; + bool correct_type_of_cpu_back = std::is_same_v; + bool correct_type_of_const_cpu_front = std::is_same_v; + bool correct_type_of_const_cpu_back = std::is_same_v; + + BOOST_CHECK(correct_type_of_cpu_front); + BOOST_CHECK(correct_type_of_cpu_back); + BOOST_CHECK(correct_type_of_const_cpu_front); + BOOST_CHECK(correct_type_of_const_cpu_back); + + // check that the values are correct + BOOST_CHECK(cpuview.front() == buf.front()); + BOOST_CHECK(cpuview.back() == buf.back()); +} + +BOOST_AUTO_TEST_CASE(TestCuViewOnCPUWithSTLIteratorAlgorithm) +{ + auto buf = std::vector({1.0, 2.0, 42.0, 59.9451743, 10.7132692}); + auto cpuview = CuViewDouble(buf.data(), buf.size()); + std::sort(buf.begin(), buf.end()); + BOOST_CHECK(42.0 == cpuview[3]); +} + +BOOST_AUTO_TEST_CASE(TestCuViewOnGPU) +{ + auto buf = std::vector({1.0, 2.0, 42.0, 59.9451743, 10.7132692}); + auto cubufA = CuBufferDouble(buf); + auto cuviewA = CuViewDouble(cubufA.data(), cubufA.size()); + auto cubufB = CuBufferDouble(4); + auto cuviewB = CuViewDouble(cubufB.data(), cubufB.size()); + + useCuViewOnGPU<<<1,1>>>(cuviewA, cuviewB); + + auto vecA = cuviewA.asStdVector(); + auto vecB = cuviewB.asStdVector(); + + // checks that front/back/begin/end works + BOOST_CHECK(vecB[0] == buf[0]); + BOOST_CHECK(vecB[1] == buf[4]); + BOOST_CHECK(vecB[2] == buf[0]); + BOOST_CHECK(vecB[3] == buf[4]); + + // checks that view[0] = view[2] works + BOOST_CHECK(buf[2] == vecA[0]); +}