mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Added CuVector with tests.
This commit is contained in:
parent
dc3316a103
commit
858d8b189b
@ -147,6 +147,9 @@ if(CUDA_FOUND)
|
|||||||
# CUISTL SOURCE
|
# CUISTL SOURCE
|
||||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/detail/CuBlasHandle.cpp)
|
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/detail/CuBlasHandle.cpp)
|
||||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/detail/CuSparseHandle.cpp)
|
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/detail/CuSparseHandle.cpp)
|
||||||
|
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/CuVector.cpp)
|
||||||
|
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/detail/vector_operations.cu)
|
||||||
|
|
||||||
|
|
||||||
# CUISTL HEADERS
|
# CUISTL HEADERS
|
||||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp)
|
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp)
|
||||||
@ -155,6 +158,7 @@ if(CUDA_FOUND)
|
|||||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/cuda_check_last_error.hpp)
|
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/cuda_check_last_error.hpp)
|
||||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp)
|
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp)
|
||||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp)
|
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp)
|
||||||
|
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuVector.hpp)
|
||||||
|
|
||||||
endif()
|
endif()
|
||||||
if(OPENCL_FOUND)
|
if(OPENCL_FOUND)
|
||||||
@ -239,8 +243,7 @@ if(CUDA_FOUND)
|
|||||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuda_check_last_error.cpp)
|
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuda_check_last_error.cpp)
|
||||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cublas_handle.cpp)
|
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cublas_handle.cpp)
|
||||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cusparse_handle.cpp)
|
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cusparse_handle.cpp)
|
||||||
|
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuvector.cpp)
|
||||||
|
|
||||||
endif()
|
endif()
|
||||||
if(OPENCL_FOUND)
|
if(OPENCL_FOUND)
|
||||||
list(APPEND TEST_SOURCE_FILES tests/test_openclSolver.cpp)
|
list(APPEND TEST_SOURCE_FILES tests/test_openclSolver.cpp)
|
||||||
|
268
opm/simulators/linalg/cuistl/CuVector.cpp
Normal file
268
opm/simulators/linalg/cuistl/CuVector.cpp
Normal file
@ -0,0 +1,268 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2022-2023 SINTEF AS
|
||||||
|
|
||||||
|
This file is part of the Open Porous Media project (OPM).
|
||||||
|
|
||||||
|
OPM is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
OPM is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
#include <cublas_v2.h>
|
||||||
|
#include <cuda.h>
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <fmt/core.h>
|
||||||
|
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||||
|
#include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
|
||||||
|
#include <opm/simulators/linalg/cuistl/detail/cublas_wrapper.hpp>
|
||||||
|
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||||
|
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
|
||||||
|
|
||||||
|
#define CHECKSIZE(x) \
|
||||||
|
if (x.m_numberOfElements != m_numberOfElements) { \
|
||||||
|
OPM_THROW(std::invalid_argument, \
|
||||||
|
fmt::format("Given vector has {}, while we have {}.", x.m_numberOfElements, m_numberOfElements)); \
|
||||||
|
}
|
||||||
|
#define CHECKPOSITIVESIZE \
|
||||||
|
if (m_numberOfElements <= 0) { \
|
||||||
|
OPM_THROW(std::invalid_argument, "We have 0 elements"); \
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace Opm::cuistl
|
||||||
|
{
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
CuVector<T>::CuVector(const std::vector<T>& data)
|
||||||
|
: CuVector(data.data(), data.size())
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
CuVector<T>::CuVector(const int numberOfElements)
|
||||||
|
: m_numberOfElements(numberOfElements)
|
||||||
|
, m_cuBlasHandle(detail::CuBlasHandle::getInstance())
|
||||||
|
{
|
||||||
|
OPM_CUDA_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * m_numberOfElements));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
CuVector<T>::CuVector(const T* dataOnHost, const int numberOfElements)
|
||||||
|
: CuVector(numberOfElements)
|
||||||
|
{
|
||||||
|
|
||||||
|
OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice, dataOnHost, m_numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
CuVector<T>&
|
||||||
|
CuVector<T>::operator=(T scalar)
|
||||||
|
{
|
||||||
|
CHECKPOSITIVESIZE
|
||||||
|
detail::setVectorValue(data(), m_numberOfElements, scalar);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
CuVector<T>&
|
||||||
|
CuVector<T>::operator=(const CuVector<T>& other)
|
||||||
|
{
|
||||||
|
CHECKPOSITIVESIZE
|
||||||
|
CHECKSIZE(other)
|
||||||
|
if (other.m_numberOfElements != m_numberOfElements) {
|
||||||
|
OPM_THROW(std::invalid_argument, "Can only copy from vector of same size.");
|
||||||
|
}
|
||||||
|
OPM_CUDA_SAFE_CALL(
|
||||||
|
cudaMemcpy(m_dataOnDevice, other.m_dataOnDevice, m_numberOfElements * sizeof(T), cudaMemcpyDeviceToDevice));
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
CuVector<T>::CuVector(const CuVector<T>& other)
|
||||||
|
: CuVector(other.m_numberOfElements)
|
||||||
|
{
|
||||||
|
CHECKPOSITIVESIZE
|
||||||
|
CHECKSIZE(other)
|
||||||
|
OPM_CUDA_SAFE_CALL(
|
||||||
|
cudaMemcpy(m_dataOnDevice, other.m_dataOnDevice, m_numberOfElements * sizeof(T), cudaMemcpyDeviceToDevice));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
CuVector<T>::~CuVector()
|
||||||
|
{
|
||||||
|
OPM_CUDA_SAFE_CALL(cudaFree(m_dataOnDevice));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
const T*
|
||||||
|
CuVector<T>::data() const
|
||||||
|
{
|
||||||
|
return m_dataOnDevice;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
typename CuVector<T>::size_type
|
||||||
|
CuVector<T>::dim() const
|
||||||
|
{
|
||||||
|
return m_numberOfElements;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
std::vector<T>
|
||||||
|
CuVector<T>::asStdVector() const
|
||||||
|
{
|
||||||
|
std::vector<T> temporary(m_numberOfElements);
|
||||||
|
copyToHost(temporary);
|
||||||
|
return temporary;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void
|
||||||
|
CuVector<T>::setZeroAtIndexSet(const CuVector<int>& indexSet)
|
||||||
|
{
|
||||||
|
detail::setZeroAtIndexSet(m_dataOnDevice, indexSet.dim(), indexSet.data());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T*
|
||||||
|
CuVector<T>::data()
|
||||||
|
{
|
||||||
|
return m_dataOnDevice;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
CuVector<T>&
|
||||||
|
CuVector<T>::operator*=(const T& scalar)
|
||||||
|
{
|
||||||
|
CHECKPOSITIVESIZE
|
||||||
|
OPM_CUBLAS_SAFE_CALL(detail::cublasScal(m_cuBlasHandle.get(), m_numberOfElements, &scalar, data(), 1));
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
CuVector<T>&
|
||||||
|
CuVector<T>::axpy(T alpha, const CuVector<T>& y)
|
||||||
|
{
|
||||||
|
CHECKPOSITIVESIZE
|
||||||
|
CHECKSIZE(y)
|
||||||
|
OPM_CUBLAS_SAFE_CALL(detail::cublasAxpy(m_cuBlasHandle.get(), m_numberOfElements, &alpha, y.data(), 1, data(), 1));
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
T
|
||||||
|
CuVector<T>::dot(const CuVector<T>& other) const
|
||||||
|
{
|
||||||
|
CHECKPOSITIVESIZE
|
||||||
|
CHECKSIZE(other)
|
||||||
|
T result = T(0);
|
||||||
|
OPM_CUBLAS_SAFE_CALL(
|
||||||
|
detail::cublasDot(m_cuBlasHandle.get(), m_numberOfElements, data(), 1, other.data(), 1, &result));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
template <class T>
|
||||||
|
T
|
||||||
|
CuVector<T>::two_norm() const
|
||||||
|
{
|
||||||
|
CHECKPOSITIVESIZE
|
||||||
|
T result = T(0);
|
||||||
|
OPM_CUBLAS_SAFE_CALL(detail::cublasNrm2(m_cuBlasHandle.get(), m_numberOfElements, data(), 1, &result));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T
|
||||||
|
CuVector<T>::dot(const CuVector<T>& other, const CuVector<int>& indexSet, CuVector<T>& buffer) const
|
||||||
|
{
|
||||||
|
return detail::innerProductAtIndices(m_dataOnDevice, other.data(), buffer.data(), indexSet.dim(), indexSet.data());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T
|
||||||
|
CuVector<T>::two_norm(const CuVector<int>& indexSet, CuVector<T>& buffer) const
|
||||||
|
{
|
||||||
|
// TODO: [perf] Optimize this to a single call
|
||||||
|
return std::sqrt(this->dot(*this, indexSet, buffer));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T
|
||||||
|
CuVector<T>::dot(const CuVector<T>& other, const CuVector<int>& indexSet) const
|
||||||
|
{
|
||||||
|
CuVector<T> buffer(indexSet.dim());
|
||||||
|
return detail::innerProductAtIndices(m_dataOnDevice, other.data(), buffer.data(), indexSet.dim(), indexSet.data());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T
|
||||||
|
CuVector<T>::two_norm(const CuVector<int>& indexSet) const
|
||||||
|
{
|
||||||
|
CuVector<T> buffer(indexSet.dim());
|
||||||
|
// TODO: [perf] Optimize this to a single call
|
||||||
|
return std::sqrt(this->dot(*this, indexSet, buffer));
|
||||||
|
}
|
||||||
|
template <class T>
|
||||||
|
CuVector<T>&
|
||||||
|
CuVector<T>::operator+=(const CuVector<T>& other)
|
||||||
|
{
|
||||||
|
CHECKPOSITIVESIZE
|
||||||
|
CHECKSIZE(other)
|
||||||
|
// TODO: [perf] Make a specialized version of this
|
||||||
|
return axpy(1.0, other);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
CuVector<T>&
|
||||||
|
CuVector<T>::operator-=(const CuVector<T>& other)
|
||||||
|
{
|
||||||
|
CHECKPOSITIVESIZE
|
||||||
|
CHECKSIZE(other)
|
||||||
|
// TODO: [perf] Make a specialized version of this
|
||||||
|
return axpy(-1.0, other);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void
|
||||||
|
CuVector<T>::copyFromHost(const T* dataPointer, int numberOfElements)
|
||||||
|
{
|
||||||
|
if (numberOfElements > dim()) {
|
||||||
|
OPM_THROW(std::runtime_error,
|
||||||
|
fmt::format("Requesting to copy too many elements. Vector has {} elements, while {} was requested.",
|
||||||
|
dim(),
|
||||||
|
numberOfElements));
|
||||||
|
}
|
||||||
|
OPM_CUDA_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void
|
||||||
|
CuVector<T>::copyToHost(T* dataPointer, int numberOfElements) const
|
||||||
|
{
|
||||||
|
OPM_CUDA_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void
|
||||||
|
CuVector<T>::copyFromHost(const std::vector<T>& data)
|
||||||
|
{
|
||||||
|
copyFromHost(data.data(), data.size());
|
||||||
|
}
|
||||||
|
template <class T>
|
||||||
|
void
|
||||||
|
CuVector<T>::copyToHost(std::vector<T>& data) const
|
||||||
|
{
|
||||||
|
copyToHost(data.data(), data.size());
|
||||||
|
}
|
||||||
|
template class CuVector<double>;
|
||||||
|
template class CuVector<float>;
|
||||||
|
template class CuVector<int>;
|
||||||
|
|
||||||
|
} // namespace Opm::cuistl
|
335
opm/simulators/linalg/cuistl/CuVector.hpp
Normal file
335
opm/simulators/linalg/cuistl/CuVector.hpp
Normal file
@ -0,0 +1,335 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2022-2023 SINTEF AS
|
||||||
|
|
||||||
|
This file is part of the Open Porous Media project (OPM).
|
||||||
|
|
||||||
|
OPM is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
OPM is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
#ifndef OPM_CUVECTOR_HEADER_HPP
|
||||||
|
#define OPM_CUVECTOR_HEADER_HPP
|
||||||
|
#include <dune/common/fvector.hh>
|
||||||
|
#include <dune/istl/bvector.hh>
|
||||||
|
#include <exception>
|
||||||
|
#include <fmt/core.h>
|
||||||
|
#include <opm/common/ErrorMacros.hpp>
|
||||||
|
#include <opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace Opm::cuistl
|
||||||
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief The CuVector class is a simple (arithmetic) vector class for the GPU.
|
||||||
|
*
|
||||||
|
* @note we currently only support simple raw primitives for T (double and float)
|
||||||
|
*
|
||||||
|
* @note this vector has no notion of block size. The user is responsible for allocating
|
||||||
|
* the correct number of primitives (double or floats)
|
||||||
|
* Example usage:
|
||||||
|
*
|
||||||
|
* @code{.cpp}
|
||||||
|
* #include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||||
|
*
|
||||||
|
* void someFunction() {
|
||||||
|
* auto someDataOnCPU = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
|
||||||
|
*
|
||||||
|
* auto dataOnGPU = CuVector<double>(someDataOnCPU);
|
||||||
|
*
|
||||||
|
* // Multiply by 4.0:
|
||||||
|
* dataOnGPU *= 4.0;
|
||||||
|
*
|
||||||
|
* // Get data back on CPU in another vector:
|
||||||
|
* auto stdVectorOnCPU = dataOnGPU.asStdVector();
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* @tparam T the type to store. Can be either float, double or int.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
class CuVector
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
using field_type = T;
|
||||||
|
using size_type = size_t;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief CuVector allocates new GPU memory of the same size as other and copies the content of the other vector to
|
||||||
|
* this newly allocated memory.
|
||||||
|
*
|
||||||
|
* @note This does synchronous transfer.
|
||||||
|
*
|
||||||
|
* @param other the vector to copy from
|
||||||
|
*/
|
||||||
|
CuVector(const CuVector<T>& other);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief CuVector allocates new GPU memory of the same size as data and copies the content of the data vector to
|
||||||
|
* this newly allocated memory.
|
||||||
|
*
|
||||||
|
* @note This does CPU to GPU transfer.
|
||||||
|
* @note This does synchronous transfer.
|
||||||
|
*
|
||||||
|
* @param data the vector to copy from
|
||||||
|
*/
|
||||||
|
explicit CuVector(const std::vector<T>& data);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief operator= copies the content of the data vector to the memory of this vector.
|
||||||
|
*
|
||||||
|
* @note This requires the two vectors to be of the same size.
|
||||||
|
* @note This does synchronous transfer.
|
||||||
|
*
|
||||||
|
* @param other the vector to copy from
|
||||||
|
*/
|
||||||
|
CuVector& operator=(const CuVector<T>& other);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief operator= sets the whole vector equal to the scalar value.
|
||||||
|
*
|
||||||
|
* @note This does asynchronous operations
|
||||||
|
*
|
||||||
|
* @param scalar the value all elements will be set to.
|
||||||
|
*/
|
||||||
|
CuVector& operator=(T scalar);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief CuVector allocates new GPU memory of size numberOfElements * sizeof(T)
|
||||||
|
*
|
||||||
|
* @param numberOfElements number of T elements to allocate
|
||||||
|
*/
|
||||||
|
explicit CuVector(const int numberOfElements);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief CuVector allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from
|
||||||
|
* data
|
||||||
|
*
|
||||||
|
* @note This assumes the data is on the CPU.
|
||||||
|
*
|
||||||
|
* @param numberOfElements number of T elements to allocate
|
||||||
|
* @param dataOnHost data on host/CPU
|
||||||
|
*/
|
||||||
|
CuVector(const T* dataOnHost, const int numberOfElements);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief ~CuVector calls cudaFree
|
||||||
|
*/
|
||||||
|
virtual ~CuVector();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the raw pointer to the GPU data
|
||||||
|
*/
|
||||||
|
T* data();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the raw pointer to the GPU data
|
||||||
|
*/
|
||||||
|
const T* data() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief copyFromHost copies data from a Dune::BlockVector
|
||||||
|
* @param vector the vector to copy from
|
||||||
|
*
|
||||||
|
* @note This does synchronous transfer.
|
||||||
|
* @note This assumes that the size of this vector is equal to the dim of the input vector.
|
||||||
|
*/
|
||||||
|
template <int BlockDimension>
|
||||||
|
void copyFromHost(const Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& vector)
|
||||||
|
{
|
||||||
|
if (m_numberOfElements != vector.dim()) {
|
||||||
|
OPM_THROW(std::runtime_error,
|
||||||
|
fmt::format("Given incompatible vector size. CuVector has size {}, \n"
|
||||||
|
"however, BlockVector has N() = {}, and dim = {}.",
|
||||||
|
m_numberOfElements,
|
||||||
|
vector.N(),
|
||||||
|
vector.dim()));
|
||||||
|
}
|
||||||
|
const auto dataPointer = static_cast<const T*>(&(vector[0][0]));
|
||||||
|
copyFromHost(dataPointer, m_numberOfElements);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief copyToHost copies data to a Dune::BlockVector
|
||||||
|
* @param vector the vector to copy to
|
||||||
|
*
|
||||||
|
* @note This does synchronous transfer.
|
||||||
|
* @note This assumes that the size of this vector is equal to the dim of the input vector.
|
||||||
|
*/
|
||||||
|
template <int BlockDimension>
|
||||||
|
void copyToHost(Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& vector) const
|
||||||
|
{
|
||||||
|
if (m_numberOfElements != vector.dim()) {
|
||||||
|
OPM_THROW(std::runtime_error,
|
||||||
|
fmt::format("Given incompatible vector size. CuVector has size {},\n however, the BlockVector "
|
||||||
|
"has has N() = {}, and dim() = {}.",
|
||||||
|
m_numberOfElements,
|
||||||
|
vector.N(),
|
||||||
|
vector.dim()));
|
||||||
|
}
|
||||||
|
const auto dataPointer = static_cast<T*>(&(vector[0][0]));
|
||||||
|
copyToHost(dataPointer, m_numberOfElements);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief copyFromHost copies numberOfElements from the CPU memory dataPointer
|
||||||
|
* @param dataPointer raw pointer to CPU memory
|
||||||
|
* @param numberOfElements number of elements to copy
|
||||||
|
* @note This does synchronous transfer.
|
||||||
|
* @note assumes that this vector has numberOfElements elements
|
||||||
|
*/
|
||||||
|
void copyFromHost(const T* dataPointer, int numberOfElements);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief copyFromHost copies numberOfElements to the CPU memory dataPointer
|
||||||
|
* @param dataPointer raw pointer to CPU memory
|
||||||
|
* @param numberOfElements number of elements to copy
|
||||||
|
* @note This does synchronous transfer.
|
||||||
|
* @note assumes that this vector has numberOfElements elements
|
||||||
|
*/
|
||||||
|
void copyToHost(T* dataPointer, int numberOfElements) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief copyToHost copies data from an std::vector
|
||||||
|
* @param data the vector to copy from
|
||||||
|
*
|
||||||
|
* @note This does synchronous transfer.
|
||||||
|
* @note This assumes that the size of this vector is equal to the size of the input vector.
|
||||||
|
*/
|
||||||
|
void copyFromHost(const std::vector<T>& data);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief copyToHost copies data to an std::vector
|
||||||
|
* @param data the vector to copy to
|
||||||
|
*
|
||||||
|
* @note This does synchronous transfer.
|
||||||
|
* @note This assumes that the size of this vector is equal to the size of the input vector.
|
||||||
|
*/
|
||||||
|
void copyToHost(std::vector<T>& data) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief operator *= multiplies every element by scalar
|
||||||
|
* @param scalar the scalar to with which to multiply every element
|
||||||
|
*
|
||||||
|
* @note This operation is asynchronous.
|
||||||
|
*/
|
||||||
|
CuVector<T>& operator*=(const T& scalar);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief axpy sets this vector equal to this + alha * y
|
||||||
|
* @param alpha the scalar with which to multiply y
|
||||||
|
* @param y input vector of same size as this
|
||||||
|
*
|
||||||
|
* @note this will call CuBlas in the background
|
||||||
|
* @note int is not supported
|
||||||
|
*/
|
||||||
|
CuVector<T>& axpy(T alpha, const CuVector<T>& y);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief operator+= adds the other vector to this vector
|
||||||
|
*
|
||||||
|
* @note this will call CuBlas in the background
|
||||||
|
* @note int is not supported
|
||||||
|
*/
|
||||||
|
CuVector<T>& operator+=(const CuVector<T>& other);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief operator-= subtracts the other vector from this vector
|
||||||
|
*
|
||||||
|
* @note this will call CuBlas in the background
|
||||||
|
* @note int is not supported
|
||||||
|
*/
|
||||||
|
CuVector<T>& operator-=(const CuVector<T>& other);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief dot computes the dot product (standard inner product) against the other vector
|
||||||
|
* @param other vector of same size as this
|
||||||
|
* @note this will call CuBlas in the background
|
||||||
|
* @note int is not supported
|
||||||
|
*
|
||||||
|
* @return the result on the inner product
|
||||||
|
*/
|
||||||
|
T dot(const CuVector<T>& other) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief returns the l2 norm of the vector
|
||||||
|
* @note this will call CuBlas in the background
|
||||||
|
* @note int is not supported
|
||||||
|
*
|
||||||
|
* @return the l2 norm
|
||||||
|
*/
|
||||||
|
T two_norm() const;
|
||||||
|
|
||||||
|
//! Computes the dot product sum_i this[indexSet[i]] * other[indexSet[i]]
|
||||||
|
T dot(const CuVector<T>& other, const CuVector<int>& indexSet, CuVector<T>& buffer) const;
|
||||||
|
//! Computes the norm sqrt(sum_i this[indexSet[i]] * this[indexSet[i]])
|
||||||
|
T two_norm(const CuVector<int>& indexSet, CuVector<T>& buffer) const;
|
||||||
|
//! Computes the dot product sum_i this[indexSet[i]] * other[indexSet[i]]
|
||||||
|
T dot(const CuVector<T>& other, const CuVector<int>& indexSet) const;
|
||||||
|
//! Computes the norm sqrt(sum_i this[indexSet[i]] * this[indexSet[i]])
|
||||||
|
T two_norm(const CuVector<int>& indexSet) const;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief dim returns the dimension (number of T elements) in the vector
|
||||||
|
* @return number of elements
|
||||||
|
*/
|
||||||
|
size_type dim() const;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief creates an std::vector of the same size and copies the GPU data to this std::vector
|
||||||
|
* @return an std::vector containing the elements copied from the GPU.
|
||||||
|
*/
|
||||||
|
std::vector<T> asStdVector() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief creates an std::vector of the same size and copies the GPU data to this std::vector
|
||||||
|
* @return an std::vector containing the elements copied from the GPU.
|
||||||
|
*/
|
||||||
|
template <int blockSize>
|
||||||
|
Dune::BlockVector<Dune::FieldVector<T, blockSize>> asDuneBlockVector() const
|
||||||
|
{
|
||||||
|
OPM_ERROR_IF(dim() % blockSize != 0,
|
||||||
|
fmt::format("blockSize is not a multiple of dim(). Given blockSize = {}, and dim() = {}",
|
||||||
|
blockSize,
|
||||||
|
dim()));
|
||||||
|
|
||||||
|
Dune::BlockVector<Dune::FieldVector<T, blockSize>> returnValue(dim() / blockSize);
|
||||||
|
copyToHost(returnValue);
|
||||||
|
return returnValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief setZeroAtIndexSet for each element in indexSet, sets the index of this vector to be zero
|
||||||
|
* @param indexSet the set of indices to set to zero
|
||||||
|
*
|
||||||
|
* @note Assumes all indices are within range
|
||||||
|
*
|
||||||
|
* This is supposed to do the same as the following code on the CPU:
|
||||||
|
* @code{.cpp}
|
||||||
|
* for (int index : indexSet) {
|
||||||
|
* this->data[index] = T(0.0);
|
||||||
|
* }
|
||||||
|
* @endcode
|
||||||
|
*/
|
||||||
|
void setZeroAtIndexSet(const CuVector<int>& indexSet);
|
||||||
|
|
||||||
|
private:
|
||||||
|
T* m_dataOnDevice = nullptr;
|
||||||
|
const int m_numberOfElements;
|
||||||
|
detail::CuBlasHandle& m_cuBlasHandle;
|
||||||
|
};
|
||||||
|
} // namespace Opm::cuistl
|
||||||
|
#endif
|
168
opm/simulators/linalg/cuistl/detail/cublas_wrapper.hpp
Normal file
168
opm/simulators/linalg/cuistl/detail/cublas_wrapper.hpp
Normal file
@ -0,0 +1,168 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2022-2023 SINTEF AS
|
||||||
|
|
||||||
|
This file is part of the Open Porous Media project (OPM).
|
||||||
|
|
||||||
|
OPM is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
OPM is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Contains wrappers to make the CuBLAS library behave as a modern C++ library with function overlading.
|
||||||
|
*
|
||||||
|
* In simple terms, this allows one to call say cublasScal on both double and single precisision,
|
||||||
|
* instead of calling cublasDscal and cublasSscal respectively.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef OPM_CUBLASWRAPPER_HEADER_INCLUDED
|
||||||
|
#define OPM_CUBLASWRAPPER_HEADER_INCLUDED
|
||||||
|
#include <cublas_v2.h>
|
||||||
|
#include <opm/common/ErrorMacros.hpp>
|
||||||
|
|
||||||
|
namespace Opm::cuistl::detail
|
||||||
|
{
|
||||||
|
|
||||||
|
inline cublasStatus_t
|
||||||
|
cublasScal(cublasHandle_t handle,
|
||||||
|
int n,
|
||||||
|
const double* alpha, /* host or device pointer */
|
||||||
|
double* x,
|
||||||
|
int incx)
|
||||||
|
{
|
||||||
|
return cublasDscal(handle,
|
||||||
|
n,
|
||||||
|
alpha, /* host or device pointer */
|
||||||
|
x,
|
||||||
|
incx);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline cublasStatus_t
|
||||||
|
cublasScal(cublasHandle_t handle,
|
||||||
|
int n,
|
||||||
|
const float* alpha, /* host or device pointer */
|
||||||
|
float* x,
|
||||||
|
int incx)
|
||||||
|
{
|
||||||
|
return cublasSscal(handle,
|
||||||
|
n,
|
||||||
|
alpha, /* host or device pointer */
|
||||||
|
x,
|
||||||
|
incx);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline cublasStatus_t
|
||||||
|
cublasScal([[maybe_unused]] cublasHandle_t handle,
|
||||||
|
[[maybe_unused]] int n,
|
||||||
|
[[maybe_unused]] const int* alpha, /* host or device pointer */
|
||||||
|
[[maybe_unused]] int* x,
|
||||||
|
[[maybe_unused]] int incx)
|
||||||
|
{
|
||||||
|
OPM_THROW(std::runtime_error, "cublasScal multiplication for integer vectors is not implemented yet.");
|
||||||
|
}
|
||||||
|
inline cublasStatus_t
|
||||||
|
cublasAxpy(cublasHandle_t handle,
|
||||||
|
int n,
|
||||||
|
const double* alpha, /* host or device pointer */
|
||||||
|
const double* x,
|
||||||
|
int incx,
|
||||||
|
double* y,
|
||||||
|
int incy)
|
||||||
|
{
|
||||||
|
return cublasDaxpy(handle,
|
||||||
|
n,
|
||||||
|
alpha, /* host or device pointer */
|
||||||
|
x,
|
||||||
|
incx,
|
||||||
|
y,
|
||||||
|
incy);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline cublasStatus_t
|
||||||
|
cublasAxpy(cublasHandle_t handle,
|
||||||
|
int n,
|
||||||
|
const float* alpha, /* host or device pointer */
|
||||||
|
const float* x,
|
||||||
|
int incx,
|
||||||
|
float* y,
|
||||||
|
int incy)
|
||||||
|
{
|
||||||
|
return cublasSaxpy(handle,
|
||||||
|
n,
|
||||||
|
alpha, /* host or device pointer */
|
||||||
|
x,
|
||||||
|
incx,
|
||||||
|
y,
|
||||||
|
incy);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline cublasStatus_t
|
||||||
|
cublasAxpy([[maybe_unused]] cublasHandle_t handle,
|
||||||
|
[[maybe_unused]] int n,
|
||||||
|
[[maybe_unused]] const int* alpha, /* host or device pointer */
|
||||||
|
[[maybe_unused]] const int* x,
|
||||||
|
[[maybe_unused]] int incx,
|
||||||
|
[[maybe_unused]] int* y,
|
||||||
|
[[maybe_unused]] int incy)
|
||||||
|
{
|
||||||
|
OPM_THROW(std::runtime_error, "axpy multiplication for integer vectors is not implemented yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
inline cublasStatus_t
|
||||||
|
cublasDot(cublasHandle_t handle, int n, const double* x, int incx, const double* y, int incy, double* result)
|
||||||
|
{
|
||||||
|
return cublasDdot(handle, n, x, incx, y, incy, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline cublasStatus_t
|
||||||
|
cublasDot(cublasHandle_t handle, int n, const float* x, int incx, const float* y, int incy, float* result)
|
||||||
|
{
|
||||||
|
return cublasSdot(handle, n, x, incx, y, incy, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline cublasStatus_t
|
||||||
|
cublasDot([[maybe_unused]] cublasHandle_t handle,
|
||||||
|
[[maybe_unused]] int n,
|
||||||
|
[[maybe_unused]] const int* x,
|
||||||
|
[[maybe_unused]] int incx,
|
||||||
|
[[maybe_unused]] const int* y,
|
||||||
|
[[maybe_unused]] int incy,
|
||||||
|
[[maybe_unused]] int* result)
|
||||||
|
{
|
||||||
|
OPM_THROW(std::runtime_error, "inner product for integer vectors is not implemented yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
inline cublasStatus_t
|
||||||
|
cublasNrm2(cublasHandle_t handle, int n, const double* x, int incx, double* result)
|
||||||
|
{
|
||||||
|
return cublasDnrm2(handle, n, x, incx, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
inline cublasStatus_t
|
||||||
|
cublasNrm2(cublasHandle_t handle, int n, const float* x, int incx, float* result)
|
||||||
|
{
|
||||||
|
return cublasSnrm2(handle, n, x, incx, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline cublasStatus_t
|
||||||
|
cublasNrm2([[maybe_unused]] cublasHandle_t handle,
|
||||||
|
[[maybe_unused]] int n,
|
||||||
|
[[maybe_unused]] const int* x,
|
||||||
|
[[maybe_unused]] int incx,
|
||||||
|
[[maybe_unused]] int* result)
|
||||||
|
{
|
||||||
|
OPM_THROW(std::runtime_error, "norm2 for integer vectors is not implemented yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Opm::cuistl::detail
|
||||||
|
#endif
|
112
opm/simulators/linalg/cuistl/detail/vector_operations.cu
Normal file
112
opm/simulators/linalg/cuistl/detail/vector_operations.cu
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2022-2023 SINTEF AS
|
||||||
|
|
||||||
|
This file is part of the Open Porous Media project (OPM).
|
||||||
|
|
||||||
|
OPM is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
OPM is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
|
||||||
|
// TODO: [perf] Get rid of thrust.
|
||||||
|
#include <thrust/device_ptr.h>
|
||||||
|
#include <thrust/reduce.h>
|
||||||
|
namespace Opm::cuistl::detail
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
template <class T>
|
||||||
|
__global__ void setZeroAtIndexSetKernel(T* devicePointer, size_t numberOfElements, const int* indices)
|
||||||
|
{
|
||||||
|
const auto globalIndex = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
|
||||||
|
if (globalIndex < numberOfElements) {
|
||||||
|
devicePointer[indices[globalIndex]] = T(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
__global__ void setVectorValueKernel(T* devicePointer, size_t numberOfElements, T value)
|
||||||
|
{
|
||||||
|
const auto globalIndex = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
|
||||||
|
if (globalIndex < numberOfElements) {
|
||||||
|
devicePointer[globalIndex] = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
__global__ void
|
||||||
|
elementWiseMultiplyKernel(const T* a, const T* b, T* buffer, size_t numberOfElements, const int* indices)
|
||||||
|
{
|
||||||
|
const auto globalIndex = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
|
||||||
|
// TODO: [perf] Is it faster to just use a mask? Probably does not matter either way
|
||||||
|
// This is hopefully not where we will spend most of our time.
|
||||||
|
if (globalIndex < numberOfElements) {
|
||||||
|
buffer[indices[globalIndex]] = a[indices[globalIndex]] * b[indices[globalIndex]];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr inline size_t getThreads([[maybe_unused]] size_t numberOfElements)
|
||||||
|
{
|
||||||
|
return 1024;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline size_t getBlocks(size_t numberOfElements)
|
||||||
|
{
|
||||||
|
const auto threads = getThreads(numberOfElements);
|
||||||
|
return (numberOfElements + threads - 1) / threads;
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void
|
||||||
|
setVectorValue(T* deviceData, size_t numberOfElements, const T& value)
|
||||||
|
{
|
||||||
|
setVectorValueKernel<<<getBlocks(numberOfElements), getThreads(numberOfElements)>>>(
|
||||||
|
deviceData, numberOfElements, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
template void setVectorValue(double*, size_t, const double&);
|
||||||
|
template void setVectorValue(float*, size_t, const float&);
|
||||||
|
template void setVectorValue(int*, size_t, const int&);
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void
|
||||||
|
setZeroAtIndexSet(T* deviceData, size_t numberOfElements, const int* indices)
|
||||||
|
{
|
||||||
|
setZeroAtIndexSetKernel<<<getBlocks(numberOfElements), getThreads(numberOfElements)>>>(
|
||||||
|
deviceData, numberOfElements, indices);
|
||||||
|
}
|
||||||
|
template void setZeroAtIndexSet(double*, size_t, const int*);
|
||||||
|
template void setZeroAtIndexSet(float*, size_t, const int*);
|
||||||
|
template void setZeroAtIndexSet(int*, size_t, const int*);
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
T
|
||||||
|
innerProductAtIndices(const T* deviceA, const T* deviceB, T* buffer, size_t numberOfElements, const int* indices)
|
||||||
|
{
|
||||||
|
elementWiseMultiplyKernel<<<getBlocks(numberOfElements), getThreads(numberOfElements)>>>(
|
||||||
|
deviceA, deviceB, buffer, numberOfElements, indices);
|
||||||
|
|
||||||
|
// TODO: [perf] Get rid of thrust and use a more direct reduction here.
|
||||||
|
auto bufferAsDevicePointer = thrust::device_pointer_cast(buffer);
|
||||||
|
return thrust::reduce(bufferAsDevicePointer, bufferAsDevicePointer + numberOfElements);
|
||||||
|
}
|
||||||
|
|
||||||
|
template double innerProductAtIndices(const double*, const double*, double* buffer, size_t, const int*);
|
||||||
|
template float innerProductAtIndices(const float*, const float*, float* buffer, size_t, const int*);
|
||||||
|
template int innerProductAtIndices(const int*, const int*, int* buffer, size_t, const int*);
|
||||||
|
|
||||||
|
} // namespace Opm::cuistl::impl
|
58
opm/simulators/linalg/cuistl/detail/vector_operations.hpp
Normal file
58
opm/simulators/linalg/cuistl/detail/vector_operations.hpp
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2022-2023 SINTEF AS
|
||||||
|
|
||||||
|
This file is part of the Open Porous Media project (OPM).
|
||||||
|
|
||||||
|
OPM is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
OPM is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
#ifndef OPM_CUISTL_VECTOR_OPERATIONS_HPP
|
||||||
|
#define OPM_CUISTL_VECTOR_OPERATIONS_HPP
|
||||||
|
#include <cstddef>
|
||||||
|
namespace Opm::cuistl::detail
|
||||||
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief setVectorValue sets every element of deviceData to value
|
||||||
|
* @param deviceData pointer to GPU memory
|
||||||
|
* @param numberOfElements number of elements to set to value
|
||||||
|
* @param value the value to use
|
||||||
|
*/
|
||||||
|
template <class T>
|
||||||
|
void setVectorValue(T* deviceData, size_t numberOfElements, const T& value);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief setZeroAtIndexSet sets deviceData to zero in the indices of contained in indices
|
||||||
|
* @param deviceData the data to operate on
|
||||||
|
* @param numberOfElements number of indices
|
||||||
|
* @param indices the indices to use
|
||||||
|
*/
|
||||||
|
template <class T>
|
||||||
|
void setZeroAtIndexSet(T* deviceData, size_t numberOfElements, const int* indices);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief innerProductAtIndices computes the inner product between deviceA[indices] and deviceB[indices]
|
||||||
|
* @param deviceA data A
|
||||||
|
* @param deviceB data B
|
||||||
|
* @param buffer a buffer with number of elements equal to numberOfElements
|
||||||
|
* @param numberOfElements number of indices
|
||||||
|
* @param indices the indices to compute the inner product over
|
||||||
|
* @return the result of the inner product
|
||||||
|
*
|
||||||
|
* @note This is equivalent to projecting the vectors to the indices contained in indices, then doing the inner product
|
||||||
|
* of those indices.
|
||||||
|
*/
|
||||||
|
template <class T>
|
||||||
|
T innerProductAtIndices(const T* deviceA, const T* deviceB, T* buffer, size_t numberOfElements, const int* indices);
|
||||||
|
} // namespace Opm::cuistl::detail
|
||||||
|
#endif
|
284
tests/cuistl/test_cuvector.cpp
Normal file
284
tests/cuistl/test_cuvector.cpp
Normal file
@ -0,0 +1,284 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2022-2023 SINTEF AS
|
||||||
|
|
||||||
|
This file is part of the Open Porous Media project (OPM).
|
||||||
|
|
||||||
|
OPM is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
OPM is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
#include <config.h>
|
||||||
|
|
||||||
|
#define BOOST_TEST_MODULE TestCuVector
|
||||||
|
|
||||||
|
#include <boost/test/unit_test.hpp>
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <dune/common/fvector.hh>
|
||||||
|
#include <dune/istl/bvector.hh>
|
||||||
|
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||||
|
#include <random>
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE(TestDocumentedUsage)
|
||||||
|
{
|
||||||
|
auto someDataOnCPU = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
|
||||||
|
|
||||||
|
auto dataOnGPU = ::Opm::cuistl::CuVector<double>(someDataOnCPU);
|
||||||
|
|
||||||
|
// Multiply by 4.0:
|
||||||
|
dataOnGPU *= 4.0;
|
||||||
|
|
||||||
|
// Get data back on CPU in another vector:
|
||||||
|
auto stdVectorOnCPU = dataOnGPU.asStdVector();
|
||||||
|
|
||||||
|
std::vector<double> correctVector(someDataOnCPU.size());
|
||||||
|
|
||||||
|
std::transform(someDataOnCPU.begin(), someDataOnCPU.end(), correctVector.begin(), [](double x) { return 4 * x; });
|
||||||
|
BOOST_CHECK_EQUAL_COLLECTIONS(
|
||||||
|
stdVectorOnCPU.begin(), stdVectorOnCPU.end(), correctVector.begin(), correctVector.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE(TestConstructionSize)
|
||||||
|
{
|
||||||
|
const int numberOfElements = 1234;
|
||||||
|
auto vectorOnGPU = Opm::cuistl::CuVector<double>(numberOfElements);
|
||||||
|
BOOST_CHECK_EQUAL(numberOfElements, vectorOnGPU.dim());
|
||||||
|
}
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE(TestCopyFromHostConstructor)
|
||||||
|
{
|
||||||
|
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||||
|
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||||
|
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
|
||||||
|
std::vector<double> buffer(data.size(), 0.0);
|
||||||
|
vectorOnGPU.copyToHost(buffer.data(), buffer.size());
|
||||||
|
BOOST_CHECK_EQUAL_COLLECTIONS(buffer.begin(), buffer.end(), data.begin(), data.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE(TestCopyFromHostFunction)
|
||||||
|
{
|
||||||
|
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||||
|
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.size());
|
||||||
|
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
|
||||||
|
vectorOnGPU.copyFromHost(data.data(), data.size());
|
||||||
|
std::vector<double> buffer(data.size(), 0.0);
|
||||||
|
vectorOnGPU.copyToHost(buffer.data(), buffer.size());
|
||||||
|
BOOST_CHECK_EQUAL_COLLECTIONS(buffer.begin(), buffer.end(), data.begin(), data.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE(TestCopyFromBvector)
|
||||||
|
{
|
||||||
|
auto blockVector = Dune::BlockVector<Dune::FieldVector<double, 2>> {{{42, 43}, {44, 45}, {46, 47}}};
|
||||||
|
auto vectorOnGPU = Opm::cuistl::CuVector<double>(blockVector.dim());
|
||||||
|
vectorOnGPU.copyFromHost(blockVector);
|
||||||
|
std::vector<double> buffer(vectorOnGPU.dim());
|
||||||
|
vectorOnGPU.copyToHost(buffer.data(), buffer.size());
|
||||||
|
|
||||||
|
BOOST_CHECK_EQUAL_COLLECTIONS(
|
||||||
|
buffer.begin(), buffer.end(), &blockVector[0][0], &blockVector[0][0] + blockVector.dim());
|
||||||
|
}
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE(TestCopyToBvector)
|
||||||
|
{
|
||||||
|
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7, 8, 9}};
|
||||||
|
auto blockVector = Dune::BlockVector<Dune::FieldVector<double, 3>>(3);
|
||||||
|
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||||
|
vectorOnGPU.copyToHost(blockVector);
|
||||||
|
|
||||||
|
|
||||||
|
BOOST_CHECK_EQUAL_COLLECTIONS(data.begin(), data.end(), &blockVector[0][0], &blockVector[0][0] + blockVector.dim());
|
||||||
|
}
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE(TestDataPointer)
|
||||||
|
{
|
||||||
|
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7, 8, 9}};
|
||||||
|
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||||
|
|
||||||
|
std::vector<double> buffer(data.size(), 0.0);
|
||||||
|
cudaMemcpy(buffer.data(), vectorOnGPU.data(), sizeof(double) * data.size(), cudaMemcpyDeviceToHost);
|
||||||
|
BOOST_CHECK_EQUAL_COLLECTIONS(data.begin(), data.end(), buffer.begin(), buffer.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE(TestCopyScalarMultiply)
|
||||||
|
{
|
||||||
|
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||||
|
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||||
|
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
|
||||||
|
const double scalar = 42.25;
|
||||||
|
vectorOnGPU *= scalar;
|
||||||
|
std::vector<double> buffer(data.size(), 0.0);
|
||||||
|
vectorOnGPU.copyToHost(buffer.data(), buffer.size());
|
||||||
|
|
||||||
|
for (size_t i = 0; i < buffer.size(); ++i) {
|
||||||
|
BOOST_CHECK_EQUAL(buffer[i], scalar * data[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE(TestTwoNorm)
|
||||||
|
{
|
||||||
|
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||||
|
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||||
|
auto twoNorm = vectorOnGPU.two_norm();
|
||||||
|
|
||||||
|
double correctAnswer = 0.0;
|
||||||
|
for (double d : data) {
|
||||||
|
correctAnswer += d * d;
|
||||||
|
}
|
||||||
|
correctAnswer = std::sqrt(correctAnswer);
|
||||||
|
BOOST_CHECK_EQUAL(correctAnswer, twoNorm);
|
||||||
|
}
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE(TestDot)
|
||||||
|
{
|
||||||
|
std::vector<double> dataA {{1, 2, 3, 4, 5, 6, 7}};
|
||||||
|
std::vector<double> dataB {{8, 9, 10, 11, 12, 13, 14}};
|
||||||
|
auto vectorOnGPUA = Opm::cuistl::CuVector<double>(dataA.data(), dataA.size());
|
||||||
|
auto vectorOnGPUB = Opm::cuistl::CuVector<double>(dataB.data(), dataB.size());
|
||||||
|
auto dot = vectorOnGPUA.dot(vectorOnGPUB);
|
||||||
|
|
||||||
|
double correctAnswer = 0.0;
|
||||||
|
for (size_t i = 0; i < dataA.size(); ++i) {
|
||||||
|
correctAnswer += dataA[i] * dataB[i];
|
||||||
|
}
|
||||||
|
correctAnswer = correctAnswer;
|
||||||
|
BOOST_CHECK_EQUAL(correctAnswer, dot);
|
||||||
|
}
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE(Assigment)
|
||||||
|
{
|
||||||
|
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||||
|
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||||
|
vectorOnGPU = 10.0;
|
||||||
|
vectorOnGPU.copyToHost(data.data(), data.size());
|
||||||
|
|
||||||
|
for (double x : data) {
|
||||||
|
BOOST_CHECK_EQUAL(10.0, x);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE(CopyConstructor)
|
||||||
|
{
|
||||||
|
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||||
|
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||||
|
vectorOnGPU.copyToHost(data.data(), data.size());
|
||||||
|
auto vectorOnGPUB = Opm::cuistl::CuVector<double>(data.size());
|
||||||
|
vectorOnGPUB = 4.0;
|
||||||
|
vectorOnGPUB = vectorOnGPU;
|
||||||
|
|
||||||
|
std::vector<double> output(data.size());
|
||||||
|
vectorOnGPUB.copyToHost(output.data(), output.size());
|
||||||
|
BOOST_CHECK_EQUAL_COLLECTIONS(output.begin(), output.end(), data.begin(), data.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE(RandomVectors)
|
||||||
|
{
|
||||||
|
|
||||||
|
using GVector = Opm::cuistl::CuVector<double>;
|
||||||
|
std::srand(0);
|
||||||
|
std::mt19937 generator;
|
||||||
|
std::uniform_real_distribution<double> distribution(-100.0, 100.0);
|
||||||
|
std::uniform_real_distribution<double> distribution01(.0, 1.0);
|
||||||
|
|
||||||
|
const size_t N = 1000;
|
||||||
|
|
||||||
|
const size_t retries = 100;
|
||||||
|
|
||||||
|
for (size_t retry = 0; retry < retries; ++retry) {
|
||||||
|
std::vector<double> a(N);
|
||||||
|
std::vector<double> b(N);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < N; ++i) {
|
||||||
|
a[i] = distribution(generator);
|
||||||
|
b[i] = distribution(generator);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto aGPU = GVector(a);
|
||||||
|
auto bGPU = GVector(b);
|
||||||
|
|
||||||
|
aGPU += bGPU;
|
||||||
|
|
||||||
|
auto aOutputPlus = aGPU.asStdVector();
|
||||||
|
|
||||||
|
for (size_t i = 0; i < N; ++i) {
|
||||||
|
BOOST_CHECK_EQUAL(aOutputPlus[i], a[i] + b[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
aGPU = GVector(a);
|
||||||
|
aGPU -= bGPU;
|
||||||
|
|
||||||
|
auto aOutputMinus = aGPU.asStdVector();
|
||||||
|
|
||||||
|
for (size_t i = 0; i < N; ++i) {
|
||||||
|
BOOST_CHECK_EQUAL(aOutputMinus[i], a[i] - b[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
aGPU = GVector(a);
|
||||||
|
auto scalar = distribution(generator);
|
||||||
|
aGPU *= scalar;
|
||||||
|
auto aOutputScalar = aGPU.asStdVector();
|
||||||
|
for (size_t i = 0; i < N; ++i) {
|
||||||
|
BOOST_CHECK_EQUAL(aOutputScalar[i], scalar * a[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
aGPU = GVector(a);
|
||||||
|
aGPU.axpy(scalar, bGPU);
|
||||||
|
auto aOutputSaxypy = aGPU.asStdVector();
|
||||||
|
for (size_t i = 0; i < N; ++i) {
|
||||||
|
BOOST_CHECK_CLOSE(aOutputSaxypy[i], a[i] + scalar * b[i], 1e-7);
|
||||||
|
}
|
||||||
|
|
||||||
|
aGPU = GVector(a);
|
||||||
|
auto dotted = aGPU.dot(bGPU);
|
||||||
|
double correct = 0.0;
|
||||||
|
for (size_t i = 0; i < N; ++i) {
|
||||||
|
correct += a[i] * b[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
BOOST_CHECK_CLOSE(dotted, correct, 1e-7);
|
||||||
|
|
||||||
|
aGPU = GVector(a);
|
||||||
|
auto twoNorm = aGPU.two_norm();
|
||||||
|
double correctTwoNorm = 0.0;
|
||||||
|
for (size_t i = 0; i < N; ++i) {
|
||||||
|
correctTwoNorm += a[i] * a[i];
|
||||||
|
}
|
||||||
|
correctTwoNorm = std::sqrt(correctTwoNorm);
|
||||||
|
|
||||||
|
BOOST_CHECK_CLOSE(twoNorm, correctTwoNorm, 1e-7);
|
||||||
|
|
||||||
|
aGPU = GVector(a);
|
||||||
|
std::vector<int> indexSet;
|
||||||
|
const double rejectCriteria = 0.2;
|
||||||
|
for (size_t i = 0; i < N; ++i) {
|
||||||
|
const auto reject = distribution01(generator);
|
||||||
|
if (reject < rejectCriteria) {
|
||||||
|
indexSet.push_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto indexSetGPU = Opm::cuistl::CuVector<int>(indexSet);
|
||||||
|
|
||||||
|
aGPU.setZeroAtIndexSet(indexSetGPU);
|
||||||
|
auto projectedA = aGPU.asStdVector();
|
||||||
|
for (size_t i = 0; i < N; ++i) {
|
||||||
|
// Yeah, O(N^2) so sue me
|
||||||
|
bool found = std::find(indexSet.begin(), indexSet.end(), i) != indexSet.end();
|
||||||
|
if (found) {
|
||||||
|
BOOST_CHECK_EQUAL(projectedA[i], 0);
|
||||||
|
} else {
|
||||||
|
BOOST_CHECK_EQUAL(projectedA[i], a[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user