mirror of
https://github.com/OPM/opm-simulators.git
synced 2024-12-25 16:51:00 -06:00
294 lines
7.9 KiB
C++
294 lines
7.9 KiB
C++
/*
|
|
Copyright 2022-2023 SINTEF AS
|
|
|
|
This file is part of the Open Porous Media project (OPM).
|
|
|
|
OPM is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
OPM is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
#include <cublas_v2.h>
|
|
#include <cuda.h>
|
|
#include <cuda_runtime.h>
|
|
#include <fmt/core.h>
|
|
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
|
#include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
|
|
#include <opm/simulators/linalg/cuistl/detail/cublas_wrapper.hpp>
|
|
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
|
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
|
|
|
|
namespace Opm::cuistl
|
|
{
|
|
|
|
template <class T>
|
|
CuVector<T>::CuVector(const std::vector<T>& data)
|
|
: CuVector(data.data(), detail::to_int(data.size()))
|
|
{
|
|
}
|
|
|
|
template <class T>
|
|
CuVector<T>::CuVector(const size_t numberOfElements)
|
|
: m_numberOfElements(detail::to_int(numberOfElements))
|
|
, m_cuBlasHandle(detail::CuBlasHandle::getInstance())
|
|
{
|
|
OPM_CUDA_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * detail::to_size_t(m_numberOfElements)));
|
|
}
|
|
|
|
template <class T>
|
|
CuVector<T>::CuVector(const T* dataOnHost, const size_t numberOfElements)
|
|
: CuVector(numberOfElements)
|
|
{
|
|
|
|
OPM_CUDA_SAFE_CALL(cudaMemcpy(
|
|
m_dataOnDevice, dataOnHost, detail::to_size_t(m_numberOfElements) * sizeof(T), cudaMemcpyHostToDevice));
|
|
}
|
|
|
|
template <class T>
|
|
CuVector<T>&
|
|
CuVector<T>::operator=(T scalar)
|
|
{
|
|
assertHasElements();
|
|
detail::setVectorValue(data(), detail::to_size_t(m_numberOfElements), scalar);
|
|
return *this;
|
|
}
|
|
|
|
template <class T>
|
|
CuVector<T>&
|
|
CuVector<T>::operator=(const CuVector<T>& other)
|
|
{
|
|
assertHasElements();
|
|
assertSameSize(other);
|
|
|
|
OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
|
|
other.m_dataOnDevice,
|
|
detail::to_size_t(m_numberOfElements) * sizeof(T),
|
|
cudaMemcpyDeviceToDevice));
|
|
return *this;
|
|
}
|
|
|
|
template <class T>
|
|
CuVector<T>::CuVector(const CuVector<T>& other)
|
|
: CuVector(other.m_numberOfElements)
|
|
{
|
|
assertHasElements();
|
|
assertSameSize(other);
|
|
OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
|
|
other.m_dataOnDevice,
|
|
detail::to_size_t(m_numberOfElements) * sizeof(T),
|
|
cudaMemcpyDeviceToDevice));
|
|
}
|
|
|
|
template <class T>
|
|
CuVector<T>::~CuVector()
|
|
{
|
|
OPM_CUDA_WARN_IF_ERROR(cudaFree(m_dataOnDevice));
|
|
}
|
|
|
|
template <typename T>
|
|
const T*
|
|
CuVector<T>::data() const
|
|
{
|
|
return m_dataOnDevice;
|
|
}
|
|
|
|
template <typename T>
|
|
typename CuVector<T>::size_type
|
|
CuVector<T>::dim() const
|
|
{
|
|
// Note that there is no way for m_numberOfElements to be non-positive,
|
|
// but for sanity we still use the safe conversion function here.
|
|
//
|
|
// We also doubt that this will lead to any performance penality, but should this prove
|
|
// to be false, this can be replaced by a simple cast to size_t
|
|
return detail::to_size_t(m_numberOfElements);
|
|
}
|
|
|
|
template <typename T>
|
|
std::vector<T>
|
|
CuVector<T>::asStdVector() const
|
|
{
|
|
std::vector<T> temporary(detail::to_size_t(m_numberOfElements));
|
|
copyToHost(temporary);
|
|
return temporary;
|
|
}
|
|
|
|
template <typename T>
|
|
void
|
|
CuVector<T>::setZeroAtIndexSet(const CuVector<int>& indexSet)
|
|
{
|
|
detail::setZeroAtIndexSet(m_dataOnDevice, indexSet.dim(), indexSet.data());
|
|
}
|
|
|
|
template <typename T>
|
|
void
|
|
CuVector<T>::assertSameSize(const CuVector<T>& x) const
|
|
{
|
|
assertSameSize(x.m_numberOfElements);
|
|
}
|
|
|
|
template <typename T>
|
|
void
|
|
CuVector<T>::assertSameSize(int size) const
|
|
{
|
|
if (size != m_numberOfElements) {
|
|
OPM_THROW(std::invalid_argument,
|
|
fmt::format("Given vector has {}, while we have {}.", size, m_numberOfElements));
|
|
}
|
|
}
|
|
|
|
template <typename T>
|
|
void
|
|
CuVector<T>::assertHasElements() const
|
|
{
|
|
if (m_numberOfElements <= 0) {
|
|
OPM_THROW(std::invalid_argument, "We have 0 elements");
|
|
}
|
|
}
|
|
|
|
template <typename T>
|
|
T*
|
|
CuVector<T>::data()
|
|
{
|
|
return m_dataOnDevice;
|
|
}
|
|
|
|
template <class T>
|
|
CuVector<T>&
|
|
CuVector<T>::operator*=(const T& scalar)
|
|
{
|
|
assertHasElements();
|
|
OPM_CUBLAS_SAFE_CALL(detail::cublasScal(m_cuBlasHandle.get(), m_numberOfElements, &scalar, data(), 1));
|
|
return *this;
|
|
}
|
|
|
|
template <class T>
|
|
CuVector<T>&
|
|
CuVector<T>::axpy(T alpha, const CuVector<T>& y)
|
|
{
|
|
assertHasElements();
|
|
assertSameSize(y);
|
|
OPM_CUBLAS_SAFE_CALL(detail::cublasAxpy(m_cuBlasHandle.get(), m_numberOfElements, &alpha, y.data(), 1, data(), 1));
|
|
return *this;
|
|
}
|
|
|
|
template <class T>
|
|
T
|
|
CuVector<T>::dot(const CuVector<T>& other) const
|
|
{
|
|
assertHasElements();
|
|
assertSameSize(other);
|
|
T result = T(0);
|
|
OPM_CUBLAS_SAFE_CALL(
|
|
detail::cublasDot(m_cuBlasHandle.get(), m_numberOfElements, data(), 1, other.data(), 1, &result));
|
|
return result;
|
|
}
|
|
template <class T>
|
|
T
|
|
CuVector<T>::two_norm() const
|
|
{
|
|
assertHasElements();
|
|
T result = T(0);
|
|
OPM_CUBLAS_SAFE_CALL(detail::cublasNrm2(m_cuBlasHandle.get(), m_numberOfElements, data(), 1, &result));
|
|
return result;
|
|
}
|
|
|
|
template <typename T>
|
|
T
|
|
CuVector<T>::dot(const CuVector<T>& other, const CuVector<int>& indexSet, CuVector<T>& buffer) const
|
|
{
|
|
return detail::innerProductAtIndices(m_dataOnDevice, other.data(), buffer.data(), indexSet.dim(), indexSet.data());
|
|
}
|
|
|
|
template <typename T>
|
|
T
|
|
CuVector<T>::two_norm(const CuVector<int>& indexSet, CuVector<T>& buffer) const
|
|
{
|
|
// TODO: [perf] Optimize this to a single call
|
|
return std::sqrt(this->dot(*this, indexSet, buffer));
|
|
}
|
|
|
|
template <typename T>
|
|
T
|
|
CuVector<T>::dot(const CuVector<T>& other, const CuVector<int>& indexSet) const
|
|
{
|
|
CuVector<T> buffer(indexSet.dim());
|
|
return detail::innerProductAtIndices(m_dataOnDevice, other.data(), buffer.data(), indexSet.dim(), indexSet.data());
|
|
}
|
|
|
|
template <typename T>
|
|
T
|
|
CuVector<T>::two_norm(const CuVector<int>& indexSet) const
|
|
{
|
|
CuVector<T> buffer(indexSet.dim());
|
|
// TODO: [perf] Optimize this to a single call
|
|
return std::sqrt(this->dot(*this, indexSet, buffer));
|
|
}
|
|
template <class T>
|
|
CuVector<T>&
|
|
CuVector<T>::operator+=(const CuVector<T>& other)
|
|
{
|
|
assertHasElements();
|
|
assertSameSize(other);
|
|
// TODO: [perf] Make a specialized version of this
|
|
return axpy(1.0, other);
|
|
}
|
|
|
|
template <class T>
|
|
CuVector<T>&
|
|
CuVector<T>::operator-=(const CuVector<T>& other)
|
|
{
|
|
assertHasElements();
|
|
assertSameSize(other);
|
|
// TODO: [perf] Make a specialized version of this
|
|
return axpy(-1.0, other);
|
|
}
|
|
|
|
|
|
template <class T>
|
|
void
|
|
CuVector<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
|
|
{
|
|
if (numberOfElements > dim()) {
|
|
OPM_THROW(std::runtime_error,
|
|
fmt::format("Requesting to copy too many elements. Vector has {} elements, while {} was requested.",
|
|
dim(),
|
|
numberOfElements));
|
|
}
|
|
OPM_CUDA_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
|
|
}
|
|
|
|
template <class T>
|
|
void
|
|
CuVector<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
|
|
{
|
|
assertSameSize(detail::to_int(numberOfElements));
|
|
OPM_CUDA_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
|
|
}
|
|
|
|
template <class T>
|
|
void
|
|
CuVector<T>::copyFromHost(const std::vector<T>& data)
|
|
{
|
|
copyFromHost(data.data(), data.size());
|
|
}
|
|
template <class T>
|
|
void
|
|
CuVector<T>::copyToHost(std::vector<T>& data) const
|
|
{
|
|
copyToHost(data.data(), data.size());
|
|
}
|
|
template class CuVector<double>;
|
|
template class CuVector<float>;
|
|
template class CuVector<int>;
|
|
|
|
} // namespace Opm::cuistl
|