/*
Copyright 2022-2023 SINTEF AS
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see .
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
namespace Opm::cuistl
{
template
CuVector::CuVector(const std::vector& data)
: CuVector(data.data(), detail::to_int(data.size()))
{
}
template
CuVector::CuVector(const size_t numberOfElements)
: m_numberOfElements(detail::to_int(numberOfElements))
, m_cuBlasHandle(detail::CuBlasHandle::getInstance())
{
OPM_CUDA_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * detail::to_size_t(m_numberOfElements)));
}
template
CuVector::CuVector(const T* dataOnHost, const size_t numberOfElements)
: CuVector(numberOfElements)
{
OPM_CUDA_SAFE_CALL(cudaMemcpy(
m_dataOnDevice, dataOnHost, detail::to_size_t(m_numberOfElements) * sizeof(T), cudaMemcpyHostToDevice));
}
template
CuVector&
CuVector::operator=(T scalar)
{
assertHasElements();
detail::setVectorValue(data(), detail::to_size_t(m_numberOfElements), scalar);
return *this;
}
template
CuVector&
CuVector::operator=(const CuVector& other)
{
assertHasElements();
assertSameSize(other);
OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
other.m_dataOnDevice,
detail::to_size_t(m_numberOfElements) * sizeof(T),
cudaMemcpyDeviceToDevice));
return *this;
}
template
CuVector::CuVector(const CuVector& other)
: CuVector(other.m_numberOfElements)
{
assertHasElements();
assertSameSize(other);
OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
other.m_dataOnDevice,
detail::to_size_t(m_numberOfElements) * sizeof(T),
cudaMemcpyDeviceToDevice));
}
template
CuVector::~CuVector()
{
OPM_CUDA_WARN_IF_ERROR(cudaFree(m_dataOnDevice));
}
template
const T*
CuVector::data() const
{
return m_dataOnDevice;
}
template
typename CuVector::size_type
CuVector::dim() const
{
// Note that there is no way for m_numberOfElements to be non-positive,
// but for sanity we still use the safe conversion function here.
//
// We also doubt that this will lead to any performance penality, but should this prove
// to be false, this can be replaced by a simple cast to size_t
return detail::to_size_t(m_numberOfElements);
}
template
std::vector
CuVector::asStdVector() const
{
std::vector temporary(detail::to_size_t(m_numberOfElements));
copyToHost(temporary);
return temporary;
}
template
void
CuVector::setZeroAtIndexSet(const CuVector& indexSet)
{
detail::setZeroAtIndexSet(m_dataOnDevice, indexSet.dim(), indexSet.data());
}
template
void
CuVector::assertSameSize(const CuVector& x) const
{
assertSameSize(x.m_numberOfElements);
}
template
void
CuVector::assertSameSize(int size) const
{
if (size != m_numberOfElements) {
OPM_THROW(std::invalid_argument,
fmt::format("Given vector has {}, while we have {}.", size, m_numberOfElements));
}
}
template
void
CuVector::assertHasElements() const
{
if (m_numberOfElements <= 0) {
OPM_THROW(std::invalid_argument, "We have 0 elements");
}
}
template
T*
CuVector::data()
{
return m_dataOnDevice;
}
template
CuVector&
CuVector::operator*=(const T& scalar)
{
assertHasElements();
OPM_CUBLAS_SAFE_CALL(detail::cublasScal(m_cuBlasHandle.get(), m_numberOfElements, &scalar, data(), 1));
return *this;
}
template
CuVector&
CuVector::axpy(T alpha, const CuVector& y)
{
assertHasElements();
assertSameSize(y);
OPM_CUBLAS_SAFE_CALL(detail::cublasAxpy(m_cuBlasHandle.get(), m_numberOfElements, &alpha, y.data(), 1, data(), 1));
return *this;
}
template
T
CuVector::dot(const CuVector& other) const
{
assertHasElements();
assertSameSize(other);
T result = T(0);
OPM_CUBLAS_SAFE_CALL(
detail::cublasDot(m_cuBlasHandle.get(), m_numberOfElements, data(), 1, other.data(), 1, &result));
return result;
}
template
T
CuVector::two_norm() const
{
assertHasElements();
T result = T(0);
OPM_CUBLAS_SAFE_CALL(detail::cublasNrm2(m_cuBlasHandle.get(), m_numberOfElements, data(), 1, &result));
return result;
}
template
T
CuVector::dot(const CuVector& other, const CuVector& indexSet, CuVector& buffer) const
{
return detail::innerProductAtIndices(m_dataOnDevice, other.data(), buffer.data(), indexSet.dim(), indexSet.data());
}
template
T
CuVector::two_norm(const CuVector& indexSet, CuVector& buffer) const
{
// TODO: [perf] Optimize this to a single call
return std::sqrt(this->dot(*this, indexSet, buffer));
}
template
T
CuVector::dot(const CuVector& other, const CuVector& indexSet) const
{
CuVector buffer(indexSet.dim());
return detail::innerProductAtIndices(m_dataOnDevice, other.data(), buffer.data(), indexSet.dim(), indexSet.data());
}
template
T
CuVector::two_norm(const CuVector& indexSet) const
{
CuVector buffer(indexSet.dim());
// TODO: [perf] Optimize this to a single call
return std::sqrt(this->dot(*this, indexSet, buffer));
}
template
CuVector&
CuVector::operator+=(const CuVector& other)
{
assertHasElements();
assertSameSize(other);
// TODO: [perf] Make a specialized version of this
return axpy(1.0, other);
}
template
CuVector&
CuVector::operator-=(const CuVector& other)
{
assertHasElements();
assertSameSize(other);
// TODO: [perf] Make a specialized version of this
return axpy(-1.0, other);
}
template
void
CuVector::copyFromHost(const T* dataPointer, size_t numberOfElements)
{
if (numberOfElements > dim()) {
OPM_THROW(std::runtime_error,
fmt::format("Requesting to copy too many elements. Vector has {} elements, while {} was requested.",
dim(),
numberOfElements));
}
OPM_CUDA_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
}
template
void
CuVector::copyToHost(T* dataPointer, size_t numberOfElements) const
{
assertSameSize(detail::to_int(numberOfElements));
OPM_CUDA_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
}
template
void
CuVector::copyFromHost(const std::vector& data)
{
copyFromHost(data.data(), data.size());
}
template
void
CuVector::copyToHost(std::vector& data) const
{
copyToHost(data.data(), data.size());
}
template class CuVector;
template class CuVector;
template class CuVector;
} // namespace Opm::cuistl