/* Copyright 2022-2023 SINTEF AS This file is part of the Open Porous Media project (OPM). OPM is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OPM is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OPM. If not, see . */ #include #include #include #include #include #include #include #include #include namespace Opm::cuistl { template CuVector::CuVector(const std::vector& data) : CuVector(data.data(), detail::to_int(data.size())) { } template CuVector::CuVector(const size_t numberOfElements) : m_numberOfElements(detail::to_int(numberOfElements)) , m_cuBlasHandle(detail::CuBlasHandle::getInstance()) { OPM_CUDA_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * detail::to_size_t(m_numberOfElements))); } template CuVector::CuVector(const T* dataOnHost, const size_t numberOfElements) : CuVector(numberOfElements) { OPM_CUDA_SAFE_CALL(cudaMemcpy( m_dataOnDevice, dataOnHost, detail::to_size_t(m_numberOfElements) * sizeof(T), cudaMemcpyHostToDevice)); } template CuVector& CuVector::operator=(T scalar) { assertHasElements(); detail::setVectorValue(data(), detail::to_size_t(m_numberOfElements), scalar); return *this; } template CuVector& CuVector::operator=(const CuVector& other) { assertHasElements(); assertSameSize(other); OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice, other.m_dataOnDevice, detail::to_size_t(m_numberOfElements) * sizeof(T), cudaMemcpyDeviceToDevice)); return *this; } template CuVector::CuVector(const CuVector& other) : CuVector(other.m_numberOfElements) { assertHasElements(); assertSameSize(other); OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice, other.m_dataOnDevice, detail::to_size_t(m_numberOfElements) * sizeof(T), cudaMemcpyDeviceToDevice)); } template CuVector::~CuVector() { OPM_CUDA_WARN_IF_ERROR(cudaFree(m_dataOnDevice)); } template const T* CuVector::data() const { return m_dataOnDevice; } template typename CuVector::size_type CuVector::dim() const { // Note that there is no way for m_numberOfElements to be non-positive, // but for sanity we still use the safe conversion function here. // // We also doubt that this will lead to any performance penality, but should this prove // to be false, this can be replaced by a simple cast to size_t return detail::to_size_t(m_numberOfElements); } template std::vector CuVector::asStdVector() const { std::vector temporary(detail::to_size_t(m_numberOfElements)); copyToHost(temporary); return temporary; } template void CuVector::setZeroAtIndexSet(const CuVector& indexSet) { detail::setZeroAtIndexSet(m_dataOnDevice, indexSet.dim(), indexSet.data()); } template void CuVector::assertSameSize(const CuVector& x) const { assertSameSize(x.m_numberOfElements); } template void CuVector::assertSameSize(int size) const { if (size != m_numberOfElements) { OPM_THROW(std::invalid_argument, fmt::format("Given vector has {}, while we have {}.", size, m_numberOfElements)); } } template void CuVector::assertHasElements() const { if (m_numberOfElements <= 0) { OPM_THROW(std::invalid_argument, "We have 0 elements"); } } template T* CuVector::data() { return m_dataOnDevice; } template CuVector& CuVector::operator*=(const T& scalar) { assertHasElements(); OPM_CUBLAS_SAFE_CALL(detail::cublasScal(m_cuBlasHandle.get(), m_numberOfElements, &scalar, data(), 1)); return *this; } template CuVector& CuVector::axpy(T alpha, const CuVector& y) { assertHasElements(); assertSameSize(y); OPM_CUBLAS_SAFE_CALL(detail::cublasAxpy(m_cuBlasHandle.get(), m_numberOfElements, &alpha, y.data(), 1, data(), 1)); return *this; } template T CuVector::dot(const CuVector& other) const { assertHasElements(); assertSameSize(other); T result = T(0); OPM_CUBLAS_SAFE_CALL( detail::cublasDot(m_cuBlasHandle.get(), m_numberOfElements, data(), 1, other.data(), 1, &result)); return result; } template T CuVector::two_norm() const { assertHasElements(); T result = T(0); OPM_CUBLAS_SAFE_CALL(detail::cublasNrm2(m_cuBlasHandle.get(), m_numberOfElements, data(), 1, &result)); return result; } template T CuVector::dot(const CuVector& other, const CuVector& indexSet, CuVector& buffer) const { return detail::innerProductAtIndices(m_dataOnDevice, other.data(), buffer.data(), indexSet.dim(), indexSet.data()); } template T CuVector::two_norm(const CuVector& indexSet, CuVector& buffer) const { // TODO: [perf] Optimize this to a single call return std::sqrt(this->dot(*this, indexSet, buffer)); } template T CuVector::dot(const CuVector& other, const CuVector& indexSet) const { CuVector buffer(indexSet.dim()); return detail::innerProductAtIndices(m_dataOnDevice, other.data(), buffer.data(), indexSet.dim(), indexSet.data()); } template T CuVector::two_norm(const CuVector& indexSet) const { CuVector buffer(indexSet.dim()); // TODO: [perf] Optimize this to a single call return std::sqrt(this->dot(*this, indexSet, buffer)); } template CuVector& CuVector::operator+=(const CuVector& other) { assertHasElements(); assertSameSize(other); // TODO: [perf] Make a specialized version of this return axpy(1.0, other); } template CuVector& CuVector::operator-=(const CuVector& other) { assertHasElements(); assertSameSize(other); // TODO: [perf] Make a specialized version of this return axpy(-1.0, other); } template void CuVector::copyFromHost(const T* dataPointer, size_t numberOfElements) { if (numberOfElements > dim()) { OPM_THROW(std::runtime_error, fmt::format("Requesting to copy too many elements. Vector has {} elements, while {} was requested.", dim(), numberOfElements)); } OPM_CUDA_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice)); } template void CuVector::copyToHost(T* dataPointer, size_t numberOfElements) const { assertSameSize(detail::to_int(numberOfElements)); OPM_CUDA_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost)); } template void CuVector::copyFromHost(const std::vector& data) { copyFromHost(data.data(), data.size()); } template void CuVector::copyToHost(std::vector& data) const { copyToHost(data.data(), data.size()); } template class CuVector; template class CuVector; template class CuVector; } // namespace Opm::cuistl