/* Copyright 2022-2023 SINTEF AS This file is part of the Open Porous Media project (OPM). OPM is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OPM is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OPM. If not, see . */ #include #include #include #include #include #include #include #include #include namespace Opm::gpuistl { template GpuVector::GpuVector(const std::vector& data) : GpuVector(data.data(), detail::to_int(data.size())) { } template GpuVector::GpuVector(const size_t numberOfElements) : m_numberOfElements(detail::to_int(numberOfElements)) , m_cuBlasHandle(detail::CuBlasHandle::getInstance()) { OPM_CUDA_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * detail::to_size_t(m_numberOfElements))); } template GpuVector::GpuVector(const T* dataOnHost, const size_t numberOfElements) : GpuVector(numberOfElements) { OPM_CUDA_SAFE_CALL(cudaMemcpy( m_dataOnDevice, dataOnHost, detail::to_size_t(m_numberOfElements) * sizeof(T), cudaMemcpyHostToDevice)); } template GpuVector& GpuVector::operator=(T scalar) { assertHasElements(); detail::setVectorValue(data(), detail::to_size_t(m_numberOfElements), scalar); return *this; } template GpuVector& GpuVector::operator=(const GpuVector& other) { assertHasElements(); assertSameSize(other); OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice, other.m_dataOnDevice, detail::to_size_t(m_numberOfElements) * sizeof(T), cudaMemcpyDeviceToDevice)); return *this; } template GpuVector::GpuVector(const GpuVector& other) : GpuVector(other.m_numberOfElements) { assertHasElements(); assertSameSize(other); OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice, other.m_dataOnDevice, detail::to_size_t(m_numberOfElements) * sizeof(T), cudaMemcpyDeviceToDevice)); } template GpuVector::~GpuVector() { OPM_CUDA_WARN_IF_ERROR(cudaFree(m_dataOnDevice)); } template const T* GpuVector::data() const { return m_dataOnDevice; } template typename GpuVector::size_type GpuVector::dim() const { // Note that there is no way for m_numberOfElements to be non-positive, // but for sanity we still use the safe conversion function here. // // We also doubt that this will lead to any performance penality, but should this prove // to be false, this can be replaced by a simple cast to size_t return detail::to_size_t(m_numberOfElements); } template std::vector GpuVector::asStdVector() const { std::vector temporary(detail::to_size_t(m_numberOfElements)); copyToHost(temporary); return temporary; } template void GpuVector::setZeroAtIndexSet(const GpuVector& indexSet) { detail::setZeroAtIndexSet(m_dataOnDevice, indexSet.dim(), indexSet.data()); } template void GpuVector::assertSameSize(const GpuVector& x) const { assertSameSize(x.m_numberOfElements); } template void GpuVector::assertSameSize(int size) const { if (size != m_numberOfElements) { OPM_THROW(std::invalid_argument, fmt::format("Given vector has {}, while we have {}.", size, m_numberOfElements)); } } template void GpuVector::assertHasElements() const { if (m_numberOfElements <= 0) { OPM_THROW(std::invalid_argument, "We have 0 elements"); } } template T* GpuVector::data() { return m_dataOnDevice; } template GpuVector& GpuVector::operator*=(const T& scalar) { assertHasElements(); OPM_CUBLAS_SAFE_CALL(detail::cublasScal(m_cuBlasHandle.get(), m_numberOfElements, &scalar, data(), 1)); return *this; } template GpuVector& GpuVector::axpy(T alpha, const GpuVector& y) { assertHasElements(); assertSameSize(y); OPM_CUBLAS_SAFE_CALL(detail::cublasAxpy(m_cuBlasHandle.get(), m_numberOfElements, &alpha, y.data(), 1, data(), 1)); return *this; } template T GpuVector::dot(const GpuVector& other) const { assertHasElements(); assertSameSize(other); T result = T(0); OPM_CUBLAS_SAFE_CALL( detail::cublasDot(m_cuBlasHandle.get(), m_numberOfElements, data(), 1, other.data(), 1, &result)); return result; } template T GpuVector::two_norm() const { assertHasElements(); T result = T(0); OPM_CUBLAS_SAFE_CALL(detail::cublasNrm2(m_cuBlasHandle.get(), m_numberOfElements, data(), 1, &result)); return result; } template T GpuVector::dot(const GpuVector& other, const GpuVector& indexSet, GpuVector& buffer) const { return detail::innerProductAtIndices(m_cuBlasHandle.get(), m_dataOnDevice, other.data(), buffer.data(), indexSet.dim(), indexSet.data()); } template T GpuVector::two_norm(const GpuVector& indexSet, GpuVector& buffer) const { // TODO: [perf] Optimize this to a single call return std::sqrt(this->dot(*this, indexSet, buffer)); } template T GpuVector::dot(const GpuVector& other, const GpuVector& indexSet) const { GpuVector buffer(indexSet.dim()); return detail::innerProductAtIndices(m_cuBlasHandle.get(), m_dataOnDevice, other.data(), buffer.data(), indexSet.dim(), indexSet.data()); } template T GpuVector::two_norm(const GpuVector& indexSet) const { GpuVector buffer(indexSet.dim()); // TODO: [perf] Optimize this to a single call return std::sqrt(this->dot(*this, indexSet, buffer)); } template GpuVector& GpuVector::operator+=(const GpuVector& other) { assertHasElements(); assertSameSize(other); // TODO: [perf] Make a specialized version of this return axpy(1.0, other); } template GpuVector& GpuVector::operator-=(const GpuVector& other) { assertHasElements(); assertSameSize(other); // TODO: [perf] Make a specialized version of this return axpy(-1.0, other); } template void GpuVector::copyFromHost(const T* dataPointer, size_t numberOfElements) { if (numberOfElements > dim()) { OPM_THROW(std::runtime_error, fmt::format("Requesting to copy too many elements. Vector has {} elements, while {} was requested.", dim(), numberOfElements)); } OPM_CUDA_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice)); } template void GpuVector::copyToHost(T* dataPointer, size_t numberOfElements) const { assertSameSize(detail::to_int(numberOfElements)); OPM_CUDA_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost)); } template void GpuVector::copyFromHost(const std::vector& data) { copyFromHost(data.data(), data.size()); } template void GpuVector::copyToHost(std::vector& data) const { copyToHost(data.data(), data.size()); } template void GpuVector::prepareSendBuf(GpuVector& buffer, const GpuVector& indexSet) const { return detail::prepareSendBuf(m_dataOnDevice, buffer.data(), indexSet.dim(), indexSet.data()); } template void GpuVector::syncFromRecvBuf(GpuVector& buffer, const GpuVector& indexSet) const { return detail::syncFromRecvBuf(m_dataOnDevice, buffer.data(), indexSet.dim(), indexSet.data()); } template class GpuVector; template class GpuVector; template class GpuVector; } // namespace Opm::gpuistl