mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
refactor opm_cuda_safe_call
This commit is contained in:
parent
85a9ad2b61
commit
d2681b26ed
@ -530,7 +530,7 @@ if(CUDA_FOUND)
|
||||
if (NOT USE_HIP)
|
||||
target_link_libraries( opmsimulators PUBLIC ${CUDA_cusparse_LIBRARY} )
|
||||
target_link_libraries( opmsimulators PUBLIC ${CUDA_cublas_LIBRARY} )
|
||||
foreach(tgt test_cuda_safe_call test_cuda_check_last_error test_cuvector)
|
||||
foreach(tgt test_gpu_safe_call test_cuda_check_last_error test_cuvector)
|
||||
target_link_libraries(${tgt} CUDA::cudart)
|
||||
endforeach()
|
||||
endif()
|
||||
@ -545,7 +545,7 @@ if(CUDA_FOUND)
|
||||
endif()
|
||||
set_tests_properties(cusparse_safe_call
|
||||
cublas_safe_call
|
||||
cuda_safe_call
|
||||
gpu_safe_call
|
||||
cuda_check_last_error
|
||||
cublas_handle
|
||||
GpuJac
|
||||
|
@ -226,7 +226,7 @@ if (HAVE_CUDA)
|
||||
# HEADERS
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/autotuner.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/coloringAndReorderingUtils.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/cuda_safe_call.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/gpu_safe_call.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/cusparse_matrix_operations.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/cusparse_safe_call.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/cublas_safe_call.hpp)
|
||||
@ -392,7 +392,7 @@ if (HAVE_CUDA)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuBuffer.cu)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuView.cu)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cusparse_safe_call.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuda_safe_call.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_gpu_safe_call.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuda_check_last_error.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuDILU.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuJac.cpp)
|
||||
|
@ -29,7 +29,7 @@
|
||||
#include <opm/simulators/linalg/gpuistl_hip/GpuSeqILU0.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl_hip/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl_hip/PreconditionerConvertFieldTypeAdapter.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl_hip/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl_hip/detail/gpu_safe_call.hpp>
|
||||
#else
|
||||
#include <opm/simulators/linalg/gpuistl/GpuBlockPreconditioner.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuDILU.hpp>
|
||||
@ -38,6 +38,6 @@
|
||||
#include <opm/simulators/linalg/gpuistl/GpuSeqILU0.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/PreconditionerConvertFieldTypeAdapter.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
#endif
|
||||
#endif
|
||||
|
@ -22,7 +22,7 @@
|
||||
#include <fmt/core.h>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuBuffer.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuView.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
@ -40,7 +40,7 @@ GpuBuffer<T>::GpuBuffer(const size_t numberOfElements)
|
||||
if (numberOfElements < 1) {
|
||||
OPM_THROW(std::invalid_argument, "Setting a GpuBuffer size to a non-positive number is not allowed");
|
||||
}
|
||||
OPM_CUDA_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * m_numberOfElements));
|
||||
OPM_GPU_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * m_numberOfElements));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
@ -48,7 +48,7 @@ GpuBuffer<T>::GpuBuffer(const T* dataOnHost, const size_t numberOfElements)
|
||||
: GpuBuffer(numberOfElements)
|
||||
{
|
||||
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(
|
||||
m_dataOnDevice, dataOnHost, m_numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
@ -58,7 +58,7 @@ GpuBuffer<T>::GpuBuffer(const GpuBuffer<T>& other)
|
||||
{
|
||||
assertHasElements();
|
||||
assertSameSize(other);
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
|
||||
other.m_dataOnDevice,
|
||||
m_numberOfElements * sizeof(T),
|
||||
cudaMemcpyDeviceToDevice));
|
||||
@ -67,7 +67,7 @@ GpuBuffer<T>::GpuBuffer(const GpuBuffer<T>& other)
|
||||
template <class T>
|
||||
GpuBuffer<T>::~GpuBuffer()
|
||||
{
|
||||
OPM_CUDA_WARN_IF_ERROR(cudaFree(m_dataOnDevice));
|
||||
OPM_GPU_WARN_IF_ERROR(cudaFree(m_dataOnDevice));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -86,17 +86,17 @@ GpuBuffer<T>::resize(size_t newSize)
|
||||
}
|
||||
// Allocate memory for the new buffer
|
||||
T* tmpBuffer = nullptr;
|
||||
OPM_CUDA_SAFE_CALL(cudaMalloc(&tmpBuffer, sizeof(T) * newSize));
|
||||
OPM_GPU_SAFE_CALL(cudaMalloc(&tmpBuffer, sizeof(T) * newSize));
|
||||
|
||||
// Move the data from the old to the new buffer with truncation
|
||||
size_t sizeOfMove = std::min({m_numberOfElements, newSize});
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(tmpBuffer,
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(tmpBuffer,
|
||||
m_dataOnDevice,
|
||||
sizeOfMove * sizeof(T),
|
||||
cudaMemcpyDeviceToDevice));
|
||||
|
||||
// free the old buffer
|
||||
OPM_CUDA_SAFE_CALL(cudaFree(m_dataOnDevice));
|
||||
OPM_GPU_SAFE_CALL(cudaFree(m_dataOnDevice));
|
||||
|
||||
// swap the buffers
|
||||
m_dataOnDevice = tmpBuffer;
|
||||
@ -164,7 +164,7 @@ GpuBuffer<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
|
||||
size(),
|
||||
numberOfElements));
|
||||
}
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
@ -172,7 +172,7 @@ void
|
||||
GpuBuffer<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
|
||||
{
|
||||
assertSameSize(numberOfElements);
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
|
@ -23,7 +23,7 @@
|
||||
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cublas_wrapper.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/vector_operations.hpp>
|
||||
|
||||
namespace Opm::gpuistl
|
||||
@ -40,7 +40,7 @@ GpuVector<T>::GpuVector(const size_t numberOfElements)
|
||||
: m_numberOfElements(detail::to_int(numberOfElements))
|
||||
, m_cuBlasHandle(detail::CuBlasHandle::getInstance())
|
||||
{
|
||||
OPM_CUDA_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * detail::to_size_t(m_numberOfElements)));
|
||||
OPM_GPU_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * detail::to_size_t(m_numberOfElements)));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
@ -48,7 +48,7 @@ GpuVector<T>::GpuVector(const T* dataOnHost, const size_t numberOfElements)
|
||||
: GpuVector(numberOfElements)
|
||||
{
|
||||
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(
|
||||
m_dataOnDevice, dataOnHost, detail::to_size_t(m_numberOfElements) * sizeof(T), cudaMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
@ -68,7 +68,7 @@ GpuVector<T>::operator=(const GpuVector<T>& other)
|
||||
assertHasElements();
|
||||
assertSameSize(other);
|
||||
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
|
||||
other.m_dataOnDevice,
|
||||
detail::to_size_t(m_numberOfElements) * sizeof(T),
|
||||
cudaMemcpyDeviceToDevice));
|
||||
@ -81,7 +81,7 @@ GpuVector<T>::GpuVector(const GpuVector<T>& other)
|
||||
{
|
||||
assertHasElements();
|
||||
assertSameSize(other);
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
|
||||
other.m_dataOnDevice,
|
||||
detail::to_size_t(m_numberOfElements) * sizeof(T),
|
||||
cudaMemcpyDeviceToDevice));
|
||||
@ -90,7 +90,7 @@ GpuVector<T>::GpuVector(const GpuVector<T>& other)
|
||||
template <class T>
|
||||
GpuVector<T>::~GpuVector()
|
||||
{
|
||||
OPM_CUDA_WARN_IF_ERROR(cudaFree(m_dataOnDevice));
|
||||
OPM_GPU_WARN_IF_ERROR(cudaFree(m_dataOnDevice));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -263,7 +263,7 @@ GpuVector<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
|
||||
dim(),
|
||||
numberOfElements));
|
||||
}
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
@ -271,7 +271,7 @@ void
|
||||
GpuVector<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
|
||||
{
|
||||
assertSameSize(detail::to_int(numberOfElements));
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
|
@ -21,7 +21,7 @@
|
||||
#include <algorithm>
|
||||
#include <fmt/core.h>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuView.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
@ -51,7 +51,7 @@ GpuView<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
|
||||
size(),
|
||||
numberOfElements));
|
||||
}
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
@ -59,7 +59,7 @@ void
|
||||
GpuView<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
|
||||
{
|
||||
assertSameSize(numberOfElements);
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
|
@ -21,7 +21,7 @@
|
||||
#include <limits>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
@ -47,7 +47,7 @@ tuneThreadBlockSize(func& f, std::string descriptionOfFunction)
|
||||
|
||||
// create the events
|
||||
for (int i = 0; i < runs + 1; ++i) {
|
||||
OPM_CUDA_SAFE_CALL(cudaEventCreate(&events[i]));
|
||||
OPM_GPU_SAFE_CALL(cudaEventCreate(&events[i]));
|
||||
}
|
||||
|
||||
// Initialize helper variables
|
||||
@ -59,21 +59,21 @@ tuneThreadBlockSize(func& f, std::string descriptionOfFunction)
|
||||
for (int thrBlockSize = interval; thrBlockSize <= 1024; thrBlockSize += interval) {
|
||||
|
||||
// record a first event, and then an event after each kernel
|
||||
OPM_CUDA_SAFE_CALL(cudaEventRecord(events[0]));
|
||||
OPM_GPU_SAFE_CALL(cudaEventRecord(events[0]));
|
||||
for (int i = 0; i < runs; ++i) {
|
||||
f(thrBlockSize); // runs an arbitrary function with the provided arguments
|
||||
OPM_CUDA_SAFE_CALL(cudaEventRecord(events[i + 1]));
|
||||
OPM_GPU_SAFE_CALL(cudaEventRecord(events[i + 1]));
|
||||
}
|
||||
|
||||
// make suret he runs are over
|
||||
OPM_CUDA_SAFE_CALL(cudaEventSynchronize(events[runs]));
|
||||
OPM_GPU_SAFE_CALL(cudaEventSynchronize(events[runs]));
|
||||
|
||||
// kernel launch was valid
|
||||
if (cudaSuccess == cudaGetLastError()) {
|
||||
// check if we beat the record for the fastest kernel
|
||||
for (int i = 0; i < runs; ++i) {
|
||||
float candidateBlockSizeTime;
|
||||
OPM_CUDA_SAFE_CALL(cudaEventElapsedTime(&candidateBlockSizeTime, events[i], events[i + 1]));
|
||||
OPM_GPU_SAFE_CALL(cudaEventElapsedTime(&candidateBlockSizeTime, events[i], events[i + 1]));
|
||||
if (candidateBlockSizeTime < bestTime) { // checks if this configuration beat the current best
|
||||
bestTime = candidateBlockSizeTime;
|
||||
bestBlockSize = thrBlockSize;
|
||||
|
@ -20,7 +20,7 @@
|
||||
#define OPM_CUDA_CHECK_LAST_ERROR_HPP
|
||||
#include <cuda_runtime.h>
|
||||
#include <fmt/core.h>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
|
||||
/**
|
||||
* @brief OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE checks the return type of cudaDeviceSynchronize(),
|
||||
@ -38,7 +38,7 @@
|
||||
* @note This can be used to debug the code, or simply make sure that no error has occured.
|
||||
* @note This is a rather heavy operation, so prefer to use only in Debug mode (see OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE_IF_DEBUG)
|
||||
*/
|
||||
#define OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE OPM_CUDA_SAFE_CALL(cudaDeviceSynchronize())
|
||||
#define OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE OPM_GPU_SAFE_CALL(cudaDeviceSynchronize())
|
||||
|
||||
#ifdef NDEBUG
|
||||
#define OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE_IF_DEBUG
|
||||
@ -50,7 +50,7 @@
|
||||
*
|
||||
* Example usage:
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/cuda_safe_call.hpp>
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
*
|
||||
* void some_function() {
|
||||
* OPM_CUDA_CHECK_DEVICE_SYNCHRONIZE_IF_DEBUG;
|
||||
@ -78,7 +78,7 @@
|
||||
*
|
||||
* @note This can be used to debug the code, or simply make sure that no error has occured.
|
||||
*/
|
||||
#define OPM_CUDA_CHECK_LAST_ERROR OPM_CUDA_SAFE_CALL(cudaGetLastError())
|
||||
#define OPM_CUDA_CHECK_LAST_ERROR OPM_GPU_SAFE_CALL(cudaGetLastError())
|
||||
|
||||
#ifdef NDEBUG
|
||||
#define OPM_CUDA_CHECK_LAST_ERROR_IF_DEBUG
|
||||
|
@ -21,7 +21,7 @@
|
||||
#include <cstddef>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
|
||||
/*
|
||||
This file provides some logic for handling how to choose the correct thread-block size
|
||||
@ -51,7 +51,7 @@ getCudaRecomendedThreadBlockSize(Kernel k, int suggestedThrBlockSize = -1)
|
||||
}
|
||||
int blockSize;
|
||||
int tmpGridSize;
|
||||
OPM_CUDA_SAFE_CALL(cudaOccupancyMaxPotentialBlockSize(&tmpGridSize, &blockSize, k, 0, 0));
|
||||
OPM_GPU_SAFE_CALL(cudaOccupancyMaxPotentialBlockSize(&tmpGridSize, &blockSize, k, 0, 0));
|
||||
return blockSize;
|
||||
}
|
||||
|
||||
|
@ -16,8 +16,8 @@
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef OPM_CUDA_SAFE_CALL_HPP
|
||||
#define OPM_CUDA_SAFE_CALL_HPP
|
||||
#ifndef OPM_GPU_SAFE_CALL_HPP
|
||||
#define OPM_GPU_SAFE_CALL_HPP
|
||||
#include <cuda_runtime.h>
|
||||
#include <fmt/core.h>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
@ -48,9 +48,9 @@ getCudaErrorMessage(cudaError_t error,
|
||||
const std::string_view& functionName,
|
||||
size_t lineNumber)
|
||||
{
|
||||
return fmt::format("CUDA expression did not execute correctly. Expression was: \n"
|
||||
return fmt::format("GPU expression did not execute correctly. Expression was: \n"
|
||||
" {}\n"
|
||||
"CUDA error was {}\n"
|
||||
"GPU error was {}\n"
|
||||
"in function {}, in {}, at line {}\n",
|
||||
expression,
|
||||
cudaGetErrorString(error),
|
||||
@ -60,12 +60,12 @@ getCudaErrorMessage(cudaError_t error,
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief cudaSafeCall checks the return type of the CUDA expression (function call) and throws an exception if it
|
||||
* @brief cudaSafeCall checks the return type of the GPU expression (function call) and throws an exception if it
|
||||
* does not equal cudaSuccess.
|
||||
*
|
||||
* Example usage:
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/cuda_safe_call.hpp>
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
* #include <cuda_runtime.h>
|
||||
*
|
||||
* void some_function() {
|
||||
@ -74,7 +74,7 @@ getCudaErrorMessage(cudaError_t error,
|
||||
* }
|
||||
* @endcode
|
||||
*
|
||||
* @note It is probably easier to use the macro OPM_CUDA_SAFE_CALL
|
||||
* @note It is probably easier to use the macro OPM_GPU_SAFE_CALL
|
||||
*
|
||||
* @todo Refactor to use std::source_location once we shift to C++20
|
||||
*/
|
||||
@ -91,7 +91,7 @@ cudaSafeCall(cudaError_t error,
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief cudaWarnIfError checks the return type of the CUDA expression (function call) and issues a warning if it
|
||||
* @brief cudaWarnIfError checks the return type of the GPU expression (function call) and issues a warning if it
|
||||
* does not equal cudaSuccess.
|
||||
*
|
||||
* @param error the error code from cublas
|
||||
@ -102,7 +102,7 @@ cudaSafeCall(cudaError_t error,
|
||||
*
|
||||
* Example usage:
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/cuda_safe_call.hpp>
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
* #include <cuda_runtime.h>
|
||||
*
|
||||
* void some_function() {
|
||||
@ -111,9 +111,9 @@ cudaSafeCall(cudaError_t error,
|
||||
* }
|
||||
* @endcode
|
||||
*
|
||||
* @note It is probably easier to use the macro OPM_CUDA_WARN_IF_ERROR
|
||||
* @note It is probably easier to use the macro OPM_GPU_WARN_IF_ERROR
|
||||
*
|
||||
* @note Prefer the cudaSafeCall/OPM_CUDA_SAFE_CALL counterpart unless you really don't want to throw an exception.
|
||||
* @note Prefer the cudaSafeCall/OPM_GPU_SAFE_CALL counterpart unless you really don't want to throw an exception.
|
||||
*
|
||||
* @todo Refactor to use std::source_location once we shift to C++20
|
||||
*/
|
||||
@ -131,44 +131,44 @@ cudaWarnIfError(cudaError_t error,
|
||||
} // namespace Opm::gpuistl::detail
|
||||
|
||||
/**
|
||||
* @brief OPM_CUDA_SAFE_CALL checks the return type of the CUDA expression (function call) and throws an exception if it
|
||||
* @brief OPM_GPU_SAFE_CALL checks the return type of the GPU expression (function call) and throws an exception if it
|
||||
* does not equal cudaSuccess.
|
||||
*
|
||||
* Example usage:
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/cuda_safe_call.hpp>
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
* #include <cuda_runtime.h>
|
||||
*
|
||||
* void some_function() {
|
||||
* void* somePointer;
|
||||
* OPM_CUDA_SAFE_CALL(cudaMalloc(&somePointer, 1));
|
||||
* OPM_GPU_SAFE_CALL(cudaMalloc(&somePointer, 1));
|
||||
* }
|
||||
* @endcode
|
||||
*
|
||||
* @note This should be used for any call to the CUDA runtime API unless you have a good reason not to.
|
||||
* @note This should be used for any call to the GPU runtime API unless you have a good reason not to.
|
||||
*/
|
||||
#define OPM_CUDA_SAFE_CALL(expression) \
|
||||
#define OPM_GPU_SAFE_CALL(expression) \
|
||||
::Opm::gpuistl::detail::cudaSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
|
||||
|
||||
/**
|
||||
* @brief OPM_CUDA_WARN_IF_ERROR checks the return type of the CUDA expression (function call) and issues a warning if
|
||||
* @brief OPM_GPU_WARN_IF_ERROR checks the return type of the GPU expression (function call) and issues a warning if
|
||||
* it does not equal cudaSuccess.
|
||||
*
|
||||
* Example usage:
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/cuda_safe_call.hpp>
|
||||
* #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
* #include <cuda_runtime.h>
|
||||
*
|
||||
* void some_function() {
|
||||
* void* somePointer;
|
||||
* OPM_CUDA_WARN_IF_ERROR(cudaMalloc(&somePointer, 1));
|
||||
* OPM_GPU_WARN_IF_ERROR(cudaMalloc(&somePointer, 1));
|
||||
* }
|
||||
* @endcode
|
||||
*
|
||||
* @note Prefer the cudaSafeCall/OPM_CUDA_SAFE_CALL counterpart unless you really don't want to throw an exception.
|
||||
* @note Prefer the cudaSafeCall/OPM_GPU_SAFE_CALL counterpart unless you really don't want to throw an exception.
|
||||
*/
|
||||
#define OPM_CUDA_WARN_IF_ERROR(expression) \
|
||||
#define OPM_GPU_WARN_IF_ERROR(expression) \
|
||||
::Opm::gpuistl::detail::cudaWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
|
||||
#endif
|
@ -21,7 +21,7 @@
|
||||
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cublas_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cublas_wrapper.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpuThreadUtils.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/vector_operations.hpp>
|
||||
#include <stdexcept>
|
||||
@ -161,7 +161,7 @@ prepareSendBuf(const T* deviceA, T* buffer, size_t numberOfElements, const int*
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(prepareSendBufKernel<T>);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
prepareSendBufKernel<<<nThreadBlocks, threadBlockSize>>>(deviceA, buffer, numberOfElements, indices);
|
||||
OPM_CUDA_SAFE_CALL(cudaDeviceSynchronize()); // The buffers are prepared for MPI. Wait for them to finish.
|
||||
OPM_GPU_SAFE_CALL(cudaDeviceSynchronize()); // The buffers are prepared for MPI. Wait for them to finish.
|
||||
}
|
||||
template void prepareSendBuf(const double* deviceA, double* buffer, size_t numberOfElements, const int* indices);
|
||||
template void prepareSendBuf(const float* deviceA, float* buffer, size_t numberOfElements, const int* indices);
|
||||
|
@ -19,7 +19,7 @@
|
||||
#include <config.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/set_device.hpp>
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
@ -41,8 +41,8 @@ setDevice(int mpiRank, [[maybe_unused]] int numberOfMpiRanks)
|
||||
// Now do a round robin kind of assignment
|
||||
// TODO: We need to be more sophistacted here. We have no guarantee this will pick the correct device.
|
||||
const auto deviceId = mpiRank % deviceCount;
|
||||
OPM_CUDA_SAFE_CALL(cudaDeviceReset());
|
||||
OPM_CUDA_SAFE_CALL(cudaSetDevice(deviceId));
|
||||
OPM_GPU_SAFE_CALL(cudaDeviceReset());
|
||||
OPM_GPU_SAFE_CALL(cudaSetDevice(deviceId));
|
||||
OpmLog::info("Set CUDA device to " + std::to_string(deviceId) + " (out of " + std::to_string(deviceCount)
|
||||
+ " devices).");
|
||||
}
|
||||
|
@ -25,7 +25,7 @@
|
||||
|
||||
#include <opm/simulators/linalg/gpuistl/GpuBuffer.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuView.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
|
||||
#include <array>
|
||||
#include <algorithm>
|
||||
|
@ -28,7 +28,7 @@
|
||||
#include <opm/simulators/linalg/gpuistl/GpuDILU.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cusparse_matrix_operations.hpp>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
|
@ -28,7 +28,7 @@
|
||||
#include <memory>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuOwnerOverlapCopy.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/set_device.hpp>
|
||||
#include <random>
|
||||
#include <mpi.h>
|
||||
|
@ -30,7 +30,7 @@
|
||||
#include <opm/simulators/linalg/gpuistl/GpuSeqILU0.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
|
@ -25,7 +25,7 @@
|
||||
#include <memory>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
#include <random>
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestConstruction1D)
|
||||
|
@ -25,7 +25,7 @@
|
||||
#include <dune/common/fvector.hh>
|
||||
#include <dune/istl/bvector.hh>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
#include <random>
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestDocumentedUsage)
|
||||
@ -106,7 +106,7 @@ BOOST_AUTO_TEST_CASE(TestDataPointer)
|
||||
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
|
||||
|
||||
std::vector<double> buffer(data.size(), 0.0);
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(buffer.data(), vectorOnGPU.data(), sizeof(double) * data.size(), cudaMemcpyDeviceToHost));
|
||||
OPM_GPU_SAFE_CALL(cudaMemcpy(buffer.data(), vectorOnGPU.data(), sizeof(double) * data.size(), cudaMemcpyDeviceToHost));
|
||||
BOOST_CHECK_EQUAL_COLLECTIONS(data.begin(), data.end(), buffer.begin(), buffer.end());
|
||||
}
|
||||
|
||||
|
@ -26,7 +26,7 @@
|
||||
#include <dune/istl/bvector.hh>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuView.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/GpuBuffer.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
#include <random>
|
||||
#include <array>
|
||||
#include <algorithm>
|
||||
|
@ -18,15 +18,15 @@
|
||||
*/
|
||||
#include <config.h>
|
||||
|
||||
#define BOOST_TEST_MODULE TestCudaSafeCall
|
||||
#define BOOST_TEST_MODULE TestGpuSafeCall
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <cuda_runtime.h>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestCudaMalloc)
|
||||
BOOST_AUTO_TEST_CASE(TestGpuMalloc)
|
||||
{
|
||||
void* pointer;
|
||||
BOOST_CHECK_NO_THROW(OPM_CUDA_SAFE_CALL(cudaMalloc(&pointer, 1)););
|
||||
BOOST_CHECK_NO_THROW(OPM_GPU_SAFE_CALL(cudaMalloc(&pointer, 1)););
|
||||
}
|
||||
|
||||
|
||||
@ -41,6 +41,6 @@ BOOST_AUTO_TEST_CASE(TestThrows)
|
||||
errorCodes = {{cudaErrorAddressOfConstant, cudaErrorAlreadyAcquired}};
|
||||
#endif
|
||||
for (auto code : errorCodes) {
|
||||
BOOST_CHECK_THROW(OPM_CUDA_SAFE_CALL(code), std::exception);
|
||||
BOOST_CHECK_THROW(OPM_GPU_SAFE_CALL(code), std::exception);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user