Merge pull request #4679 from kjetilly/cuistl_set_device

Setting CUDA device for MPI runs.
This commit is contained in:
Atgeirr Flø Rasmussen 2023-06-01 11:56:43 +02:00 committed by GitHub
commit c0dbbf7449
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 97 additions and 0 deletions

View File

@ -151,6 +151,7 @@ if(CUDA_FOUND)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/detail/vector_operations.cu)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/CuSparseMatrix.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/CuSeqILU0.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/set_device.cpp)
# CUISTL HEADERS
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp)
@ -179,6 +180,7 @@ if(CUDA_FOUND)
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/SolverAdapter.hpp)
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp)
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/PreconditionerHolder.hpp)
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/set_device.hpp)
endif()
if(OPENCL_FOUND)

View File

@ -34,6 +34,10 @@
#include <opm/simulators/utils/DamarisOutputModule.hpp>
#endif
#if HAVE_CUDA
#include <opm/simulators/linalg/cuistl/set_device.hpp>
#endif
namespace Opm {
Main::Main(int argc, char** argv)
@ -138,6 +142,11 @@ void Main::initMPI()
isSimulationRank_ = (world_rank > 0);
EclGenericVanguard::setCommunication(std::make_unique<Parallel::Communication>(new_comm));
}
#if HAVE_CUDA
Opm::cuistl::setDevice(EclGenericVanguard::comm().rank(), EclGenericVanguard::comm().size());
#endif
#endif // HAVE_MPI
}

View File

@ -0,0 +1,50 @@
/*
Copyright 2022-2023 SINTEF AS
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#include <config.h>
#include <cuda_runtime.h>
#include <opm/common/OpmLog/OpmLog.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/set_device.hpp>
namespace Opm::cuistl
{
void
setDevice(int mpiRank, [[maybe_unused]] int numberOfMpiRanks)
{
int deviceCount = -1;
cudaGetDeviceCount(&deviceCount);
if (deviceCount <= 0) {
// If they have CUDA enabled (ie. using a component that needs CUDA, eg. cubicgstab or CUILU0), this will fail
// later down the line. At this point in the simulator, we can not determine if CUDA is enabled, so we can only
// issue a warning.
OpmLog::warning("Could not find any CUDA devices.");
return;
}
// Now do a round robin kind of assignment
// TODO: We need to be more sophistacted here. We have no guarantee this will pick the correct device.
const auto deviceId = mpiRank % deviceCount;
OPM_CUDA_SAFE_CALL(cudaDeviceReset());
OPM_CUDA_SAFE_CALL(cudaThreadExit());
OPM_CUDA_SAFE_CALL(cudaSetDevice(deviceId));
OpmLog::info("Set CUDA device to " + std::to_string(deviceId) + " (out of " + std::to_string(deviceCount)
+ " devices).");
}
} // namespace Opm::cuistl

View File

@ -0,0 +1,36 @@
/*
Copyright 2022-2023 SINTEF AS
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_CUISTL_SET_DEVICE_HEADER
#define OPM_CUISTL_SET_DEVICE_HEADER
namespace Opm::cuistl
{
//! @brief Sets the correct CUDA device in the setting of MPI
//!
//! @note This assumes that every node has equally many GPUs, all of the same caliber
//!
//! @note This probably needs to be called *before* MPI_Init if one uses GPUDirect transfers (see eg.
//! https://devtalk.nvidia.com/default/topic/752046/teaching-and-curriculum-support/multi-gpu-system-running-mpi-cuda-/
//! )
//!
//! @note If no CUDA device is present, this does nothing.
void setDevice(int mpiRank, int numberOfMpiRanks);
} // namespace Opm::cuistl
#endif