From 3ee5eddf48a5d5f2f057b5194628f0ba3d1ebf13 Mon Sep 17 00:00:00 2001 From: Arne Morten Kvarving Date: Tue, 3 Sep 2024 09:22:21 +0200 Subject: [PATCH 1/8] threadmanager.hh: rename to threadmanager.hpp --- CMakeLists_files.cmake | 2 +- opm/models/discretization/common/fvbasediscretization.hh | 2 +- opm/models/discretization/common/fvbaselinearizer.hh | 2 +- .../parallel/{threadmanager.hh => threadmanager.hpp} | 7 ++++--- 4 files changed, 7 insertions(+), 6 deletions(-) rename opm/models/parallel/{threadmanager.hh => threadmanager.hpp} (97%) diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index 01ace7fcf..0d473fd17 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -671,7 +671,7 @@ list (APPEND PUBLIC_HEADER_FILES opm/models/parallel/mpiutil.hh opm/models/parallel/tasklets.hh opm/models/parallel/threadedentityiterator.hh - opm/models/parallel/threadmanager.hh + opm/models/parallel/threadmanager.hpp opm/models/ptflash/flashindices.hh opm/models/ptflash/flashintensivequantities.hh opm/models/ptflash/flashlocalresidual.hh diff --git a/opm/models/discretization/common/fvbasediscretization.hh b/opm/models/discretization/common/fvbasediscretization.hh index 2765164cf..df0165701 100644 --- a/opm/models/discretization/common/fvbasediscretization.hh +++ b/opm/models/discretization/common/fvbasediscretization.hh @@ -56,7 +56,7 @@ #include #include -#include +#include #include #include diff --git a/opm/models/discretization/common/fvbaselinearizer.hh b/opm/models/discretization/common/fvbaselinearizer.hh index c53035a46..f40d843fc 100644 --- a/opm/models/discretization/common/fvbaselinearizer.hh +++ b/opm/models/discretization/common/fvbaselinearizer.hh @@ -36,7 +36,7 @@ #include #include -#include +#include #include #include diff --git a/opm/models/parallel/threadmanager.hh b/opm/models/parallel/threadmanager.hpp similarity index 97% rename from opm/models/parallel/threadmanager.hh rename to opm/models/parallel/threadmanager.hpp index 2aa83d8fc..13e630509 100644 --- a/opm/models/parallel/threadmanager.hh +++ b/opm/models/parallel/threadmanager.hpp @@ -24,8 +24,8 @@ * \file * \copydoc Opm::ThreadManager */ -#ifndef EWOMS_THREAD_MANAGER_HH -#define EWOMS_THREAD_MANAGER_HH +#ifndef OPM_THREAD_MANAGER_HPP +#define OPM_THREAD_MANAGER_HPP #ifdef _OPENMP #include @@ -136,6 +136,7 @@ private: template int ThreadManager::numThreads_ = 1; + } // namespace Opm -#endif +#endif // OPM_THREAD_MANAGER_HPP From e7a9c4cd2198c139de9c4fc423a18375c1e5c8b0 Mon Sep 17 00:00:00 2001 From: Arne Morten Kvarving Date: Tue, 3 Sep 2024 09:52:31 +0200 Subject: [PATCH 2/8] threadmanager: remove unused typetag template parameter and move implementation to a translation unit --- CMakeLists.txt | 1 + CMakeLists_files.cmake | 1 + .../common/fvbasediscretization.hh | 2 +- opm/models/parallel/threadmanager.cpp | 93 +++++++++++++++++++ opm/models/parallel/threadmanager.hpp | 68 +------------- opm/models/utils/start.hh | 8 +- opm/simulators/flow/FlowMain.hpp | 10 +- tests/test_RestartSerialization.cpp | 2 +- tests/test_equil.cpp | 2 +- tests/test_outputdir.cpp | 4 +- 10 files changed, 114 insertions(+), 77 deletions(-) create mode 100644 opm/models/parallel/threadmanager.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index bbd7374a4..c5526517d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -407,6 +407,7 @@ if(QuadMath_FOUND) co2injection_flash_ecfv co2injection_flash_vcfv) opm_add_test(${tapp}_quad + LIBRARIES opmsimulators opmcommon EXE_NAME ${tapp}_quad SOURCES examples/${tapp}.cpp diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index 0d473fd17..6c78e1886 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -61,6 +61,7 @@ list (APPEND MAIN_SOURCE_FILES opm/models/blackoil/blackoilmicpparams.cpp opm/models/blackoil/blackoilpolymerparams.cpp opm/models/blackoil/blackoilsolventparams.cpp + opm/models/parallel/threadmanager.cpp opm/simulators/flow/ActionHandler.cpp opm/simulators/flow/Banners.cpp opm/simulators/flow/BlackoilModelParameters.cpp diff --git a/opm/models/discretization/common/fvbasediscretization.hh b/opm/models/discretization/common/fvbasediscretization.hh index df0165701..cee45056c 100644 --- a/opm/models/discretization/common/fvbasediscretization.hh +++ b/opm/models/discretization/common/fvbasediscretization.hh @@ -219,7 +219,7 @@ struct ConstraintsContext */ template struct ThreadManager -{ using type = ::Opm::ThreadManager; }; +{ using type = ::Opm::ThreadManager; }; template struct UseLinearizationLock diff --git a/opm/models/parallel/threadmanager.cpp b/opm/models/parallel/threadmanager.cpp new file mode 100644 index 000000000..f9a77afd2 --- /dev/null +++ b/opm/models/parallel/threadmanager.cpp @@ -0,0 +1,93 @@ +// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- +// vi: set et ts=4 sw=4 sts=4: +/* + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . + + Consult the COPYING file in the top-level source directory of this + module for the precise wording of the license and the list of + copyright holders. +*/ +#include +#include + +#ifdef _OPENMP +#include +#endif + +#include +#include + +namespace Opm { + +int ThreadManager::numThreads_ = 1; + +void ThreadManager::registerParameters() +{ + Parameters::Register + ("The maximum number of threads to be instantiated per process " + "('-1' means 'automatic')"); +} + +void ThreadManager::init(bool queryCommandLineParameter) +{ + if (queryCommandLineParameter) { + numThreads_ = Parameters::Get(); + + // some safety checks. This is pretty ugly macro-magic, but so what? +#if !defined(_OPENMP) + if (numThreads_ != 1 && numThreads_ != -1) { + throw std::invalid_argument("OpenMP is not available. The only valid values for " + "threads-per-process is 1 and -1 but it is "+std::to_string(numThreads_)+"!"); + } + numThreads_ = 1; +#elif !defined NDEBUG && defined DUNE_INTERFACECHECK + if (numThreads_ != 1) { + throw std::invalid_argument("You explicitly enabled Barton-Nackman interface checking in Dune. " + "The Dune implementation of this is currently incompatible with " + "thread parallelism!"); + } + numThreads_ = 1; +#else + if (numThreads_ == 0) { + throw std::invalid_argument("Zero threads per process are not possible: It must be at least 1, " + "(or -1 for 'automatic')!"); + } +#endif + +#ifdef _OPENMP + // actually limit the number of threads + if (numThreads_ > 0) { + omp_set_num_threads(numThreads_); + } +#endif + } + +#ifdef _OPENMP + // get the number of threads which are used in the end. + numThreads_ = omp_get_max_threads(); +#endif +} + +unsigned ThreadManager::threadId() +{ +#ifdef _OPENMP + return static_cast(omp_get_thread_num()); +#else + return 0; +#endif +} + +} // namespace Opm diff --git a/opm/models/parallel/threadmanager.hpp b/opm/models/parallel/threadmanager.hpp index 13e630509..8b12345d8 100644 --- a/opm/models/parallel/threadmanager.hpp +++ b/opm/models/parallel/threadmanager.hpp @@ -27,22 +27,11 @@ #ifndef OPM_THREAD_MANAGER_HPP #define OPM_THREAD_MANAGER_HPP -#ifdef _OPENMP -#include -#endif - -#include -#include -#include - -#include - namespace Opm { /*! * \brief Simplifies multi-threaded capabilities. */ -template class ThreadManager { public: @@ -58,12 +47,7 @@ public: /*! * \brief Register all run-time parameters of the thread manager. */ - static void registerParameters() - { - Parameters::Register - ("The maximum number of threads to be instantiated per process " - "('-1' means 'automatic')"); - } + static void registerParameters(); /*! * \brief Initialize number of threads used thread manager. @@ -74,43 +58,7 @@ public: * outside of this function (e.g. by OPM_NUM_THREADS or in the simulator by * the ThreadsPerProcess parameter). */ - static void init(bool queryCommandLineParameter = true) - { - if (queryCommandLineParameter) - { - numThreads_ = Parameters::Get(); - - // some safety checks. This is pretty ugly macro-magic, but so what? -#if !defined(_OPENMP) - if (numThreads_ != 1 && numThreads_ != -1) - throw std::invalid_argument("OpenMP is not available. The only valid values for " - "threads-per-process is 1 and -1 but it is "+std::to_string(numThreads_)+"!"); - numThreads_ = 1; -#elif !defined NDEBUG && defined DUNE_INTERFACECHECK - if (numThreads_ != 1) - throw std::invalid_argument("You explicitly enabled Barton-Nackman interface checking in Dune. " - "The Dune implementation of this is currently incompatible with " - "thread parallelism!"); - numThreads_ = 1; -#else - - if (numThreads_ == 0) - throw std::invalid_argument("Zero threads per process are not possible: It must be at least 1, " - "(or -1 for 'automatic')!"); -#endif - -#ifdef _OPENMP - // actually limit the number of threads - if (numThreads_ > 0) - omp_set_num_threads(numThreads_); -#endif - } - -#ifdef _OPENMP - // get the number of threads which are used in the end. - numThreads_ = omp_get_max_threads(); -#endif - } + static void init(bool queryCommandLineParameter = true); /*! * \brief Return the maximum number of threads of the current process. @@ -121,22 +69,12 @@ public: /*! * \brief Return the index of the current OpenMP thread */ - static unsigned threadId() - { -#ifdef _OPENMP - return static_cast(omp_get_thread_num()); -#else - return 0; -#endif - } + static unsigned threadId(); private: static int numThreads_; }; -template -int ThreadManager::numThreads_ = 1; - } // namespace Opm #endif // OPM_THREAD_MANAGER_HPP diff --git a/opm/models/utils/start.hh b/opm/models/utils/start.hh index 62604c471..0c9714d84 100644 --- a/opm/models/utils/start.hh +++ b/opm/models/utils/start.hh @@ -76,7 +76,7 @@ template static inline void registerAllParameters_(bool finalizeRegistration = true) { using Simulator = GetPropType; - using ThreadManager = GetPropType; + using TM = GetPropType; Parameters::Register ("An .ini file which contains a set of run-time parameters"); @@ -84,7 +84,7 @@ static inline void registerAllParameters_(bool finalizeRegistration = true) ("Print the values of the run-time parameters at the " "start of the simulation"); - ThreadManager::registerParameters(); + TM::registerParameters(); Simulator::registerParameters(); if (finalizeRegistration) { @@ -279,7 +279,7 @@ static inline int start(int argc, char **argv, bool registerParams=true) using Scalar = GetPropType; using Simulator = GetPropType; using Problem = GetPropType; - using ThreadManager = GetPropType; + using TM = GetPropType; // set the signal handlers to reset the TTY to a well defined state on unexpected // program aborts @@ -304,7 +304,7 @@ static inline int start(int argc, char **argv, bool registerParams=true) if (paramStatus == 2) return 0; - ThreadManager::init(); + TM::init(); // initialize MPI, finalize is done automatically on exit #if HAVE_DUNE_FEM diff --git a/opm/simulators/flow/FlowMain.hpp b/opm/simulators/flow/FlowMain.hpp index f9a391ef4..b9727a85c 100644 --- a/opm/simulators/flow/FlowMain.hpp +++ b/opm/simulators/flow/FlowMain.hpp @@ -38,6 +38,10 @@ #include #endif +#ifdef _OPENMP +#include +#endif + #include #include #include @@ -97,7 +101,7 @@ namespace Opm { ("Developer option to see whether logging was on non-root processors. " "In that case it will be appended to the *.DBG or *.PRT files"); - ThreadManager::registerParameters(); + ThreadManager::registerParameters(); Simulator::registerParameters(); // register the base parameters @@ -305,8 +309,8 @@ namespace Opm { omp_set_num_threads(threads); #endif - using ThreadManager = GetPropType; - ThreadManager::init(false); + using TM = GetPropType; + TM::init(false); } void mergeParallelLogFiles() diff --git a/tests/test_RestartSerialization.cpp b/tests/test_RestartSerialization.cpp index 4d3b9ec28..c69a6b539 100644 --- a/tests/test_RestartSerialization.cpp +++ b/tests/test_RestartSerialization.cpp @@ -527,7 +527,7 @@ struct AquiferFixture { "test_RestartSerialization", "--ecl-deck-file-name=GLIFT1.DATA" }; - Opm::ThreadManager::registerParameters(); + Opm::ThreadManager::registerParameters(); AdaptiveTimeStepping::registerParameters(); BlackoilModelParameters::registerParameters(); Parameters::Register("Do *NOT* use!"); diff --git a/tests/test_equil.cpp b/tests/test_equil.cpp index fdc3157bb..1971e5293 100644 --- a/tests/test_equil.cpp +++ b/tests/test_equil.cpp @@ -232,7 +232,7 @@ struct EquilFixture { #endif using namespace Opm; FlowGenericVanguard::setCommunication(std::make_unique()); - Opm::ThreadManager::registerParameters(); + Opm::ThreadManager::registerParameters(); BlackoilModelParameters::registerParameters(); AdaptiveTimeStepping::registerParameters(); Parameters::Register("Dummy added for the well model to compile."); diff --git a/tests/test_outputdir.cpp b/tests/test_outputdir.cpp index cce63c26f..d0bf0d466 100644 --- a/tests/test_outputdir.cpp +++ b/tests/test_outputdir.cpp @@ -116,7 +116,7 @@ BOOST_FIXTURE_TEST_CASE(WithOutputDir, Fixture) Opm::Parameters::reset(); - Opm::ThreadManager::registerParameters(); + Opm::ThreadManager::registerParameters(); Opm::Main main(3, const_cast(no_param), false); BOOST_CHECK_EQUAL(main.justInitialize(), EXIT_SUCCESS); @@ -154,7 +154,7 @@ BOOST_FIXTURE_TEST_CASE(NoOutputDir, Fixture) const char* no_param[] = {"test_outputdir", input_file_path.c_str(), nullptr}; Opm::Parameters::reset(); - Opm::ThreadManager::registerParameters(); + Opm::ThreadManager::registerParameters(); Opm::Main main(2, const_cast(no_param), false); From fcd016869198dd342eeb8e4a88d928f48143a01a Mon Sep 17 00:00:00 2001 From: Arne Morten Kvarving Date: Tue, 3 Sep 2024 09:58:53 +0200 Subject: [PATCH 3/8] mpiutil.hh: rename to mpiutil.hpp --- CMakeLists_files.cmake | 2 +- opm/models/parallel/{mpiutil.hh => mpiutil.hpp} | 9 +++------ opm/models/utils/simulator.hh | 2 +- tests/models/test_mpiutil.cpp | 2 +- 4 files changed, 6 insertions(+), 9 deletions(-) rename opm/models/parallel/{mpiutil.hh => mpiutil.hpp} (98%) diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index 6c78e1886..f736ab7e9 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -669,7 +669,7 @@ list (APPEND PUBLIC_HEADER_FILES opm/models/nonlinear/nullconvergencewriter.hh opm/models/parallel/gridcommhandles.hh opm/models/parallel/mpibuffer.hh - opm/models/parallel/mpiutil.hh + opm/models/parallel/mpiutil.hpp opm/models/parallel/tasklets.hh opm/models/parallel/threadedentityiterator.hh opm/models/parallel/threadmanager.hpp diff --git a/opm/models/parallel/mpiutil.hh b/opm/models/parallel/mpiutil.hpp similarity index 98% rename from opm/models/parallel/mpiutil.hh rename to opm/models/parallel/mpiutil.hpp index 6ccc1a12c..5ed7a8629 100644 --- a/opm/models/parallel/mpiutil.hh +++ b/opm/models/parallel/mpiutil.hpp @@ -24,8 +24,8 @@ * \file * \copydoc Opm::MpiBuffer */ -#ifndef OPM_MATERIAL_MPIUTIL_HH -#define OPM_MATERIAL_MPIUTIL_HH +#ifndef OPM_MPIUTIL_HPP +#define OPM_MPIUTIL_HPP #include @@ -36,11 +36,8 @@ #if HAVE_MPI - #include - - namespace mpiutil_details { @@ -207,5 +204,5 @@ namespace Opm #endif // HAVE_MPI -#endif // OPM_MATERIAL_MPIUTIL_HH +#endif // OPM_MPIUTIL_HPP diff --git a/opm/models/utils/simulator.hh b/opm/models/utils/simulator.hh index 116da39c6..d50b555ce 100644 --- a/opm/models/utils/simulator.hh +++ b/opm/models/utils/simulator.hh @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include diff --git a/tests/models/test_mpiutil.cpp b/tests/models/test_mpiutil.cpp index 327575b43..4a4f85da3 100644 --- a/tests/models/test_mpiutil.cpp +++ b/tests/models/test_mpiutil.cpp @@ -19,7 +19,7 @@ #include -#include +#include #include #include From 27f9277c473daf63dcf7753a4f9b49e1e1aecfd4 Mon Sep 17 00:00:00 2001 From: Arne Morten Kvarving Date: Tue, 3 Sep 2024 11:42:23 +0200 Subject: [PATCH 4/8] mpiutil: add translation unit --- CMakeLists_files.cmake | 1 + opm/models/parallel/mpiutil.cpp | 188 ++++++++++++++++++++++++++++++++ opm/models/parallel/mpiutil.hpp | 172 +---------------------------- parallelUnitTests.cmake | 2 + 4 files changed, 193 insertions(+), 170 deletions(-) create mode 100644 opm/models/parallel/mpiutil.cpp diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index f736ab7e9..e0027a041 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -61,6 +61,7 @@ list (APPEND MAIN_SOURCE_FILES opm/models/blackoil/blackoilmicpparams.cpp opm/models/blackoil/blackoilpolymerparams.cpp opm/models/blackoil/blackoilsolventparams.cpp + opm/models/parallel/mpiutil.cpp opm/models/parallel/threadmanager.cpp opm/simulators/flow/ActionHandler.cpp opm/simulators/flow/Banners.cpp diff --git a/opm/models/parallel/mpiutil.cpp b/opm/models/parallel/mpiutil.cpp new file mode 100644 index 000000000..80fd6c4d8 --- /dev/null +++ b/opm/models/parallel/mpiutil.cpp @@ -0,0 +1,188 @@ +// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- +// vi: set et ts=4 sw=4 sts=4: +/* + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . + + Consult the COPYING file in the top-level source directory of this + module for the precise wording of the license and the list of + copyright holders. +*/ +#include +#include + +#include +#include +#include +#include + +#if HAVE_MPI +#include +#include +#endif + +namespace { + +template +int packSize() +{ + int pack_size; + MPI_Pack_size(1, Dune::MPITraits::getType(), MPI_COMM_WORLD, &pack_size); + return pack_size; +} + +// -------- Packer -------- +template +struct Packer +{ + static int size(const T&) + { + return packSize(); + } + + static void pack(const T& content, std::vector& buf, int& offset) + { + MPI_Pack(&content, 1, Dune::MPITraits::getType(), buf.data(), buf.size(), &offset, MPI_COMM_WORLD); + } + + static T unpack(const std::vector& recv_buffer, int& offset) + { + T content; + auto* data = const_cast(recv_buffer.data()); + MPI_Unpack(data, recv_buffer.size(), &offset, &content, 1, Dune::MPITraits::getType(), MPI_COMM_WORLD); + return content; + } +}; + +// -------- Packer, string specialization -------- +template <> +struct Packer +{ + static int size(const std::string& content) + { + return packSize() + content.size()*packSize(); + } + + static void pack(const std::string& content, std::vector& buf, int& offset) + { + unsigned int size = content.size(); + Packer::pack(size, buf, offset); + if (size > 0) { + MPI_Pack(const_cast(content.c_str()), size, MPI_CHAR, buf.data(), buf.size(), &offset, MPI_COMM_WORLD); + } + } + + static std::string unpack(const std::vector& recv_buffer, int& offset) + { + unsigned int size = Packer::unpack(recv_buffer, offset); + std::string text; + if (size > 0) { + auto* data = const_cast(recv_buffer.data()); + std::vector chars(size); + MPI_Unpack(data, recv_buffer.size(), &offset, chars.data(), size, MPI_CHAR, MPI_COMM_WORLD); + text = std::string(chars.data(), size); + } + return text; + } +}; + +// -------- Packer, vector partial specialization -------- +template +struct Packer> +{ + static int size(const std::string& content) + { + int sz = 0; + sz += packSize(); + for (const T& elem : content) { + sz += Packer::size(elem); + } + return sz; + } + + static void pack(const std::vector& content, std::vector& buf, int& offset) + { + unsigned int size = content.size(); + Packer::pack(size, buf, offset); + for (const T& elem : content) { + Packer::pack(elem); + } + } + + static std::vector unpack(const std::vector& recv_buffer, int& offset) + { + unsigned int size = Packer::unpack(recv_buffer, offset); + std::vector content; + content.reserve(size); + for (unsigned int i = 0; i < size; ++i) { + content.push_back(Packer::unpack(recv_buffer, offset)); + } + return content; + } +}; + +} // anonymous namespace + +namespace Opm { + +/// From each rank, gather its string (if not empty) into a vector. +std::vector gatherStrings(const std::string& local_string) +{ +#if HAVE_MPI + using StringPacker = Packer; + + // Pack local messages. + const int message_size = StringPacker::size(local_string); + std::vector buffer(message_size); + int offset = 0; + StringPacker::pack(local_string, buffer, offset); + assert(offset == message_size); + + // Get message sizes and create offset/displacement array for gathering. + int num_processes = -1; + MPI_Comm_size(MPI_COMM_WORLD, &num_processes); + std::vector message_sizes(num_processes); + MPI_Allgather(&message_size, 1, MPI_INT, message_sizes.data(), 1, MPI_INT, MPI_COMM_WORLD); + std::vector displ(num_processes + 1, 0); + std::partial_sum(message_sizes.begin(), message_sizes.end(), displ.begin() + 1); + + // Gather. + std::vector recv_buffer(displ.back()); + MPI_Allgatherv(buffer.data(), buffer.size(), MPI_PACKED, + const_cast(recv_buffer.data()), message_sizes.data(), + displ.data(), MPI_PACKED, + MPI_COMM_WORLD); + + // Unpack and return. + std::vector ret; + for (int process = 0; process < num_processes; ++process) { + offset = displ[process]; + std::string s = StringPacker::unpack(recv_buffer, offset); + if (!s.empty()) { + ret.push_back(s); + } + assert(offset == displ[process + 1]); + } + return ret; +#else + if (local_string.empty()) { + return {}; + } else { + return { local_string }; + } +#endif +} + +} // namespace Opm diff --git a/opm/models/parallel/mpiutil.hpp b/opm/models/parallel/mpiutil.hpp index 5ed7a8629..fdd3825a3 100644 --- a/opm/models/parallel/mpiutil.hpp +++ b/opm/models/parallel/mpiutil.hpp @@ -27,182 +27,14 @@ #ifndef OPM_MPIUTIL_HPP #define OPM_MPIUTIL_HPP -#include - -#include -#include #include #include +namespace Opm { -#if HAVE_MPI -#include - -namespace mpiutil_details -{ - - template - int packSize() - { - int pack_size; - MPI_Pack_size(1, Dune::MPITraits::getType(), MPI_COMM_WORLD, &pack_size); - return pack_size; - } - - // -------- Packer -------- - template - struct Packer - { - static int size(const T&) - { - return packSize(); - } - - static void pack(const T& content, std::vector& buf, int& offset) - { - MPI_Pack(&content, 1, Dune::MPITraits::getType(), buf.data(), buf.size(), &offset, MPI_COMM_WORLD); - } - - static T unpack(const std::vector& recv_buffer, int& offset) - { - T content; - auto* data = const_cast(recv_buffer.data()); - MPI_Unpack(data, recv_buffer.size(), &offset, &content, 1, Dune::MPITraits::getType(), MPI_COMM_WORLD); - return content; - } - }; - - // -------- Packer, string specialization -------- - template <> - struct Packer - { - static int size(const std::string& content) - { - return packSize() + content.size()*packSize(); - } - - static void pack(const std::string& content, std::vector& buf, int& offset) - { - unsigned int size = content.size(); - Packer::pack(size, buf, offset); - if (size > 0) { - MPI_Pack(const_cast(content.c_str()), size, MPI_CHAR, buf.data(), buf.size(), &offset, MPI_COMM_WORLD); - } - } - - static std::string unpack(const std::vector& recv_buffer, int& offset) - { - unsigned int size = Packer::unpack(recv_buffer, offset); - std::string text; - if (size > 0) { - auto* data = const_cast(recv_buffer.data()); - std::vector chars(size); - MPI_Unpack(data, recv_buffer.size(), &offset, chars.data(), size, MPI_CHAR, MPI_COMM_WORLD); - text = std::string(chars.data(), size); - } - return text; - } - }; - - // -------- Packer, vector partial specialization -------- - template - struct Packer> - { - static int size(const std::string& content) - { - int sz = 0; - sz += packSize(); - for (const T& elem : content) { - sz += Packer::size(elem); - } - return sz; - } - - static void pack(const std::vector& content, std::vector& buf, int& offset) - { - unsigned int size = content.size(); - Packer::pack(size, buf, offset); - for (const T& elem : content) { - Packer::pack(elem); - } - } - - static std::vector unpack(const std::vector& recv_buffer, int& offset) - { - unsigned int size = Packer::unpack(recv_buffer, offset); - std::vector content; - content.reserve(size); - for (unsigned int i = 0; i < size; ++i) { - content.push_back(Packer::unpack(recv_buffer, offset)); - } - return content; - } - }; - - -} // anonymous namespace - - -namespace Opm -{ - - /// From each rank, gather its string (if not empty) into a vector. - inline std::vector gatherStrings(const std::string& local_string) - { - using StringPacker = mpiutil_details::Packer; - - // Pack local messages. - const int message_size = StringPacker::size(local_string); - std::vector buffer(message_size); - int offset = 0; - StringPacker::pack(local_string, buffer, offset); - assert(offset == message_size); - - // Get message sizes and create offset/displacement array for gathering. - int num_processes = -1; - MPI_Comm_size(MPI_COMM_WORLD, &num_processes); - std::vector message_sizes(num_processes); - MPI_Allgather(&message_size, 1, MPI_INT, message_sizes.data(), 1, MPI_INT, MPI_COMM_WORLD); - std::vector displ(num_processes + 1, 0); - std::partial_sum(message_sizes.begin(), message_sizes.end(), displ.begin() + 1); - - // Gather. - std::vector recv_buffer(displ.back()); - MPI_Allgatherv(buffer.data(), buffer.size(), MPI_PACKED, - const_cast(recv_buffer.data()), message_sizes.data(), - displ.data(), MPI_PACKED, - MPI_COMM_WORLD); - - // Unpack and return. - std::vector ret; - for (int process = 0; process < num_processes; ++process) { - offset = displ[process]; - std::string s = StringPacker::unpack(recv_buffer, offset); - if (!s.empty()) { - ret.push_back(s); - } - assert(offset == displ[process + 1]); - } - return ret; - } +std::vector gatherStrings(const std::string& local_string); } // namespace Opm -#else // HAVE_MPI - -namespace Opm -{ - inline std::vector gatherStrings(const std::string& local_string) - { - if (local_string.empty()) { - return {}; - } else { - return { local_string }; - } - } -} // namespace Opm - -#endif // HAVE_MPI - #endif // OPM_MPIUTIL_HPP diff --git a/parallelUnitTests.cmake b/parallelUnitTests.cmake index 906b54f5d..4036bf7a1 100644 --- a/parallelUnitTests.cmake +++ b/parallelUnitTests.cmake @@ -184,6 +184,8 @@ opm_add_test(test_rstconv_parallel ) opm_add_test(test_mpiutil + DEPENDS "opmsimulators" + LIBRARIES opmsimulators CONDITION MPI_FOUND AND Boost_UNIT_TEST_FRAMEWORK_FOUND SOURCES From d35d80427e7829a5a4efaa84b3aaa862c04996cb Mon Sep 17 00:00:00 2001 From: Arne Morten Kvarving Date: Tue, 3 Sep 2024 12:01:11 +0200 Subject: [PATCH 5/8] tasklets.hh: rename to tasklets.hpp --- CMakeLists_files.cmake | 2 +- opm/models/io/vtkmultiwriter.hh | 2 +- opm/models/parallel/{tasklets.hh => tasklets.hpp} | 7 ++++--- opm/simulators/flow/EclGenericWriter.hpp | 2 +- tests/models/test_tasklets.cpp | 2 +- tests/models/test_tasklets_failure.cpp | 2 +- 6 files changed, 9 insertions(+), 8 deletions(-) rename opm/models/parallel/{tasklets.hh => tasklets.hpp} (99%) diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index e0027a041..bf893d36c 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -671,7 +671,7 @@ list (APPEND PUBLIC_HEADER_FILES opm/models/parallel/gridcommhandles.hh opm/models/parallel/mpibuffer.hh opm/models/parallel/mpiutil.hpp - opm/models/parallel/tasklets.hh + opm/models/parallel/tasklets.hpp opm/models/parallel/threadedentityiterator.hh opm/models/parallel/threadmanager.hpp opm/models/ptflash/flashindices.hh diff --git a/opm/models/io/vtkmultiwriter.hh b/opm/models/io/vtkmultiwriter.hh index 1a1827a0f..edf79d1bd 100644 --- a/opm/models/io/vtkmultiwriter.hh +++ b/opm/models/io/vtkmultiwriter.hh @@ -33,7 +33,7 @@ #include "vtktensorfunction.hh" #include -#include +#include #include diff --git a/opm/models/parallel/tasklets.hh b/opm/models/parallel/tasklets.hpp similarity index 99% rename from opm/models/parallel/tasklets.hh rename to opm/models/parallel/tasklets.hpp index ad3508d3c..0e783c45b 100644 --- a/opm/models/parallel/tasklets.hh +++ b/opm/models/parallel/tasklets.hpp @@ -24,8 +24,8 @@ * \file * \brief Provides a mechanism to dispatch work to separate threads */ -#ifndef EWOMS_TASKLETS_HH -#define EWOMS_TASKLETS_HH +#ifndef OPM_TASKLETS_HPP +#define OPM_TASKLETS_HPP #include #include @@ -366,4 +366,5 @@ protected: }; } // end namespace Opm -#endif + +#endif // OPM_TASKLETS_HPP diff --git a/opm/simulators/flow/EclGenericWriter.hpp b/opm/simulators/flow/EclGenericWriter.hpp index c6b39f9a3..fee0f0c65 100644 --- a/opm/simulators/flow/EclGenericWriter.hpp +++ b/opm/simulators/flow/EclGenericWriter.hpp @@ -28,7 +28,7 @@ #ifndef OPM_ECL_GENERIC_WRITER_HPP #define OPM_ECL_GENERIC_WRITER_HPP -#include +#include #include #include diff --git a/tests/models/test_tasklets.cpp b/tests/models/test_tasklets.cpp index be54ffa8b..a95db6eba 100644 --- a/tests/models/test_tasklets.cpp +++ b/tests/models/test_tasklets.cpp @@ -28,7 +28,7 @@ */ #include "config.h" -#include +#include #include #include diff --git a/tests/models/test_tasklets_failure.cpp b/tests/models/test_tasklets_failure.cpp index c0eb6c7d0..f01793266 100644 --- a/tests/models/test_tasklets_failure.cpp +++ b/tests/models/test_tasklets_failure.cpp @@ -39,7 +39,7 @@ #include #include "config.h" -#include +#include std::mutex outputMutex; From 13b575eae9a8d421a259e2843918b5f78aa87ce6 Mon Sep 17 00:00:00 2001 From: Arne Morten Kvarving Date: Tue, 3 Sep 2024 12:37:08 +0200 Subject: [PATCH 6/8] tasklets: introduce translation unit --- CMakeLists_files.cmake | 1 + opm/models/parallel/tasklets.cpp | 205 +++++++++++++++++++++++++++++++ opm/models/parallel/tasklets.hpp | 188 +++------------------------- tests/models/test_tasklets.cpp | 1 + 4 files changed, 221 insertions(+), 174 deletions(-) create mode 100644 opm/models/parallel/tasklets.cpp diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index bf893d36c..abe322fdc 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -62,6 +62,7 @@ list (APPEND MAIN_SOURCE_FILES opm/models/blackoil/blackoilpolymerparams.cpp opm/models/blackoil/blackoilsolventparams.cpp opm/models/parallel/mpiutil.cpp + opm/models/parallel/tasklets.cpp opm/models/parallel/threadmanager.cpp opm/simulators/flow/ActionHandler.cpp opm/simulators/flow/Banners.cpp diff --git a/opm/models/parallel/tasklets.cpp b/opm/models/parallel/tasklets.cpp new file mode 100644 index 000000000..390edacfd --- /dev/null +++ b/opm/models/parallel/tasklets.cpp @@ -0,0 +1,205 @@ +// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- +// vi: set et ts=4 sw=4 sts=4: +/* + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . + + Consult the COPYING file in the top-level source directory of this + module for the precise wording of the license and the list of + copyright holders. +*/ +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace Opm { + +thread_local TaskletRunner* TaskletRunner::taskletRunner_ = nullptr; +thread_local int TaskletRunner::workerThreadIndex_ = -1; + +TaskletRunner::BarrierTasklet::BarrierTasklet(unsigned numWorkers) + : TaskletInterface(/*refCount=*/numWorkers) +{ + numWorkers_ = numWorkers; + numWaiting_ = 0; +} + +void TaskletRunner::BarrierTasklet::run() +{ + wait(); +} + +void TaskletRunner::BarrierTasklet::wait() +{ + std::unique_lock lock(barrierMutex_); + + numWaiting_ += 1; + if (numWaiting_ >= numWorkers_ + 1) { + lock.unlock(); + barrierCondition_.notify_all(); + } + else { + const auto& areAllWaiting = + [this]() -> bool + { return this->numWaiting_ >= this->numWorkers_ + 1; }; + + barrierCondition_.wait(lock, /*predicate=*/areAllWaiting); + } +} + +TaskletRunner::TaskletRunner(unsigned numWorkers) +{ + threads_.resize(numWorkers); + for (unsigned i = 0; i < numWorkers; ++i) + // create a worker thread + threads_[i].reset(new std::thread(startWorkerThread_, this, i)); +} + +TaskletRunner::~TaskletRunner() +{ + if (threads_.size() > 0) { + // dispatch a tasklet which will terminate the worker thread + dispatch(std::make_shared()); + + // wait until all worker threads have terminated + for (auto& thread : threads_) + thread->join(); + } +} + +bool TaskletRunner::failure() const +{ + return this->failureFlag_.load(std::memory_order_relaxed); +} + +int TaskletRunner::workerThreadIndex() const +{ + if (TaskletRunner::taskletRunner_ != this) + return -1; + return TaskletRunner::workerThreadIndex_; +} + +void TaskletRunner::dispatch(std::shared_ptr tasklet) +{ + if (threads_.empty()) { + // run the tasklet immediately in synchronous mode. + while (tasklet->referenceCount() > 0) { + tasklet->dereference(); + try { + tasklet->run(); + } + catch (const std::exception& e) { + std::cerr << "ERROR: Uncaught std::exception when running tasklet: " << e.what() << ". Trying to continue.\n"; + failureFlag_.store(true, std::memory_order_relaxed); + } + catch (...) { + std::cerr << "ERROR: Uncaught exception (general type) when running tasklet. Trying to continue.\n"; + failureFlag_.store(true, std::memory_order_relaxed); + } + } + } + else { + // lock mutex for the tasklet queue to make sure that nobody messes with the + // task queue + taskletQueueMutex_.lock(); + + // add the tasklet to the queue + taskletQueue_.push(tasklet); + + taskletQueueMutex_.unlock(); + + workAvailableCondition_.notify_all(); + } +} + +void TaskletRunner::barrier() +{ + unsigned numWorkers = threads_.size(); + if (numWorkers == 0) + // nothing needs to be done to implement a barrier in synchronous mode + return; + + // dispatch a barrier tasklet and wait until it has been run by the worker thread + auto barrierTasklet = std::make_shared(numWorkers); + dispatch(barrierTasklet); + + barrierTasklet->wait(); +} + +void TaskletRunner::startWorkerThread_(TaskletRunner* taskletRunner, int workerThreadIndex) +{ + TaskletRunner::taskletRunner_ = taskletRunner; + TaskletRunner::workerThreadIndex_ = workerThreadIndex; + + taskletRunner->run_(); +} + +void TaskletRunner::run_() +{ + while (true) { + + // wait until tasklets have been pushed to the queue. first we need to lock + // mutex for access to taskletQueue_ + std::unique_lock lock(taskletQueueMutex_); + + const auto& workIsAvailable = + [this]() -> bool + { return !taskletQueue_.empty(); }; + + if (!workIsAvailable()) + workAvailableCondition_.wait(lock, /*predicate=*/workIsAvailable); + + // remove tasklet from queue + std::shared_ptr tasklet = taskletQueue_.front(); + + // if tasklet is an end marker, terminate the thread and DO NOT remove the + // tasklet. + if (tasklet->isEndMarker()) { + if(taskletQueue_.size() > 1) + throw std::logic_error("TaskletRunner: Not all queued tasklets were executed"); + taskletQueueMutex_.unlock(); + return; + } + + tasklet->dereference(); + if (tasklet->referenceCount() == 0) + // remove tasklets from the queue as soon as their reference count + // reaches zero, i.e. the tasklet has been run often enough. + taskletQueue_.pop(); + lock.unlock(); + + // execute tasklet + try { + tasklet->run(); + } + catch (const std::exception& e) { + std::cerr << "ERROR: Uncaught std::exception when running tasklet: " << e.what() << ".\n"; + failureFlag_.store(true, std::memory_order_relaxed); + } + catch (...) { + std::cerr << "ERROR: Uncaught exception when running tasklet.\n"; + failureFlag_.store(true, std::memory_order_relaxed); + } + } +} + +} // end namespace Opm diff --git a/opm/models/parallel/tasklets.hpp b/opm/models/parallel/tasklets.hpp index 0e783c45b..8a952a327 100644 --- a/opm/models/parallel/tasklets.hpp +++ b/opm/models/parallel/tasklets.hpp @@ -28,12 +28,9 @@ #define OPM_TASKLETS_HPP #include -#include -#include #include #include #include -#include #include namespace Opm { @@ -83,25 +80,6 @@ private: const Fn& fn_; }; -class TaskletRunner; - -// this class stores the thread local static attributes for the TaskletRunner class. we -// cannot put them directly into TaskletRunner because defining static members for -// non-template classes in headers leads the linker to choke in case multiple compile -// units are used. -template -struct TaskletRunnerHelper_ -{ - static thread_local TaskletRunner* taskletRunner_; - static thread_local int workerThreadIndex_; -}; - -template -thread_local TaskletRunner* TaskletRunnerHelper_::taskletRunner_ = nullptr; - -template -thread_local int TaskletRunnerHelper_::workerThreadIndex_ = -1; - /*! * \brief Handles where a given tasklet is run. * @@ -114,33 +92,11 @@ class TaskletRunner class BarrierTasklet : public TaskletInterface { public: - BarrierTasklet(unsigned numWorkers) - : TaskletInterface(/*refCount=*/numWorkers) - { - numWorkers_ = numWorkers; - numWaiting_ = 0; - } + BarrierTasklet(unsigned numWorkers); - void run() - { wait(); } + void run(); - void wait() - { - std::unique_lock lock(barrierMutex_); - - numWaiting_ += 1; - if (numWaiting_ >= numWorkers_ + 1) { - lock.unlock(); - barrierCondition_.notify_all(); - } - else { - const auto& areAllWaiting = - [this]() -> bool - { return this->numWaiting_ >= this->numWorkers_ + 1; }; - - barrierCondition_.wait(lock, /*predicate=*/areAllWaiting); - } - } + void wait(); private: unsigned numWorkers_; @@ -172,13 +128,7 @@ public: * The number of worker threads may be 0. In this case, all work is done by the main * thread (synchronous mode). */ - TaskletRunner(unsigned numWorkers) - { - threads_.resize(numWorkers); - for (unsigned i = 0; i < numWorkers; ++i) - // create a worker thread - threads_[i].reset(new std::thread(startWorkerThread_, this, i)); - } + TaskletRunner(unsigned numWorkers); /*! * \brief Destructor @@ -187,34 +137,16 @@ public: * worker threads have been terminated, i.e. all scheduled tasklets are guaranteed to * be completed. */ - ~TaskletRunner() - { - if (threads_.size() > 0) { - // dispatch a tasklet which will terminate the worker thread - dispatch(std::make_shared()); + ~TaskletRunner(); - // wait until all worker threads have terminated - for (auto& thread : threads_) - thread->join(); - } - } - - bool failure() const - { - return this->failureFlag_.load(std::memory_order_relaxed); - } + bool failure() const; /*! * \brief Returns the index of the current worker thread. * * If the current thread is not a worker thread, -1 is returned. */ - int workerThreadIndex() const - { - if (TaskletRunnerHelper_::taskletRunner_ != this) - return -1; - return TaskletRunnerHelper_::workerThreadIndex_; - } + int workerThreadIndex() const; /*! * \brief Returns the number of worker threads for the tasklet runner. @@ -227,38 +159,7 @@ public: * * The tasklet is either run immediately or deferred to a separate thread. */ - void dispatch(std::shared_ptr tasklet) - { - if (threads_.empty()) { - // run the tasklet immediately in synchronous mode. - while (tasklet->referenceCount() > 0) { - tasklet->dereference(); - try { - tasklet->run(); - } - catch (const std::exception& e) { - std::cerr << "ERROR: Uncaught std::exception when running tasklet: " << e.what() << ". Trying to continue.\n"; - failureFlag_.store(true, std::memory_order_relaxed); - } - catch (...) { - std::cerr << "ERROR: Uncaught exception (general type) when running tasklet. Trying to continue.\n"; - failureFlag_.store(true, std::memory_order_relaxed); - } - } - } - else { - // lock mutex for the tasklet queue to make sure that nobody messes with the - // task queue - taskletQueueMutex_.lock(); - - // add the tasklet to the queue - taskletQueue_.push(tasklet); - - taskletQueueMutex_.unlock(); - - workAvailableCondition_.notify_all(); - } - } + void dispatch(std::shared_ptr tasklet); /*! * \brief Convenience method to construct a new function runner tasklet and dispatch it immediately. @@ -275,19 +176,8 @@ public: /*! * \brief Make sure that all tasklets have been completed after this method has been called */ - void barrier() - { - unsigned numWorkers = threads_.size(); - if (numWorkers == 0) - // nothing needs to be done to implement a barrier in synchronous mode - return; + void barrier(); - // dispatch a barrier tasklet and wait until it has been run by the worker thread - auto barrierTasklet = std::make_shared(numWorkers); - dispatch(barrierTasklet); - - barrierTasklet->wait(); - } private: // Atomic flag that is set to failure if any of the tasklets run by the TaskletRunner fails. // This flag is checked before new tasklets run or get dispatched and in case it is true, the @@ -301,68 +191,18 @@ private: protected: // main function of the worker thread - static void startWorkerThread_(TaskletRunner* taskletRunner, int workerThreadIndex) - { - TaskletRunnerHelper_::taskletRunner_ = taskletRunner; - TaskletRunnerHelper_::workerThreadIndex_ = workerThreadIndex; - - taskletRunner->run_(); - } + static void startWorkerThread_(TaskletRunner* taskletRunner, int workerThreadIndex); //! do the work until the queue received an end tasklet - void run_() - { - while (true) { - - // wait until tasklets have been pushed to the queue. first we need to lock - // mutex for access to taskletQueue_ - std::unique_lock lock(taskletQueueMutex_); - - const auto& workIsAvailable = - [this]() -> bool - { return !taskletQueue_.empty(); }; - - if (!workIsAvailable()) - workAvailableCondition_.wait(lock, /*predicate=*/workIsAvailable); - - // remove tasklet from queue - std::shared_ptr tasklet = taskletQueue_.front(); - - // if tasklet is an end marker, terminate the thread and DO NOT remove the - // tasklet. - if (tasklet->isEndMarker()) { - if(taskletQueue_.size() > 1) - throw std::logic_error("TaskletRunner: Not all queued tasklets were executed"); - taskletQueueMutex_.unlock(); - return; - } - - tasklet->dereference(); - if (tasklet->referenceCount() == 0) - // remove tasklets from the queue as soon as their reference count - // reaches zero, i.e. the tasklet has been run often enough. - taskletQueue_.pop(); - lock.unlock(); - - // execute tasklet - try { - tasklet->run(); - } - catch (const std::exception& e) { - std::cerr << "ERROR: Uncaught std::exception when running tasklet: " << e.what() << ".\n"; - failureFlag_.store(true, std::memory_order_relaxed); - } - catch (...) { - std::cerr << "ERROR: Uncaught exception when running tasklet.\n"; - failureFlag_.store(true, std::memory_order_relaxed); - } - } - } + void run_(); std::vector > threads_; std::queue > taskletQueue_; std::mutex taskletQueueMutex_; std::condition_variable workAvailableCondition_; + + static thread_local TaskletRunner* taskletRunner_; + static thread_local int workerThreadIndex_; }; } // end namespace Opm diff --git a/tests/models/test_tasklets.cpp b/tests/models/test_tasklets.cpp index a95db6eba..838f7fccc 100644 --- a/tests/models/test_tasklets.cpp +++ b/tests/models/test_tasklets.cpp @@ -30,6 +30,7 @@ #include +#include #include #include From b3e08b4f2f6c9d022b63a3ef3d19995492b53a63 Mon Sep 17 00:00:00 2001 From: Arne Morten Kvarving Date: Tue, 3 Sep 2024 12:57:51 +0200 Subject: [PATCH 7/8] timer.hh: rename to timer.hpp --- CMakeLists_files.cmake | 2 +- opm/models/discretization/common/fvbasediscretization.hh | 2 +- opm/models/nonlinear/newtonmethod.hh | 2 +- opm/models/utils/simulator.hh | 2 +- opm/models/utils/start.hh | 2 +- opm/models/utils/{timer.hh => timer.hpp} | 9 ++++++--- opm/models/utils/timerguard.hh | 2 +- opm/simulators/linalg/bicgstabsolver.hh | 2 +- opm/simulators/linalg/linearsolverreport.hh | 2 +- 9 files changed, 14 insertions(+), 11 deletions(-) rename opm/models/utils/{timer.hh => timer.hpp} (98%) diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index abe322fdc..4e4635d48 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -714,7 +714,7 @@ list (APPEND PUBLIC_HEADER_FILES opm/models/utils/signum.hh opm/models/utils/simulator.hh opm/models/utils/start.hh - opm/models/utils/timer.hh + opm/models/utils/timer.hpp opm/models/utils/timerguard.hh opm/simulators/flow/ActionHandler.hpp opm/simulators/flow/AluGridCartesianIndexMapper.hpp diff --git a/opm/models/discretization/common/fvbasediscretization.hh b/opm/models/discretization/common/fvbasediscretization.hh index cee45056c..cd0d3b5e4 100644 --- a/opm/models/discretization/common/fvbasediscretization.hh +++ b/opm/models/discretization/common/fvbasediscretization.hh @@ -60,7 +60,7 @@ #include #include -#include +#include #include #include diff --git a/opm/models/nonlinear/newtonmethod.hh b/opm/models/nonlinear/newtonmethod.hh index aa99ae354..744d90349 100644 --- a/opm/models/nonlinear/newtonmethod.hh +++ b/opm/models/nonlinear/newtonmethod.hh @@ -41,7 +41,7 @@ #include #include -#include +#include #include #include diff --git a/opm/models/utils/simulator.hh b/opm/models/utils/simulator.hh index d50b555ce..80341db08 100644 --- a/opm/models/utils/simulator.hh +++ b/opm/models/utils/simulator.hh @@ -33,7 +33,7 @@ #include #include -#include +#include #include #include #include diff --git a/opm/models/utils/start.hh b/opm/models/utils/start.hh index 0c9714d84..76ce1def9 100644 --- a/opm/models/utils/start.hh +++ b/opm/models/utils/start.hh @@ -35,7 +35,7 @@ #include "parametersystem.hh" #include -#include +#include #include diff --git a/opm/models/utils/timer.hh b/opm/models/utils/timer.hpp similarity index 98% rename from opm/models/utils/timer.hh rename to opm/models/utils/timer.hpp index 855c88dd8..bbe01507e 100644 --- a/opm/models/utils/timer.hh +++ b/opm/models/utils/timer.hpp @@ -25,8 +25,8 @@ * * \copydoc Opm::Timer */ -#ifndef EWOMS_TIMER_HH -#define EWOMS_TIMER_HH +#ifndef OPM_TIMER_HPP +#define OPM_TIMER_HPP #include @@ -35,6 +35,7 @@ #endif namespace Opm { + /*! * \ingroup Common * @@ -52,6 +53,7 @@ class Timer std::chrono::high_resolution_clock::time_point realtimeData; std::clock_t cputimeData; }; + public: Timer() { halt(); } @@ -212,6 +214,7 @@ private: double realTimeElapsed_; TimeData startTime_; }; + } // namespace Opm -#endif +#endif // OPM_TIMER_HPP diff --git a/opm/models/utils/timerguard.hh b/opm/models/utils/timerguard.hh index 9b08bbc1e..c2d89e13a 100644 --- a/opm/models/utils/timerguard.hh +++ b/opm/models/utils/timerguard.hh @@ -28,7 +28,7 @@ #ifndef EWOMS_TIMER_GUARD_HH #define EWOMS_TIMER_GUARD_HH -#include "timer.hh" +#include namespace Opm { /*! diff --git a/opm/simulators/linalg/bicgstabsolver.hh b/opm/simulators/linalg/bicgstabsolver.hh index 445e7faef..9b1147720 100644 --- a/opm/simulators/linalg/bicgstabsolver.hh +++ b/opm/simulators/linalg/bicgstabsolver.hh @@ -31,7 +31,7 @@ #include "residreductioncriterion.hh" #include "linearsolverreport.hh" -#include +#include #include #include diff --git a/opm/simulators/linalg/linearsolverreport.hh b/opm/simulators/linalg/linearsolverreport.hh index 63b10147c..1d7f34a54 100644 --- a/opm/simulators/linalg/linearsolverreport.hh +++ b/opm/simulators/linalg/linearsolverreport.hh @@ -29,7 +29,7 @@ #include "convergencecriterion.hh" -#include +#include #include namespace Opm { From f838cb6ecfe43eab00e69e42a960bafd4d7ec965 Mon Sep 17 00:00:00 2001 From: Arne Morten Kvarving Date: Tue, 3 Sep 2024 13:18:58 +0200 Subject: [PATCH 8/8] timer: introduce translation unit --- CMakeLists_files.cmake | 1 + opm/models/utils/timer.cpp | 148 +++++++++++++++++++++++++++++++++++++ opm/models/utils/timer.hpp | 122 +++--------------------------- 3 files changed, 161 insertions(+), 110 deletions(-) create mode 100644 opm/models/utils/timer.cpp diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index 4e4635d48..3003961d7 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -64,6 +64,7 @@ list (APPEND MAIN_SOURCE_FILES opm/models/parallel/mpiutil.cpp opm/models/parallel/tasklets.cpp opm/models/parallel/threadmanager.cpp + opm/models/utils/timer.cpp opm/simulators/flow/ActionHandler.cpp opm/simulators/flow/Banners.cpp opm/simulators/flow/BlackoilModelParameters.cpp diff --git a/opm/models/utils/timer.cpp b/opm/models/utils/timer.cpp new file mode 100644 index 000000000..77f6f631d --- /dev/null +++ b/opm/models/utils/timer.cpp @@ -0,0 +1,148 @@ +// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- +// vi: set et ts=4 sw=4 sts=4: +/* + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . + + Consult the COPYING file in the top-level source directory of this + module for the precise wording of the license and the list of + copyright holders. +*/ + +#include +#include + +#if HAVE_MPI +#include +#endif + +namespace Opm { + +void Timer::TimeData::measure() +{ + // Note: On Linux -- or rather fully POSIX compliant systems -- using + // clock_gettime() would be more accurate for the CPU time. + realtimeData = std::chrono::high_resolution_clock::now(); + cputimeData = std::clock(); +} + +Timer::Timer() +{ + halt(); +} + +void Timer::start() +{ + isStopped_ = false; + startTime_.measure(); +} + +double Timer::stop() +{ + if (!isStopped_) { + TimeData stopTime; + + stopTime.measure(); + + const auto& t1 = startTime_.realtimeData; + const auto& t2 = stopTime.realtimeData; + std::chrono::duration dt = + std::chrono::duration_cast >(t2 - t1); + + realTimeElapsed_ += dt.count(); + cpuTimeElapsed_ += + static_cast(stopTime.cputimeData + - startTime_.cputimeData)/CLOCKS_PER_SEC; + } + + isStopped_ = true; + + return realTimeElapsed_; +} + +void Timer::halt() +{ + isStopped_ = true; + cpuTimeElapsed_ = 0.0; + realTimeElapsed_ = 0.0; +} + +void Timer::reset() +{ + cpuTimeElapsed_ = 0.0; + realTimeElapsed_ = 0.0; + + startTime_.measure(); +} + +double Timer::realTimeElapsed() const +{ + if (isStopped_) + return realTimeElapsed_; + + TimeData stopTime; + + stopTime.measure(); + + const auto& t1 = startTime_.realtimeData; + const auto& t2 = stopTime.realtimeData; + std::chrono::duration dt = + std::chrono::duration_cast >(t2 - t1); + + return realTimeElapsed_ + dt.count(); +} + +double Timer::cpuTimeElapsed() const +{ + if (isStopped_) + return cpuTimeElapsed_; + + TimeData stopTime; + + stopTime.measure(); + + const auto& t1 = startTime_.cputimeData; + const auto& t2 = stopTime.cputimeData; + + return cpuTimeElapsed_ + static_cast(t2 - t1)/CLOCKS_PER_SEC; +} + +double Timer::globalCpuTimeElapsed() const +{ + double val = cpuTimeElapsed(); + double globalVal = val; + +#if HAVE_MPI + MPI_Reduce(&val, + &globalVal, + /*count=*/1, + MPI_DOUBLE, + MPI_SUM, + /*rootRank=*/0, + MPI_COMM_WORLD); +#endif + + return globalVal; +} + +Timer& Timer::operator+=(const Timer& other) +{ + realTimeElapsed_ += other.realTimeElapsed(); + cpuTimeElapsed_ += other.cpuTimeElapsed(); + + return *this; +} + +} // namespace Opm diff --git a/opm/models/utils/timer.hpp b/opm/models/utils/timer.hpp index bbe01507e..689209c66 100644 --- a/opm/models/utils/timer.hpp +++ b/opm/models/utils/timer.hpp @@ -30,10 +30,6 @@ #include -#if HAVE_MPI -#include -#endif - namespace Opm { /*! @@ -52,90 +48,41 @@ class Timer { std::chrono::high_resolution_clock::time_point realtimeData; std::clock_t cputimeData; + + // measure the current time and put it into the object. + void measure(); }; public: - Timer() - { halt(); } + Timer(); /*! * \brief Start counting the time resources used by the simulation. */ - void start() - { - isStopped_ = false; - measure_(startTime_); - } + void start(); /*! * \brief Stop counting the time resources. * * Returns the wall clock time the timer was active. */ - double stop() - { - if (!isStopped_) { - TimeData stopTime; - - measure_(stopTime); - - const auto& t1 = startTime_.realtimeData; - const auto& t2 = stopTime.realtimeData; - std::chrono::duration dt = - std::chrono::duration_cast >(t2 - t1); - - realTimeElapsed_ += dt.count(); - cpuTimeElapsed_ += - static_cast(stopTime.cputimeData - - startTime_.cputimeData)/CLOCKS_PER_SEC; - } - - isStopped_ = true; - - return realTimeElapsed_; - } + double stop(); /*! * \brief Stop the measurement reset all timing values */ - void halt() - { - isStopped_ = true; - cpuTimeElapsed_ = 0.0; - realTimeElapsed_ = 0.0; - } + void halt(); /*! * \brief Make the current point in time t=0 but do not change the status of the timer. */ - void reset() - { - cpuTimeElapsed_ = 0.0; - realTimeElapsed_ = 0.0; - - measure_(startTime_); - } + void reset(); /*! * \brief Return the real time [s] elapsed during the periods the timer was active * since the last reset. */ - double realTimeElapsed() const - { - if (isStopped_) - return realTimeElapsed_; - - TimeData stopTime; - - measure_(stopTime); - - const auto& t1 = startTime_.realtimeData; - const auto& t2 = stopTime.realtimeData; - std::chrono::duration dt = - std::chrono::duration_cast >(t2 - t1); - - return realTimeElapsed_ + dt.count(); - } + double realTimeElapsed() const; /*! * \brief This is an alias for realTimeElapsed() @@ -149,66 +96,21 @@ public: * \brief Return the CPU time [s] used by all threads of the local process for the * periods the timer was active */ - double cpuTimeElapsed() const - { - if (isStopped_) - return cpuTimeElapsed_; - - TimeData stopTime; - - measure_(stopTime); - - const auto& t1 = startTime_.cputimeData; - const auto& t2 = stopTime.cputimeData; - - return cpuTimeElapsed_ + static_cast(t2 - t1)/CLOCKS_PER_SEC; - } + double cpuTimeElapsed() const; /*! * \brief Return the CPU time [s] used by all threads of the all processes of program * * The value returned only differs from cpuTimeElapsed() if MPI is used. */ - double globalCpuTimeElapsed() const - { - double val = cpuTimeElapsed(); - double globalVal = val; - -#if HAVE_MPI - MPI_Reduce(&val, - &globalVal, - /*count=*/1, - MPI_DOUBLE, - MPI_SUM, - /*rootRank=*/0, - MPI_COMM_WORLD); -#endif - - return globalVal; - } + double globalCpuTimeElapsed() const; /*! * \brief Adds the time of another timer to the current one */ - Timer& operator+=(const Timer& other) - { - realTimeElapsed_ += other.realTimeElapsed(); - cpuTimeElapsed_ += other.cpuTimeElapsed(); - - return *this; - } + Timer& operator+=(const Timer& other); private: - // measure the current time and put it into the object passed via - // the argument. - static void measure_(TimeData& timeData) - { - // Note: On Linux -- or rather fully POSIX compliant systems -- using - // clock_gettime() would be more accurate for the CPU time. - timeData.realtimeData = std::chrono::high_resolution_clock::now(); - timeData.cputimeData = std::clock(); - } - bool isStopped_; double cpuTimeElapsed_; double realTimeElapsed_;