From 6d43c1b715f4924c58e98794613faa21edb84951 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Atgeirr=20Fl=C3=B8=20Rasmussen?= Date: Tue, 3 Dec 2024 09:36:27 +0100 Subject: [PATCH] Use regular OpenMP for loop, then iterate over chunks inside. Avoiding the mutex-based ThreadedEntityIterator gives a good speedup, the overhead from the single extra loop over the grid for initialization should be small in comparison. --- opm/simulators/flow/FIBlackoilModel.hpp | 70 +++++++++++++++++++++---- 1 file changed, 61 insertions(+), 9 deletions(-) diff --git a/opm/simulators/flow/FIBlackoilModel.hpp b/opm/simulators/flow/FIBlackoilModel.hpp index cce1b03b5..08000972b 100644 --- a/opm/simulators/flow/FIBlackoilModel.hpp +++ b/opm/simulators/flow/FIBlackoilModel.hpp @@ -35,7 +35,14 @@ #include +#include #include +#include +#include + +#ifdef _OPENMP +#include +#endif namespace Opm { @@ -55,27 +62,69 @@ class FIBlackOilModel : public BlackOilModel historySize = getPropValue(), }; + // The chunked and threaded iteration over elements in this class assumes that the number + // and order of elements is fixed, and is therefore constrained to only work with CpGrid. + // For example, ALUGrid supports refinement and can not assume that. + static constexpr bool gridIsUnchanging = std::is_same_v, Dune::CpGrid>; + public: FIBlackOilModel(Simulator& simulator) : BlackOilModel(simulator) { + if constexpr (gridIsUnchanging) { + const auto& gv = this->gridView_; +#ifdef _OPENMP + const int nt = omp_get_max_threads(); + if (nt > 1) { + const auto num_elements = gv.size(0); + constexpr int max_chunk_size = 1000; + const int chunk_size = std::clamp(num_elements / nt, 1, max_chunk_size); + OpmLog::debug("Using chunk size " + std::to_string(chunk_size) + + " for property evaluation with " + std::to_string(nt) + " OpenMP threads."); + grid_chunk_iterators_.reserve(num_elements / chunk_size + 2); + auto it = gv.template begin<0>(); + const auto end = gv.template end<0>(); + for (int count = 0; it != end; ++it, ++count) { + if (count % chunk_size == 0) { + grid_chunk_iterators_.push_back(it); + } + } + grid_chunk_iterators_.push_back(end); + } else +#endif + { + // With one thread, or without OpenMP, we use a single chunk. + grid_chunk_iterators_.push_back(gv.template begin<0>()); + grid_chunk_iterators_.push_back(gv.template end<0>()); + } + } } void invalidateAndUpdateIntensiveQuantities(unsigned timeIdx) const { - this->invalidateIntensiveQuantitiesCache(timeIdx); - OPM_BEGIN_PARALLEL_TRY_CATCH() - // loop over all elements... - ThreadedEntityIterator threadedElemIt(this->gridView_); + OPM_BEGIN_PARALLEL_TRY_CATCH(); + if constexpr (gridIsUnchanging) { + const int num_chunks = grid_chunk_iterators_.size() - 1; #ifdef _OPENMP -#pragma omp parallel +#pragma omp parallel for #endif - { + for (int chunk = 0; chunk < num_chunks; ++chunk) { + ElementContext elemCtx(this->simulator_); + for (auto it = grid_chunk_iterators_[chunk]; it != grid_chunk_iterators_[chunk+1]; ++it) { + const Element& elem = *it; + elemCtx.updatePrimaryStencil(elem); + elemCtx.updatePrimaryIntensiveQuantities(timeIdx); + } + } + } else { + // Grid is possibly refined or otherwise changed between calls. + const auto& gv = this->gridView_; + auto it = gv.template begin<0>(); + const auto end = gv.template end<0>(); ElementContext elemCtx(this->simulator_); - ElementIterator elemIt = threadedElemIt.beginParallel(); - for (; !threadedElemIt.isFinished(elemIt); elemIt = threadedElemIt.increment()) { - const Element& elem = *elemIt; + for (; it != end; ++it) { + const Element& elem = *it; elemCtx.updatePrimaryStencil(elem); elemCtx.updatePrimaryIntensiveQuantities(timeIdx); } @@ -171,6 +220,9 @@ public: } return *intquant; } + +protected: + std::vector grid_chunk_iterators_; }; } // namespace Opm #endif // FI_BLACK_OIL_MODEL_HPP