From 6d43c1b715f4924c58e98794613faa21edb84951 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Atgeirr=20Fl=C3=B8=20Rasmussen?= <atgeirr@sintef.no>
Date: Tue, 3 Dec 2024 09:36:27 +0100
Subject: [PATCH] Use regular OpenMP for loop, then iterate over chunks inside.

Avoiding the mutex-based ThreadedEntityIterator gives a good speedup,
the overhead from the single extra loop over the grid for initialization
should be small in comparison.
---
 opm/simulators/flow/FIBlackoilModel.hpp | 70 +++++++++++++++++++++----
 1 file changed, 61 insertions(+), 9 deletions(-)
diff --git a/opm/simulators/flow/FIBlackoilModel.hpp b/opm/simulators/flow/FIBlackoilModel.hpp
index cce1b03b5..08000972b 100644
--- a/opm/simulators/flow/FIBlackoilModel.hpp
+++ b/opm/simulators/flow/FIBlackoilModel.hpp
@@ -35,7 +35,14 @@
 
 #include <opm/simulators/utils/DeferredLoggingErrorHelpers.hpp>
 
+#include <cstddef>
 #include <stdexcept>
+#include <type_traits>
+#include <vector>
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
 
 namespace Opm
 {
@@ -55,27 +62,69 @@ class FIBlackOilModel : public BlackOilModel<TypeTag>
         historySize = getPropValue<TypeTag, Properties::TimeDiscHistorySize>(),
     };
 
+    // The chunked and threaded iteration over elements in this class assumes that the number
+    // and order of elements is fixed, and is therefore constrained to only work with CpGrid.
+    // For example, ALUGrid supports refinement and can not assume that.
+    static constexpr bool gridIsUnchanging = std::is_same_v<GetPropType<TypeTag, Properties::Grid>, Dune::CpGrid>;
+
 public:
     FIBlackOilModel(Simulator& simulator)
         : BlackOilModel<TypeTag>(simulator)
     {
+        if constexpr (gridIsUnchanging) {
+            const auto& gv = this->gridView_;
+#ifdef _OPENMP
+            const int nt = omp_get_max_threads();
+            if (nt > 1) {
+                const auto num_elements = gv.size(0);
+                constexpr int max_chunk_size = 1000;
+                const int chunk_size = std::clamp(num_elements / nt, 1, max_chunk_size);
+                OpmLog::debug("Using chunk size " + std::to_string(chunk_size) +
+                              " for property evaluation with " + std::to_string(nt) + " OpenMP threads.");
+                grid_chunk_iterators_.reserve(num_elements / chunk_size + 2);
+                auto it = gv.template begin<0>();
+                const auto end = gv.template end<0>();
+                for (int count = 0; it != end; ++it, ++count) {
+                    if (count % chunk_size == 0) {
+                        grid_chunk_iterators_.push_back(it);
+                    }
+                }
+                grid_chunk_iterators_.push_back(end);
+            } else
+#endif
+            {
+                // With one thread, or without OpenMP, we use a single chunk.
+                grid_chunk_iterators_.push_back(gv.template begin<0>());
+                grid_chunk_iterators_.push_back(gv.template end<0>());
+            }
+        }
     }
 
     void invalidateAndUpdateIntensiveQuantities(unsigned timeIdx) const
     {
-
         this->invalidateIntensiveQuantitiesCache(timeIdx);
-        OPM_BEGIN_PARALLEL_TRY_CATCH()
-        // loop over all elements...
-        ThreadedEntityIterator<GridView, /*codim=*/0> threadedElemIt(this->gridView_);
+        OPM_BEGIN_PARALLEL_TRY_CATCH();
+        if constexpr (gridIsUnchanging) {
+            const int num_chunks = grid_chunk_iterators_.size() - 1;
 #ifdef _OPENMP
-#pragma omp parallel
+#pragma omp parallel for
 #endif
-        {
+            for (int chunk = 0; chunk < num_chunks; ++chunk) {
+                ElementContext elemCtx(this->simulator_);
+                for (auto it = grid_chunk_iterators_[chunk]; it != grid_chunk_iterators_[chunk+1]; ++it) {
+                    const Element& elem = *it;
+                    elemCtx.updatePrimaryStencil(elem);
+                    elemCtx.updatePrimaryIntensiveQuantities(timeIdx);
+                }
+            }
+        } else {
+            // Grid is possibly refined or otherwise changed between calls.
+            const auto& gv = this->gridView_;
+            auto it = gv.template begin<0>();
+            const auto end = gv.template end<0>();
             ElementContext elemCtx(this->simulator_);
-            ElementIterator elemIt = threadedElemIt.beginParallel();
-            for (; !threadedElemIt.isFinished(elemIt); elemIt = threadedElemIt.increment()) {
-                const Element& elem = *elemIt;
+            for (; it != end; ++it) {
+                const Element& elem = *it;
                 elemCtx.updatePrimaryStencil(elem);
                 elemCtx.updatePrimaryIntensiveQuantities(timeIdx);
             }
@@ -171,6 +220,9 @@ public:
         }
         return *intquant;
     }
+
+protected:
+    std::vector<ElementIterator> grid_chunk_iterators_;
 };
 } // namespace Opm
 #endif // FI_BLACK_OIL_MODEL_HPP