Use regular OpenMP for loop, then iterate over chunks inside.

Avoiding the mutex-based ThreadedEntityIterator gives a good speedup, the overhead from the single extra loop over the grid for initialization should be small in comparison.
2025-02-25 18:55:30 -06:00 · 2024-12-03 09:36:27 +01:00 · 2024-12-03 09:36:27 +01:00 · 6d43c1b715
commit 6d43c1b715
parent e24d355e66
1 changed files with 61 additions and 9 deletions
--- a/opm/simulators/flow/FIBlackoilModel.hpp
+++ b/opm/simulators/flow/FIBlackoilModel.hpp
@ -35,7 +35,14 @@

 #include <opm/simulators/utils/DeferredLoggingErrorHelpers.hpp>

+#include <cstddef>
 #include <stdexcept>
+#include <type_traits>
+#include <vector>
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif

 namespace Opm
 {
@ -55,27 +62,69 @@ class FIBlackOilModel : public BlackOilModel<TypeTag>
        historySize = getPropValue<TypeTag, Properties::TimeDiscHistorySize>(),
    };

+    // The chunked and threaded iteration over elements in this class assumes that the number
+    // and order of elements is fixed, and is therefore constrained to only work with CpGrid.
+    // For example, ALUGrid supports refinement and can not assume that.
+    static constexpr bool gridIsUnchanging = std::is_same_v<GetPropType<TypeTag, Properties::Grid>, Dune::CpGrid>;
+
 public:
    FIBlackOilModel(Simulator& simulator)
        : BlackOilModel<TypeTag>(simulator)
    {
+        if constexpr (gridIsUnchanging) {
+            const auto& gv = this->gridView_;
+#ifdef _OPENMP
+            const int nt = omp_get_max_threads();
+            if (nt > 1) {
+                const auto num_elements = gv.size(0);
+                constexpr int max_chunk_size = 1000;
+                const int chunk_size = std::clamp(num_elements / nt, 1, max_chunk_size);
+                OpmLog::debug("Using chunk size " + std::to_string(chunk_size) +
+                              " for property evaluation with " + std::to_string(nt) + " OpenMP threads.");
+                grid_chunk_iterators_.reserve(num_elements / chunk_size + 2);
+                auto it = gv.template begin<0>();
+                const auto end = gv.template end<0>();
+                for (int count = 0; it != end; ++it, ++count) {
+                    if (count % chunk_size == 0) {
+                        grid_chunk_iterators_.push_back(it);
+                    }
+                }
+                grid_chunk_iterators_.push_back(end);
+            } else
+#endif
+            {
+                // With one thread, or without OpenMP, we use a single chunk.
+                grid_chunk_iterators_.push_back(gv.template begin<0>());
+                grid_chunk_iterators_.push_back(gv.template end<0>());
+            }
+        }
    }

    void invalidateAndUpdateIntensiveQuantities(unsigned timeIdx) const
    {
-
        this->invalidateIntensiveQuantitiesCache(timeIdx);
-        OPM_BEGIN_PARALLEL_TRY_CATCH()
-        // loop over all elements...
-        ThreadedEntityIterator<GridView, /*codim=*/0> threadedElemIt(this->gridView_);
+        OPM_BEGIN_PARALLEL_TRY_CATCH();
+        if constexpr (gridIsUnchanging) {
+            const int num_chunks = grid_chunk_iterators_.size() - 1;
 #ifdef _OPENMP
-#pragma omp parallel
+#pragma omp parallel for
 #endif
-        {
+            for (int chunk = 0; chunk < num_chunks; ++chunk) {
+                ElementContext elemCtx(this->simulator_);
+                for (auto it = grid_chunk_iterators_[chunk]; it != grid_chunk_iterators_[chunk+1]; ++it) {
+                    const Element& elem = *it;
+                    elemCtx.updatePrimaryStencil(elem);
+                    elemCtx.updatePrimaryIntensiveQuantities(timeIdx);
+                }
+            }
+        } else {
+            // Grid is possibly refined or otherwise changed between calls.
+            const auto& gv = this->gridView_;
+            auto it = gv.template begin<0>();
+            const auto end = gv.template end<0>();
            ElementContext elemCtx(this->simulator_);
-            ElementIterator elemIt = threadedElemIt.beginParallel();
-            for (; !threadedElemIt.isFinished(elemIt); elemIt = threadedElemIt.increment()) {
-                const Element& elem = *elemIt;
+            for (; it != end; ++it) {
+                const Element& elem = *it;
                elemCtx.updatePrimaryStencil(elem);
                elemCtx.updatePrimaryIntensiveQuantities(timeIdx);
            }
@ -171,6 +220,9 @@ public:
        }
        return *intquant;
    }
+
+protected:
+    std::vector<ElementIterator> grid_chunk_iterators_;
 };
 } // namespace Opm
 #endif // FI_BLACK_OIL_MODEL_HPP