Use regular OpenMP for loop, then iterate over chunks inside.

Avoiding the mutex-based ThreadedEntityIterator gives a good speedup,
the overhead from the single extra loop over the grid for initialization
should be small in comparison.
This commit is contained in:
Atgeirr Flø Rasmussen 2024-12-03 09:36:27 +01:00 committed by Atgeirr Flø Rasmussen
parent e24d355e66
commit 6d43c1b715

View File

@ -35,7 +35,14 @@
#include <opm/simulators/utils/DeferredLoggingErrorHelpers.hpp>
#include <cstddef>
#include <stdexcept>
#include <type_traits>
#include <vector>
#ifdef _OPENMP
#include <omp.h>
#endif
namespace Opm
{
@ -55,27 +62,69 @@ class FIBlackOilModel : public BlackOilModel<TypeTag>
historySize = getPropValue<TypeTag, Properties::TimeDiscHistorySize>(),
};
// The chunked and threaded iteration over elements in this class assumes that the number
// and order of elements is fixed, and is therefore constrained to only work with CpGrid.
// For example, ALUGrid supports refinement and can not assume that.
static constexpr bool gridIsUnchanging = std::is_same_v<GetPropType<TypeTag, Properties::Grid>, Dune::CpGrid>;
public:
FIBlackOilModel(Simulator& simulator)
: BlackOilModel<TypeTag>(simulator)
{
if constexpr (gridIsUnchanging) {
const auto& gv = this->gridView_;
#ifdef _OPENMP
const int nt = omp_get_max_threads();
if (nt > 1) {
const auto num_elements = gv.size(0);
constexpr int max_chunk_size = 1000;
const int chunk_size = std::clamp(num_elements / nt, 1, max_chunk_size);
OpmLog::debug("Using chunk size " + std::to_string(chunk_size) +
" for property evaluation with " + std::to_string(nt) + " OpenMP threads.");
grid_chunk_iterators_.reserve(num_elements / chunk_size + 2);
auto it = gv.template begin<0>();
const auto end = gv.template end<0>();
for (int count = 0; it != end; ++it, ++count) {
if (count % chunk_size == 0) {
grid_chunk_iterators_.push_back(it);
}
}
grid_chunk_iterators_.push_back(end);
} else
#endif
{
// With one thread, or without OpenMP, we use a single chunk.
grid_chunk_iterators_.push_back(gv.template begin<0>());
grid_chunk_iterators_.push_back(gv.template end<0>());
}
}
}
void invalidateAndUpdateIntensiveQuantities(unsigned timeIdx) const
{
this->invalidateIntensiveQuantitiesCache(timeIdx);
OPM_BEGIN_PARALLEL_TRY_CATCH()
// loop over all elements...
ThreadedEntityIterator<GridView, /*codim=*/0> threadedElemIt(this->gridView_);
OPM_BEGIN_PARALLEL_TRY_CATCH();
if constexpr (gridIsUnchanging) {
const int num_chunks = grid_chunk_iterators_.size() - 1;
#ifdef _OPENMP
#pragma omp parallel
#pragma omp parallel for
#endif
{
for (int chunk = 0; chunk < num_chunks; ++chunk) {
ElementContext elemCtx(this->simulator_);
for (auto it = grid_chunk_iterators_[chunk]; it != grid_chunk_iterators_[chunk+1]; ++it) {
const Element& elem = *it;
elemCtx.updatePrimaryStencil(elem);
elemCtx.updatePrimaryIntensiveQuantities(timeIdx);
}
}
} else {
// Grid is possibly refined or otherwise changed between calls.
const auto& gv = this->gridView_;
auto it = gv.template begin<0>();
const auto end = gv.template end<0>();
ElementContext elemCtx(this->simulator_);
ElementIterator elemIt = threadedElemIt.beginParallel();
for (; !threadedElemIt.isFinished(elemIt); elemIt = threadedElemIt.increment()) {
const Element& elem = *elemIt;
for (; it != end; ++it) {
const Element& elem = *it;
elemCtx.updatePrimaryStencil(elem);
elemCtx.updatePrimaryIntensiveQuantities(timeIdx);
}
@ -171,6 +220,9 @@ public:
}
return *intquant;
}
protected:
std::vector<ElementIterator> grid_chunk_iterators_;
};
} // namespace Opm
#endif // FI_BLACK_OIL_MODEL_HPP