Several OpenMP improvements

Several optimizations based on profiling of 20M grid model. These fixes will improve the largest performance issues, but there are still more operations that can be refactored. * OpenMP: Use in fault geometry generator * OpenMP: Use when computing statistics for result values * OpenMP: Use multithreading on fault detection * Add RiaOpenMPTools * VizFwk: Use openMP for texture generation
2025-02-25 18:55:39 -06:00 · 2022-12-19 13:49:03 +01:00 · 2022-12-19 13:49:03 +01:00 · a423ecf95f
commit a423ecf95f
parent 254c74be13
12 changed files with 244 additions and 110 deletions
--- a/ApplicationLibCode/Application/Tools/CMakeLists_files.cmake
+++ b/ApplicationLibCode/Application/Tools/CMakeLists_files.cmake
@ -50,6 +50,7 @@ set(SOURCE_GROUP_HEADER_FILES
    ${CMAKE_CURRENT_LIST_DIR}/RiaEnsembleNameTools.h
    ${CMAKE_CURRENT_LIST_DIR}/RiaSummaryStringTools.h
    ${CMAKE_CURRENT_LIST_DIR}/RiaNetworkTools.h
    ${CMAKE_CURRENT_LIST_DIR}/RiaOpenMPTools.h
 )
 set(SOURCE_GROUP_SOURCE_FILES
@ -97,6 +98,7 @@ set(SOURCE_GROUP_SOURCE_FILES
    ${CMAKE_CURRENT_LIST_DIR}/RiaVec3Tools.cpp
    ${CMAKE_CURRENT_LIST_DIR}/RiaSummaryStringTools.cpp
    ${CMAKE_CURRENT_LIST_DIR}/RiaNetworkTools.cpp
    ${CMAKE_CURRENT_LIST_DIR}/RiaOpenMPTools.cpp
 )
 list(APPEND CODE_SOURCE_FILES ${SOURCE_GROUP_SOURCE_FILES})
--- a/ApplicationLibCode/Application/Tools/RiaOpenMPTools.cpp
+++ b/ApplicationLibCode/Application/Tools/RiaOpenMPTools.cpp
@ -0,0 +1,50 @@
 /////////////////////////////////////////////////////////////////////////////////
 //
 //  Copyright (C) 2022 Equinor ASA
 //
 //  ResInsight is free software: you can redistribute it and/or modify
 //  it under the terms of the GNU General Public License as published by
 //  the Free Software Foundation, either version 3 of the License, or
 //  (at your option) any later version.
 //
 //  ResInsight is distributed in the hope that it will be useful, but WITHOUT ANY
 //  WARRANTY; without even the implied warranty of MERCHANTABILITY or
 //  FITNESS FOR A PARTICULAR PURPOSE.
 //
 //  See the GNU General Public License at <http://www.gnu.org/licenses/gpl.html>
 //  for more details.
 //
 /////////////////////////////////////////////////////////////////////////////////
 #include "RiaOpenMPTools.h"
 #ifdef USE_OPENMP
 #include <omp.h>
 #endif
 //--------------------------------------------------------------------------------------------------
 ///
 //--------------------------------------------------------------------------------------------------
 int RiaOpenMPTools::availableThreadCount()
 {
    int numberOfThreads = 1;
 #ifdef USE_OPENMP
    numberOfThreads = omp_get_max_threads();
 #endif
    return numberOfThreads;
 }
 //--------------------------------------------------------------------------------------------------
 ///
 //--------------------------------------------------------------------------------------------------
 int RiaOpenMPTools::currentThreadIndex()
 {
    int myThread = 0;
 #ifdef USE_OPENMP
    myThread = omp_get_thread_num();
 #endif
    return myThread;
 }
--- a/ApplicationLibCode/Application/Tools/RiaOpenMPTools.h
+++ b/ApplicationLibCode/Application/Tools/RiaOpenMPTools.h
@ -0,0 +1,30 @@
 /////////////////////////////////////////////////////////////////////////////////
 //
 //  Copyright (C) 2022 Equinor ASA
 //
 //  ResInsight is free software: you can redistribute it and/or modify
 //  it under the terms of the GNU General Public License as published by
 //  the Free Software Foundation, either version 3 of the License, or
 //  (at your option) any later version.
 //
 //  ResInsight is distributed in the hope that it will be useful, but WITHOUT ANY
 //  WARRANTY; without even the implied warranty of MERCHANTABILITY or
 //  FITNESS FOR A PARTICULAR PURPOSE.
 //
 //  See the GNU General Public License at <http://www.gnu.org/licenses/gpl.html>
 //  for more details.
 //
 /////////////////////////////////////////////////////////////////////////////////
 #pragma once
 #include <QString>
 //==================================================================================================
 //
 //==================================================================================================
 namespace RiaOpenMPTools
 {
 int availableThreadCount();
 int currentThreadIndex();
 }; // namespace RiaOpenMPTools
--- a/ApplicationLibCode/FileInterface/RifOpmCommonSummary.cpp
+++ b/ApplicationLibCode/FileInterface/RifOpmCommonSummary.cpp
@ -28,10 +28,6 @@
 #include "opm/io/eclipse/ESmry.hpp"
 #include "opm/io/eclipse/ExtESmry.hpp"
 #ifdef USE_OPENMP
 #include <omp.h>
 #endif
 #include <QFileInfo>
 size_t RifOpmCommonEclipseSummary::sm_createdEsmryFileCount = 0;
--- a/ApplicationLibCode/FileInterface/RifOpmHdf5Summary.cpp
+++ b/ApplicationLibCode/FileInterface/RifOpmHdf5Summary.cpp
@ -32,10 +32,6 @@
 #include <QFileInfo>
 #ifdef USE_OPENMP
 #include <omp.h>
 #endif
 //--------------------------------------------------------------------------------------------------
 ///
 //--------------------------------------------------------------------------------------------------
--- a/ApplicationLibCode/ModelVisualization/RivFaultGeometryGenerator.cpp
+++ b/ApplicationLibCode/ModelVisualization/RivFaultGeometryGenerator.cpp
@ -19,7 +19,11 @@
 #include "RivFaultGeometryGenerator.h"
-#include <cmath>
+#include "RiaOpenMPTools.h"
 #include "RigFault.h"
 #include "RigNNCData.h"
 #include "RigNncConnection.h"
 #include "cvfDrawableGeo.h"
 #include "cvfOutlineEdgeExtractor.h"
@ -28,9 +32,7 @@
 #include "cvfScalarMapper.h"
-#include "RigFault.h"
+#include <cmath>
 #include "RigNNCData.h"
 #include "RigNncConnection.h"
 //--------------------------------------------------------------------------------------------------
 ///
@ -166,7 +168,21 @@ void RivFaultGeometryGenerator::computeArrays( bool onlyShowFacesWithDefinedNeig
    const std::vector<RigFault::FaultFace>& faultFaces = m_fault->faultFaces();
-#pragma omp parallel for
+    int numberOfThreads = RiaOpenMPTools::availableThreadCount();
    std::vector<std::vector<cvf::Vec3f>>                         threadVertices( numberOfThreads );
    std::vector<std::vector<size_t>>                             threadCellIndices( numberOfThreads );
    std::vector<std::vector<cvf::StructGridInterface::FaceType>> threadFaceTypes( numberOfThreads );
 #pragma omp parallel
    {
        int myThread = RiaOpenMPTools::currentThreadIndex();
        cvf::Vec3d cornerVerts[8];
        cvf::ubyte faceConn[4];
        // NB! We are inside a parallel section, do not use "parallel for" here
 #pragma omp for
        for ( int fIdx = 0; fIdx < static_cast<int>( faultFaces.size() ); fIdx++ )
        {
            size_t                             cellIndex = faultFaces[fIdx].m_nativeReservoirCellIndex;
@ -185,27 +201,33 @@ void RivFaultGeometryGenerator::computeArrays( bool onlyShowFacesWithDefinedNeig
            if ( onlyShowFacesWithDefinedNeighbors && !hasConnection( cellIndex, face, connections, connIndices ) )
                continue;
        cvf::Vec3d cornerVerts[8];
            m_grid->cellCornerVertices( cellIndex, cornerVerts );
        cvf::ubyte faceConn[4];
            m_grid->cellFaceVertexIndices( face, faceConn );
-// Critical section to avoid two threads accessing the arrays at the same time.
+            for ( int n = 0; n < 4; n++ )
 #pragma omp critical( critical_section_RivFaultGeometryGenerator_computeArrays )
            {
-            int n;
+                threadVertices[myThread].emplace_back( cvf::Vec3f( cornerVerts[faceConn[n]] - offset ) );
            for ( n = 0; n < 4; n++ )
            {
                vertices.push_back( cvf::Vec3f( cornerVerts[faceConn[n]] - offset ) );
            }
            // Keep track of the source cell index per quad
-            m_quadMapper->quadToCellIndexMap().push_back( cellIndex );
+            threadCellIndices[myThread].emplace_back( cellIndex );
-            m_quadMapper->quadToCellFaceMap().push_back( face );
+            threadFaceTypes[myThread].emplace_back( face );
        }
    }
    for ( int threadIndex = 0; threadIndex < numberOfThreads; threadIndex++ )
    {
        vertices.insert( vertices.end(), threadVertices[threadIndex].begin(), threadVertices[threadIndex].end() );
        m_quadMapper->quadToCellIndexMap().insert( m_quadMapper->quadToCellIndexMap().end(),
                                                   threadCellIndices[threadIndex].begin(),
                                                   threadCellIndices[threadIndex].end() );
        m_quadMapper->quadToCellFaceMap().insert( m_quadMapper->quadToCellFaceMap().end(),
                                                  threadFaceTypes[threadIndex].begin(),
                                                  threadFaceTypes[threadIndex].end() );
    }
    m_vertices = new cvf::Vec3fArray;
    m_vertices->assign( vertices );
 }
--- a/ApplicationLibCode/ProjectDataModel/RimContourMapProjection.cpp
+++ b/ApplicationLibCode/ProjectDataModel/RimContourMapProjection.cpp
@ -18,6 +18,7 @@
 #include "RimContourMapProjection.h"
 #include "RiaOpenMPTools.h"
 #include "RiaWeightedGeometricMeanCalculator.h"
 #include "RiaWeightedHarmonicMeanCalculator.h"
 #include "RiaWeightedMeanCalculator.h"
@ -45,10 +46,6 @@
 #include <algorithm>
 #ifdef USE_OPENMP
 #include <omp.h>
 #endif
 namespace caf
 {
 template <>
@ -861,19 +858,13 @@ void RimContourMapProjection::generateTrianglesWithVertexValues()
        }
    }
-#ifdef USE_OPENMP
+    int numberOfThreads = RiaOpenMPTools::availableThreadCount();
-    std::vector<std::vector<std::vector<cvf::Vec4d>>> threadTriangles( omp_get_max_threads() );
+
-#else
+    std::vector<std::vector<std::vector<cvf::Vec4d>>> threadTriangles( numberOfThreads );
    std::vector<std::vector<std::vector<cvf::Vec4d>>> threadTriangles( 1 );
 #endif
 #pragma omp parallel
    {
-#ifdef USE_OPENMP
+        int myThread = RiaOpenMPTools::currentThreadIndex();
        int myThread = omp_get_thread_num();
 #else
        int myThread = 0;
 #endif
        threadTriangles[myThread].resize( std::max( (size_t)1, m_contourPolygons.size() ) );
 #pragma omp for schedule( dynamic )
--- a/ApplicationLibCode/ProjectDataModel/Summary/RimSummaryCaseMainCollection.cpp
+++ b/ApplicationLibCode/ProjectDataModel/Summary/RimSummaryCaseMainCollection.cpp
@ -47,10 +47,6 @@
 #include "cafProgressInfo.h"
 #ifdef USE_OPENMP
 #include <omp.h>
 #endif
 #include <QDir>
 CAF_PDM_SOURCE_INIT( RimSummaryCaseMainCollection, "SummaryCaseCollection" );
--- a/ApplicationLibCode/ReservoirDataModel/RigEclipseNativeStatCalc.cpp
+++ b/ApplicationLibCode/ReservoirDataModel/RigEclipseNativeStatCalc.cpp
@ -41,7 +41,14 @@ RigEclipseNativeStatCalc::RigEclipseNativeStatCalc( RigCaseCellResultsData*
 void RigEclipseNativeStatCalc::minMaxCellScalarValues( size_t timeStepIndex, double& min, double& max )
 {
    MinMaxAccumulator acc( min, max );
-    traverseCells( acc, timeStepIndex );
+
    std::vector<MinMaxAccumulator> threadAccumulators;
    threadTraverseCells( threadAccumulators, timeStepIndex );
    for ( const auto& threadAcc : threadAccumulators )
    {
        acc.addValue( threadAcc.min );
        acc.addValue( threadAcc.max );
    }
    min = acc.min;
    max = acc.max;
 }
@ -63,7 +70,13 @@ void RigEclipseNativeStatCalc::p10p90CellScalarValues( double& p10, double& p90
    for ( size_t timeStepIndex = 0; timeStepIndex < timeStepCount(); timeStepIndex++ )
    {
-        traverseCells( acc, timeStepIndex );
+        std::vector<PercentilAccumulator> threadAccumulators;
        threadTraverseCells( threadAccumulators, timeStepIndex );
        for ( const auto& threadAcc : threadAccumulators )
        {
            acc.addData( threadAcc.values );
            acc.addData( threadAcc.values );
        }
    }
    acc.computep10p90( p10, p90 );
@ -75,7 +88,15 @@ void RigEclipseNativeStatCalc::p10p90CellScalarValues( double& p10, double& p90
 void RigEclipseNativeStatCalc::p10p90CellScalarValues( size_t timeStepIndex, double& p10, double& p90 )
 {
    PercentilAccumulator acc;
-    traverseCells( acc, timeStepIndex );
+
    std::vector<PercentilAccumulator> threadAccumulators;
    threadTraverseCells( threadAccumulators, timeStepIndex );
    for ( const auto& threadAcc : threadAccumulators )
    {
        acc.addData( threadAcc.values );
        acc.addData( threadAcc.values );
    }
    acc.computep10p90( p10, p90 );
 }
@ -85,7 +106,15 @@ void RigEclipseNativeStatCalc::p10p90CellScalarValues( size_t timeStepIndex, dou
 void RigEclipseNativeStatCalc::posNegClosestToZero( size_t timeStepIndex, double& pos, double& neg )
 {
    PosNegAccumulator acc( pos, neg );
-    traverseCells( acc, timeStepIndex );
+
    std::vector<PosNegAccumulator> threadAccumulators;
    threadTraverseCells( threadAccumulators, timeStepIndex );
    for ( const auto& threadAcc : threadAccumulators )
    {
        acc.addValue( threadAcc.pos );
        acc.addValue( threadAcc.neg );
    }
    pos = acc.pos;
    neg = acc.neg;
 }
--- a/ApplicationLibCode/ReservoirDataModel/RigEclipseNativeStatCalc.h
+++ b/ApplicationLibCode/ReservoirDataModel/RigEclipseNativeStatCalc.h
@ -19,10 +19,11 @@
 #pragma once
-#include "RigStatisticsCalculator.h"
+#include "RiaOpenMPTools.h"
 #include "RigActiveCellInfo.h"
 #include "RigCaseCellResultsData.h"
 #include "RigStatisticsCalculator.h"
 class RigHistogramCalculator;
@ -50,6 +51,7 @@ private:
    RigCaseCellResultsData* m_resultsData;
    RigEclipseResultAddress m_eclipseResultAddress;
    // Add all result values to an accumulator for a given time step
    template <typename StatisticsAccumulator>
    void traverseCells( StatisticsAccumulator& accumulator, size_t timeStepIndex )
    {
@ -59,31 +61,34 @@ private:
        }
        const std::vector<double>& values = m_resultsData->cellScalarResults( m_eclipseResultAddress, timeStepIndex );
-
+        for ( const auto& val : values )
-        if ( values.empty() )
+        {
            accumulator.addValue( val );
        }
    }
    // Create one accumulator for each available thread, and add values to accumulator for a given time step
    template <typename StatisticsAccumulator>
    void threadTraverseCells( std::vector<StatisticsAccumulator>& accumulators, size_t timeStepIndex )
    {
        if ( timeStepIndex >= m_resultsData->cellScalarResults( m_eclipseResultAddress ).size() )
        {
            // Can happen if values do not exist for the current time step index.
            return;
        }
-        const RigActiveCellInfo* actCellInfo = m_resultsData->activeCellInfo();
+        const std::vector<double>& values = m_resultsData->cellScalarResults( m_eclipseResultAddress, timeStepIndex );
        size_t                   cellCount   = actCellInfo->reservoirCellCount();
-        bool isUsingGlobalActiveIndex = m_resultsData->isUsingGlobalActiveIndex( m_eclipseResultAddress );
+        int numberOfThreads = RiaOpenMPTools::availableThreadCount();
-        for ( size_t cIdx = 0; cIdx < cellCount; ++cIdx )
+        accumulators.resize( numberOfThreads );
        {
            // Filter out inactive cells
            if ( !actCellInfo->isActive( cIdx ) ) continue;
-            size_t cellResultIndex = cIdx;
+#pragma omp parallel
            if ( isUsingGlobalActiveIndex )
        {
-                cellResultIndex = actCellInfo->cellResultIndex( cIdx );
+            int myThread = RiaOpenMPTools::currentThreadIndex();
            }
-            if ( cellResultIndex != cvf::UNDEFINED_SIZE_T && cellResultIndex < values.size() )
+#pragma omp for
            for ( long long i = 0; i < (long long)values.size(); i++ )
            {
-                accumulator.addValue( values[cellResultIndex] );
+                accumulators[myThread].addValue( values[i] );
            }
        }
    }
--- a/ApplicationLibCode/ReservoirDataModel/RigMainGrid.cpp
+++ b/ApplicationLibCode/ReservoirDataModel/RigMainGrid.cpp
@ -21,6 +21,7 @@
 #include "RigMainGrid.h"
 #include "RiaLogging.h"
 #include "RiaOpenMPTools.h"
 #include "RiaResultNames.h"
 #include "RigActiveCellInfo.h"
@ -30,10 +31,6 @@
 #include "cvfAssert.h"
 #include "cvfBoundingBoxTree.h"
 #ifdef USE_OPENMP
 #include <omp.h>
 #endif
 RigMainGrid::RigMainGrid()
    : RigGridBase( this )
 {
@ -453,8 +450,17 @@ void RigMainGrid::calculateFaults( const RigActiveCellInfo* activeCellInfo )
    const std::vector<cvf::Vec3d>& vxs = m_mainGrid->nodes();
-    std::vector<RigFault::FaultFace>& unNamedFaultFaces         = unNamedFault->faultFaces();
+    int numberOfThreads = RiaOpenMPTools::availableThreadCount();
-    std::vector<RigFault::FaultFace>& unNamedFaultFacesInactive = unNamedFaultWithInactive->faultFaces();
+
    std::vector<std::vector<RigFault::FaultFace>> threadFaultFaces( numberOfThreads );
    std::vector<std::vector<RigFault::FaultFace>> threadInactiveFaultFaces( numberOfThreads );
 #pragma omp parallel
    {
        int myThread = RiaOpenMPTools::currentThreadIndex();
        // NB! We are inside a parallel section, do not use "parallel for" here
 #pragma omp for
        for ( int gcIdx = 0; gcIdx < static_cast<int>( m_cells.size() ); ++gcIdx )
        {
            addUnNamedFaultFaces( gcIdx,
@ -462,10 +468,22 @@ void RigMainGrid::calculateFaults( const RigActiveCellInfo* activeCellInfo )
                                  vxs,
                                  unNamedFaultIdx,
                                  unNamedFaultWithInactiveIdx,
-                              unNamedFaultFaces,
+                                  threadFaultFaces[myThread],
-                              unNamedFaultFacesInactive,
+                                  threadInactiveFaultFaces[myThread],
                                  m_faultsPrCellAcc.p() );
        }
    }
    std::vector<RigFault::FaultFace>& unNamedFaultFaces         = unNamedFault->faultFaces();
    std::vector<RigFault::FaultFace>& unNamedFaultFacesInactive = unNamedFaultWithInactive->faultFaces();
    for ( int i = 0; i < numberOfThreads; i++ )
    {
        unNamedFaultFaces.insert( unNamedFaultFaces.end(), threadFaultFaces[i].begin(), threadFaultFaces[i].end() );
        unNamedFaultFacesInactive.insert( unNamedFaultFacesInactive.end(),
                                          threadInactiveFaultFaces[i].begin(),
                                          threadInactiveFaultFaces[i].end() );
    }
 }
 //--------------------------------------------------------------------------------------------------
@ -557,8 +575,16 @@ void RigMainGrid::addUnNamedFaultFaces( int                               gcIdx,
            int faultIdx = unNamedFaultIdx;
            if ( !( isCellActive && isNeighborCellActive ) ) faultIdx = unNamedFaultWithInactiveIdx;
 #pragma omp critical( faultsPrCellAcc_modification )
            {
                // Best practice is to avoid critical sections. The number of cells related to a fault is usually very
                // small compared to the total number of cells, so the performance of this function should be good. The
                // main computation is related to the 'pointDistance' functions above. The refactoring of this structure
                // to avoid critical section is considered too much compared to the gain.
                faultsPrCellAcc->setFaultIdx( gcIdx, face, faultIdx );
                faultsPrCellAcc->setFaultIdx( neighborReservoirCellIdx, StructGridInterface::oppositeFace( face ), faultIdx );
            }
            // Add as fault face only if the grid index is less than the neighbors
@ -574,15 +600,6 @@ void RigMainGrid::addUnNamedFaultFaces( int                               gcIdx,
                    unNamedFaultFacesInactive.push_back( ff );
                }
            }
            else
            {
                CVF_FAIL_MSG( "Found fault with global neighbor index less than the native index. " ); // Should never
                                                                                                       // occur. because
                                                                                                       // we flag the
                                                                                                       // opposite face
                                                                                                       // in the
                                                                                                       // faultsPrCellAcc
            }
        }
    }
 }
@ -745,10 +762,8 @@ void RigMainGrid::buildCellSearchTree()
 #pragma omp parallel
        {
-            size_t threadCellCount = cellCount;
+            int    numberOfThreads = RiaOpenMPTools::availableThreadCount();
-#ifdef USE_OPENMP
+            size_t threadCellCount = std::ceil( cellCount / static_cast<double>( numberOfThreads ) );
            threadCellCount = std::ceil( cellCount / static_cast<double>( omp_get_num_threads() ) );
 #endif
            std::vector<size_t>           threadIndicesForBoundingBoxes;
            std::vector<cvf::BoundingBox> threadBoundingBoxes;
--- a/Fwk/VizFwk/LibGuiQt/cvfqtUtils.cpp
+++ b/Fwk/VizFwk/LibGuiQt/cvfqtUtils.cpp
@ -232,10 +232,11 @@ void Utils::toTextureImageRegion(const QImage& qImage, const cvf::Vec2ui& srcPos
    // Check if QImage has format QImage::Format_ARGB32, and use a more optimized path
    if (qImage.format() == QImage::Format_ARGB32)
    {
-        for (cvf::uint y = 0; y < sizeY; ++y)
+#pragma omp for
        for (int y = 0; y < static_cast<int>(sizeY); ++y)
        {
            const cvf::uint scanLineIdx = srcPosY + sizeY - y - 1;
-            const QRgb* qWholeScanLine = reinterpret_cast<const QRgb*>(qImage.scanLine(scanLineIdx));
+            const QRgb* qWholeScanLine = reinterpret_cast<const QRgb*>(qImage.constScanLine(scanLineIdx));
            const QRgb* qPixels = &qWholeScanLine[srcPosX];
            const cvf::uint dstStartIdx = 4*(y*sizeX);
@ -254,7 +255,8 @@ void Utils::toTextureImageRegion(const QImage& qImage, const cvf::Vec2ui& srcPos
    else
    {
        cvf::Color4ub cvfRgbVal;
-        for (cvf::uint y = 0; y < sizeY; ++y)
+#pragma omp for
        for (int y = 0; y < static_cast<int>(sizeY); ++y)
        {
            const cvf::uint qImageYPos = srcPosY + sizeY - y - 1;
            for (cvf::uint x = 0; x < sizeX; ++x)