mirror of
https://github.com/OPM/ResInsight.git
synced 2025-02-25 18:55:39 -06:00
Several OpenMP improvements
Several optimizations based on profiling of 20M grid model. These fixes will improve the largest performance issues, but there are still more operations that can be refactored. * OpenMP: Use in fault geometry generator * OpenMP: Use when computing statistics for result values * OpenMP: Use multithreading on fault detection * Add RiaOpenMPTools * VizFwk: Use openMP for texture generation
This commit is contained in:
@@ -41,7 +41,14 @@ RigEclipseNativeStatCalc::RigEclipseNativeStatCalc( RigCaseCellResultsData*
|
||||
void RigEclipseNativeStatCalc::minMaxCellScalarValues( size_t timeStepIndex, double& min, double& max )
|
||||
{
|
||||
MinMaxAccumulator acc( min, max );
|
||||
traverseCells( acc, timeStepIndex );
|
||||
|
||||
std::vector<MinMaxAccumulator> threadAccumulators;
|
||||
threadTraverseCells( threadAccumulators, timeStepIndex );
|
||||
for ( const auto& threadAcc : threadAccumulators )
|
||||
{
|
||||
acc.addValue( threadAcc.min );
|
||||
acc.addValue( threadAcc.max );
|
||||
}
|
||||
min = acc.min;
|
||||
max = acc.max;
|
||||
}
|
||||
@@ -63,7 +70,13 @@ void RigEclipseNativeStatCalc::p10p90CellScalarValues( double& p10, double& p90
|
||||
|
||||
for ( size_t timeStepIndex = 0; timeStepIndex < timeStepCount(); timeStepIndex++ )
|
||||
{
|
||||
traverseCells( acc, timeStepIndex );
|
||||
std::vector<PercentilAccumulator> threadAccumulators;
|
||||
threadTraverseCells( threadAccumulators, timeStepIndex );
|
||||
for ( const auto& threadAcc : threadAccumulators )
|
||||
{
|
||||
acc.addData( threadAcc.values );
|
||||
acc.addData( threadAcc.values );
|
||||
}
|
||||
}
|
||||
|
||||
acc.computep10p90( p10, p90 );
|
||||
@@ -75,7 +88,15 @@ void RigEclipseNativeStatCalc::p10p90CellScalarValues( double& p10, double& p90
|
||||
void RigEclipseNativeStatCalc::p10p90CellScalarValues( size_t timeStepIndex, double& p10, double& p90 )
|
||||
{
|
||||
PercentilAccumulator acc;
|
||||
traverseCells( acc, timeStepIndex );
|
||||
|
||||
std::vector<PercentilAccumulator> threadAccumulators;
|
||||
threadTraverseCells( threadAccumulators, timeStepIndex );
|
||||
for ( const auto& threadAcc : threadAccumulators )
|
||||
{
|
||||
acc.addData( threadAcc.values );
|
||||
acc.addData( threadAcc.values );
|
||||
}
|
||||
|
||||
acc.computep10p90( p10, p90 );
|
||||
}
|
||||
|
||||
@@ -85,7 +106,15 @@ void RigEclipseNativeStatCalc::p10p90CellScalarValues( size_t timeStepIndex, dou
|
||||
void RigEclipseNativeStatCalc::posNegClosestToZero( size_t timeStepIndex, double& pos, double& neg )
|
||||
{
|
||||
PosNegAccumulator acc( pos, neg );
|
||||
traverseCells( acc, timeStepIndex );
|
||||
|
||||
std::vector<PosNegAccumulator> threadAccumulators;
|
||||
threadTraverseCells( threadAccumulators, timeStepIndex );
|
||||
for ( const auto& threadAcc : threadAccumulators )
|
||||
{
|
||||
acc.addValue( threadAcc.pos );
|
||||
acc.addValue( threadAcc.neg );
|
||||
}
|
||||
|
||||
pos = acc.pos;
|
||||
neg = acc.neg;
|
||||
}
|
||||
|
||||
@@ -19,10 +19,11 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "RigStatisticsCalculator.h"
|
||||
#include "RiaOpenMPTools.h"
|
||||
|
||||
#include "RigActiveCellInfo.h"
|
||||
#include "RigCaseCellResultsData.h"
|
||||
#include "RigStatisticsCalculator.h"
|
||||
|
||||
class RigHistogramCalculator;
|
||||
|
||||
@@ -50,6 +51,7 @@ private:
|
||||
RigCaseCellResultsData* m_resultsData;
|
||||
RigEclipseResultAddress m_eclipseResultAddress;
|
||||
|
||||
// Add all result values to an accumulator for a given time step
|
||||
template <typename StatisticsAccumulator>
|
||||
void traverseCells( StatisticsAccumulator& accumulator, size_t timeStepIndex )
|
||||
{
|
||||
@@ -59,31 +61,34 @@ private:
|
||||
}
|
||||
|
||||
const std::vector<double>& values = m_resultsData->cellScalarResults( m_eclipseResultAddress, timeStepIndex );
|
||||
|
||||
if ( values.empty() )
|
||||
for ( const auto& val : values )
|
||||
{
|
||||
accumulator.addValue( val );
|
||||
}
|
||||
}
|
||||
|
||||
// Create one accumulator for each available thread, and add values to accumulator for a given time step
|
||||
template <typename StatisticsAccumulator>
|
||||
void threadTraverseCells( std::vector<StatisticsAccumulator>& accumulators, size_t timeStepIndex )
|
||||
{
|
||||
if ( timeStepIndex >= m_resultsData->cellScalarResults( m_eclipseResultAddress ).size() )
|
||||
{
|
||||
// Can happen if values do not exist for the current time step index.
|
||||
return;
|
||||
}
|
||||
|
||||
const RigActiveCellInfo* actCellInfo = m_resultsData->activeCellInfo();
|
||||
size_t cellCount = actCellInfo->reservoirCellCount();
|
||||
const std::vector<double>& values = m_resultsData->cellScalarResults( m_eclipseResultAddress, timeStepIndex );
|
||||
|
||||
bool isUsingGlobalActiveIndex = m_resultsData->isUsingGlobalActiveIndex( m_eclipseResultAddress );
|
||||
for ( size_t cIdx = 0; cIdx < cellCount; ++cIdx )
|
||||
int numberOfThreads = RiaOpenMPTools::availableThreadCount();
|
||||
accumulators.resize( numberOfThreads );
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
// Filter out inactive cells
|
||||
if ( !actCellInfo->isActive( cIdx ) ) continue;
|
||||
int myThread = RiaOpenMPTools::currentThreadIndex();
|
||||
|
||||
size_t cellResultIndex = cIdx;
|
||||
if ( isUsingGlobalActiveIndex )
|
||||
#pragma omp for
|
||||
for ( long long i = 0; i < (long long)values.size(); i++ )
|
||||
{
|
||||
cellResultIndex = actCellInfo->cellResultIndex( cIdx );
|
||||
}
|
||||
|
||||
if ( cellResultIndex != cvf::UNDEFINED_SIZE_T && cellResultIndex < values.size() )
|
||||
{
|
||||
accumulator.addValue( values[cellResultIndex] );
|
||||
accumulators[myThread].addValue( values[i] );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
#include "RigMainGrid.h"
|
||||
|
||||
#include "RiaLogging.h"
|
||||
#include "RiaOpenMPTools.h"
|
||||
#include "RiaResultNames.h"
|
||||
|
||||
#include "RigActiveCellInfo.h"
|
||||
@@ -30,10 +31,6 @@
|
||||
#include "cvfAssert.h"
|
||||
#include "cvfBoundingBoxTree.h"
|
||||
|
||||
#ifdef USE_OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
RigMainGrid::RigMainGrid()
|
||||
: RigGridBase( this )
|
||||
{
|
||||
@@ -453,18 +450,39 @@ void RigMainGrid::calculateFaults( const RigActiveCellInfo* activeCellInfo )
|
||||
|
||||
const std::vector<cvf::Vec3d>& vxs = m_mainGrid->nodes();
|
||||
|
||||
int numberOfThreads = RiaOpenMPTools::availableThreadCount();
|
||||
|
||||
std::vector<std::vector<RigFault::FaultFace>> threadFaultFaces( numberOfThreads );
|
||||
std::vector<std::vector<RigFault::FaultFace>> threadInactiveFaultFaces( numberOfThreads );
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
int myThread = RiaOpenMPTools::currentThreadIndex();
|
||||
|
||||
// NB! We are inside a parallel section, do not use "parallel for" here
|
||||
#pragma omp for
|
||||
for ( int gcIdx = 0; gcIdx < static_cast<int>( m_cells.size() ); ++gcIdx )
|
||||
{
|
||||
addUnNamedFaultFaces( gcIdx,
|
||||
activeCellInfo,
|
||||
vxs,
|
||||
unNamedFaultIdx,
|
||||
unNamedFaultWithInactiveIdx,
|
||||
threadFaultFaces[myThread],
|
||||
threadInactiveFaultFaces[myThread],
|
||||
m_faultsPrCellAcc.p() );
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<RigFault::FaultFace>& unNamedFaultFaces = unNamedFault->faultFaces();
|
||||
std::vector<RigFault::FaultFace>& unNamedFaultFacesInactive = unNamedFaultWithInactive->faultFaces();
|
||||
for ( int gcIdx = 0; gcIdx < static_cast<int>( m_cells.size() ); ++gcIdx )
|
||||
|
||||
for ( int i = 0; i < numberOfThreads; i++ )
|
||||
{
|
||||
addUnNamedFaultFaces( gcIdx,
|
||||
activeCellInfo,
|
||||
vxs,
|
||||
unNamedFaultIdx,
|
||||
unNamedFaultWithInactiveIdx,
|
||||
unNamedFaultFaces,
|
||||
unNamedFaultFacesInactive,
|
||||
m_faultsPrCellAcc.p() );
|
||||
unNamedFaultFaces.insert( unNamedFaultFaces.end(), threadFaultFaces[i].begin(), threadFaultFaces[i].end() );
|
||||
unNamedFaultFacesInactive.insert( unNamedFaultFacesInactive.end(),
|
||||
threadInactiveFaultFaces[i].begin(),
|
||||
threadInactiveFaultFaces[i].end() );
|
||||
}
|
||||
}
|
||||
|
||||
@@ -557,8 +575,16 @@ void RigMainGrid::addUnNamedFaultFaces( int gcIdx,
|
||||
int faultIdx = unNamedFaultIdx;
|
||||
if ( !( isCellActive && isNeighborCellActive ) ) faultIdx = unNamedFaultWithInactiveIdx;
|
||||
|
||||
faultsPrCellAcc->setFaultIdx( gcIdx, face, faultIdx );
|
||||
faultsPrCellAcc->setFaultIdx( neighborReservoirCellIdx, StructGridInterface::oppositeFace( face ), faultIdx );
|
||||
#pragma omp critical( faultsPrCellAcc_modification )
|
||||
{
|
||||
// Best practice is to avoid critical sections. The number of cells related to a fault is usually very
|
||||
// small compared to the total number of cells, so the performance of this function should be good. The
|
||||
// main computation is related to the 'pointDistance' functions above. The refactoring of this structure
|
||||
// to avoid critical section is considered too much compared to the gain.
|
||||
|
||||
faultsPrCellAcc->setFaultIdx( gcIdx, face, faultIdx );
|
||||
faultsPrCellAcc->setFaultIdx( neighborReservoirCellIdx, StructGridInterface::oppositeFace( face ), faultIdx );
|
||||
}
|
||||
|
||||
// Add as fault face only if the grid index is less than the neighbors
|
||||
|
||||
@@ -574,15 +600,6 @@ void RigMainGrid::addUnNamedFaultFaces( int gcIdx,
|
||||
unNamedFaultFacesInactive.push_back( ff );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
CVF_FAIL_MSG( "Found fault with global neighbor index less than the native index. " ); // Should never
|
||||
// occur. because
|
||||
// we flag the
|
||||
// opposite face
|
||||
// in the
|
||||
// faultsPrCellAcc
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -745,10 +762,8 @@ void RigMainGrid::buildCellSearchTree()
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
size_t threadCellCount = cellCount;
|
||||
#ifdef USE_OPENMP
|
||||
threadCellCount = std::ceil( cellCount / static_cast<double>( omp_get_num_threads() ) );
|
||||
#endif
|
||||
int numberOfThreads = RiaOpenMPTools::availableThreadCount();
|
||||
size_t threadCellCount = std::ceil( cellCount / static_cast<double>( numberOfThreads ) );
|
||||
|
||||
std::vector<size_t> threadIndicesForBoundingBoxes;
|
||||
std::vector<cvf::BoundingBox> threadBoundingBoxes;
|
||||
|
||||
Reference in New Issue
Block a user