From 2da678802b36f1ac4496c524a4d94bcb17333ec5 Mon Sep 17 00:00:00 2001 From: Gaute Lindkvist Date: Thu, 27 Aug 2020 13:07:04 +0200 Subject: [PATCH] #4064 Support sorting by correlation in Ensemble Curve coloring --- .../Application/Tools/RiaStatisticsTools.cpp | 16 +++ .../CorrelationPlots/RimCorrelationPlot.cpp | 67 +-------- .../Summary/RimEnsembleCurveSet.cpp | 31 +++- .../Summary/RimEnsembleCurveSet.h | 2 + .../Summary/RimSummaryCaseCollection.cpp | 135 ++++++++++++++++++ .../Summary/RimSummaryCaseCollection.h | 12 +- .../UserInterface/RiuSummaryQwtPlot.cpp | 25 +++- 7 files changed, 217 insertions(+), 71 deletions(-) diff --git a/ApplicationCode/Application/Tools/RiaStatisticsTools.cpp b/ApplicationCode/Application/Tools/RiaStatisticsTools.cpp index 11d6d4c7a2..6c89488fda 100644 --- a/ApplicationCode/Application/Tools/RiaStatisticsTools.cpp +++ b/ApplicationCode/Application/Tools/RiaStatisticsTools.cpp @@ -21,6 +21,9 @@ #include "RiaStatisticsTools.h" #include "RifEclipseSummaryAddress.h" +#include "RigStatisticsMath.h" + +#include "cafAssert.h" #ifdef USE_GSL #include "gsl/statistics/gsl_statistics_double.h" @@ -59,6 +62,12 @@ const QString RiaStatisticsTools::replacePercentileByPValueText( const QString& //-------------------------------------------------------------------------------------------------- double RiaStatisticsTools::pearsonCorrelation( const std::vector& xValues, const std::vector& yValues ) { + const double eps = 1.0e-8; + double rangeX = 0.0, rangeY = 0.0; + RigStatisticsMath::calculateBasicStatistics( xValues, nullptr, nullptr, nullptr, &rangeX, nullptr, nullptr ); + RigStatisticsMath::calculateBasicStatistics( yValues, nullptr, nullptr, nullptr, &rangeY, nullptr, nullptr ); + if ( rangeX < eps || rangeY < eps ) return 0.0; + #ifdef USE_GSL return pearsonCorrelationGSL( xValues, yValues ); #else @@ -74,6 +83,7 @@ double RiaStatisticsTools::pearsonCorrelationGSL( const std::vector& xVa #ifdef USE_GSL return gsl_stats_correlation( xValues.data(), 1, yValues.data(), 1, xValues.size() ); #else + CAF_ASSERT( false ); return std::numeric_limits::infinity(); #endif } @@ -120,6 +130,12 @@ double RiaStatisticsTools::pearsonCorrelationOwn( const std::vector& xVa //-------------------------------------------------------------------------------------------------- double RiaStatisticsTools::spearmanCorrelation( const std::vector& xValues, const std::vector& yValues ) { + const double eps = 1.0e-8; + double rangeX = 0.0, rangeY = 0.0; + RigStatisticsMath::calculateBasicStatistics( xValues, nullptr, nullptr, nullptr, &rangeX, nullptr, nullptr ); + RigStatisticsMath::calculateBasicStatistics( yValues, nullptr, nullptr, nullptr, &rangeY, nullptr, nullptr ); + if ( rangeX < eps || rangeY < eps ) return 0.0; + #ifdef USE_GSL std::vector work( 2 * xValues.size() ); return gsl_stats_spearman( xValues.data(), 1, yValues.data(), 1, xValues.size(), work.data() ); diff --git a/ApplicationCode/ProjectDataModel/CorrelationPlots/RimCorrelationPlot.cpp b/ApplicationCode/ProjectDataModel/CorrelationPlots/RimCorrelationPlot.cpp index 6902b22970..ea02cb1cae 100644 --- a/ApplicationCode/ProjectDataModel/CorrelationPlots/RimCorrelationPlot.cpp +++ b/ApplicationCode/ProjectDataModel/CorrelationPlots/RimCorrelationPlot.cpp @@ -259,75 +259,22 @@ void RimCorrelationPlot::addDataToChartBuilder( RiuGroupedBarChartBuilder& chart if ( ensembles().empty() ) return; if ( addresses().empty() ) return; - std::vector caseValuesAtTimestep; - std::map> parameterValues; - auto ensemble = *ensembles().begin(); auto address = *addresses().begin(); - for ( size_t caseIdx = 0u; caseIdx < ensemble->allSummaryCases().size(); ++caseIdx ) - { - auto summaryCase = ensemble->allSummaryCases()[caseIdx]; - - RifSummaryReaderInterface* reader = summaryCase->summaryReader(); - if ( !reader ) continue; - - if ( !summaryCase->caseRealizationParameters() ) continue; - - std::vector values; - - double closestValue = std::numeric_limits::infinity(); - time_t closestTimeStep = 0; - if ( reader->values( address, &values ) ) - { - const std::vector& timeSteps = reader->timeSteps( address ); - for ( size_t i = 0; i < timeSteps.size(); ++i ) - { - if ( timeDiff( timeSteps[i], selectedTimestep ) < timeDiff( selectedTimestep, closestTimeStep ) ) - { - closestValue = values[i]; - closestTimeStep = timeSteps[i]; - } - } - } - if ( closestValue != std::numeric_limits::infinity() ) - { - caseValuesAtTimestep.push_back( closestValue ); - - for ( auto parameterName : m_selectedParametersList() ) - { - auto parameter = ensemble->ensembleParameter( parameterName ); - if ( parameter.isNumeric() && parameter.isValid() ) - { - double paramValue = parameter.values[caseIdx].toDouble(); - parameterValues[parameter.name].push_back( paramValue ); - } - } - } - } - - std::vector> correlationResults; - for ( auto parameterValuesPair : parameterValues ) - { - double correlation = 0.0; - if ( m_correlationFactor == CorrelationFactor::PEARSON ) - { - correlation = RiaStatisticsTools::pearsonCorrelation( parameterValuesPair.second, caseValuesAtTimestep ); - } - else - { - correlation = RiaStatisticsTools::spearmanCorrelation( parameterValuesPair.second, caseValuesAtTimestep ); - } - correlationResults.push_back( std::make_pair( parameterValuesPair.first, correlation ) ); - } + std::vector> correlations = + ensemble->parameterCorrelations( address, + selectedTimestep, + m_correlationFactor == CorrelationFactor::PEARSON, + m_selectedParametersList() ); QString timestepString = m_timeStep().toString( RiaPreferences::current()->dateTimeFormat() ); - for ( auto parameterCorrPair : correlationResults ) + for ( auto parameterCorrPair : correlations ) { double value = m_showAbsoluteValues() ? std::abs( parameterCorrPair.second ) : parameterCorrPair.second; double sortValue = m_sortByAbsoluteValues() ? std::abs( value ) : value; - QString barText = parameterCorrPair.first; + QString barText = QString( "%1 (%2)" ).arg( parameterCorrPair.first.name ).arg( parameterCorrPair.second ); QString majorText = "", medText = "", minText = "", legendText = barText; chartBuilder.addBarEntry( majorText, medText, minText, sortValue, legendText, barText, value ); } diff --git a/ApplicationCode/ProjectDataModel/Summary/RimEnsembleCurveSet.cpp b/ApplicationCode/ProjectDataModel/Summary/RimEnsembleCurveSet.cpp index 38999c24d4..b5c67daf0d 100644 --- a/ApplicationCode/ProjectDataModel/Summary/RimEnsembleCurveSet.cpp +++ b/ApplicationCode/ProjectDataModel/Summary/RimEnsembleCurveSet.cpp @@ -758,16 +758,20 @@ QList RimEnsembleCurveSet::calculateValueOptions( const auto byEnsParamOption = ColorModeEnum( ColorMode::BY_ENSEMBLE_PARAM ); options.push_back( caf::PdmOptionItemInfo( singleColorOption.uiText(), ColorMode::SINGLE_COLOR ) ); - if ( !variationSortedEnsembleParameters().empty() ) + if ( !correlationSortedEnsembleParameters().empty() ) { options.push_back( caf::PdmOptionItemInfo( byEnsParamOption.uiText(), ColorMode::BY_ENSEMBLE_PARAM ) ); } } else if ( fieldNeedingOptions == &m_ensembleParameter ) { - for ( const auto& param : variationSortedEnsembleParameters() ) + auto params = correlationSortedEnsembleParameters(); + for ( const auto& paramCorrPair : params ) { - options.push_back( caf::PdmOptionItemInfo( param.uiName(), param.name ) ); + QString name = paramCorrPair.first.name; + double corr = paramCorrPair.second; + options.push_back( + caf::PdmOptionItemInfo( QString( "%1 (Avg. correlation: %2)" ).arg( name ).arg( corr ), name ) ); } } else if ( fieldNeedingOptions == &m_yValuesSummaryAddressUiField ) @@ -1119,6 +1123,27 @@ std::vector RimEnsembleCurveSet::variationSortedEnsembleParam } } +//-------------------------------------------------------------------------------------------------- +/// +//-------------------------------------------------------------------------------------------------- +std::vector> RimEnsembleCurveSet::correlationSortedEnsembleParameters() const +{ + RimSummaryCaseCollection* ensemble = m_yValuesSummaryCaseCollection; + if ( ensemble ) + { + auto parameters = ensemble->parameterCorrelationsAllTimeSteps( summaryAddress() ); + std::sort( parameters.begin(), + parameters.end(), + []( const std::pair& lhs, + const std::pair& rhs ) { return lhs.second > rhs.second; } ); + return parameters; + } + else + { + return std::vector>(); + } +} + //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- diff --git a/ApplicationCode/ProjectDataModel/Summary/RimEnsembleCurveSet.h b/ApplicationCode/ProjectDataModel/Summary/RimEnsembleCurveSet.h index 259692429c..e9e140d77c 100644 --- a/ApplicationCode/ProjectDataModel/Summary/RimEnsembleCurveSet.h +++ b/ApplicationCode/ProjectDataModel/Summary/RimEnsembleCurveSet.h @@ -120,6 +120,8 @@ public: void updateAllTextInPlot(); std::vector variationSortedEnsembleParameters() const; + std::vector> correlationSortedEnsembleParameters() const; + std::vector filterEnsembleCases( const std::vector& sumCases ); void disableStatisticCurves(); bool isFiltered() const; diff --git a/ApplicationCode/ProjectDataModel/Summary/RimSummaryCaseCollection.cpp b/ApplicationCode/ProjectDataModel/Summary/RimSummaryCaseCollection.cpp index 511fd14df5..f2227737a7 100644 --- a/ApplicationCode/ProjectDataModel/Summary/RimSummaryCaseCollection.cpp +++ b/ApplicationCode/ProjectDataModel/Summary/RimSummaryCaseCollection.cpp @@ -19,6 +19,8 @@ #include "RimSummaryCaseCollection.h" #include "RiaFieldHandleTools.h" +#include "RiaStatisticsTools.h" +#include "RiaWeightedMeanCalculator.h" #include "RicfCommandObject.h" @@ -39,6 +41,7 @@ #include #include +#include CAF_PDM_SOURCE_INIT( RimSummaryCaseCollection, "SummaryCaseSubCollection" ); @@ -463,6 +466,138 @@ const std::vector& return m_cachedSortedEnsembleParameters; } +time_t timeDiff( time_t lhs, time_t rhs ) +{ + if ( lhs >= rhs ) + { + return lhs - rhs; + } + return rhs - lhs; +} + +//-------------------------------------------------------------------------------------------------- +/// +//-------------------------------------------------------------------------------------------------- +std::vector> + RimSummaryCaseCollection::parameterCorrelations( const RifEclipseSummaryAddress& address, + time_t timeStep, + bool spearman, + const std::vector& selectedParameters ) const +{ + auto parameters = variationSortedEnsembleParameters( true ); + + if ( !selectedParameters.empty() ) + { + parameters.erase( std::remove_if( parameters.begin(), + parameters.end(), + [&selectedParameters]( const EnsembleParameter& parameter ) { + return std::find( selectedParameters.begin(), + selectedParameters.end(), + parameter.name ) == selectedParameters.end(); + } ), + parameters.end() ); + } + + std::vector caseValuesAtTimestep; + std::map> parameterValues; + + for ( size_t caseIdx = 0u; caseIdx < m_cases.size(); ++caseIdx ) + { + RimSummaryCase* summaryCase = m_cases[caseIdx]; + RifSummaryReaderInterface* reader = summaryCase->summaryReader(); + if ( !reader ) continue; + + if ( !summaryCase->caseRealizationParameters() ) continue; + + std::vector values; + + double closestValue = std::numeric_limits::infinity(); + time_t closestTimeStep = 0; + if ( reader->values( address, &values ) ) + { + const std::vector& timeSteps = reader->timeSteps( address ); + for ( size_t i = 0; i < timeSteps.size(); ++i ) + { + if ( timeDiff( timeSteps[i], timeStep ) < timeDiff( timeStep, closestTimeStep ) ) + { + closestValue = values[i]; + closestTimeStep = timeSteps[i]; + } + } + } + if ( closestValue != std::numeric_limits::infinity() ) + { + caseValuesAtTimestep.push_back( closestValue ); + + for ( auto parameter : parameters ) + { + if ( parameter.isNumeric() && parameter.isValid() ) + { + double paramValue = parameter.values[caseIdx].toDouble(); + parameterValues[parameter].push_back( paramValue ); + } + } + } + } + + std::vector> correlationResults; + for ( auto parameterValuesPair : parameterValues ) + { + double correlation = 0.0; + if ( spearman ) + { + double spearman = RiaStatisticsTools::spearmanCorrelation( parameterValuesPair.second, caseValuesAtTimestep ); + if ( spearman != std::numeric_limits::infinity() ) correlation = spearman; + } + else + { + double pearson = RiaStatisticsTools::pearsonCorrelation( parameterValuesPair.second, caseValuesAtTimestep ); + if ( pearson != std::numeric_limits::infinity() ) correlation = pearson; + } + correlationResults.push_back( std::make_pair( parameterValuesPair.first, correlation ) ); + } + return correlationResults; +} + +//-------------------------------------------------------------------------------------------------- +/// Returns a vector of the parameters and the average absolute values of correlations per time step +//-------------------------------------------------------------------------------------------------- +std::vector> + RimSummaryCaseCollection::parameterCorrelationsAllTimeSteps( const RifEclipseSummaryAddress& address, + bool spearman, + const std::vector& selectedParameters ) const +{ + const size_t maxTimeStepCount = 10; + std::set timeSteps = ensembleTimeSteps(); + if ( timeSteps.empty() ) return {}; + + std::vector timeStepsVector( timeSteps.begin(), timeSteps.end() ); + size_t stride = std::max( (size_t)1, timeStepsVector.size() / maxTimeStepCount ); + + std::vector>> correlationsForChosenTimeSteps; + + for ( size_t i = stride; i < timeStepsVector.size(); i += stride ) + { + std::vector> correlationsForTimeStep = + parameterCorrelations( address, timeStepsVector[i], spearman, selectedParameters ); + correlationsForChosenTimeSteps.push_back( correlationsForTimeStep ); + } + + for ( size_t i = 1; i < correlationsForChosenTimeSteps.size(); ++i ) + { + for ( size_t j = 0; j < correlationsForChosenTimeSteps[0].size(); ++j ) + { + correlationsForChosenTimeSteps[0][j].second += correlationsForChosenTimeSteps[i][j].second; + } + } + for ( size_t j = 0; j < correlationsForChosenTimeSteps[0].size(); ++j ) + { + correlationsForChosenTimeSteps[0][j].second /= correlationsForChosenTimeSteps.size(); + } + + return correlationsForChosenTimeSteps[0]; +} + //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- diff --git a/ApplicationCode/ProjectDataModel/Summary/RimSummaryCaseCollection.h b/ApplicationCode/ProjectDataModel/Summary/RimSummaryCaseCollection.h index 85cb67c0e8..b76803d9cf 100644 --- a/ApplicationCode/ProjectDataModel/Summary/RimSummaryCaseCollection.h +++ b/ApplicationCode/ProjectDataModel/Summary/RimSummaryCaseCollection.h @@ -111,7 +111,17 @@ public: int ensembleId() const; const std::vector& variationSortedEnsembleParameters( bool excludeNoVariation = false ) const; - std::vector alphabeticEnsembleParameters() const; + std::vector> + parameterCorrelations( const RifEclipseSummaryAddress& address, + time_t selectedTimeStep, + bool spearman = false, + const std::vector& selectedParameters = {} ) const; + std::vector> + parameterCorrelationsAllTimeSteps( const RifEclipseSummaryAddress& address, + bool spearman = false, + const std::vector& selectedParameters = {} ) const; + + std::vector alphabeticEnsembleParameters() const; EnsembleParameter ensembleParameter( const QString& paramName ) const; void calculateEnsembleParametersIntersectionHash(); diff --git a/ApplicationCode/UserInterface/RiuSummaryQwtPlot.cpp b/ApplicationCode/UserInterface/RiuSummaryQwtPlot.cpp index e1e5daeebf..8df6c885c4 100644 --- a/ApplicationCode/UserInterface/RiuSummaryQwtPlot.cpp +++ b/ApplicationCode/UserInterface/RiuSummaryQwtPlot.cpp @@ -265,17 +265,28 @@ void RiuSummaryQwtPlot::contextMenuEvent( QContextMenuEvent* event ) { menuBuilder.subMenuStart( "Cross Plots", *caf::IconProvider( ":/CorrelationCrossPlot16x16.png" ).icon() ); - std::vector ensembleParameters = - ensemble->variationSortedEnsembleParameters(); - for ( const EnsembleParameter& param : ensembleParameters ) + std::vector> ensembleParameters = + ensemble->parameterCorrelations( clickedEnsembleCurveSet->summaryAddress(), + timeStep, + false ); + std::sort( ensembleParameters.begin(), + ensembleParameters.end(), + []( const std::pair& lhs, + const std::pair& rhs ) { + return std::fabs( lhs.second ) > std::fabs( rhs.second ); + } ); + + for ( const auto& param : ensembleParameters ) { - if ( param.variationBin >= (int)EnsembleParameter::LOW_VARIATION ) + if ( std::fabs( param.second ) >= 1.0e-6 ) { - params.ensembleParameter = param.name; + params.ensembleParameter = param.first.name; variant = QVariant::fromValue( params ); menuBuilder.addCmdFeatureWithUserData( "RicNewParameterResultCrossPlotFeature", - QString( "New Cross Plot Against %1" ) - .arg( param.uiName() ), + QString( "New Cross Plot Against %1 " + "(Correlation: %2)" ) + .arg( param.first.name ) + .arg( param.second ), variant ); } }