#4064 Support sorting by correlation in Ensemble Curve coloring

This commit is contained in:
Gaute Lindkvist 2020-08-27 13:07:04 +02:00 committed by Magne Sjaastad
parent 6f3c8e7ec6
commit 2da678802b
7 changed files with 217 additions and 71 deletions

View File

@ -21,6 +21,9 @@
#include "RiaStatisticsTools.h"
#include "RifEclipseSummaryAddress.h"
#include "RigStatisticsMath.h"
#include "cafAssert.h"
#ifdef USE_GSL
#include "gsl/statistics/gsl_statistics_double.h"
@ -59,6 +62,12 @@ const QString RiaStatisticsTools::replacePercentileByPValueText( const QString&
//--------------------------------------------------------------------------------------------------
double RiaStatisticsTools::pearsonCorrelation( const std::vector<double>& xValues, const std::vector<double>& yValues )
{
const double eps = 1.0e-8;
double rangeX = 0.0, rangeY = 0.0;
RigStatisticsMath::calculateBasicStatistics( xValues, nullptr, nullptr, nullptr, &rangeX, nullptr, nullptr );
RigStatisticsMath::calculateBasicStatistics( yValues, nullptr, nullptr, nullptr, &rangeY, nullptr, nullptr );
if ( rangeX < eps || rangeY < eps ) return 0.0;
#ifdef USE_GSL
return pearsonCorrelationGSL( xValues, yValues );
#else
@ -74,6 +83,7 @@ double RiaStatisticsTools::pearsonCorrelationGSL( const std::vector<double>& xVa
#ifdef USE_GSL
return gsl_stats_correlation( xValues.data(), 1, yValues.data(), 1, xValues.size() );
#else
CAF_ASSERT( false );
return std::numeric_limits<double>::infinity();
#endif
}
@ -120,6 +130,12 @@ double RiaStatisticsTools::pearsonCorrelationOwn( const std::vector<double>& xVa
//--------------------------------------------------------------------------------------------------
double RiaStatisticsTools::spearmanCorrelation( const std::vector<double>& xValues, const std::vector<double>& yValues )
{
const double eps = 1.0e-8;
double rangeX = 0.0, rangeY = 0.0;
RigStatisticsMath::calculateBasicStatistics( xValues, nullptr, nullptr, nullptr, &rangeX, nullptr, nullptr );
RigStatisticsMath::calculateBasicStatistics( yValues, nullptr, nullptr, nullptr, &rangeY, nullptr, nullptr );
if ( rangeX < eps || rangeY < eps ) return 0.0;
#ifdef USE_GSL
std::vector<double> work( 2 * xValues.size() );
return gsl_stats_spearman( xValues.data(), 1, yValues.data(), 1, xValues.size(), work.data() );

View File

@ -259,75 +259,22 @@ void RimCorrelationPlot::addDataToChartBuilder( RiuGroupedBarChartBuilder& chart
if ( ensembles().empty() ) return;
if ( addresses().empty() ) return;
std::vector<double> caseValuesAtTimestep;
std::map<QString, std::vector<double>> parameterValues;
auto ensemble = *ensembles().begin();
auto address = *addresses().begin();
for ( size_t caseIdx = 0u; caseIdx < ensemble->allSummaryCases().size(); ++caseIdx )
{
auto summaryCase = ensemble->allSummaryCases()[caseIdx];
RifSummaryReaderInterface* reader = summaryCase->summaryReader();
if ( !reader ) continue;
if ( !summaryCase->caseRealizationParameters() ) continue;
std::vector<double> values;
double closestValue = std::numeric_limits<double>::infinity();
time_t closestTimeStep = 0;
if ( reader->values( address, &values ) )
{
const std::vector<time_t>& timeSteps = reader->timeSteps( address );
for ( size_t i = 0; i < timeSteps.size(); ++i )
{
if ( timeDiff( timeSteps[i], selectedTimestep ) < timeDiff( selectedTimestep, closestTimeStep ) )
{
closestValue = values[i];
closestTimeStep = timeSteps[i];
}
}
}
if ( closestValue != std::numeric_limits<double>::infinity() )
{
caseValuesAtTimestep.push_back( closestValue );
for ( auto parameterName : m_selectedParametersList() )
{
auto parameter = ensemble->ensembleParameter( parameterName );
if ( parameter.isNumeric() && parameter.isValid() )
{
double paramValue = parameter.values[caseIdx].toDouble();
parameterValues[parameter.name].push_back( paramValue );
}
}
}
}
std::vector<std::pair<QString, double>> correlationResults;
for ( auto parameterValuesPair : parameterValues )
{
double correlation = 0.0;
if ( m_correlationFactor == CorrelationFactor::PEARSON )
{
correlation = RiaStatisticsTools::pearsonCorrelation( parameterValuesPair.second, caseValuesAtTimestep );
}
else
{
correlation = RiaStatisticsTools::spearmanCorrelation( parameterValuesPair.second, caseValuesAtTimestep );
}
correlationResults.push_back( std::make_pair( parameterValuesPair.first, correlation ) );
}
std::vector<std::pair<EnsembleParameter, double>> correlations =
ensemble->parameterCorrelations( address,
selectedTimestep,
m_correlationFactor == CorrelationFactor::PEARSON,
m_selectedParametersList() );
QString timestepString = m_timeStep().toString( RiaPreferences::current()->dateTimeFormat() );
for ( auto parameterCorrPair : correlationResults )
for ( auto parameterCorrPair : correlations )
{
double value = m_showAbsoluteValues() ? std::abs( parameterCorrPair.second ) : parameterCorrPair.second;
double sortValue = m_sortByAbsoluteValues() ? std::abs( value ) : value;
QString barText = parameterCorrPair.first;
QString barText = QString( "%1 (%2)" ).arg( parameterCorrPair.first.name ).arg( parameterCorrPair.second );
QString majorText = "", medText = "", minText = "", legendText = barText;
chartBuilder.addBarEntry( majorText, medText, minText, sortValue, legendText, barText, value );
}

View File

@ -758,16 +758,20 @@ QList<caf::PdmOptionItemInfo> RimEnsembleCurveSet::calculateValueOptions( const
auto byEnsParamOption = ColorModeEnum( ColorMode::BY_ENSEMBLE_PARAM );
options.push_back( caf::PdmOptionItemInfo( singleColorOption.uiText(), ColorMode::SINGLE_COLOR ) );
if ( !variationSortedEnsembleParameters().empty() )
if ( !correlationSortedEnsembleParameters().empty() )
{
options.push_back( caf::PdmOptionItemInfo( byEnsParamOption.uiText(), ColorMode::BY_ENSEMBLE_PARAM ) );
}
}
else if ( fieldNeedingOptions == &m_ensembleParameter )
{
for ( const auto& param : variationSortedEnsembleParameters() )
auto params = correlationSortedEnsembleParameters();
for ( const auto& paramCorrPair : params )
{
options.push_back( caf::PdmOptionItemInfo( param.uiName(), param.name ) );
QString name = paramCorrPair.first.name;
double corr = paramCorrPair.second;
options.push_back(
caf::PdmOptionItemInfo( QString( "%1 (Avg. correlation: %2)" ).arg( name ).arg( corr ), name ) );
}
}
else if ( fieldNeedingOptions == &m_yValuesSummaryAddressUiField )
@ -1119,6 +1123,27 @@ std::vector<EnsembleParameter> RimEnsembleCurveSet::variationSortedEnsembleParam
}
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
std::vector<std::pair<EnsembleParameter, double>> RimEnsembleCurveSet::correlationSortedEnsembleParameters() const
{
RimSummaryCaseCollection* ensemble = m_yValuesSummaryCaseCollection;
if ( ensemble )
{
auto parameters = ensemble->parameterCorrelationsAllTimeSteps( summaryAddress() );
std::sort( parameters.begin(),
parameters.end(),
[]( const std::pair<EnsembleParameter, double>& lhs,
const std::pair<EnsembleParameter, double>& rhs ) { return lhs.second > rhs.second; } );
return parameters;
}
else
{
return std::vector<std::pair<EnsembleParameter, double>>();
}
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------

View File

@ -120,6 +120,8 @@ public:
void updateAllTextInPlot();
std::vector<EnsembleParameter> variationSortedEnsembleParameters() const;
std::vector<std::pair<EnsembleParameter, double>> correlationSortedEnsembleParameters() const;
std::vector<RimSummaryCase*> filterEnsembleCases( const std::vector<RimSummaryCase*>& sumCases );
void disableStatisticCurves();
bool isFiltered() const;

View File

@ -19,6 +19,8 @@
#include "RimSummaryCaseCollection.h"
#include "RiaFieldHandleTools.h"
#include "RiaStatisticsTools.h"
#include "RiaWeightedMeanCalculator.h"
#include "RicfCommandObject.h"
@ -39,6 +41,7 @@
#include <algorithm>
#include <cmath>
#include <omp.h>
CAF_PDM_SOURCE_INIT( RimSummaryCaseCollection, "SummaryCaseSubCollection" );
@ -463,6 +466,138 @@ const std::vector<EnsembleParameter>&
return m_cachedSortedEnsembleParameters;
}
time_t timeDiff( time_t lhs, time_t rhs )
{
if ( lhs >= rhs )
{
return lhs - rhs;
}
return rhs - lhs;
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
std::vector<std::pair<EnsembleParameter, double>>
RimSummaryCaseCollection::parameterCorrelations( const RifEclipseSummaryAddress& address,
time_t timeStep,
bool spearman,
const std::vector<QString>& selectedParameters ) const
{
auto parameters = variationSortedEnsembleParameters( true );
if ( !selectedParameters.empty() )
{
parameters.erase( std::remove_if( parameters.begin(),
parameters.end(),
[&selectedParameters]( const EnsembleParameter& parameter ) {
return std::find( selectedParameters.begin(),
selectedParameters.end(),
parameter.name ) == selectedParameters.end();
} ),
parameters.end() );
}
std::vector<double> caseValuesAtTimestep;
std::map<EnsembleParameter, std::vector<double>> parameterValues;
for ( size_t caseIdx = 0u; caseIdx < m_cases.size(); ++caseIdx )
{
RimSummaryCase* summaryCase = m_cases[caseIdx];
RifSummaryReaderInterface* reader = summaryCase->summaryReader();
if ( !reader ) continue;
if ( !summaryCase->caseRealizationParameters() ) continue;
std::vector<double> values;
double closestValue = std::numeric_limits<double>::infinity();
time_t closestTimeStep = 0;
if ( reader->values( address, &values ) )
{
const std::vector<time_t>& timeSteps = reader->timeSteps( address );
for ( size_t i = 0; i < timeSteps.size(); ++i )
{
if ( timeDiff( timeSteps[i], timeStep ) < timeDiff( timeStep, closestTimeStep ) )
{
closestValue = values[i];
closestTimeStep = timeSteps[i];
}
}
}
if ( closestValue != std::numeric_limits<double>::infinity() )
{
caseValuesAtTimestep.push_back( closestValue );
for ( auto parameter : parameters )
{
if ( parameter.isNumeric() && parameter.isValid() )
{
double paramValue = parameter.values[caseIdx].toDouble();
parameterValues[parameter].push_back( paramValue );
}
}
}
}
std::vector<std::pair<EnsembleParameter, double>> correlationResults;
for ( auto parameterValuesPair : parameterValues )
{
double correlation = 0.0;
if ( spearman )
{
double spearman = RiaStatisticsTools::spearmanCorrelation( parameterValuesPair.second, caseValuesAtTimestep );
if ( spearman != std::numeric_limits<double>::infinity() ) correlation = spearman;
}
else
{
double pearson = RiaStatisticsTools::pearsonCorrelation( parameterValuesPair.second, caseValuesAtTimestep );
if ( pearson != std::numeric_limits<double>::infinity() ) correlation = pearson;
}
correlationResults.push_back( std::make_pair( parameterValuesPair.first, correlation ) );
}
return correlationResults;
}
//--------------------------------------------------------------------------------------------------
/// Returns a vector of the parameters and the average absolute values of correlations per time step
//--------------------------------------------------------------------------------------------------
std::vector<std::pair<EnsembleParameter, double>>
RimSummaryCaseCollection::parameterCorrelationsAllTimeSteps( const RifEclipseSummaryAddress& address,
bool spearman,
const std::vector<QString>& selectedParameters ) const
{
const size_t maxTimeStepCount = 10;
std::set<time_t> timeSteps = ensembleTimeSteps();
if ( timeSteps.empty() ) return {};
std::vector<time_t> timeStepsVector( timeSteps.begin(), timeSteps.end() );
size_t stride = std::max( (size_t)1, timeStepsVector.size() / maxTimeStepCount );
std::vector<std::vector<std::pair<EnsembleParameter, double>>> correlationsForChosenTimeSteps;
for ( size_t i = stride; i < timeStepsVector.size(); i += stride )
{
std::vector<std::pair<EnsembleParameter, double>> correlationsForTimeStep =
parameterCorrelations( address, timeStepsVector[i], spearman, selectedParameters );
correlationsForChosenTimeSteps.push_back( correlationsForTimeStep );
}
for ( size_t i = 1; i < correlationsForChosenTimeSteps.size(); ++i )
{
for ( size_t j = 0; j < correlationsForChosenTimeSteps[0].size(); ++j )
{
correlationsForChosenTimeSteps[0][j].second += correlationsForChosenTimeSteps[i][j].second;
}
}
for ( size_t j = 0; j < correlationsForChosenTimeSteps[0].size(); ++j )
{
correlationsForChosenTimeSteps[0][j].second /= correlationsForChosenTimeSteps.size();
}
return correlationsForChosenTimeSteps[0];
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------

View File

@ -111,7 +111,17 @@ public:
int ensembleId() const;
const std::vector<EnsembleParameter>& variationSortedEnsembleParameters( bool excludeNoVariation = false ) const;
std::vector<EnsembleParameter> alphabeticEnsembleParameters() const;
std::vector<std::pair<EnsembleParameter, double>>
parameterCorrelations( const RifEclipseSummaryAddress& address,
time_t selectedTimeStep,
bool spearman = false,
const std::vector<QString>& selectedParameters = {} ) const;
std::vector<std::pair<EnsembleParameter, double>>
parameterCorrelationsAllTimeSteps( const RifEclipseSummaryAddress& address,
bool spearman = false,
const std::vector<QString>& selectedParameters = {} ) const;
std::vector<EnsembleParameter> alphabeticEnsembleParameters() const;
EnsembleParameter ensembleParameter( const QString& paramName ) const;
void calculateEnsembleParametersIntersectionHash();

View File

@ -265,17 +265,28 @@ void RiuSummaryQwtPlot::contextMenuEvent( QContextMenuEvent* event )
{
menuBuilder.subMenuStart( "Cross Plots",
*caf::IconProvider( ":/CorrelationCrossPlot16x16.png" ).icon() );
std::vector<EnsembleParameter> ensembleParameters =
ensemble->variationSortedEnsembleParameters();
for ( const EnsembleParameter& param : ensembleParameters )
std::vector<std::pair<EnsembleParameter, double>> ensembleParameters =
ensemble->parameterCorrelations( clickedEnsembleCurveSet->summaryAddress(),
timeStep,
false );
std::sort( ensembleParameters.begin(),
ensembleParameters.end(),
[]( const std::pair<EnsembleParameter, double>& lhs,
const std::pair<EnsembleParameter, double>& rhs ) {
return std::fabs( lhs.second ) > std::fabs( rhs.second );
} );
for ( const auto& param : ensembleParameters )
{
if ( param.variationBin >= (int)EnsembleParameter::LOW_VARIATION )
if ( std::fabs( param.second ) >= 1.0e-6 )
{
params.ensembleParameter = param.name;
params.ensembleParameter = param.first.name;
variant = QVariant::fromValue( params );
menuBuilder.addCmdFeatureWithUserData( "RicNewParameterResultCrossPlotFeature",
QString( "New Cross Plot Against %1" )
.arg( param.uiName() ),
QString( "New Cross Plot Against %1 "
"(Correlation: %2)" )
.arg( param.first.name )
.arg( param.second ),
variant );
}
}