Moved code to new library: ResultStatisticsCache

Moved  RigStatisticsCalculator.h/.cpp, RigStatisticsDataCache.h/.cpp,
RigStatisticsMath.h/.cpp to new library ResultStatisticsCache.
ResInsight and some unit tests now link with this new library.
This commit is contained in:
Stein Dale
2015-05-11 13:25:05 +02:00
parent ca17645499
commit 85f62ec1d4
12 changed files with 72 additions and 269 deletions

View File

@@ -0,0 +1,18 @@
cmake_minimum_required (VERSION 2.8)
project (ResultStatisticsCache)
include_directories(
${LibCore_SOURCE_DIR}
)
add_library( ${PROJECT_NAME}
RigStatisticsCalculator.h
RigStatisticsCalculator.cpp
RigStatisticsDataCache.h
RigStatisticsDataCache.cpp
RigStatisticsMath.h
RigStatisticsMath.cpp
)
target_link_libraries(${PROJECT_NAME} LibCore)

View File

@@ -0,0 +1,44 @@
/////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) Statoil ASA
// Copyright (C) Ceetron Solutions AS
//
// ResInsight is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// ResInsight is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE.
//
// See the GNU General Public License at <http://www.gnu.org/licenses/gpl.html>
// for more details.
//
/////////////////////////////////////////////////////////////////////////////////
#include "RigStatisticsCalculator.h"
#include <cmath> // Needed for HUGE_VAL on Linux
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
void RigStatisticsCalculator::meanCellScalarValue(double& meanValue)
{
double valueSum = 0.0;
size_t sampleCount = 0;
this->valueSumAndSampleCount(valueSum, sampleCount);
if (sampleCount == 0)
{
meanValue = HUGE_VAL;
}
else
{
meanValue = valueSum / sampleCount;
}
}

View File

@@ -0,0 +1,44 @@
/////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) Statoil ASA
// Copyright (C) Ceetron Solutions AS
//
// ResInsight is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// ResInsight is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE.
//
// See the GNU General Public License at <http://www.gnu.org/licenses/gpl.html>
// for more details.
//
/////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "cvfBase.h"
#include "cvfObject.h"
#include "cvfCollection.h"
#include <vector>
class RigHistogramCalculator;
//==================================================================================================
///
//==================================================================================================
class RigStatisticsCalculator : public cvf::Object
{
public:
virtual void minMaxCellScalarValues(size_t timeStepIndex, double& min, double& max) = 0;
virtual void posNegClosestToZero(size_t timeStepIndex, double& pos, double& neg) = 0;
void meanCellScalarValue(double& meanValue);
virtual void valueSumAndSampleCount(double& valueSum, size_t& sampleCount) = 0;
virtual void addDataToHistogramCalculator(RigHistogramCalculator& histogramCalculator) = 0;
virtual size_t timeStepCount() = 0;
};

View File

@@ -0,0 +1,206 @@
/////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) Statoil ASA
// Copyright (C) Ceetron Solutions AS
//
// ResInsight is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// ResInsight is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE.
//
// See the GNU General Public License at <http://www.gnu.org/licenses/gpl.html>
// for more details.
//
/////////////////////////////////////////////////////////////////////////////////
#include "RigStatisticsDataCache.h"
#include "RigStatisticsCalculator.h"
#include "RigStatisticsMath.h"
#include <cmath> // Needed for HUGE_VAL on Linux
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
RigStatisticsDataCache::RigStatisticsDataCache(RigStatisticsCalculator* statisticsCalculator)
: m_statisticsCalculator(statisticsCalculator)
{
clearAllStatistics();
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
void RigStatisticsDataCache::clearAllStatistics()
{
m_minValue = HUGE_VAL;
m_maxValue = -HUGE_VAL;
m_posClosestToZero = HUGE_VAL;
m_negClosestToZero = -HUGE_VAL;
m_p10 = HUGE_VAL;
m_p90 = HUGE_VAL;
m_meanValue = HUGE_VAL;
m_histogram.clear();
m_maxMinValuesPrTs.clear();
m_posNegClosestToZeroPrTs.clear();
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
void RigStatisticsDataCache::minMaxCellScalarValues(double& min, double& max)
{
if (m_minValue == HUGE_VAL)
{
min = HUGE_VAL;
max = -HUGE_VAL;
size_t i;
for (i = 0; i < m_statisticsCalculator->timeStepCount(); i++)
{
double tsmin, tsmax;
this->minMaxCellScalarValues(i, tsmin, tsmax);
if (tsmin < min) min = tsmin;
if (tsmax > max) max = tsmax;
}
m_minValue = min;
m_maxValue = max;
}
min = m_minValue;
max = m_maxValue;
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
void RigStatisticsDataCache::minMaxCellScalarValues(size_t timeStepIndex, double& min, double& max)
{
if (timeStepIndex >= m_maxMinValuesPrTs.size())
{
m_maxMinValuesPrTs.resize(timeStepIndex + 1, std::make_pair(HUGE_VAL, -HUGE_VAL));
}
if (m_maxMinValuesPrTs[timeStepIndex].first == HUGE_VAL)
{
min = HUGE_VAL;
max = -HUGE_VAL;
m_statisticsCalculator->minMaxCellScalarValues(timeStepIndex, min, max);
m_maxMinValuesPrTs[timeStepIndex].first = min;
m_maxMinValuesPrTs[timeStepIndex].second = max;
}
min = m_maxMinValuesPrTs[timeStepIndex].first;
max = m_maxMinValuesPrTs[timeStepIndex].second;
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
void RigStatisticsDataCache::posNegClosestToZero(double& pos, double& neg)
{
if (m_posClosestToZero == HUGE_VAL)
{
pos = HUGE_VAL;
neg = -HUGE_VAL;
size_t i;
for (i = 0; i < m_statisticsCalculator->timeStepCount(); i++)
{
double tsNeg, tsPos;
this->posNegClosestToZero(i, tsPos, tsNeg);
if (tsNeg > neg && tsNeg < 0) neg = tsNeg;
if (tsPos < pos && tsPos > 0) pos = tsPos;
}
m_posClosestToZero = pos;
m_negClosestToZero = neg;
}
pos = m_posClosestToZero;
neg = m_negClosestToZero;
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
void RigStatisticsDataCache::posNegClosestToZero(size_t timeStepIndex, double& pos, double& neg)
{
if (timeStepIndex >= m_posNegClosestToZeroPrTs.size())
{
m_posNegClosestToZeroPrTs.resize(timeStepIndex + 1, std::make_pair(HUGE_VAL, -HUGE_VAL));
}
if (m_posNegClosestToZeroPrTs[timeStepIndex].first == HUGE_VAL)
{
pos = HUGE_VAL;
neg = -HUGE_VAL;
m_statisticsCalculator->posNegClosestToZero(timeStepIndex, pos, neg);
m_posNegClosestToZeroPrTs[timeStepIndex].first = pos;
m_posNegClosestToZeroPrTs[timeStepIndex].second = neg;
}
pos = m_posNegClosestToZeroPrTs[timeStepIndex].first;
neg = m_posNegClosestToZeroPrTs[timeStepIndex].second;
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
const std::vector<size_t>& RigStatisticsDataCache::cellScalarValuesHistogram()
{
if (m_histogram.size() == 0)
{
double min;
double max;
size_t nBins = 100;
this->minMaxCellScalarValues(min, max);
RigHistogramCalculator histCalc(min, max, nBins, &m_histogram);
m_statisticsCalculator->addDataToHistogramCalculator(histCalc);
m_p10 = histCalc.calculatePercentil(0.1);
m_p90 = histCalc.calculatePercentil(0.9);
}
return m_histogram;
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
void RigStatisticsDataCache::p10p90CellScalarValues(double& p10, double& p90)
{
// First make sure they are calculated
const std::vector<size_t>& histogr = this->cellScalarValuesHistogram();
p10 = m_p10;
p90 = m_p90;
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
void RigStatisticsDataCache::meanCellScalarValues(double& meanValue)
{
if (m_meanValue == HUGE_VAL)
{
m_statisticsCalculator->meanCellScalarValue(m_meanValue);
}
meanValue = m_meanValue;
}

View File

@@ -0,0 +1,66 @@
/////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) Statoil ASA
// Copyright (C) Ceetron Solutions AS
//
// ResInsight is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// ResInsight is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE.
//
// See the GNU General Public License at <http://www.gnu.org/licenses/gpl.html>
// for more details.
//
/////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "RigStatisticsCalculator.h"
#include "cvfBase.h"
#include "cvfObject.h"
#include <vector>
//==================================================================================================
///
//==================================================================================================
class RigStatisticsDataCache : public cvf::Object
{
public:
RigStatisticsDataCache(RigStatisticsCalculator* statisticsCalculator);
void clearAllStatistics();
void minMaxCellScalarValues(double& min, double& max);
void minMaxCellScalarValues(size_t timeStepIndex, double& min, double& max);
void posNegClosestToZero(double& pos, double& neg);
void posNegClosestToZero(size_t timeStepIndex, double& pos, double& neg);
void p10p90CellScalarValues(double& p10, double& p90);
void meanCellScalarValues(double& meanValue);
const std::vector<size_t>& cellScalarValuesHistogram();
private:
double m_minValue;
double m_maxValue;
double m_posClosestToZero;
double m_negClosestToZero;
double m_p10;
double m_p90;
double m_meanValue;
std::vector<size_t> m_histogram;
std::vector<std::pair<double, double> > m_maxMinValuesPrTs; ///< Max min values for each time step
std::vector<std::pair<double, double> > m_posNegClosestToZeroPrTs; ///< PosNeg values for each time step
cvf::ref<RigStatisticsCalculator> m_statisticsCalculator;
};

View File

@@ -0,0 +1,245 @@
/////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2011-2012 Statoil ASA, Ceetron AS
//
// ResInsight is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// ResInsight is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE.
//
// See the GNU General Public License at <http://www.gnu.org/licenses/gpl.html>
// for more details.
//
/////////////////////////////////////////////////////////////////////////////////
#include "RigStatisticsMath.h"
#include <algorithm>
#include <assert.h>
#include <math.h>
#include "cvfBase.h"
#include "cvfMath.h"
//--------------------------------------------------------------------------------------------------
/// A function to do basic statistical calculations
//--------------------------------------------------------------------------------------------------
void RigStatisticsMath::calculateBasicStatistics(const std::vector<double>& values, double* min, double* max, double* range, double* mean, double* dev)
{
double m_min(HUGE_VAL);
double m_max(-HUGE_VAL);
double m_mean(HUGE_VAL);
double m_dev(HUGE_VAL);
double sum = 0.0;
double sumSquared = 0.0;
size_t validValueCount = 0;
for (size_t i = 0; i < values.size(); i++)
{
double val = values[i];
if (val == HUGE_VAL) continue;
validValueCount++;
if (val < m_min) m_min = val;
if (val > m_max) m_max = val;
sum += val;
sumSquared += (val * val);
}
if (validValueCount > 0)
{
m_mean = sum / validValueCount;
// http://en.wikipedia.org/wiki/Standard_deviation#Rapid_calculation_methods
// Running standard deviation
double s0 = static_cast<double>(validValueCount);
double s1 = sum;
double s2 = sumSquared;
m_dev = sqrt( (s0 * s2) - (s1 * s1) ) / s0;
}
if (min) *min = m_min;
if (max) *max = m_max;
if (range) *range = m_max - m_min;
if (mean) *mean = m_mean;
if (dev) *dev = m_dev;
}
//--------------------------------------------------------------------------------------------------
/// Calculate the percentiles of /a inputValues at the pValPosition percentages using the "Nearest Rank"
/// method. This method treats HUGE_VAL as "undefined" values, and ignores these. Will return HUGE_VAL if
/// the inputValues does not contain any valid values
//--------------------------------------------------------------------------------------------------
std::vector<double> RigStatisticsMath::calculateNearestRankPercentiles(const std::vector<double> & inputValues, const std::vector<double>& pValPositions)
{
std::vector<double> sortedValues;
sortedValues.reserve(inputValues.size());
for (size_t i = 0; i < inputValues.size(); ++i)
{
if (inputValues[i] != HUGE_VAL)
{
sortedValues.push_back(inputValues[i]);
}
}
std::sort(sortedValues.begin(), sortedValues.end());
std::vector<double> percentiles(pValPositions.size(), HUGE_VAL);
if (sortedValues.size())
{
for (size_t i = 0; i < pValPositions.size(); ++i)
{
double pVal = HUGE_VAL;
size_t pValIndex = static_cast<size_t>(sortedValues.size() * cvf::Math::abs(pValPositions[i]) / 100);
if (pValIndex >= sortedValues.size() ) pValIndex = sortedValues.size() - 1;
pVal = sortedValues[pValIndex];
percentiles[i] = pVal;
}
}
return percentiles;
};
//--------------------------------------------------------------------------------------------------
/// Calculate the percentiles of /a inputValues at the pValPosition percentages by interpolating input values.
/// This method treats HUGE_VAL as "undefined" values, and ignores these. Will return HUGE_VAL if
/// the inputValues does not contain any valid values
//--------------------------------------------------------------------------------------------------
std::vector<double> RigStatisticsMath::calculateInterpolatedPercentiles(const std::vector<double> & inputValues, const std::vector<double>& pValPositions)
{
std::vector<double> sortedValues;
sortedValues.reserve(inputValues.size());
for (size_t i = 0; i < inputValues.size(); ++i)
{
if (inputValues[i] != HUGE_VAL)
{
sortedValues.push_back(inputValues[i]);
}
}
std::sort(sortedValues.begin(), sortedValues.end());
std::vector<double> percentiles(pValPositions.size(), HUGE_VAL);
if (sortedValues.size())
{
for (size_t i = 0; i < pValPositions.size(); ++i)
{
double pVal = HUGE_VAL;
double doubleIndex = (sortedValues.size() - 1) * cvf::Math::abs(pValPositions[i]) / 100.0;
size_t lowerValueIndex = static_cast<size_t>(floor(doubleIndex));
size_t upperValueIndex = lowerValueIndex + 1;
double upperValueWeight = doubleIndex - lowerValueIndex;
assert(upperValueWeight < 1.0);
if (upperValueIndex < sortedValues.size())
{
pVal = (1.0 - upperValueWeight) * sortedValues[lowerValueIndex] + upperValueWeight * sortedValues[upperValueIndex];
}
else
{
pVal = sortedValues[lowerValueIndex];
}
percentiles[i] = pVal;
}
}
return percentiles;
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
RigHistogramCalculator::RigHistogramCalculator(double min, double max, size_t nBins, std::vector<size_t>* histogram)
{
assert(histogram);
assert(nBins > 0);
if (max == min) { nBins = 1; } // Avoid dividing on 0 range
m_histogram = histogram;
m_min = min;
m_observationCount = 0;
// Initialize bins
m_histogram->resize(nBins);
for (size_t i = 0; i < m_histogram->size(); ++i) (*m_histogram)[i] = 0;
m_range = max - min;
maxIndex = nBins-1;
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
void RigHistogramCalculator::addData(const std::vector<double>& data)
{
assert(m_histogram);
for (size_t i = 0; i < data.size(); ++i)
{
if (data[i] == HUGE_VAL)
{
continue;
}
size_t index = 0;
if (maxIndex > 0) index = (size_t)(maxIndex*(data[i] - m_min)/m_range);
if(index < m_histogram->size()) // Just clip to the max min range (-index will overflow to positive )
{
(*m_histogram)[index]++;
m_observationCount++;
}
}
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
double RigHistogramCalculator::calculatePercentil(double pVal)
{
assert(m_histogram);
assert(m_histogram->size());
assert( 0.0 <= pVal && pVal <= 1.0);
double pValObservationCount = pVal*m_observationCount;
if (pValObservationCount == 0.0) return m_min;
size_t accObsCount = 0;
double binWidth = m_range/m_histogram->size();
for (size_t binIdx = 0; binIdx < m_histogram->size(); ++binIdx)
{
size_t binObsCount = (*m_histogram)[binIdx];
accObsCount += binObsCount;
if (accObsCount >= pValObservationCount)
{
double domainValueAtEndOfBin = m_min + (binIdx+1) * binWidth;
double unusedFractionOfLastBin = (double)(accObsCount - pValObservationCount)/binObsCount;
return domainValueAtEndOfBin - unusedFractionOfLastBin*binWidth;
}
}
assert(false);
return HUGE_VAL;
}

View File

@@ -0,0 +1,55 @@
/////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2011-2012 Statoil ASA, Ceetron AS
//
// ResInsight is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// ResInsight is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE.
//
// See the GNU General Public License at <http://www.gnu.org/licenses/gpl.html>
// for more details.
//
/////////////////////////////////////////////////////////////////////////////////
#pragma once
#include <vector>
#include <cmath>
#include <cstddef>
class RigStatisticsMath
{
public:
static void calculateBasicStatistics(const std::vector<double>& values, double* min, double* max, double* range, double* mean, double* dev);
static std::vector<double> calculateNearestRankPercentiles(const std::vector<double> & inputValues, const std::vector<double>& pValPositions);
static std::vector<double> calculateInterpolatedPercentiles(const std::vector<double> & inputValues, const std::vector<double>& pValPositions);
};
//==================================================================================================
/// Class to calculate a histogram, and histogram based p-value estimates
//==================================================================================================
class RigHistogramCalculator
{
public:
RigHistogramCalculator(double min, double max, size_t nBins, std::vector<size_t>* histogram);
void addData(const std::vector<double>& data);
/// Calculates the estimated percentile from the histogram.
/// the percentile is the domain value at which pVal of the observations are below it.
/// Will only consider observed values between min and max, as all other values are discarded from the histogram
double calculatePercentil(double pVal);
private:
size_t maxIndex;
double m_range;
double m_min;
size_t m_observationCount;
std::vector<size_t>* m_histogram;
};