#1959 Observed Data : Support custom ordering of header rows

This commit is contained in:
Magne Sjaastad 2017-10-03 11:45:31 +02:00
parent 4c61696f4e
commit f3f78995fa
3 changed files with 148 additions and 62 deletions

View File

@ -90,7 +90,7 @@ bool RifEclipseUserDataParserTools::isAComment(const std::string& word)
//-------------------------------------------------------------------------------------------------- //--------------------------------------------------------------------------------------------------
/// ///
//-------------------------------------------------------------------------------------------------- //--------------------------------------------------------------------------------------------------
std::vector<std::string> RifEclipseUserDataParserTools::splitLineAndRemoveComments(std::string line) std::vector<std::string> RifEclipseUserDataParserTools::splitLineAndRemoveComments(const std::string& line)
{ {
std::istringstream iss(line); std::istringstream iss(line);
std::vector<std::string> words{ std::istream_iterator<std::string>{iss}, std::vector<std::string> words{ std::istream_iterator<std::string>{iss},
@ -232,7 +232,10 @@ RifEclipseSummaryAddress RifEclipseUserDataParserTools::makeAndFillAddress(std::
cellK); cellK);
} }
bool RifEclipseUserDataParserTools::keywordParser(std::string line, std::string& origin, std::string& dateFormat, std::string& startDate) //--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
bool RifEclipseUserDataParserTools::keywordParser(const std::string& line, std::string& origin, std::string& dateFormat, std::string& startDate)
{ {
std::vector<std::string> words = splitLineAndRemoveComments(line); std::vector<std::string> words = splitLineAndRemoveComments(line);
if (words.size() < 2) return false; if (words.size() < 2) return false;
@ -278,21 +281,100 @@ std::vector<ColumnInfo> RifEclipseUserDataParserTools::columnInfoForTable(std::s
} }
std::vector<std::string> quantityNames = splitLineAndRemoveComments(line); std::vector<std::string> quantityNames = splitLineAndRemoveComments(line);
std::getline(streamData, line);
std::vector<std::string> unitNames = splitLineAndRemoveComments(line);
std::getline(streamData, line);
std::vector<std::string> scaleFactors = splitLineAndRemoveComments(line);
std::vector<RifEclipseSummaryAddress::SummaryVarCategory> categories;
size_t columnCount = quantityNames.size(); size_t columnCount = quantityNames.size();
if (unitNames.size() != columnCount) std::vector< std::vector< std::string > > allHeaderRows;
{ {
size_t diff = columnCount - unitNames.size(); std::stringstream::pos_type posAtStartOfLine = streamData.tellg();
unitNames.insert(unitNames.end(), diff, "");
std::string secondLine;
std::getline(streamData, line);
std::stringstream::pos_type posAtStartOfSecondLine = streamData.tellg();
std::getline(streamData, secondLine);
bool header = true;
while (header)
{
std::vector<std::string> words = splitLineAndRemoveComments(line);
std::vector<std::string> wordsSecondLine = splitLineAndRemoveComments(secondLine);
if (words.size() == columnCount &&
wordsSecondLine.size() == columnCount &&
hasOnlyValidDoubleValues(words) &&
hasOnlyValidDoubleValues(wordsSecondLine))
{
header = false;
break;
}
else if (words.size() > columnCount)
{
continue;
}
else
{
size_t diff = columnCount - words.size();
if (diff == columnCount)
{
std::vector< std::string > vectorOfEmptyStrings(columnCount, "");
allHeaderRows.push_back(vectorOfEmptyStrings);
}
else
{
words.insert(words.begin(), diff, "");
allHeaderRows.push_back(words);
}
}
posAtStartOfLine = posAtStartOfSecondLine;
line = secondLine;
posAtStartOfSecondLine = streamData.tellg();
std::getline(streamData, secondLine);
}
streamData.seekg(posAtStartOfLine);
} }
for (std::string unit : unitNames) std::vector<std::string> unitNames;
std::vector<double> scaleFactors;
std::vector< std::vector< std::string > > restOfHeaderRows;
for (const auto& wordsForRow : allHeaderRows)
{
bool excludeFromHeader = false;
if (unitNames.size() == 0)
{
for (const std::string& word : wordsForRow)
{
if (hasTimeUnit(word))
{
unitNames = wordsForRow;
excludeFromHeader = true;
}
}
}
if (scaleFactors.size() == 0)
{
std::vector<double> values;
if (hasOnlyValidDoubleValues(wordsForRow, &values))
{
scaleFactors = values;
excludeFromHeader = true;
}
}
if (!excludeFromHeader)
{
restOfHeaderRows.push_back(wordsForRow);
}
}
for (const std::string& unit : unitNames)
{ {
ColumnInfo columnInfo; ColumnInfo columnInfo;
columnInfo.unitName = unit; columnInfo.unitName = unit;
@ -302,62 +384,22 @@ std::vector<ColumnInfo> RifEclipseUserDataParserTools::columnInfoForTable(std::s
table.push_back(columnInfo); table.push_back(columnInfo);
} }
if (scaleFactors.size() < columnCount)
{
size_t diff = columnCount - scaleFactors.size();
scaleFactors.insert(scaleFactors.end(), diff, "1");
}
for (size_t i = 0; i < table.size(); i++) for (size_t i = 0; i < table.size(); i++)
{ {
table[i].scaleFactor = scaleFactors[i]; if (scaleFactors.size() == table.size())
}
std::vector< std::vector< std::string > > restOfHeader;
std::stringstream::pos_type posAtStartOfLine = streamData.tellg();
bool header = true;
while (header)
{
posAtStartOfLine = streamData.tellg();
std::getline(streamData, line);
std::vector<std::string> words = splitLineAndRemoveComments(line);
if (words.size() == columnCount)
{ {
header = false; table[i].scaleFactor = scaleFactors[i];
break;
}
else if (words.size() > columnCount)
{
continue;
} }
else else
{ {
size_t diff = columnCount - words.size(); table[i].scaleFactor = 1.0;
if (diff == columnCount)
{
std::vector< std::string > vectorOfEmptyStrings(columnCount, "");
restOfHeader.push_back(vectorOfEmptyStrings);
}
else
{
words.insert(words.begin(), diff, "");
restOfHeader.push_back(words);
}
} }
} }
streamData.seekg(posAtStartOfLine);
for (size_t i = 0; i < columnCount; i++) for (size_t i = 0; i < table.size(); i++)
{ {
std::vector< std::string > restOfHeaderColumn; std::vector< std::string > restOfHeaderColumn;
for (std::vector< std::string > restOfHeaderRow : restOfHeader) for (std::vector< std::string > restOfHeaderRow : restOfHeaderRows)
{ {
restOfHeaderColumn.push_back(restOfHeaderRow.at(i)); restOfHeaderColumn.push_back(restOfHeaderRow.at(i));
} }
@ -419,4 +461,44 @@ std::vector<std::string> RifEclipseUserDataParserTools::headerReader(std::string
std::getline(streamData, line); std::getline(streamData, line);
} }
return header; return header;
} }
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
bool RifEclipseUserDataParserTools::hasTimeUnit(const std::string& line)
{
if (line == "DAYS" ||
line == "DAY" ||
line == "YEARS" ||
line == "YEAR")
{
return true;
}
return false;
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
bool RifEclipseUserDataParserTools::hasOnlyValidDoubleValues(const std::vector<std::string>& words, std::vector<double>* doubleValues)
{
char* end;
for (const auto& word : words)
{
double doubleVal = strtod(word.data(), &end);
if (end == word.data())
{
return false;
}
if (doubleValues)
{
doubleValues->push_back(doubleVal);
}
}
return true;
}

View File

@ -33,7 +33,7 @@ struct ColumnInfo
bool isAVector = false; bool isAVector = false;
RifEclipseSummaryAddress summaryAddress; RifEclipseSummaryAddress summaryAddress;
std::string unitName; std::string unitName;
std::string scaleFactor; double scaleFactor;
std::vector<double> values; std::vector<double> values;
std::string dateFormat; std::string dateFormat;
std::string startDate; std::string startDate;
@ -48,13 +48,16 @@ class RifEclipseUserDataParserTools
public: public:
static bool isLineSkippable(const std::string& line); static bool isLineSkippable(const std::string& line);
static bool isAComment(const std::string& word); static bool isAComment(const std::string& word);
static std::vector<std::string> splitLineAndRemoveComments(std::string line); static std::vector<std::string> splitLineAndRemoveComments(const std::string& line);
static RifEclipseSummaryAddress::SummaryVarCategory identifyCategory(const std::string& word); static RifEclipseSummaryAddress::SummaryVarCategory identifyCategory(const std::string& word);
static void splitLineToDoubles(const std::string& line, std::vector<double>& values); static void splitLineToDoubles(const std::string& line, std::vector<double>& values);
static size_t findFirstNonEmptyEntryIndex(std::vector<std::string>& list); static size_t findFirstNonEmptyEntryIndex(std::vector<std::string>& list);
static RifEclipseSummaryAddress makeAndFillAddress(std::string quantityName, std::vector< std::string > headerColumn); static RifEclipseSummaryAddress makeAndFillAddress(std::string quantityName, std::vector< std::string > headerColumn);
static bool keywordParser(std::string line, std::string& origin, std::string& dateFormat, std::string& startDate); static bool keywordParser(const std::string& line, std::string& origin, std::string& dateFormat, std::string& startDate);
static std::vector<ColumnInfo> columnInfoForTable(std::stringstream& data); static std::vector<ColumnInfo> columnInfoForTable(std::stringstream& data);
static bool isANumber(const std::string& line); static bool isANumber(const std::string& line);
static std::vector<std::string> headerReader(std::stringstream& streamData, std::string& line); static std::vector<std::string> headerReader(std::stringstream& streamData, std::string& line);
static bool hasTimeUnit(const std::string& line);
static bool hasOnlyValidDoubleValues(const std::vector<std::string>& words, std::vector<double>* doubleValues = nullptr);
}; };

View File

@ -299,7 +299,8 @@ TEST(RifColumnBasedRsmspecParserTest, TestKeywordsAndMissingUnitName)
EXPECT_EQ("OP-2_TR", tables[1].at(0).origin); EXPECT_EQ("OP-2_TR", tables[1].at(0).origin);
EXPECT_EQ("DD/MM/YY", tables[1].at(0).dateFormat); EXPECT_EQ("DD/MM/YY", tables[1].at(0).dateFormat);
EXPECT_EQ("", tables[0].at(7).unitName); // Assume missing units at start of row
EXPECT_EQ("", tables[0].at(0).unitName);
ASSERT_EQ(8, tables.at(0).size()); ASSERT_EQ(8, tables.at(0).size());
EXPECT_EQ(1.0E-12, tables.at(0).at(4).values[0]); EXPECT_EQ(1.0E-12, tables.at(0).at(4).values[0]);