diff --git a/ApplicationCode/FileInterface/RifEclipseUserDataParserTools.cpp b/ApplicationCode/FileInterface/RifEclipseUserDataParserTools.cpp index a88d611037..e7c2632cd8 100644 --- a/ApplicationCode/FileInterface/RifEclipseUserDataParserTools.cpp +++ b/ApplicationCode/FileInterface/RifEclipseUserDataParserTools.cpp @@ -90,7 +90,7 @@ bool RifEclipseUserDataParserTools::isAComment(const std::string& word) //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- -std::vector RifEclipseUserDataParserTools::splitLineAndRemoveComments(std::string line) +std::vector RifEclipseUserDataParserTools::splitLineAndRemoveComments(const std::string& line) { std::istringstream iss(line); std::vector words{ std::istream_iterator{iss}, @@ -232,7 +232,10 @@ RifEclipseSummaryAddress RifEclipseUserDataParserTools::makeAndFillAddress(std:: cellK); } -bool RifEclipseUserDataParserTools::keywordParser(std::string line, std::string& origin, std::string& dateFormat, std::string& startDate) +//-------------------------------------------------------------------------------------------------- +/// +//-------------------------------------------------------------------------------------------------- +bool RifEclipseUserDataParserTools::keywordParser(const std::string& line, std::string& origin, std::string& dateFormat, std::string& startDate) { std::vector words = splitLineAndRemoveComments(line); if (words.size() < 2) return false; @@ -278,21 +281,100 @@ std::vector RifEclipseUserDataParserTools::columnInfoForTable(std::s } std::vector quantityNames = splitLineAndRemoveComments(line); - std::getline(streamData, line); - std::vector unitNames = splitLineAndRemoveComments(line); - std::getline(streamData, line); - std::vector scaleFactors = splitLineAndRemoveComments(line); - - std::vector categories; size_t columnCount = quantityNames.size(); - if (unitNames.size() != columnCount) + std::vector< std::vector< std::string > > allHeaderRows; + { - size_t diff = columnCount - unitNames.size(); - unitNames.insert(unitNames.end(), diff, ""); + std::stringstream::pos_type posAtStartOfLine = streamData.tellg(); + + std::string secondLine; + std::getline(streamData, line); + + std::stringstream::pos_type posAtStartOfSecondLine = streamData.tellg(); + std::getline(streamData, secondLine); + + bool header = true; + while (header) + { + std::vector words = splitLineAndRemoveComments(line); + std::vector wordsSecondLine = splitLineAndRemoveComments(secondLine); + + if (words.size() == columnCount && + wordsSecondLine.size() == columnCount && + hasOnlyValidDoubleValues(words) && + hasOnlyValidDoubleValues(wordsSecondLine)) + { + header = false; + break; + } + else if (words.size() > columnCount) + { + continue; + } + else + { + size_t diff = columnCount - words.size(); + + if (diff == columnCount) + { + std::vector< std::string > vectorOfEmptyStrings(columnCount, ""); + allHeaderRows.push_back(vectorOfEmptyStrings); + } + else + { + words.insert(words.begin(), diff, ""); + allHeaderRows.push_back(words); + } + } + + posAtStartOfLine = posAtStartOfSecondLine; + line = secondLine; + + posAtStartOfSecondLine = streamData.tellg(); + std::getline(streamData, secondLine); + } + + streamData.seekg(posAtStartOfLine); } - for (std::string unit : unitNames) + std::vector unitNames; + std::vector scaleFactors; + std::vector< std::vector< std::string > > restOfHeaderRows; + + for (const auto& wordsForRow : allHeaderRows) + { + bool excludeFromHeader = false; + if (unitNames.size() == 0) + { + for (const std::string& word : wordsForRow) + { + if (hasTimeUnit(word)) + { + unitNames = wordsForRow; + excludeFromHeader = true; + } + } + } + + if (scaleFactors.size() == 0) + { + std::vector values; + + if (hasOnlyValidDoubleValues(wordsForRow, &values)) + { + scaleFactors = values; + excludeFromHeader = true; + } + } + + if (!excludeFromHeader) + { + restOfHeaderRows.push_back(wordsForRow); + } + } + + for (const std::string& unit : unitNames) { ColumnInfo columnInfo; columnInfo.unitName = unit; @@ -302,62 +384,22 @@ std::vector RifEclipseUserDataParserTools::columnInfoForTable(std::s table.push_back(columnInfo); } - if (scaleFactors.size() < columnCount) - { - size_t diff = columnCount - scaleFactors.size(); - scaleFactors.insert(scaleFactors.end(), diff, "1"); - } - for (size_t i = 0; i < table.size(); i++) { - table[i].scaleFactor = scaleFactors[i]; - } - - std::vector< std::vector< std::string > > restOfHeader; - - std::stringstream::pos_type posAtStartOfLine = streamData.tellg(); - - bool header = true; - while (header) - { - posAtStartOfLine = streamData.tellg(); - - std::getline(streamData, line); - - std::vector words = splitLineAndRemoveComments(line); - - if (words.size() == columnCount) + if (scaleFactors.size() == table.size()) { - header = false; - break; - } - else if (words.size() > columnCount) - { - continue; + table[i].scaleFactor = scaleFactors[i]; } else { - size_t diff = columnCount - words.size(); - - if (diff == columnCount) - { - std::vector< std::string > vectorOfEmptyStrings(columnCount, ""); - restOfHeader.push_back(vectorOfEmptyStrings); - } - else - { - words.insert(words.begin(), diff, ""); - restOfHeader.push_back(words); - } + table[i].scaleFactor = 1.0; } } - - streamData.seekg(posAtStartOfLine); - for (size_t i = 0; i < columnCount; i++) + for (size_t i = 0; i < table.size(); i++) { std::vector< std::string > restOfHeaderColumn; - for (std::vector< std::string > restOfHeaderRow : restOfHeader) + for (std::vector< std::string > restOfHeaderRow : restOfHeaderRows) { restOfHeaderColumn.push_back(restOfHeaderRow.at(i)); } @@ -419,4 +461,44 @@ std::vector RifEclipseUserDataParserTools::headerReader(std::string std::getline(streamData, line); } return header; -} \ No newline at end of file +} + +//-------------------------------------------------------------------------------------------------- +/// +//-------------------------------------------------------------------------------------------------- +bool RifEclipseUserDataParserTools::hasTimeUnit(const std::string& line) +{ + if (line == "DAYS" || + line == "DAY" || + line == "YEARS" || + line == "YEAR") + { + return true; + } + + return false; +} + +//-------------------------------------------------------------------------------------------------- +/// +//-------------------------------------------------------------------------------------------------- +bool RifEclipseUserDataParserTools::hasOnlyValidDoubleValues(const std::vector& words, std::vector* doubleValues) +{ + char* end; + + for (const auto& word : words) + { + double doubleVal = strtod(word.data(), &end); + if (end == word.data()) + { + return false; + } + + if (doubleValues) + { + doubleValues->push_back(doubleVal); + } + } + + return true; +} diff --git a/ApplicationCode/FileInterface/RifEclipseUserDataParserTools.h b/ApplicationCode/FileInterface/RifEclipseUserDataParserTools.h index 9878fe0791..3288a1dc47 100644 --- a/ApplicationCode/FileInterface/RifEclipseUserDataParserTools.h +++ b/ApplicationCode/FileInterface/RifEclipseUserDataParserTools.h @@ -33,7 +33,7 @@ struct ColumnInfo bool isAVector = false; RifEclipseSummaryAddress summaryAddress; std::string unitName; - std::string scaleFactor; + double scaleFactor; std::vector values; std::string dateFormat; std::string startDate; @@ -48,13 +48,16 @@ class RifEclipseUserDataParserTools public: static bool isLineSkippable(const std::string& line); static bool isAComment(const std::string& word); - static std::vector splitLineAndRemoveComments(std::string line); + static std::vector splitLineAndRemoveComments(const std::string& line); static RifEclipseSummaryAddress::SummaryVarCategory identifyCategory(const std::string& word); static void splitLineToDoubles(const std::string& line, std::vector& values); static size_t findFirstNonEmptyEntryIndex(std::vector& list); static RifEclipseSummaryAddress makeAndFillAddress(std::string quantityName, std::vector< std::string > headerColumn); - static bool keywordParser(std::string line, std::string& origin, std::string& dateFormat, std::string& startDate); + static bool keywordParser(const std::string& line, std::string& origin, std::string& dateFormat, std::string& startDate); static std::vector columnInfoForTable(std::stringstream& data); static bool isANumber(const std::string& line); static std::vector headerReader(std::stringstream& streamData, std::string& line); + + static bool hasTimeUnit(const std::string& line); + static bool hasOnlyValidDoubleValues(const std::vector& words, std::vector* doubleValues = nullptr); }; diff --git a/ApplicationCode/UnitTests/ObservedDataParser-Test.cpp b/ApplicationCode/UnitTests/ObservedDataParser-Test.cpp index 7d41e488ae..f909a78268 100644 --- a/ApplicationCode/UnitTests/ObservedDataParser-Test.cpp +++ b/ApplicationCode/UnitTests/ObservedDataParser-Test.cpp @@ -299,7 +299,8 @@ TEST(RifColumnBasedRsmspecParserTest, TestKeywordsAndMissingUnitName) EXPECT_EQ("OP-2_TR", tables[1].at(0).origin); EXPECT_EQ("DD/MM/YY", tables[1].at(0).dateFormat); - EXPECT_EQ("", tables[0].at(7).unitName); + // Assume missing units at start of row + EXPECT_EQ("", tables[0].at(0).unitName); ASSERT_EQ(8, tables.at(0).size()); EXPECT_EQ(1.0E-12, tables.at(0).at(4).values[0]);