#1959 Observed Data : Support custom ordering of header rows

2025-02-25 18:55:39 -06:00 · 2017-10-03 11:45:31 +02:00 · 2017-10-03 11:45:31 +02:00 · f3f78995fa
commit f3f78995fa
parent 4c61696f4e
3 changed files with 148 additions and 62 deletions
--- a/ApplicationCode/FileInterface/RifEclipseUserDataParserTools.cpp
+++ b/ApplicationCode/FileInterface/RifEclipseUserDataParserTools.cpp
@ -90,7 +90,7 @@ bool RifEclipseUserDataParserTools::isAComment(const std::string& word)
 //--------------------------------------------------------------------------------------------------
 /// 
 //--------------------------------------------------------------------------------------------------
-std::vector<std::string> RifEclipseUserDataParserTools::splitLineAndRemoveComments(std::string line)
+std::vector<std::string> RifEclipseUserDataParserTools::splitLineAndRemoveComments(const std::string& line)
 {
    std::istringstream iss(line);
    std::vector<std::string> words{ std::istream_iterator<std::string>{iss},
@ -232,7 +232,10 @@ RifEclipseSummaryAddress RifEclipseUserDataParserTools::makeAndFillAddress(std::
        cellK);
 }

-bool RifEclipseUserDataParserTools::keywordParser(std::string line, std::string& origin, std::string& dateFormat, std::string& startDate)
+//--------------------------------------------------------------------------------------------------
+/// 
+//--------------------------------------------------------------------------------------------------
+bool RifEclipseUserDataParserTools::keywordParser(const std::string& line, std::string& origin, std::string& dateFormat, std::string& startDate)
 {
    std::vector<std::string> words = splitLineAndRemoveComments(line);
    if (words.size() < 2) return false;
@ -278,21 +281,100 @@ std::vector<ColumnInfo> RifEclipseUserDataParserTools::columnInfoForTable(std::s
    }

    std::vector<std::string> quantityNames = splitLineAndRemoveComments(line);
-    std::getline(streamData, line);
-    std::vector<std::string> unitNames = splitLineAndRemoveComments(line);
-    std::getline(streamData, line);
-    std::vector<std::string> scaleFactors = splitLineAndRemoveComments(line);
-    
-    std::vector<RifEclipseSummaryAddress::SummaryVarCategory> categories;
    size_t columnCount = quantityNames.size();

-    if (unitNames.size() != columnCount)
+    std::vector< std::vector< std::string > > allHeaderRows;
+
    {
-        size_t diff = columnCount - unitNames.size();
-        unitNames.insert(unitNames.end(), diff, "");
+        std::stringstream::pos_type posAtStartOfLine = streamData.tellg();
+
+        std::string secondLine;
+        std::getline(streamData, line);
+    
+        std::stringstream::pos_type posAtStartOfSecondLine = streamData.tellg();
+        std::getline(streamData, secondLine);
+
+        bool header = true;
+        while (header)
+        {
+            std::vector<std::string> words = splitLineAndRemoveComments(line);
+            std::vector<std::string> wordsSecondLine = splitLineAndRemoveComments(secondLine);
+
+            if (words.size() == columnCount &&
+                wordsSecondLine.size() == columnCount &&
+                hasOnlyValidDoubleValues(words) &&
+                hasOnlyValidDoubleValues(wordsSecondLine))
+            {
+                header = false;
+                break;
+            }
+            else if (words.size() > columnCount)
+            {
+                continue;
+            }
+            else
+            {
+                size_t diff = columnCount - words.size();
+
+                if (diff == columnCount)
+                {
+                    std::vector< std::string > vectorOfEmptyStrings(columnCount, "");
+                    allHeaderRows.push_back(vectorOfEmptyStrings);
+                }
+                else
+                {
+                    words.insert(words.begin(), diff, "");
+                    allHeaderRows.push_back(words);
+                }
+            }
+
+            posAtStartOfLine = posAtStartOfSecondLine;
+            line = secondLine;
+
+            posAtStartOfSecondLine = streamData.tellg();
+            std::getline(streamData, secondLine);
+        }
+
+        streamData.seekg(posAtStartOfLine);
    }

-    for (std::string unit : unitNames)
+    std::vector<std::string> unitNames;
+    std::vector<double> scaleFactors;
+    std::vector< std::vector< std::string > > restOfHeaderRows;
+
+    for (const auto& wordsForRow : allHeaderRows)
+    {
+        bool excludeFromHeader = false;
+        if (unitNames.size() == 0)
+        {
+            for (const std::string& word : wordsForRow)
+            {
+                if (hasTimeUnit(word))
+                {
+                    unitNames = wordsForRow;
+                    excludeFromHeader = true;
+                }
+            }
+        }
+
+        if (scaleFactors.size() == 0)
+        {
+            std::vector<double> values;
+
+            if (hasOnlyValidDoubleValues(wordsForRow, &values))
+            {
+                scaleFactors = values;
+                excludeFromHeader = true;
+            }
+        }
+
+        if (!excludeFromHeader)
+        {
+            restOfHeaderRows.push_back(wordsForRow);
+        }
+    }
+
+    for (const std::string& unit : unitNames)
    {
        ColumnInfo columnInfo;
        columnInfo.unitName = unit;
@ -302,62 +384,22 @@ std::vector<ColumnInfo> RifEclipseUserDataParserTools::columnInfoForTable(std::s
        table.push_back(columnInfo);
    }

-    if (scaleFactors.size() < columnCount)
-    {
-        size_t diff = columnCount - scaleFactors.size();
-        scaleFactors.insert(scaleFactors.end(), diff, "1");
-    }
-
    for (size_t i = 0; i < table.size(); i++)
    {
-        table[i].scaleFactor = scaleFactors[i];
-    }
-
-    std::vector< std::vector< std::string > > restOfHeader;
-
-    std::stringstream::pos_type posAtStartOfLine = streamData.tellg();
-
-    bool header = true;
-    while (header)
-    {
-        posAtStartOfLine = streamData.tellg();
-
-        std::getline(streamData, line);
-
-        std::vector<std::string> words = splitLineAndRemoveComments(line);
-
-        if (words.size() == columnCount)
+        if (scaleFactors.size() == table.size())
        {
-            header = false;
-            break;
-        }
-        else if (words.size() > columnCount)
-        {
-            continue;
+            table[i].scaleFactor = scaleFactors[i];
        }
        else
        {
-            size_t diff = columnCount - words.size();
-            
-            if (diff == columnCount)
-            {
-                std::vector< std::string > vectorOfEmptyStrings(columnCount, "");
-                restOfHeader.push_back(vectorOfEmptyStrings);
-            }
-            else
-            {
-                words.insert(words.begin(), diff, "");
-                restOfHeader.push_back(words);
-            }
+            table[i].scaleFactor = 1.0;
        }
    }
-
-    streamData.seekg(posAtStartOfLine);
    
-    for (size_t i = 0; i < columnCount; i++)
+    for (size_t i = 0; i < table.size(); i++)
    {
        std::vector< std::string > restOfHeaderColumn;
-        for (std::vector< std::string > restOfHeaderRow : restOfHeader)
+        for (std::vector< std::string > restOfHeaderRow : restOfHeaderRows)
        {
            restOfHeaderColumn.push_back(restOfHeaderRow.at(i));
        }
@ -419,4 +461,44 @@ std::vector<std::string> RifEclipseUserDataParserTools::headerReader(std::string
        std::getline(streamData, line);
    }
    return header;
-}
+}
+
+//--------------------------------------------------------------------------------------------------
+/// 
+//--------------------------------------------------------------------------------------------------
+bool RifEclipseUserDataParserTools::hasTimeUnit(const std::string& line)
+{
+    if (line == "DAYS" ||
+        line == "DAY" ||
+        line == "YEARS" ||
+        line == "YEAR")
+    {
+        return true;
+    }
+
+    return false;
+}
+
+//--------------------------------------------------------------------------------------------------
+/// 
+//--------------------------------------------------------------------------------------------------
+bool RifEclipseUserDataParserTools::hasOnlyValidDoubleValues(const std::vector<std::string>& words, std::vector<double>* doubleValues)
+{
+    char* end;
+
+    for (const auto& word : words)
+    {
+        double doubleVal = strtod(word.data(), &end);
+        if (end == word.data())
+        {
+            return false;
+        }
+
+        if (doubleValues)
+        {
+            doubleValues->push_back(doubleVal);
+        }
+    }
+
+    return true;
+}
--- a/ApplicationCode/FileInterface/RifEclipseUserDataParserTools.h
+++ b/ApplicationCode/FileInterface/RifEclipseUserDataParserTools.h
@ -33,7 +33,7 @@ struct ColumnInfo
    bool                                            isAVector = false;
    RifEclipseSummaryAddress                        summaryAddress;
    std::string                                     unitName;
-    std::string                                     scaleFactor;
+    double                                          scaleFactor;
    std::vector<double>                             values;
    std::string                                     dateFormat;
    std::string                                     startDate;
@ -48,13 +48,16 @@ class RifEclipseUserDataParserTools
 public:
    static bool                                         isLineSkippable(const std::string& line);
    static bool                                         isAComment(const std::string& word);
-    static std::vector<std::string>                     splitLineAndRemoveComments(std::string line);
+    static std::vector<std::string>                     splitLineAndRemoveComments(const std::string& line);
    static RifEclipseSummaryAddress::SummaryVarCategory identifyCategory(const std::string& word);
    static void                                         splitLineToDoubles(const std::string& line, std::vector<double>& values);
    static size_t                                       findFirstNonEmptyEntryIndex(std::vector<std::string>& list);
    static  RifEclipseSummaryAddress                    makeAndFillAddress(std::string quantityName, std::vector< std::string > headerColumn);
-    static bool                                         keywordParser(std::string line, std::string& origin, std::string& dateFormat, std::string& startDate);
+    static bool                                         keywordParser(const std::string& line, std::string& origin, std::string& dateFormat, std::string& startDate);
    static std::vector<ColumnInfo>                      columnInfoForTable(std::stringstream& data);
    static bool                                         isANumber(const std::string& line);
    static std::vector<std::string>                     headerReader(std::stringstream& streamData, std::string& line);
+
+    static bool                                         hasTimeUnit(const std::string& line);
+    static bool                                         hasOnlyValidDoubleValues(const std::vector<std::string>& words, std::vector<double>* doubleValues = nullptr);
 };
--- a/ApplicationCode/UnitTests/ObservedDataParser-Test.cpp
+++ b/ApplicationCode/UnitTests/ObservedDataParser-Test.cpp
@ -299,7 +299,8 @@ TEST(RifColumnBasedRsmspecParserTest, TestKeywordsAndMissingUnitName)
    EXPECT_EQ("OP-2_TR", tables[1].at(0).origin);
    EXPECT_EQ("DD/MM/YY", tables[1].at(0).dateFormat);

-    EXPECT_EQ("", tables[0].at(7).unitName);
+    // Assume missing units at start of row
+    EXPECT_EQ("", tables[0].at(0).unitName);

    ASSERT_EQ(8, tables.at(0).size());
    EXPECT_EQ(1.0E-12, tables.at(0).at(4).values[0]);