#2066 Observed Data : Improve detection of required header lines

This commit is contained in:
Magne Sjaastad 2017-11-06 21:20:53 +01:00
parent 6444b9f32e
commit 5ab56bb2fb
4 changed files with 145 additions and 36 deletions

View File

@ -206,12 +206,11 @@ RifEclipseSummaryAddress RifEclipseUserDataKeywordTools::makeAndFillAddress(cons
} }
case RifEclipseSummaryAddress::SUMMARY_WELL_COMPLETION: case RifEclipseSummaryAddress::SUMMARY_WELL_COMPLETION:
{ {
if (columnHeaderText.size() > 3) if (columnHeaderText.size() > 1)
{ {
wellName = columnHeaderText[0]; wellName = columnHeaderText[0];
cellI = RiaStdStringTools::toInt(columnHeaderText[1]);
cellJ = RiaStdStringTools::toInt(columnHeaderText[2]); RifEclipseUserDataKeywordTools::extractThreeInts(&cellI, &cellJ, &cellK, columnHeaderText[1]);
cellK = RiaStdStringTools::toInt(columnHeaderText[3]);
} }
break; break;
} }
@ -220,17 +219,16 @@ RifEclipseSummaryAddress RifEclipseUserDataKeywordTools::makeAndFillAddress(cons
if (columnHeaderText.size() > 1) if (columnHeaderText.size() > 1)
{ {
wellName = columnHeaderText[0]; wellName = columnHeaderText[0];
lgrName = columnHeaderText[1]; lgrName = columnHeaderText[1];
} }
break; break;
case RifEclipseSummaryAddress::SUMMARY_WELL_COMPLETION_LGR: case RifEclipseSummaryAddress::SUMMARY_WELL_COMPLETION_LGR:
if (columnHeaderText.size() > 4) if (columnHeaderText.size() > 2)
{ {
wellName = columnHeaderText[0]; wellName = columnHeaderText[0];
lgrName = columnHeaderText[1]; lgrName = columnHeaderText[1];
cellI = RiaStdStringTools::toInt(columnHeaderText[2]);
cellJ = RiaStdStringTools::toInt(columnHeaderText[3]); RifEclipseUserDataKeywordTools::extractThreeInts(&cellI, &cellJ, &cellK, columnHeaderText[2]);
cellK = RiaStdStringTools::toInt(columnHeaderText[4]);
} }
break; break;
case RifEclipseSummaryAddress::SUMMARY_WELL_SEGMENT: case RifEclipseSummaryAddress::SUMMARY_WELL_SEGMENT:
@ -241,20 +239,17 @@ RifEclipseSummaryAddress RifEclipseUserDataKeywordTools::makeAndFillAddress(cons
} }
break; break;
case RifEclipseSummaryAddress::SUMMARY_BLOCK: case RifEclipseSummaryAddress::SUMMARY_BLOCK:
if (columnHeaderText.size() > 2) if (columnHeaderText.size() > 0)
{ {
cellI = RiaStdStringTools::toInt(columnHeaderText[0]); RifEclipseUserDataKeywordTools::extractThreeInts(&cellI, &cellJ, &cellK, columnHeaderText[0]);
cellJ = RiaStdStringTools::toInt(columnHeaderText[1]);
cellK = RiaStdStringTools::toInt(columnHeaderText[2]);
} }
break; break;
case RifEclipseSummaryAddress::SUMMARY_BLOCK_LGR: case RifEclipseSummaryAddress::SUMMARY_BLOCK_LGR:
if (columnHeaderText.size() > 3) if (columnHeaderText.size() > 1)
{ {
lgrName = columnHeaderText[0]; lgrName = columnHeaderText[0];
cellI = RiaStdStringTools::toInt(columnHeaderText[1]);
cellJ = RiaStdStringTools::toInt(columnHeaderText[2]); RifEclipseUserDataKeywordTools::extractThreeInts(&cellI, &cellJ, &cellK, columnHeaderText[1]);
cellK = RiaStdStringTools::toInt(columnHeaderText[3]);
} }
break; break;
case RifEclipseSummaryAddress::SUMMARY_CALCULATED: case RifEclipseSummaryAddress::SUMMARY_CALCULATED:
@ -285,3 +280,73 @@ bool RifEclipseUserDataKeywordTools::isStepType(const std::string& identifier)
return (identifier.find("STEPTYPE") != std::string::npos); return (identifier.find("STEPTYPE") != std::string::npos);
} }
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
size_t RifEclipseUserDataKeywordTools::computeRequiredHeaderLineCount(const std::vector<std::string>& words)
{
size_t maxHeaderLinesFromKeywords = 0;
for (auto w : words)
{
if (knownKeywordsWithZeroRequiredHeaderLines(w)) continue;
auto linesForKeyword = RifEclipseUserDataKeywordTools::requiredItemsPerLineForKeyword(w).size();
if (linesForKeyword > maxHeaderLinesFromKeywords)
{
maxHeaderLinesFromKeywords = linesForKeyword;
}
}
// Quantity and unit, scaling is optional
return 2 + maxHeaderLinesFromKeywords;
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
bool RifEclipseUserDataKeywordTools::knownKeywordsWithZeroRequiredHeaderLines(const std::string& identifier)
{
if (identifier.find("DAY") != std::string::npos) return true;
if (identifier.find("MONTH") != std::string::npos) return true;
if (identifier.find("YEAR") != std::string::npos) return true;
if (identifier.find("DATE") != std::string::npos) return true;
if (identifier.find("TIME") != std::string::npos) return true;
if (identifier.find("ELAPSED") != std::string::npos) return true;
if (identifier.find("NEWTON") != std::string::npos) return true;
if (identifier.find("NLINSMIN") != std::string::npos) return true;
if (identifier.find("NLINSMAX") != std::string::npos) return true;
if (identifier.find("MLINEARS") != std::string::npos) return true;
if (identifier.find("MSUMLINS") != std::string::npos) return true;
if (identifier.find("MSUMNEWT") != std::string::npos) return true;
if (identifier.find("TCPU") != std::string::npos) return true;
if (identifier.find("TCPUTS") != std::string::npos) return true;
if (identifier.find("TCPUDAY") != std::string::npos) return true;
if (identifier.find("TELAPLIN") != std::string::npos) return true;
if (identifier.find("STEPTYPE") != std::string::npos) return true;
return false;
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
void RifEclipseUserDataKeywordTools::extractThreeInts(int* cellI, int* cellJ, int* cellK, const std::string& line)
{
std::vector<std::string> words = RiaStdStringTools::splitStringBySpace(line);
if (words.size() > 2)
{
*cellI = RiaStdStringTools::toInt(words[0]);
*cellJ = RiaStdStringTools::toInt(words[1]);
*cellK = RiaStdStringTools::toInt(words[2]);
}
}

View File

@ -44,5 +44,9 @@ public:
static RifEclipseSummaryAddress makeAndFillAddress(const std::string quantityName, const std::vector<std::string>& columnHeaderText); static RifEclipseSummaryAddress makeAndFillAddress(const std::string quantityName, const std::vector<std::string>& columnHeaderText);
static bool isStepType(const std::string& identifier); static bool isStepType(const std::string& identifier);
static size_t computeRequiredHeaderLineCount(const std::vector<std::string>& words);
static bool knownKeywordsWithZeroRequiredHeaderLines(const std::string& identifier);
static void extractThreeInts(int* i, int* j, int* k, const std::string& line);
}; };

View File

@ -584,6 +584,8 @@ std::vector<std::string> RifEclipseUserDataParserTools::findValidHeaderLines(std
std::string line; std::string line;
bool continueParsing = true; bool continueParsing = true;
bool hasStepType = false; bool hasStepType = false;
size_t minimunRequiredExtraHeaderLines = 0;
while (continueParsing) while (continueParsing)
{ {
posAtTableDataStart = streamData.tellg(); posAtTableDataStart = streamData.tellg();
@ -615,6 +617,12 @@ std::vector<std::string> RifEclipseUserDataParserTools::findValidHeaderLines(std
columnCount = words.size(); columnCount = words.size();
minimunRequiredExtraHeaderLines = RifEclipseUserDataKeywordTools::computeRequiredHeaderLineCount(words);
headerLines.push_back(line);
}
else if (headerLines.size() < minimunRequiredExtraHeaderLines)
{
headerLines.push_back(line); headerLines.push_back(line);
} }
else else

View File

@ -175,16 +175,6 @@ TEST(FixedWidthDataParser, VaryingTimeStepCount)
" 6-NOV-1997 0 0 0 6 11 1997 0 4.880000 9.720000 \n" " 6-NOV-1997 0 0 0 6 11 1997 0 4.880000 9.720000 \n"
" 7-NOV-1997 0.002738 3 28 7 11 1997 3 5.240000 10.11000 \n" " 7-NOV-1997 0.002738 3 28 7 11 1997 3 5.240000 10.11000 \n"
" 8-NOV-1997 0.006556 4 42 8 11 1997 4 5.730000 10.60000 \n" " 8-NOV-1997 0.006556 4 42 8 11 1997 4 5.730000 10.60000 \n"
" 9-NOV-1997 0.009231 3 28 9 11 1997 3 6.080000 10.95000 \n"
" 10-NOV-1997 0.011462 4 32 10 11 1997 4 6.500000 11.37000 \n"
" 11-NOV-1997 0.013710 4 35 11 11 1997 4 6.940001 11.81000 \n"
" 11-NOV-1997 0.015950 3 25 11 11 1997 3 7.270000 12.14000 \n"
" 12-NOV-1997 0.018477 4 35 12 11 1997 4 7.710001 12.58000 \n"
" 13-NOV-1997 0.020190 3 24 13 11 1997 3 8.040000 12.91000 \n"
" 14-NOV-1997 0.021903 3 27 14 11 1997 3 8.380000 13.25000 \n"
" 14-NOV-1997 0.024232 2 17 14 11 1997 2 8.640000 13.57000 \n"
" 17-NOV-1997 0.030838 3 26 17 11 1997 3 8.980000 13.91000 \n"
" 19-NOV-1997 0.037060 3 30 19 11 1997 3 9.350000 14.28000 \n"
"1 \n" "1 \n"
" -------------------------------------------------------------------------------------------------------------------------------\n" " -------------------------------------------------------------------------------------------------------------------------------\n"
" SUMMARY OF RUN NORNE_ATW2013_RFTPLT_V3 ECLIPSE 2016.2 DATESTAMP 13-DEC-2016 \n" " SUMMARY OF RUN NORNE_ATW2013_RFTPLT_V3 ECLIPSE 2016.2 DATESTAMP 13-DEC-2016 \n"
@ -199,17 +189,14 @@ TEST(FixedWidthDataParser, VaryingTimeStepCount)
" 7-NOV-1997 0 2396930. 5424424. 591498.1 0 2396.930 0 -21.6173 -21.6173 \n" " 7-NOV-1997 0 2396930. 5424424. 591498.1 0 2396.930 0 -21.6173 -21.6173 \n"
" 8-NOV-1997 0 -250487. 5423940. 591966.2 0 2047.598 0 -11124.0 -15535.3 \n" " 8-NOV-1997 0 -250487. 5423940. 591966.2 0 2047.598 0 -11124.0 -15535.3 \n"
" 9-NOV-1997 0 2829432. 5421400. 591952.3 0 4812.102 0 -17.8744 -15552.8 \n" " 9-NOV-1997 0 2829432. 5421400. 591952.3 0 4812.102 0 -17.8744 -15552.8 \n"
" 10-NOV-1997 0 -280634. 5421285. 592209.2 0 4583.461 0 -10959.9 -24482.1 \n"
" 11-NOV-1997 0 2830404. 5419148. 592196.6 0 6908.132 0 -10.3646 -24490.7 \n"
" 11-NOV-1997 0 -295420. 5419071. 592429.6 0 6666.499 0 -10081.2 -32736.4 \n"
" 12-NOV-1997 0 2653586. 5416835. 592407.6 0 9116.355 0 1.209827 -32735.3 \n"
" 13-NOV-1997 0 -360534. 5416802. 592585.4 0 8890.818 0 -9987.70 -38983.3 \n"
" 14-NOV-1997 0 2637960. 5415305. 592557.9 0 10541.03 0 14.09936 -38974.5 \n"
; ;
RifColumnBasedUserDataParser parser(data); RifColumnBasedUserDataParser parser(data);
auto tables = parser.tableData(); auto tables = parser.tableData();
EXPECT_EQ(size_t(2), tables.size()); EXPECT_EQ(size_t(2), tables.size());
EXPECT_EQ(size_t(3), tables[0].columnInfos()[0].itemCount());
EXPECT_EQ(size_t(4), tables[1].columnInfos()[0].itemCount());
} }
//-------------------------------------------------------------------------------------------------- //--------------------------------------------------------------------------------------------------
@ -281,3 +268,48 @@ TEST(FixedWidthDataParser, HandlingOfStepType)
EXPECT_EQ(size_t(19), tables[0].columnInfos().size()); EXPECT_EQ(size_t(19), tables[0].columnInfos().size());
} }
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
TEST(FixedWidthDataParser, ParsingOfHeaderWithCompletions)
{
QString data = R"(
1
-------------------------------------------------------------------------------------------------------------------------------
SUMMARY OF RUN NORNE_ATW2013_RFTPLT_V3 ECLIPSE 2016.2 DATESTAMP 13-DEC-2016 USER dtb MACHINE stj-lcb01-01-03
-------------------------------------------------------------------------------------------------------------------------------
DATE CWIR CWIR CWIR CWIR CWIR CWIR CWIR CWIR CWIR
SM3/DAY SM3/DAY SM3/DAY SM3/DAY SM3/DAY SM3/DAY SM3/DAY SM3/DAY SM3/DAY
F-1H F-1H F-2H F-2H F-2H F-2H F-2H F-2H F-2H
12 85 22 12 85 5 18 83 1 18 83 2 18 83 3 18 83 6 18 83 7 18 83 8 18 83 10
-------------------------------------------------------------------------------------------------------------------------------
6-NOV-1997 0 0 0 0 0 0 0 0 0
7-NOV-1997 9.231156 0 4.764449 0.281349 0.745133 0.265189 0.920954 0.836619 3.560058
8-NOV-1997 14.24625 0 -0.32936 -0.02394 -0.03676 -0.01107 -0.04118 -0.04043 -0.20764
9-NOV-1997 18.14752 0 12.64910 0.550845 1.412629 0.484177 1.673316 1.510174 6.306583
10-NOV-1997 22.47141 0 -0.43799 -0.08433 -0.22154 -0.06072 -0.22558 -0.22200 -1.14752
11-NOV-1997 26.39600 0 4.129149 0.249054 0.636686 0.220191 0.752644 0.669143 2.670208
11-NOV-1997 30.89960 0 -0.42792 -0.07257 -0.18697 -0.06075 -0.22589 -0.22287 -1.16095
12-NOV-1997 34.92273 0 2.787762 0.204226 0.520520 0.173727 0.588237 0.516083 1.973659
13-NOV-1997 37.70422 0 -0.45489 -0.08366 -0.21913 -0.07483 -0.27827 -0.27465 -1.43221
14-NOV-1997 40.17989 0 1.737699 0.167881 0.426118 0.136267 0.456785 0.394921 1.435897
14-NOV-1997 41.75888 0 -0.03586 0.081980 0.190420 0.058727 0.180976 0.136145 0.240186
17-NOV-1997 55.30696 0 -0.39500 -0.05166 -0.13519 -0.05874 -0.22546 -0.23077 -1.29799
19-NOV-1997 57.85853 0 -0.06687 0.071926 0.165751 0.048624 0.144723 0.101929 0.076216
21-NOV-1997 66.37193 0 -0.41220 -0.05978 -0.16607 -0.06713 -0.26004 -0.26881 -1.54050
23-NOV-1997 66.17105 0 -0.16977 0.031785 0.060331 0.015140 0.026817 -0.00778 -0.45987
25-NOV-1997 71.57468 0 -0.35382 -0.03761 -0.11790 -0.04534 -0.18728 -0.20627 -1.31764
)";
std::stringstream streamData;
streamData.str(data.toStdString());
std::vector<std::string> tableHeaderLines = RifEclipseUserDataParserTools::findValidHeaderLines(streamData);
EXPECT_EQ(4, tableHeaderLines.size());
auto colHeaders = RifEclipseUserDataParserTools::splitIntoColumnHeaders(tableHeaderLines);
EXPECT_EQ(10, colHeaders.size());
}