///////////////////////////////////////////////////////////////////////////////// // // Copyright (C) 2017- Statoil ASA // // ResInsight is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // ResInsight is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or // FITNESS FOR A PARTICULAR PURPOSE. // // See the GNU General Public License at // for more details. // ///////////////////////////////////////////////////////////////////////////////// #include "RifEclipseUserDataParserTools.h" #include "RiaDateStringParser.h" #include "RiaLogging.h" #include "RifEclipseUserDataKeywordTools.h" #include "cvfAssert.h" #include #include #include #include #include //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::isLineSkippable(const std::string& line) { std::size_t found = line.find_first_not_of(" "); if (found == std::string::npos) { // Line with only spaces return true; } if (line[found] == '-') { // Comments start with - return true; } if (line[0] == '1') { // Single 1 at start of file return true; } std::string str(line); if (str.find("SUMMARY") < str.size()) { return true; } if (str.find("PAGE") < str.size()) { return true; } if (str.find("NULL") < str.size()) { return true; } return false; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::isAComment(const std::string& word) { if (word.find("--") != std::string::npos) { return true; } return false; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::vector RifEclipseUserDataParserTools::splitLineAndRemoveComments(const std::string& line) { std::istringstream iss(line); std::vector words{ std::istream_iterator{iss}, std::istream_iterator{} }; for(auto wordsIterator = words.begin(); wordsIterator != words.end(); ++wordsIterator) { if (isAComment(*wordsIterator)) { words.erase(wordsIterator, words.end()); break; } } return words; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- RifEclipseSummaryAddress::SummaryVarCategory RifEclipseUserDataParserTools::identifyCategory(const std::string& word) { if (word.size() == 0) return RifEclipseSummaryAddress::SUMMARY_INVALID; char firstLetter = word.at(0); if (firstLetter == 'A') return RifEclipseSummaryAddress::SUMMARY_AQUIFER; if (firstLetter == 'B') return RifEclipseSummaryAddress::SUMMARY_BLOCK; if (firstLetter == 'C') return RifEclipseSummaryAddress::SUMMARY_WELL_COMPLETION; if (firstLetter == 'F') return RifEclipseSummaryAddress::SUMMARY_FIELD; if (firstLetter == 'G') return RifEclipseSummaryAddress::SUMMARY_WELL_GROUP; if (firstLetter == 'N') return RifEclipseSummaryAddress::SUMMARY_NETWORK; if (firstLetter == 'R') return RifEclipseSummaryAddress::SUMMARY_REGION; //TODO: CAN BE REGION2REGION OR MISC!! if (firstLetter == 'S') return RifEclipseSummaryAddress::SUMMARY_WELL_SEGMENT; if (firstLetter == 'W') return RifEclipseSummaryAddress::SUMMARY_WELL; if (word.size() < 2) return RifEclipseSummaryAddress::SUMMARY_INVALID; std::string firstTwoLetters; firstTwoLetters.push_back(word.at(0)); firstTwoLetters.push_back(word.at(1)); if (firstTwoLetters == "LB") return RifEclipseSummaryAddress::SUMMARY_BLOCK_LGR; if (firstTwoLetters == "LC") return RifEclipseSummaryAddress::SUMMARY_WELL_COMPLETION_LGR; if (firstTwoLetters == "LW") return RifEclipseSummaryAddress::SUMMARY_WELL_LGR; /* TODO return RifEclipseSummaryAddress::SUMMARY_MISC return RifEclipseSummaryAddress::SUMMARY_REGION_2_REGION */ return RifEclipseSummaryAddress::SUMMARY_INVALID; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- size_t RifEclipseUserDataParserTools::findFirstNonEmptyEntryIndex(std::vector& list) { for (size_t i = 0; i < list.size(); i++) { if (!list[i].empty()) { return i; } } return list.size(); } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::keywordParser(const std::string& line, std::string& origin, std::string& dateFormat, std::string& startDate) { std::vector words = splitLineAndRemoveComments(line); if (words.size() < 2) return false; if (words[0] == "ORIGIN") { origin = words[1]; return true; } else if (words[0] == "STARTDATE") { words.erase(words.begin()); for (size_t i = 0; i < words.size(); i++) { std::string s = words[i]; startDate += s; if (i < words.size() - 1) { startDate += " "; } } return true; } else if (words[0] == "DATEFORMAT") { dateFormat = words[1]; return true; } return false; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- void RifEclipseUserDataParserTools::splitLineToDoubles(const std::string& line, std::vector& values) { std::istringstream iss(line); values.clear(); double d; while (iss >> d) { values.push_back(d); } } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::isANumber(const std::string& line) { try { std::stod(line); } catch (...) { return false; } return true; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::vector RifEclipseUserDataParserTools::headerReader(std::stringstream& streamData, std::string& line) { std::vector header; while (!isANumber(line) && !streamData.eof()) { header.push_back(line); std::getline(streamData, line); } return header; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::hasTimeUnit(const std::string& word) { if (word == "DAYS" || word == "DAY" || word == "YEARS" || word == "YEAR" || word == "DATE" || word == "DATES") { return true; } return false; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::hasOnlyValidDoubleValues(const std::vector& words, std::vector* doubleValues) { char* end; bool onlyValidValues = true; for (const auto& word : words) { if (word.find_first_not_of("0123456789.eE-") != std::string::npos) { onlyValidValues = false; } else { double doubleVal = strtod(word.data(), &end); doubleValues->push_back(doubleVal); } } return onlyValidValues; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::hasDateUnit(const std::string& word) { if (word.find("DATE") != std::string::npos) return true; return false; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::isValidTableData(size_t columnCount, const std::string& line) { std::vector words = splitLineAndRemoveComments(line); if (words.size() != columnCount) return false; std::vector doubleValues; RifEclipseUserDataParserTools::hasOnlyValidDoubleValues(words, &doubleValues); if (doubleValues.size() == columnCount) return true; size_t columnsWithDate = 0; for (auto w : words) { if (RiaDateStringParser::parseDateString(w).isValid()) { columnsWithDate++; } } if (columnsWithDate == 1 && doubleValues.size() == columnCount - 1) { return true; } return false; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- TableData RifEclipseUserDataParserTools::tableDataFromText(std::stringstream& streamData, std::vector* errorText) { TableData emptyTable; std::string origin = ""; std::string dateFormat = ""; std::string startDate = ""; std::string firstLine; std::getline(streamData, firstLine); while (isLineSkippable(firstLine) || keywordParser(firstLine, origin, dateFormat, startDate)) { if (!streamData.good()) { if (errorText) errorText->push_back("Failed to detect start of table header"); return emptyTable; } std::getline(streamData, firstLine); } std::vector quantityNames = splitLineAndRemoveComments(firstLine); size_t columnCount = quantityNames.size(); if (columnCount == 0) { if (errorText) errorText->push_back("No quantities detected in table"); return emptyTable; } std::vector< std::vector< std::string > > allHeaderRows; { std::stringstream::pos_type posAtStartOfFirstLine = streamData.tellg(); std::string secondLine; std::getline(streamData, firstLine); std::stringstream::pos_type posAtStartOfSecondLine = streamData.tellg(); std::getline(streamData, secondLine); bool header = true; while (header) { if (isValidTableData(columnCount, firstLine) && isValidTableData(columnCount, secondLine)) { header = false; break; } else { std::vector words = splitLineAndRemoveComments(firstLine); if (words.size() > 0) { allHeaderRows.push_back(words); } } posAtStartOfFirstLine = posAtStartOfSecondLine; firstLine = secondLine; posAtStartOfSecondLine = streamData.tellg(); std::getline(streamData, secondLine); if (!streamData.good()) { header = false; } } streamData.seekg(posAtStartOfFirstLine); } std::vector unitNames; std::vector scaleFactors; std::vector< std::vector< std::string > > headerRows; for (const auto& rowWords : allHeaderRows) { bool excludeFromHeader = false; if (rowWords.size() == columnCount) { if (unitNames.size() == 0) { for (const std::string& word : rowWords) { if (hasTimeUnit(word)) { unitNames = rowWords; excludeFromHeader = true; } } } if (scaleFactors.size() == 0) { std::vector values; if (hasOnlyValidDoubleValues(rowWords, &values)) { scaleFactors = values; excludeFromHeader = true; } } } if (!excludeFromHeader) { headerRows.push_back(rowWords); } } if (columnCount != unitNames.size()) { if (errorText) errorText->push_back("Number of quantities is different from number of units"); return emptyTable; } std::vector columnInfos; // Create string vectors for each column { std::vector parserErrors; std::vector> tableHeaderText = RifEclipseUserDataKeywordTools::buildColumnHeaderText(quantityNames, headerRows, &parserErrors); if (parserErrors.size() > 0) { if (errorText) errorText->insert(errorText->end(), parserErrors.begin(), parserErrors.end()); return emptyTable; } // For each column header, create rif adress and date time for (size_t i = 0; i < tableHeaderText.size(); i++) { auto columnText = tableHeaderText[i]; if (columnText.size() == 0) { if (errorText) errorText->push_back("Detected column with no content"); continue; } std::string quantity = columnText[0]; std::string unit = unitNames[i]; std::vector columnHeader; if (columnText.size() > 1) columnHeader.insert(columnHeader.begin(), columnText.begin() + 1, columnText.end()); RifEclipseSummaryAddress adr = RifEclipseUserDataKeywordTools::makeAndFillAddress(quantity, columnHeader); ColumnInfo ci(adr, unit); if (quantity == "DATE") { ci.isStringData = true; } columnInfos.push_back(ci); } } return TableData(origin, dateFormat, startDate, columnInfos); }