///////////////////////////////////////////////////////////////////////////////// // // Copyright (C) 2017- Statoil ASA // // ResInsight is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // ResInsight is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or // FITNESS FOR A PARTICULAR PURPOSE. // // See the GNU General Public License at // for more details. // ///////////////////////////////////////////////////////////////////////////////// #include "RifEclipseUserDataParserTools.h" #include "RiaDateStringParser.h" #include "RiaLogging.h" #include "RiaQDateTimeTools.h" #include "RiaStdStringTools.h" #include "RifEclipseUserDataKeywordTools.h" #include "cvfAssert.h" #include #include #include #include #include //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::isLineSkippable(const std::string& line) { std::size_t found = line.find_first_not_of(" "); if (found == std::string::npos) { // Line with only spaces return true; } if (line[found] == '-') { // Comments start with - return true; } if (line[found] == '1' && found == 0 && line.find_first_not_of("1 ", 1) == std::string::npos) { // Single 1 at start of file return true; } std::string str(line); if (str.find("SUMMARY") < str.size()) { return true; } if (str.find("PAGE") < str.size()) { return true; } if (str.find("NULL") < str.size()) { return true; } return false; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::isAComment(const std::string& word) { if (word.find("--") != std::string::npos) { return true; } return false; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::vector RifEclipseUserDataParserTools::splitLineAndRemoveComments(const std::string& line) { std::istringstream iss(line); std::vector words{ std::istream_iterator{iss}, std::istream_iterator{} }; for(auto wordsIterator = words.begin(); wordsIterator != words.end(); ++wordsIterator) { if (isAComment(*wordsIterator)) { words.erase(wordsIterator, words.end()); break; } } return words; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::keywordParser(const std::string& line, std::string& origin, std::string& dateFormat, std::string& startDate) { std::vector words = splitLineAndRemoveComments(line); if (words.size() < 2) return false; if (words[0] == "ORIGIN") { origin = words[1]; return true; } else if (words[0] == "STARTDATE") { words.erase(words.begin()); for (size_t i = 0; i < words.size(); i++) { std::string s = words[i]; startDate += s; if (i < words.size() - 1) { startDate += " "; } } return true; } else if (words[0] == "DATEFORMAT") { dateFormat = words[1]; return true; } return false; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::vector RifEclipseUserDataParserTools::splitLineToDoubles(const std::string& line) { std::vector values; QString s = QString::fromStdString(line); QStringList words = s.split(" "); bool ok = false; for (auto w : words) { double val = w.toDouble(&ok); if (ok) { values.push_back(val); } } return values; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::isANumber(const std::string& line) { try { std::stod(line); } catch (...) { return false; } return true; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::vector RifEclipseUserDataParserTools::headerReader(std::stringstream& streamData, std::string& line) { std::vector header; while (!isANumber(line) && !streamData.eof()) { header.push_back(line); std::getline(streamData, line); } return header; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::hasTimeUnit(const std::string& word) { if (word == "DAYS" || word == "DAY" || word == "YEARS" || word == "YEAR" || word == "DATE" || word == "DATES") { return true; } return false; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::hasOnlyValidDoubleValues(const std::vector& words, std::vector* doubleValues) { bool onlyValidValues = true; for (const auto& word : words) { if (word.find_first_not_of("0123456789.eE-+") != std::string::npos) { onlyValidValues = false; } else { double doubleVal = RiaStdStringTools::toDouble(word); doubleValues->push_back(doubleVal); } } return onlyValidValues; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::isValidTableData(size_t columnCount, const std::string& line) { std::vector words = splitLineAndRemoveComments(line); if (words.size() != columnCount) return false; std::vector doubleValues; RifEclipseUserDataParserTools::hasOnlyValidDoubleValues(words, &doubleValues); if (doubleValues.size() == columnCount) return true; size_t columnsWithDate = 0; for (auto w : words) { if (RiaDateStringParser::parseDateString(w).isValid()) { columnsWithDate++; } } if (columnsWithDate == 1 && doubleValues.size() == columnCount - 1) { return true; } return false; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- TableData RifEclipseUserDataParserTools::tableDataFromText(std::stringstream& streamData, std::vector* errorText) { TableData emptyTable; std::string origin = ""; std::string dateFormat = ""; std::string startDate = ""; std::string firstLine; std::getline(streamData, firstLine); while (isLineSkippable(firstLine) || keywordParser(firstLine, origin, dateFormat, startDate)) { if (!streamData.good()) { // End of file return emptyTable; } std::getline(streamData, firstLine); } std::vector quantityNames = splitLineAndRemoveComments(firstLine); size_t columnCount = quantityNames.size(); if (columnCount == 0) { if (errorText) errorText->push_back("No quantities detected in table"); return emptyTable; } std::vector< std::vector< std::string > > allHeaderRows; { std::stringstream::pos_type posAtStartOfFirstLine = streamData.tellg(); std::string secondLine; std::getline(streamData, firstLine); std::stringstream::pos_type posAtStartOfSecondLine = streamData.tellg(); std::getline(streamData, secondLine); bool header = true; while (header) { if (isValidTableData(columnCount, firstLine) && isValidTableData(columnCount, secondLine)) { header = false; break; } else { std::vector words = splitLineAndRemoveComments(firstLine); if (words.size() > 0) { allHeaderRows.push_back(words); } } posAtStartOfFirstLine = posAtStartOfSecondLine; firstLine = secondLine; posAtStartOfSecondLine = streamData.tellg(); std::getline(streamData, secondLine); if (!streamData.good()) { header = false; } } streamData.seekg(posAtStartOfFirstLine); } std::vector unitNames; std::vector scaleFactors; std::vector< std::vector< std::string > > headerRows; for (const auto& rowWords : allHeaderRows) { bool excludeFromHeader = false; if (rowWords.size() == columnCount) { if (unitNames.size() == 0) { for (const std::string& word : rowWords) { if (hasTimeUnit(word)) { unitNames = rowWords; excludeFromHeader = true; } } } if (scaleFactors.size() == 0) { std::vector values; if (hasOnlyValidDoubleValues(rowWords, &values)) { scaleFactors = values; excludeFromHeader = true; } } } if (!excludeFromHeader) { headerRows.push_back(rowWords); } } if (columnCount != unitNames.size()) { if (errorText) errorText->push_back("Number of quantities is different from number of units"); return emptyTable; } std::vector columnInfos; // Create string vectors for each column { std::vector parserErrors; std::vector> tableHeaderText = RifEclipseUserDataKeywordTools::buildColumnHeaderText(quantityNames, headerRows, &parserErrors); if (parserErrors.size() > 0) { if (errorText) errorText->insert(errorText->end(), parserErrors.begin(), parserErrors.end()); return emptyTable; } // For each column header, create rif adress and date time for (size_t i = 0; i < tableHeaderText.size(); i++) { auto columnText = tableHeaderText[i]; if (columnText.size() == 0) { if (errorText) errorText->push_back("Detected column with no content"); continue; } std::string quantity = columnText[0]; std::string unit = unitNames[i]; std::vector columnHeader; if (columnText.size() > 1) columnHeader.insert(columnHeader.begin(), columnText.begin() + 1, columnText.end()); RifEclipseSummaryAddress adr = RifEclipseUserDataKeywordTools::makeAndFillAddress(quantity, columnHeader); Column ci = Column::createColumnInfoFromRsmData(quantity, unit, adr); columnInfos.push_back(ci); } } return TableData(origin, startDate, columnInfos); } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::isFixedWidthHeader(const std::string& lines) { std::stringstream streamData(lines); std::vector headerLines = RifEclipseUserDataParserTools::findValidHeaderLines(streamData); if (headerLines.size() > 1) { std::vector firstLine = RifEclipseUserDataParserTools::columnIndexForWords(headerLines[0]); for (auto line : headerLines) { std::vector columnIndicesForLine = RifEclipseUserDataParserTools::columnIndexForWords(line); for (auto index : columnIndicesForLine) { if (std::find(firstLine.begin(), firstLine.end(), index) == firstLine.end()) { return false; } } } return true; } return false; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::vector RifEclipseUserDataParserTools::columnInfoForFixedColumnWidth(std::stringstream& streamData) { auto headerLines = RifEclipseUserDataParserTools::findValidHeaderLines(streamData); auto columnHeaders = RifEclipseUserDataParserTools::splitIntoColumnHeaders(headerLines); return RifEclipseUserDataParserTools::columnInfoFromColumnHeaders(columnHeaders); } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::vector RifEclipseUserDataParserTools::findValidHeaderLines(std::stringstream& streamData) { std::vector headerLines; std::stringstream::pos_type posAtTableDataStart = streamData.tellg(); size_t columnCount = 0; std::string line; bool continueParsing = true; bool hasStepType = false; size_t minimunRequiredExtraHeaderLines = 0; while (continueParsing) { posAtTableDataStart = streamData.tellg(); if (!std::getline(streamData, line)) { continueParsing = false; } else { if (!RifEclipseUserDataParserTools::isLineSkippable(line)) { auto words = RifEclipseUserDataParserTools::splitLineAndRemoveComments(line); if (!hasStepType) { for (size_t i = 0; i < words.size(); i++) { if (RifEclipseUserDataKeywordTools::isStepType(words[i])) { hasStepType = true; } } } if (isUnitText(line)) { minimunRequiredExtraHeaderLines += 1; } if (isScalingText(line)) { minimunRequiredExtraHeaderLines += 1; } if (columnCount == 0) { // Fist line with valid header data defines the number of columns columnCount = words.size(); minimunRequiredExtraHeaderLines = RifEclipseUserDataKeywordTools::computeRequiredHeaderLineCount(words); headerLines.push_back(line); } else if (headerLines.size() < minimunRequiredExtraHeaderLines) { headerLines.push_back(line); } else { std::vector doubleValues = RifEclipseUserDataParserTools::splitLineToDoubles(line); if (doubleValues.size() < columnCount && words.size() < columnCount) { if (hasStepType && (words.size() + 1 == columnCount)) { continueParsing = false; } else { // Consider a line with double values less than column count as a table header headerLines.push_back(line); } } else { continueParsing = false; } } } } } streamData.seekg(posAtTableDataStart); return headerLines; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::vector> RifEclipseUserDataParserTools::splitIntoColumnHeaders(const std::vector& headerLines) { std::vector> headerLinesPerColumn; if (headerLines.size() > 0) { std::vector columnOffsets = RifEclipseUserDataParserTools::columnIndexForWords(headerLines[0]); if (columnOffsets.size() > 0) { headerLinesPerColumn.resize(columnOffsets.size()); for (auto headerLine : headerLines) { for (size_t i = 0; i < columnOffsets.size(); i++) { size_t colStart = columnOffsets[i]; size_t columnWidth = std::string::npos; if (i < columnOffsets.size() - 1) { columnWidth = columnOffsets[i + 1] - colStart; } else { if (headerLine.size() > colStart) { columnWidth = headerLine.size() - colStart; } } std::string subString; if (columnWidth != std::string::npos && colStart < headerLine.size() && colStart + columnWidth <= headerLine.size()) { subString = headerLine.substr(colStart, columnWidth); } subString = trimString(subString); headerLinesPerColumn[i].push_back(subString); } } } } return headerLinesPerColumn; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::vector RifEclipseUserDataParserTools::columnInfoFromColumnHeaders(const std::vector>& columnData) { std::vector table; bool isUnitsDetected = false; bool isScalingDetected = false; for (auto columnLines : columnData) { if (columnLines.size() > 1 && isUnitText(columnLines[1])) { isUnitsDetected = true; } if (columnLines.size() > 2 && isScalingText(columnLines[2])) { isScalingDetected = true; } } for (auto columnLines : columnData) { if (columnLines.size() == 0) continue; std::string quantity = columnLines[0]; std::string unit; size_t startIndex = 1; if (isUnitsDetected) { unit = columnLines[1]; startIndex = 2; } if (isScalingDetected) { //std::string scaling = columnLines[2]; startIndex = 3; } std::vector restOfHeader; for (size_t i = startIndex; i < columnLines.size(); i++) { restOfHeader.push_back(columnLines[i]); } RifEclipseSummaryAddress adr = RifEclipseUserDataKeywordTools::makeAndFillAddress(quantity, restOfHeader); Column ci = Column::createColumnInfoFromRsmData(quantity, unit, adr); table.push_back(ci); } return table; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::vector RifEclipseUserDataParserTools::columnIndexForWords(const std::string& line) { std::vector columnOffsets; std::size_t offset = line.find_first_not_of(" "); while (offset != std::string::npos) { columnOffsets.push_back(offset); offset = line.find_first_of(" ", offset); offset = line.find_first_not_of(" ", offset); } return columnOffsets; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::vector RifEclipseUserDataParserTools::mergeEqualTimeSteps(const std::vector& tables) { if (tables.size() < 2) { return tables; } if (tables[0].columnInfos().size() == 0) return tables; QDateTime firstTableStartTime; for (auto c : tables[0].columnInfos()) { if (c.summaryAddress.quantityName() == "DATE") { if (c.itemCount() > 0) { firstTableStartTime = RiaDateStringParser::parseDateString(c.textValues[0]); } } } if (!firstTableStartTime.isValid()) { return tables; } std::vector largeTables; largeTables.push_back(tables[0]); TableData& firstTable = largeTables[0]; size_t itemsInFirstTable = tables[0].columnInfos()[0].itemCount(); for (size_t i = 1; i < tables.size(); i++) { bool isDatesEqual = true; if (firstTableStartTime.isValid()) { QDateTime tableFirstTime; for (auto& c : tables[i].columnInfos()) { if (c.summaryAddress.quantityName() == "DATE") { if (c.itemCount() > 0) { tableFirstTime = RiaDateStringParser::parseDateString(c.textValues[0]); } } } if (firstTableStartTime != tableFirstTime) { isDatesEqual = false; } } if (tables[i].columnInfos().size() > 0 && tables[i].columnInfos()[0].itemCount() == itemsInFirstTable && isDatesEqual) { for (auto& c : tables[i].columnInfos()) { if (c.summaryAddress.quantityName() != "DATE") { firstTable.columnInfos().push_back(c); } } } else { largeTables.push_back(tables[i]); } } return largeTables; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::string RifEclipseUserDataParserTools::trimString(const std::string& s) { auto sCopy = s.substr(0, s.find_last_not_of(' ') + 1); if (sCopy.size() > 0) { sCopy = sCopy.substr(sCopy.find_first_not_of(' ')); } return sCopy; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::isUnitText(const std::string& word) { if (hasTimeUnit(word)) return true; if (word.find("BARSA") != std::string::npos) return true; if (word.find("SM3") != std::string::npos) return true; if (word.find("RM3") != std::string::npos) return true; return false; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::isScalingText(const std::string& word) { return word.find_first_of('*') != std::string::npos; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::string Column::columnName() const { return summaryAddress.uiText(); } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- size_t Column::itemCount() const { switch (dataType) { case NUMERIC: return values.size(); case TEXT: return textValues.size(); case DATETIME: return dateTimeValues.size(); default: return 0; } } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- Column Column::createColumnInfoFromRsmData(const std::string& quantity, const std::string& unit, const RifEclipseSummaryAddress& addr) { Column ci(addr, unit); if (RifEclipseUserDataKeywordTools::isDate(quantity)) { ci.dataType = TEXT; } else if (RifEclipseUserDataKeywordTools::isStepType(quantity)) { ci.dataType = TEXT; } else { ci.dataType = NUMERIC; } return ci; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- Column Column::createColumnInfoFromCsvData(const RifEclipseSummaryAddress& addr, const std::string& unit) { Column col(addr, unit); return col; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::vector Column::qDateTimeValues() const { std::vector output; for (auto t : dateTimeValues) output.push_back(RiaQDateTimeTools::fromTime_t(t)); return output; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- int TableData::dateTimeColumnIndex() const { return m_dateTimeColumnIndex; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- QDateTime TableData::findFirstDate() const { QDateTime dt = RiaQDateTimeTools::epoch(); for (auto ci : m_columnInfos) { if (RifEclipseUserDataKeywordTools::isDate(ci.summaryAddress.quantityName())) { if (ci.itemCount() > 0) { std::string firstDateString = ci.textValues[0]; QDateTime candidate = RiaDateStringParser::parseDateString(firstDateString); if (candidate.isValid()) { dt = candidate; } } } } return dt; }