///////////////////////////////////////////////////////////////////////////////// // // Copyright (C) 2017- Statoil ASA // // ResInsight is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // ResInsight is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or // FITNESS FOR A PARTICULAR PURPOSE. // // See the GNU General Public License at // for more details. // ///////////////////////////////////////////////////////////////////////////////// #include "RifEclipseUserDataParserTools.h" #include "RiaDateStringParser.h" #include "RiaLogging.h" #include "RiaQDateTimeTools.h" #include "RiaStdStringTools.h" #include "RifEclipseUserDataKeywordTools.h" #include "cvfAssert.h" #include #include #include #include #include #include //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::isLineSkippable( const std::string& line ) { std::size_t found = line.find_first_not_of( " " ); if ( found == std::string::npos ) { // Line with only spaces return true; } if ( line[found] == '-' ) { // Comments start with - return true; } if ( line[found] == '1' && found == 0 && line.find_first_not_of( "1 ", 1 ) == std::string::npos ) { // Single 1 at start of file return true; } std::string str( line ); if ( str.find( "SUMMARY" ) < str.size() ) { return true; } if ( str.find( "PAGE" ) < str.size() ) { return true; } if ( str.find( "NULL" ) < str.size() ) { return true; } return false; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::isAComment( const std::string& word ) { if ( word.find( "--" ) != std::string::npos ) { return true; } return false; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::vector RifEclipseUserDataParserTools::splitLineAndRemoveComments( const std::string& line ) { std::istringstream iss( line ); std::vector words{ std::istream_iterator{ iss }, std::istream_iterator{} }; for ( auto wordsIterator = words.begin(); wordsIterator != words.end(); ++wordsIterator ) { if ( isAComment( *wordsIterator ) ) { words.erase( wordsIterator, words.end() ); break; } } return words; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::keywordParser( const std::string& line, std::string& origin, std::string& dateFormat, std::string& startDate ) { std::vector words = splitLineAndRemoveComments( line ); if ( words.size() < 2 ) return false; if ( words[0] == "ORIGIN" ) { origin = words[1]; return true; } else if ( words[0] == "STARTDATE" ) { words.erase( words.begin() ); for ( size_t i = 0; i < words.size(); i++ ) { std::string s = words[i]; startDate += s; if ( i < words.size() - 1 ) { startDate += " "; } } return true; } else if ( words[0] == "DATEFORMAT" ) { dateFormat = words[1]; return true; } return false; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::vector RifEclipseUserDataParserTools::splitLineToDoubles( const std::string& line ) { std::vector values; QString s = QString::fromStdString( line ); QStringList words = s.split( " " ); bool ok = false; for ( auto w : words ) { double val = w.toDouble( &ok ); if ( ok ) { values.push_back( val ); } } return values; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::isANumber( const std::string& line ) { try { auto value = std::stod( line ); if ( std::isinf( value ) || std::isnan( value ) ) return false; return true; } catch ( ... ) { return false; } return true; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::vector RifEclipseUserDataParserTools::headerReader( std::stringstream& streamData, std::string& line ) { std::vector header; while ( !isANumber( line ) && !streamData.eof() ) { header.push_back( line ); std::getline( streamData, line ); } return header; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::hasTimeUnit( const std::string& word ) { if ( word == "DAYS" || word == "DAY" || word == "YEARS" || word == "YEAR" || word == "DATE" || word == "DATES" ) { return true; } return false; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::hasOnlyValidDoubleValues( const std::vector& words, std::vector* doubleValues ) { bool onlyValidValues = true; for ( const auto& word : words ) { if ( word.find_first_not_of( "0123456789.eE-+" ) != std::string::npos ) { onlyValidValues = false; } else { double doubleVal = RiaStdStringTools::toDouble( word ); doubleValues->push_back( doubleVal ); } } return onlyValidValues; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::isValidTableData( size_t columnCount, const std::string& line ) { std::vector words = splitLineAndRemoveComments( line ); if ( words.size() != columnCount ) return false; std::vector doubleValues; RifEclipseUserDataParserTools::hasOnlyValidDoubleValues( words, &doubleValues ); if ( doubleValues.size() == columnCount ) return true; size_t columnsWithDate = 0; for ( auto w : words ) { if ( RiaDateStringParser::parseDateString( w ).isValid() ) { columnsWithDate++; } } if ( columnsWithDate == 1 && doubleValues.size() == columnCount - 1 ) { return true; } return false; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- TableData RifEclipseUserDataParserTools::tableDataFromText( std::stringstream& streamData, std::vector* errorText ) { TableData emptyTable; std::string origin = ""; std::string dateFormat = ""; std::string startDate = ""; std::string firstLine; std::getline( streamData, firstLine ); while ( isLineSkippable( firstLine ) || keywordParser( firstLine, origin, dateFormat, startDate ) ) { if ( !streamData.good() ) { // End of file return emptyTable; } std::getline( streamData, firstLine ); } std::vector quantityNames = splitLineAndRemoveComments( firstLine ); size_t columnCount = quantityNames.size(); if ( columnCount == 0 ) { if ( errorText ) errorText->push_back( "No quantities detected in table" ); return emptyTable; } std::vector> allHeaderRows; { std::stringstream::pos_type posAtStartOfFirstLine = streamData.tellg(); std::string secondLine; std::getline( streamData, firstLine ); std::stringstream::pos_type posAtStartOfSecondLine = streamData.tellg(); std::getline( streamData, secondLine ); bool header = true; while ( header ) { if ( isValidTableData( columnCount, firstLine ) && isValidTableData( columnCount, secondLine ) ) { header = false; break; } else { std::vector words = splitLineAndRemoveComments( firstLine ); if ( !words.empty() ) { allHeaderRows.push_back( words ); } } posAtStartOfFirstLine = posAtStartOfSecondLine; firstLine = secondLine; posAtStartOfSecondLine = streamData.tellg(); std::getline( streamData, secondLine ); if ( !streamData.good() ) { header = false; } } streamData.seekg( posAtStartOfFirstLine ); } std::vector unitNames; std::vector scaleFactors; std::vector> headerRows; for ( const auto& rowWords : allHeaderRows ) { bool excludeFromHeader = false; if ( rowWords.size() == columnCount ) { if ( unitNames.empty() ) { for ( const std::string& word : rowWords ) { if ( hasTimeUnit( word ) ) { unitNames = rowWords; excludeFromHeader = true; } } } if ( scaleFactors.empty() ) { std::vector values; if ( hasOnlyValidDoubleValues( rowWords, &values ) ) { scaleFactors = values; excludeFromHeader = true; } } } if ( !excludeFromHeader ) { headerRows.push_back( rowWords ); } } if ( columnCount != unitNames.size() ) { if ( errorText ) errorText->push_back( "Number of quantities is different from number of units" ); return emptyTable; } std::vector columnInfos; // Create string vectors for each column { std::vector parserErrors; std::vector> tableHeaderText = RifEclipseUserDataKeywordTools::buildColumnHeaderText( quantityNames, headerRows, &parserErrors ); if ( !parserErrors.empty() ) { if ( errorText ) errorText->insert( errorText->end(), parserErrors.begin(), parserErrors.end() ); return emptyTable; } // For each column header, create rif adress and date time for ( size_t i = 0; i < tableHeaderText.size(); i++ ) { auto columnText = tableHeaderText[i]; if ( columnText.empty() ) { if ( errorText ) errorText->push_back( "Detected column with no content" ); continue; } std::string vectorName = columnText[0]; std::string unit = unitNames[i]; std::vector columnHeader; if ( columnText.size() > 1 ) columnHeader.insert( columnHeader.begin(), columnText.begin() + 1, columnText.end() ); RifEclipseSummaryAddress adr = RifEclipseUserDataKeywordTools::makeAndFillAddress( vectorName, columnHeader ); Column ci = Column::createColumnInfoFromRsmData( vectorName, unit, adr ); columnInfos.push_back( ci ); } } return TableData( origin, startDate, columnInfos ); } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::isFixedWidthHeader( const std::string& lines ) { std::stringstream streamData( lines ); std::vector headerLines = RifEclipseUserDataParserTools::findValidHeaderLines( streamData ); if ( headerLines.size() > 1 ) { std::vector firstLine = RifEclipseUserDataParserTools::columnIndexForWords( headerLines[0] ); for ( auto line : headerLines ) { std::vector columnIndicesForLine = RifEclipseUserDataParserTools::columnIndexForWords( line ); for ( auto index : columnIndicesForLine ) { if ( std::find( firstLine.begin(), firstLine.end(), index ) == firstLine.end() ) { return false; } } } return true; } return false; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::vector RifEclipseUserDataParserTools::columnInfoForFixedColumnWidth( std::stringstream& streamData ) { auto headerLines = RifEclipseUserDataParserTools::findValidHeaderLines( streamData ); auto columnHeaders = RifEclipseUserDataParserTools::splitIntoColumnHeaders( headerLines ); return RifEclipseUserDataParserTools::columnInfoFromColumnHeaders( columnHeaders ); } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::vector RifEclipseUserDataParserTools::findValidHeaderLines( std::stringstream& streamData ) { std::vector headerLines; std::stringstream::pos_type posAtTableDataStart = streamData.tellg(); size_t columnCount = 0; std::string line; bool continueParsing = true; bool hasStepType = false; size_t minimunRequiredExtraHeaderLines = 0; while ( continueParsing ) { posAtTableDataStart = streamData.tellg(); if ( !std::getline( streamData, line ) ) { continueParsing = false; } else { if ( !RifEclipseUserDataParserTools::isLineSkippable( line ) ) { auto words = RifEclipseUserDataParserTools::splitLineAndRemoveComments( line ); if ( !hasStepType ) { for ( size_t i = 0; i < words.size(); i++ ) { if ( RifEclipseUserDataKeywordTools::isStepType( words[i] ) ) { hasStepType = true; } } } if ( isUnitText( line ) ) { minimunRequiredExtraHeaderLines += 1; } if ( isScalingText( line ) ) { minimunRequiredExtraHeaderLines += 1; } if ( columnCount == 0 ) { // Fist line with valid header data defines the number of columns columnCount = words.size(); minimunRequiredExtraHeaderLines = RifEclipseUserDataKeywordTools::computeRequiredHeaderLineCount( words ); headerLines.push_back( line ); } else if ( headerLines.size() < minimunRequiredExtraHeaderLines ) { headerLines.push_back( line ); } else { std::vector doubleValues = RifEclipseUserDataParserTools::splitLineToDoubles( line ); if ( doubleValues.size() < columnCount && words.size() < columnCount ) { if ( hasStepType && ( words.size() + 1 == columnCount ) ) { continueParsing = false; } else { // Consider a line with double values less than column count as a table header headerLines.push_back( line ); } } else { continueParsing = false; } } } } } streamData.seekg( posAtTableDataStart ); return headerLines; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::vector> RifEclipseUserDataParserTools::splitIntoColumnHeaders( const std::vector& headerLines ) { std::vector> headerLinesPerColumn; if ( !headerLines.empty() ) { std::vector columnOffsets = RifEclipseUserDataParserTools::columnIndexForWords( headerLines[0] ); if ( !columnOffsets.empty() ) { headerLinesPerColumn.resize( columnOffsets.size() ); for ( auto headerLine : headerLines ) { for ( size_t i = 0; i < columnOffsets.size(); i++ ) { size_t colStart = columnOffsets[i]; size_t columnWidth = std::string::npos; if ( i < columnOffsets.size() - 1 ) { columnWidth = columnOffsets[i + 1] - colStart; } else { if ( headerLine.size() > colStart ) { columnWidth = headerLine.size() - colStart; } } std::string subString; if ( columnWidth != std::string::npos && colStart < headerLine.size() && colStart + columnWidth <= headerLine.size() ) { subString = headerLine.substr( colStart, columnWidth ); } subString = trimString( subString ); headerLinesPerColumn[i].push_back( subString ); } } } } return headerLinesPerColumn; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::vector RifEclipseUserDataParserTools::columnInfoFromColumnHeaders( const std::vector>& columnData ) { std::vector table; bool isUnitsDetected = false; bool isScalingDetected = false; for ( auto columnLines : columnData ) { if ( columnLines.size() > 1 && isUnitText( columnLines[1] ) ) { isUnitsDetected = true; } if ( columnLines.size() > 2 && isScalingText( columnLines[2] ) ) { isScalingDetected = true; } } for ( auto columnLines : columnData ) { if ( columnLines.empty() ) continue; std::string vectorName = columnLines[0]; std::string unit; size_t startIndex = 1; if ( isUnitsDetected ) { unit = columnLines[1]; startIndex = 2; } if ( isScalingDetected ) { // std::string scaling = columnLines[2]; startIndex = 3; } std::vector restOfHeader; for ( size_t i = startIndex; i < columnLines.size(); i++ ) { restOfHeader.push_back( columnLines[i] ); } RifEclipseSummaryAddress adr = RifEclipseUserDataKeywordTools::makeAndFillAddress( vectorName, restOfHeader ); Column ci = Column::createColumnInfoFromRsmData( vectorName, unit, adr ); table.push_back( ci ); } return table; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::vector RifEclipseUserDataParserTools::columnIndexForWords( const std::string& line ) { std::vector columnOffsets; std::size_t offset = line.find_first_not_of( " " ); while ( offset != std::string::npos ) { columnOffsets.push_back( offset ); offset = line.find_first_of( " ", offset ); offset = line.find_first_not_of( " ", offset ); } return columnOffsets; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::vector RifEclipseUserDataParserTools::mergeEqualTimeSteps( const std::vector& tables ) { if ( tables.size() < 2 ) { return tables; } if ( tables[0].columnInfos().empty() ) return tables; QDateTime firstTableStartTime; for ( auto c : tables[0].columnInfos() ) { if ( c.summaryAddress.vectorName() == "DATE" ) { if ( c.itemCount() > 0 ) { firstTableStartTime = RiaDateStringParser::parseDateString( c.textValues[0] ); } } } if ( !firstTableStartTime.isValid() ) { return tables; } std::vector largeTables; largeTables.push_back( tables[0] ); TableData& firstTable = largeTables[0]; size_t itemsInFirstTable = tables[0].columnInfos()[0].itemCount(); for ( size_t i = 1; i < tables.size(); i++ ) { bool isDatesEqual = true; if ( firstTableStartTime.isValid() ) { QDateTime tableFirstTime; for ( auto& c : tables[i].columnInfos() ) { if ( c.summaryAddress.vectorName() == "DATE" ) { if ( c.itemCount() > 0 ) { tableFirstTime = RiaDateStringParser::parseDateString( c.textValues[0] ); } } } if ( firstTableStartTime != tableFirstTime ) { isDatesEqual = false; } } if ( !tables[i].columnInfos().empty() && tables[i].columnInfos()[0].itemCount() == itemsInFirstTable && isDatesEqual ) { for ( auto& c : tables[i].columnInfos() ) { if ( c.summaryAddress.vectorName() != "DATE" ) { firstTable.columnInfos().push_back( c ); } } } else { largeTables.push_back( tables[i] ); } } return largeTables; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::string RifEclipseUserDataParserTools::trimString( const std::string& s ) { auto sCopy = s.substr( 0, s.find_last_not_of( ' ' ) + 1 ); if ( !sCopy.empty() ) { sCopy = sCopy.substr( sCopy.find_first_not_of( ' ' ) ); } return sCopy; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::isUnitText( const std::string& word ) { if ( hasTimeUnit( word ) ) return true; if ( word.find( "BARSA" ) != std::string::npos ) return true; if ( word.find( "SM3" ) != std::string::npos ) return true; if ( word.find( "RM3" ) != std::string::npos ) return true; return false; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifEclipseUserDataParserTools::isScalingText( const std::string& word ) { return word.find_first_of( '*' ) != std::string::npos; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::string Column::columnName() const { return summaryAddress.vectorName(); } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- size_t Column::itemCount() const { switch ( dataType ) { case NUMERIC: return values.size(); case TEXT: return textValues.size(); case DATETIME: return dateTimeValues.size(); default: return 0; } } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- Column Column::createColumnInfoFromRsmData( const std::string& vectorName, const std::string& unit, const RifEclipseSummaryAddress& addr ) { Column ci( addr, unit ); if ( RifEclipseUserDataKeywordTools::isDate( vectorName ) ) { ci.dataType = TEXT; } else if ( RifEclipseUserDataKeywordTools::isStepType( vectorName ) ) { ci.dataType = TEXT; } else { ci.dataType = NUMERIC; } return ci; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- Column Column::createColumnInfoFromCsvData( const RifEclipseSummaryAddress& addr, const std::string& unit ) { Column col( addr, unit ); return col; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::vector Column::qDateTimeValues() const { std::vector output; for ( auto t : dateTimeValues ) output.push_back( RiaQDateTimeTools::fromTime_t( t ) ); return output; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- int TableData::dateTimeColumnIndex() const { return m_dateTimeColumnIndex; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- QDateTime TableData::findFirstDate() const { QDateTime dt = RiaQDateTimeTools::epoch(); for ( auto ci : m_columnInfos ) { if ( RifEclipseUserDataKeywordTools::isDate( ci.summaryAddress.vectorName() ) ) { if ( ci.itemCount() > 0 ) { std::string firstDateString = ci.textValues[0]; QDateTime candidate = RiaDateStringParser::parseDateString( firstDateString ); if ( candidate.isValid() ) { dt = candidate; } } } } return dt; }