///////////////////////////////////////////////////////////////////////////////// // // Copyright (C) 2017- Statoil ASA // // ResInsight is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // ResInsight is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or // FITNESS FOR A PARTICULAR PURPOSE. // // See the GNU General Public License at // for more details. // ///////////////////////////////////////////////////////////////////////////////// #include "RifCsvUserDataParser.h" #include "RifEclipseUserDataKeywordTools.h" #include "RifEclipseUserDataParserTools.h" #include "RifFileParseTools.h" #include "RiaDateStringParser.h" #include "RiaLogging.h" #include "RiaQDateTimeTools.h" #include "RiaStdStringTools.h" #include "RiaTextStringTools.h" #include "SummaryPlotCommands/RicPasteAsciiDataToSummaryPlotFeatureUi.h" #include "cvfAssert.h" #include #include #include #include #include #include #include //-------------------------------------------------------------------------------------------------- /// Internal constants //-------------------------------------------------------------------------------------------------- #define DOUBLE_INF std::numeric_limits::infinity() #define ISO_DATE_FORMAT "yyyy-MM-dd" #define TIME_FORMAT "hh:mm:ss" using Sample = std::pair; using SampleList = std::vector; //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- enum class CsvLineBasedColumnType { DATE, VECTOR, VALUE, ERROR_VALUE, COMMENTS }; const std::vector CSV_LINE_BASED_COL_NAMES = { "DATE", "VECTOR", "VALUE", "ERROR", "COMMENTS" }; //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- RifCsvUserDataParser::RifCsvUserDataParser( QString* errorText ) : m_errorText( errorText ) { } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- RifCsvUserDataParser::~RifCsvUserDataParser() { } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifCsvUserDataParser::parse( const AsciiDataParseOptions& parseOptions ) { if ( determineCsvLayout() == LineBased ) return parseLineBasedData(); return parseColumnBasedData( parseOptions ); } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- const TableData& RifCsvUserDataParser::tableData() const { return m_tableData; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- const Column* RifCsvUserDataParser::columnInfo( size_t columnIndex ) const { if ( columnIndex >= m_tableData.columnInfos().size() ) return nullptr; return &( m_tableData.columnInfos()[columnIndex] ); } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- const Column* RifCsvUserDataParser::dateTimeColumn() const { for ( const Column& col : m_tableData.columnInfos() ) { if ( col.dataType == Column::DATETIME ) { return &col; } } return nullptr; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::vector RifCsvUserDataParser::parseLineBasedHeader( QStringList headerCols ) { std::vector colIndexes; for ( int i = 0; i < (int)CSV_LINE_BASED_COL_NAMES.size(); i++ ) { for ( int j = 0; j < (int)headerCols.size(); j++ ) { if ( headerCols[j] == CSV_LINE_BASED_COL_NAMES[i] ) { colIndexes.push_back( j ); break; } } if ( i < 3 && (int)colIndexes.size() < i + 1 ) return {}; } return colIndexes; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifCsvUserDataParser::parseColumnInfo( const AsciiDataParseOptions& parseOptions ) { QTextStream* dataStream = openDataStream(); std::vector columnInfoList; bool result = parseColumnInfo( dataStream, parseOptions, &columnInfoList ); if ( result ) { m_tableData = TableData( "", "", columnInfoList ); } closeDataStream(); return result; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- QString RifCsvUserDataParser::previewText( int lineCount, const AsciiDataParseOptions& parseOptions ) { QTextStream* stream = openDataStream(); if ( !stream ) return ""; QString preview; QTextStream outStream( &preview ); int iLine = 0; bool header = true; int timeColumnIndex = -1; outStream << ""; outStream << ""; while ( iLine < lineCount && !stream->atEnd() ) { QString line = stream->readLine(); if ( line.isEmpty() ) continue; outStream << ""; int iCol = 0; QStringList cols = RifFileParseTools::splitLineAndTrim( line, parseOptions.cellSeparator ); for ( const QString& cellData : cols ) { if ( cellData == parseOptions.timeSeriesColumnName && header ) { timeColumnIndex = iCol; } outStream << ( header ? ""; outStream << cellData; if ( iCol < cols.size() - 1 && ( parseOptions.cellSeparator == ";" || parseOptions.cellSeparator == "," ) ) { outStream << parseOptions.cellSeparator; } outStream << ( header ? "" : "" ); iCol++; } outStream << ""; header = false; iLine++; } outStream << "
"; closeDataStream(); return preview; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- QStringList RifCsvUserDataParser::timeColumnPreviewData( int lineCount, const AsciiDataParseOptions& parseOptions ) { QStringList timeStrings; QTextStream* stream = openDataStream(); if ( stream ) { int timeColumnIndex = -1; int iLine = 0; while ( iLine < lineCount && !stream->atEnd() ) { QString line = stream->readLine(); if ( line.isEmpty() ) continue; int iCol = 0; QStringList cols = RifFileParseTools::splitLineAndTrim( line, parseOptions.cellSeparator ); for ( const QString& cellData : cols ) { if ( cellData == parseOptions.timeSeriesColumnName && iLine == 0 ) { timeColumnIndex = iCol; } if ( iLine > 0 && timeColumnIndex != -1 && timeColumnIndex == iCol ) { timeStrings.push_back( cellData ); } iCol++; } iLine++; } } closeDataStream(); return timeStrings; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- RifCsvUserDataParser::CsvLayout RifCsvUserDataParser::determineCsvLayout() { QTextStream* dataStream = openDataStream(); QString firstLine; QStringList headers; while ( !dataStream->atEnd() ) { firstLine = dataStream->readLine(); if ( firstLine.isEmpty() ) continue; headers = firstLine.split( ';' ); if ( headers.size() < 3 || headers.size() > 5 ) continue; break; } closeDataStream(); if ( headers.contains( CSV_LINE_BASED_COL_NAMES[(size_t)CsvLineBasedColumnType::DATE] ) && headers.contains( CSV_LINE_BASED_COL_NAMES[(size_t)CsvLineBasedColumnType::VECTOR] ) && headers.contains( CSV_LINE_BASED_COL_NAMES[(size_t)CsvLineBasedColumnType::VALUE] ) ) return LineBased; return ColumnBased; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifCsvUserDataParser::parseColumnInfo( QTextStream* dataStream, const AsciiDataParseOptions& parseOptions, std::vector* columnInfoList ) { bool headerFound = false; if ( !columnInfoList ) return false; columnInfoList->clear(); while ( !headerFound ) { QString line = dataStream->readLine(); if ( line.trimmed().isEmpty() ) continue; QStringList lineColumns = RifFileParseTools::splitLineAndTrim( line, parseOptions.cellSeparator ); int colCount = lineColumns.size(); for ( int iCol = 0; iCol < colCount; iCol++ ) { QString colName = RiaTextStringTools::trimAndRemoveDoubleSpaces( lineColumns[iCol] ); RifEclipseSummaryAddress addr = RifEclipseSummaryAddress::fromEclipseTextAddress( colName.toStdString() ); Column col = Column::createColumnInfoFromCsvData( addr, "" ); columnInfoList->push_back( col ); } headerFound = true; } return true; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifCsvUserDataParser::parseColumnBasedData( const AsciiDataParseOptions& parseOptions ) { bool errors = false; enum { FIRST_DATA_ROW, DATA_ROW } parseState = FIRST_DATA_ROW; int colCount; std::vector columnInfoList; QTextStream* dataStream = openDataStream(); // Parse header if ( !parseColumnInfo( dataStream, parseOptions, &columnInfoList ) ) { if ( m_errorText ) m_errorText->append( "CSV import: Failed to parse header columns" ); return false; } colCount = (int)columnInfoList.size(); while ( !dataStream->atEnd() && !errors ) { QString line = dataStream->readLine(); if ( line.trimmed().isEmpty() ) continue; QStringList lineColumns = RifFileParseTools::splitLineAndTrim( line, parseOptions.cellSeparator ); if ( lineColumns.size() != colCount ) { if ( m_errorText ) m_errorText->append( "CSV import: Varying number of columns" ); errors = true; break; } else if ( parseState == FIRST_DATA_ROW ) { for ( int iCol = 0; iCol < colCount; iCol++ ) { std::string colData = lineColumns[iCol].toStdString(); Column& col = columnInfoList[iCol]; // Determine column data type if ( col.dataType == Column::NONE ) { if ( QString::fromStdString( col.summaryAddress.quantityName() ) == parseOptions.timeSeriesColumnName ) { col.dataType = Column::DATETIME; } else { if ( parseOptions.assumeNumericDataColumns || RiaStdStringTools::isNumber( colData, parseOptions.locale.decimalPoint().toLatin1() ) ) { col.dataType = Column::NUMERIC; } else { col.dataType = Column::TEXT; } } } } parseState = DATA_ROW; } if ( parseState == DATA_ROW ) { for ( int iCol = 0; iCol < colCount; iCol++ ) { QString& colData = lineColumns[iCol]; Column& col = columnInfoList[iCol]; try { if ( col.dataType == Column::NUMERIC ) { bool parseOk = true; double value = parseOptions.locale.toDouble( colData, &parseOk ); if ( !parseOk ) { // Find the error reason, wrong decimal sign or something else if ( RiaStdStringTools::isNumber( colData.toStdString(), '.' ) || RiaStdStringTools::isNumber( colData.toStdString(), ',' ) ) { if ( m_errorText ) m_errorText->append( QString( "CSV import: Failed to parse numeric value in column %1\n" ) .arg( QString::number( iCol + 1 ) ) ); throw 0; } // Add nullptr value value = HUGE_VAL; } col.values.push_back( value ); } else if ( col.dataType == Column::TEXT ) { col.textValues.push_back( colData.toStdString() ); } else if ( col.dataType == Column::DATETIME ) { QDateTime dt; dt = tryParseDateTime( colData.toStdString(), parseOptions.dateTimeFormat ); if ( !dt.isValid() && !parseOptions.useCustomDateTimeFormat ) { // Try to match date format only if ( parseOptions.dateFormat != parseOptions.dateTimeFormat ) { dt = tryParseDateTime( colData.toStdString(), parseOptions.dateFormat ); } if ( !dt.isValid() && !parseOptions.fallbackDateTimeFormat.isEmpty() ) { dt = tryParseDateTime( colData.toStdString(), parseOptions.fallbackDateTimeFormat ); } } if ( !dt.isValid() ) { if ( m_errorText ) m_errorText->append( "CSV import: Failed to parse date time value" ); throw 0; } col.dateTimeValues.push_back( dt.toSecsSinceEpoch() ); } } catch ( ... ) { errors = true; break; } } } } closeDataStream(); if ( !errors ) { TableData td( "", "", columnInfoList ); m_tableData = td; } return !errors; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifCsvUserDataParser::parseLineBasedData() { QTextStream* dataStream = openDataStream(); std::map>> addressesAndData; std::vector colIndexes; // Parse header int lineCount = 0; bool headerFound = false; bool expectErrorValue = false; while ( !dataStream->atEnd() ) { lineCount++; QString line = dataStream->readLine(); if ( line.trimmed().isEmpty() ) continue; QStringList dataItems = RifFileParseTools::splitLineAndTrim( line, ";" ); if ( dataItems.size() < 3 || dataItems.size() > 5 ) continue; if ( !headerFound ) { colIndexes = parseLineBasedHeader( dataItems ); if ( !colIndexes.empty() ) { headerFound = true; expectErrorValue = colIndexes.size() > (size_t)CsvLineBasedColumnType::ERROR_VALUE && colIndexes[(size_t)CsvLineBasedColumnType::ERROR_VALUE] >= 0; } continue; } if ( dataItems.size() != (int)colIndexes.size() ) continue; { auto textAddr = dataItems[colIndexes[(size_t)CsvLineBasedColumnType::VECTOR]]; auto addr = RifEclipseSummaryAddress::fromEclipseTextAddress( textAddr.toStdString() ); auto errAddr = addr; errAddr.setAsErrorResult(); if ( !addr.isValid() ) continue; // VECTOR { if ( addressesAndData.find( addr ) == addressesAndData.end() ) { addressesAndData.insert( std::make_pair( addr, std::vector() ) ); } // Create error address if error value is expected if ( expectErrorValue ) { if ( addressesAndData.find( errAddr ) == addressesAndData.end() ) { addressesAndData.insert( std::make_pair( errAddr, std::vector() ) ); } } } // DATE QDateTime dateTime; { auto dateText = dataItems[colIndexes[(size_t)CsvLineBasedColumnType::DATE]].toStdString(); dateTime = tryParseDateTime( dateText, ISO_DATE_FORMAT ); if ( !dateTime.isValid() ) { // Try to match date and time dateTime = tryParseDateTime( dateText, QString( ISO_DATE_FORMAT ) + " " + TIME_FORMAT ); } if ( !dateTime.isValid() ) { if ( m_errorText ) m_errorText->append( QString( "CSV import: Failed to parse date time value in line %1" ) .arg( QString::number( lineCount ) ) ); throw 0; } } // VALUE { bool parseOk = true; double value = QLocale::c().toDouble( dataItems[colIndexes[(size_t)CsvLineBasedColumnType::VALUE]], &parseOk ); if ( !parseOk ) { if ( m_errorText ) m_errorText->append( QString( "CSV import: Failed to parse numeric value in line %1\n" ) .arg( QString::number( lineCount ) ) ); throw 0; } auto& samples = addressesAndData[addr]; samples.push_back( std::make_pair( dateTime.toSecsSinceEpoch(), value ) ); } // ERROR VALUE if ( expectErrorValue ) { bool parseOk = true; double value = QLocale::c().toDouble( dataItems[colIndexes[(size_t)CsvLineBasedColumnType::ERROR_VALUE]], &parseOk ); if ( !parseOk ) value = DOUBLE_INF; auto& samples = addressesAndData[errAddr]; samples.push_back( std::make_pair( dateTime.toSecsSinceEpoch(), value ) ); } } } closeDataStream(); { std::vector columnInfoList; for ( const auto& item : addressesAndData ) { auto samples = item.second; // Sort samples by time std::sort( samples.begin(), samples.end(), []( const Sample& s1, const Sample& s2 ) { return s1.first < s2.first; } ); // Copy Column c = Column::createColumnInfoFromCsvData( item.first, "" ); c.dataType = Column::NUMERIC; for ( const auto& sample : samples ) { c.dateTimeValues.push_back( sample.first ); c.values.push_back( sample.second ); } columnInfoList.push_back( c ); } TableData td( "", "", columnInfoList ); m_tableData = td; } return true; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- QDateTime RifCsvUserDataParser::tryParseDateTime( const std::string& colData, const QString& format ) { return RiaQDateTimeTools::fromString( QString::fromStdString( colData ), format ); } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- QString RifCsvUserDataParser::tryDetermineCellSeparator() { QTextStream* dataStream = openDataStream(); if ( !dataStream ) { return ""; } std::vector lines; int iLine = 0; while ( iLine < 10 && !dataStream->atEnd() ) { QString line = dataStream->readLine(); if ( line.isEmpty() ) continue; lines.push_back( line ); iLine++; } closeDataStream(); // Try different cell separators int totColumnCountTab = 0; int totColumnCountSemicolon = 0; int totColumnCountComma = 0; for ( const QString& line : lines ) { totColumnCountTab += RifFileParseTools::splitLineAndTrim( line, "\t" ).size(); totColumnCountSemicolon += RifFileParseTools::splitLineAndTrim( line, ";" ).size(); totColumnCountComma += RifFileParseTools::splitLineAndTrim( line, "," ).size(); } double avgColumnCountTab = (double)totColumnCountTab / lines.size(); double avgColumnCountSemicolon = (double)totColumnCountSemicolon / lines.size(); double avgColumnCountComma = (double)totColumnCountComma / lines.size(); // Select the one having highest average double maxAvg = std::max( std::max( avgColumnCountTab, avgColumnCountSemicolon ), avgColumnCountComma ); if ( maxAvg == avgColumnCountTab ) return "\t"; if ( maxAvg == avgColumnCountSemicolon ) return ";"; if ( maxAvg == avgColumnCountComma ) return ","; return ""; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- QString RifCsvUserDataParser::tryDetermineDecimalSeparator( const QString& cellSeparator ) { QTextStream* dataStream = openDataStream(); int iLine = 0; int successfulParsesDot = 0; int successfulParsesComma = 0; while ( iLine < 10 && !dataStream->atEnd() ) { QString line = dataStream->readLine(); if ( line.isEmpty() ) continue; for ( const QString& cellData : RifFileParseTools::splitLineAndTrim( line, cellSeparator ) ) { bool parseOk; QLocale locale; locale = localeFromDecimalSeparator( "." ); locale.toDouble( cellData, &parseOk ); if ( parseOk ) successfulParsesDot++; locale = localeFromDecimalSeparator( "," ); locale.toDouble( cellData, &parseOk ); if ( parseOk ) successfulParsesComma++; } iLine++; } closeDataStream(); if ( successfulParsesComma > successfulParsesDot ) return ","; else return "."; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- QLocale RifCsvUserDataParser::localeFromDecimalSeparator( const QString& decimalSeparator ) { if ( decimalSeparator == "," ) { return QLocale::Norwegian; } return QLocale::c(); } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- RifCsvUserDataFileParser::RifCsvUserDataFileParser( const QString& fileName, QString* errorText ) : RifCsvUserDataParser( errorText ) { m_fileName = fileName; m_file = nullptr; m_textStream = nullptr; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- RifCsvUserDataFileParser::~RifCsvUserDataFileParser() { if ( m_textStream ) { delete m_textStream; } closeFile(); } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- QTextStream* RifCsvUserDataFileParser::openDataStream() { if ( !openFile() ) return nullptr; m_textStream = new QTextStream( m_file ); return m_textStream; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- void RifCsvUserDataFileParser::closeDataStream() { if ( m_textStream ) { delete m_textStream; m_textStream = nullptr; } closeFile(); } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifCsvUserDataFileParser::openFile() { if ( !m_file ) { m_file = new QFile( m_fileName ); if ( !m_file->open( QIODevice::ReadOnly | QIODevice::Text ) ) { RiaLogging::error( QString( "Failed to open %1" ).arg( m_fileName ) ); delete m_file; m_file = nullptr; return false; } } return true; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- void RifCsvUserDataFileParser::closeFile() { if ( m_file ) { m_file->close(); delete m_file; m_file = nullptr; } } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- RifCsvUserDataPastedTextParser::RifCsvUserDataPastedTextParser( const QString& text, QString* errorText ) : RifCsvUserDataParser( errorText ) { m_text = text; m_textStream = nullptr; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- RifCsvUserDataPastedTextParser::~RifCsvUserDataPastedTextParser() { if ( m_textStream ) { delete m_textStream; } } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- QTextStream* RifCsvUserDataPastedTextParser::openDataStream() { if ( m_textStream ) { delete m_textStream; } m_textStream = new QTextStream( &m_text ); return m_textStream; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- void RifCsvUserDataPastedTextParser::closeDataStream() { if ( m_textStream ) { delete m_textStream; m_textStream = nullptr; } }