///////////////////////////////////////////////////////////////////////////////// // // Copyright (C) 2017- Statoil ASA // // ResInsight is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // ResInsight is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or // FITNESS FOR A PARTICULAR PURPOSE. // // See the GNU General Public License at // for more details. // ///////////////////////////////////////////////////////////////////////////////// #include "RifCsvUserDataParser.h" #include "RifEclipseUserDataKeywordTools.h" #include "RifEclipseUserDataParserTools.h" #include "RifFileParseTools.h" #include "RiaDateStringParser.h" #include "RiaLogging.h" #include "RiaStdStringTools.h" #include "RiaTextStringTools.h" #include "RiaQDateTimeTools.h" #include "../Commands/SummaryPlotCommands/RicPasteAsciiDataToSummaryPlotFeatureUi.h" #include "cvfAssert.h" #include #include #include #include #include #include #include //-------------------------------------------------------------------------------------------------- /// Internal constants //-------------------------------------------------------------------------------------------------- #define DOUBLE_INF std::numeric_limits::infinity() #define ISO_DATE_FORMAT "yyyy-MM-dd" #define TIME_FORMAT "hh:mm:ss" using Sample = std::pair; using SampleList = std::vector; //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- enum class CsvLineBasedColumnType { DATE, VECTOR, VALUE, ERROR_VALUE, COMMENTS }; const std::vector CSV_LINE_BASED_COL_NAMES = { "DATE", "VECTOR", "VALUE", "ERROR", "COMMENTS" }; //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- RifCsvUserDataParser::RifCsvUserDataParser(QString* errorText) : m_errorText(errorText) { } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- RifCsvUserDataParser::~RifCsvUserDataParser() { } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifCsvUserDataParser::parse(const AsciiDataParseOptions& parseOptions) { if (determineCsvLayout() == LineBased) return parseLineBasedData(); return parseColumnBasedData(parseOptions); } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- const TableData& RifCsvUserDataParser::tableData() const { return m_tableData; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- const Column* RifCsvUserDataParser::columnInfo(size_t columnIndex) const { if (columnIndex >= m_tableData.columnInfos().size()) return nullptr; return &(m_tableData.columnInfos()[columnIndex]); } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- const Column* RifCsvUserDataParser::dateTimeColumn() const { for (const Column& col : m_tableData.columnInfos()) { if (col.dataType == Column::DATETIME) { return &col; } } return nullptr; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- std::vector RifCsvUserDataParser::parseLineBasedHeader(QStringList headerCols) { std::vector colIndexes; for (int i = 0; i < (int)CSV_LINE_BASED_COL_NAMES.size(); i++) { for (int j = 0; j < (int)headerCols.size(); j++) { if (headerCols[j] == CSV_LINE_BASED_COL_NAMES[i]) { colIndexes.push_back(j); break; } } if (i < 3 && (int)colIndexes.size() < i + 1) return {}; } return colIndexes; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifCsvUserDataParser::parseColumnInfo(const AsciiDataParseOptions& parseOptions) { QTextStream* dataStream = openDataStream(); std::vector columnInfoList; bool result = parseColumnInfo(dataStream, parseOptions, &columnInfoList); if (result) { m_tableData = TableData("", "", columnInfoList); } closeDataStream(); return result; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- QString RifCsvUserDataParser::previewText(int lineCount, const AsciiDataParseOptions& parseOptions) { QTextStream *stream = openDataStream(); if (!stream) return ""; QString preview; QTextStream outStream(&preview); int iLine = 0; bool header = true; int timeColumnIndex = -1; outStream << ""; outStream << ""; while (iLine < lineCount && !stream->atEnd()) { QString line = stream->readLine(); if (line.isEmpty()) continue; outStream << ""; int iCol = 0; QStringList cols = RifFileParseTools::splitLineAndTrim(line, parseOptions.cellSeparator); for (const QString& cellData : cols) { if (cellData == parseOptions.timeSeriesColumnName && header) { timeColumnIndex = iCol; } outStream << (header ? ""; outStream << cellData; if (iCol < cols.size() - 1 && (parseOptions.cellSeparator == ";" || parseOptions.cellSeparator == ",")) { outStream << parseOptions.cellSeparator; } outStream << (header ? "" : ""); iCol++; } outStream << ""; header = false; iLine++; } outStream << "
"; closeDataStream(); return preview; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- RifCsvUserDataParser::CsvLayout RifCsvUserDataParser::determineCsvLayout() { QTextStream* dataStream = openDataStream(); QString firstLine; QStringList headers; while (!dataStream->atEnd()) { firstLine = dataStream->readLine(); if (firstLine.isEmpty()) continue; headers = firstLine.split(';'); if (headers.size() < 3 || headers.size() > 5) continue; break; } closeDataStream(); if (headers.contains(CSV_LINE_BASED_COL_NAMES[(size_t)CsvLineBasedColumnType::DATE]) && headers.contains(CSV_LINE_BASED_COL_NAMES[(size_t)CsvLineBasedColumnType::VECTOR]) && headers.contains(CSV_LINE_BASED_COL_NAMES[(size_t)CsvLineBasedColumnType::VALUE])) return LineBased; return ColumnBased; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifCsvUserDataParser::parseColumnInfo(QTextStream* dataStream, const AsciiDataParseOptions& parseOptions, std::vector* columnInfoList) { bool headerFound = false; if (!columnInfoList) return false; columnInfoList->clear(); while (!headerFound) { QString line = dataStream->readLine(); if (line.trimmed().isEmpty()) continue; QStringList lineColumns = RifFileParseTools::splitLineAndTrim(line, parseOptions.cellSeparator); int colCount = lineColumns.size(); for (int iCol = 0; iCol < colCount; iCol++) { QString colName = RiaTextStringTools::trimAndRemoveDoubleSpaces(lineColumns[iCol]); RifEclipseSummaryAddress addr = RifEclipseSummaryAddress::fromEclipseTextAddress(colName.toStdString()); Column col = Column::createColumnInfoFromCsvData(addr, ""); columnInfoList->push_back(col); } headerFound = true; } return true; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifCsvUserDataParser::parseColumnBasedData(const AsciiDataParseOptions& parseOptions) { bool errors = false; enum { FIRST_DATA_ROW, DATA_ROW } parseState = FIRST_DATA_ROW; int colCount; std::vector columnInfoList; QTextStream* dataStream = openDataStream(); // Parse header if (!parseColumnInfo(dataStream, parseOptions, &columnInfoList)) { if (m_errorText) m_errorText->append("CSV import: Failed to parse header columns"); return false; } colCount = (int)columnInfoList.size(); while (!dataStream->atEnd() && !errors) { QString line = dataStream->readLine(); if(line.trimmed().isEmpty()) continue; QStringList lineColumns = RifFileParseTools::splitLineAndTrim(line, parseOptions.cellSeparator); if(lineColumns.size() != colCount) { if (m_errorText) m_errorText->append("CSV import: Varying number of columns"); errors = true; break; } else if(parseState == FIRST_DATA_ROW) { for (int iCol = 0; iCol < colCount; iCol++) { std::string colData = lineColumns[iCol].toStdString(); Column& col = columnInfoList[iCol]; // Determine column data type if (col.dataType == Column::NONE) { if (QString::fromStdString(col.summaryAddress.quantityName()) == parseOptions.timeSeriesColumnName) { col.dataType = Column::DATETIME; } else { if (parseOptions.assumeNumericDataColumns || RiaStdStringTools::isNumber(colData, parseOptions.locale.decimalPoint().toAscii())) { col.dataType = Column::NUMERIC; } else { col.dataType = Column::TEXT; } } } } parseState = DATA_ROW; } if (parseState == DATA_ROW) { for (int iCol = 0; iCol < colCount; iCol++) { QString& colData = lineColumns[iCol]; Column& col = columnInfoList[iCol]; try { if (col.dataType == Column::NUMERIC) { bool parseOk = true; double value = parseOptions.locale.toDouble(colData, &parseOk); if (!parseOk) { // Find the error reason, wrong decimal sign or something else if (RiaStdStringTools::isNumber(colData.toStdString(), '.') || RiaStdStringTools::isNumber(colData.toStdString(), ',')) { if (m_errorText) m_errorText->append(QString("CSV import: Failed to parse numeric value in column %1\n").arg(QString::number(iCol + 1))); throw 0; } // Add nullptr value value = HUGE_VAL; } col.values.push_back(value); } else if (col.dataType == Column::TEXT) { col.textValues.push_back(colData.toStdString()); } else if (col.dataType == Column::DATETIME) { QDateTime dt; dt = tryParseDateTime(colData.toStdString(), parseOptions.dateTimeFormat); if (!dt.isValid() && !parseOptions.useCustomDateTimeFormat) { // Try to match date format only if (parseOptions.dateFormat != parseOptions.dateTimeFormat) { dt = tryParseDateTime(colData.toStdString(), parseOptions.dateFormat); } if (!dt.isValid() && !parseOptions.fallbackDateTimeFormat.isEmpty()) { dt = tryParseDateTime(colData.toStdString(), parseOptions.fallbackDateTimeFormat); } } if (!dt.isValid()) { if (m_errorText) m_errorText->append("CSV import: Failed to parse date time value"); throw 0; } col.dateTimeValues.push_back(dt.toTime_t()); } } catch (...) { errors = true; break; } } } } closeDataStream(); if (!errors) { TableData td("", "", columnInfoList); m_tableData = td; } return !errors; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifCsvUserDataParser::parseLineBasedData() { bool errors = false; QTextStream* dataStream = openDataStream(); std::map>> addressesAndData; std::vector colIndexes; // Parse header int lineCount = 0; bool headerFound = false; bool expectErrorValue = false; while (!dataStream->atEnd() && !errors) { lineCount++; QString line = dataStream->readLine(); if (line.trimmed().isEmpty()) continue; QStringList dataItems = RifFileParseTools::splitLineAndTrim(line, ";"); if (dataItems.size() < 3 || dataItems.size() > 5) continue; if (!headerFound) { colIndexes = parseLineBasedHeader(dataItems); if (!colIndexes.empty()) { headerFound = true; expectErrorValue = colIndexes.size() > (size_t)CsvLineBasedColumnType::ERROR_VALUE && colIndexes[(size_t)CsvLineBasedColumnType::ERROR_VALUE] >= 0; } continue; } if(dataItems.size() != (int)colIndexes.size()) continue; { auto textAddr = dataItems[colIndexes[(size_t)CsvLineBasedColumnType::VECTOR]]; auto addr = RifEclipseSummaryAddress::fromEclipseTextAddress(textAddr.toStdString()); auto errAddr = addr; errAddr.setAsErrorResult(); if (!addr.isValid()) continue; // VECTOR { if (addressesAndData.find(addr) == addressesAndData.end()) { addressesAndData.insert(std::make_pair(addr, std::vector())); } // Create error address if error value is expected if (expectErrorValue) { if (addressesAndData.find(errAddr) == addressesAndData.end()) { addressesAndData.insert(std::make_pair(errAddr, std::vector())); } } } // DATE QDateTime dateTime; { auto dateText = dataItems[colIndexes[(size_t)CsvLineBasedColumnType::DATE]].toStdString(); dateTime = tryParseDateTime(dateText, ISO_DATE_FORMAT); if (!dateTime.isValid()) { // Try to match date and time dateTime = tryParseDateTime(dateText, QString(ISO_DATE_FORMAT) + " " + TIME_FORMAT); } if (!dateTime.isValid()) { if (m_errorText) m_errorText->append(QString("CSV import: Failed to parse date time value in line %1").arg(QString::number(lineCount))); throw 0; } } // VALUE { bool parseOk = true; double value = QLocale::c().toDouble(dataItems[colIndexes[(size_t)CsvLineBasedColumnType::VALUE]], &parseOk); if (!parseOk) { if (m_errorText) m_errorText->append(QString("CSV import: Failed to parse numeric value in line %1\n").arg(QString::number(lineCount))); throw 0; } auto& samples = addressesAndData[addr]; samples.push_back(std::make_pair(dateTime.toTime_t(), value)); } // ERROR VALUE if(expectErrorValue) { bool parseOk = true; double value = QLocale::c().toDouble(dataItems[colIndexes[(size_t)CsvLineBasedColumnType::ERROR_VALUE]], &parseOk); if (!parseOk) value = DOUBLE_INF; auto& samples = addressesAndData[errAddr]; samples.push_back(std::make_pair(dateTime.toTime_t(), value)); } } } closeDataStream(); if (!errors) { std::vector columnInfoList; for (const auto& item : addressesAndData) { auto samples = item.second; // Sort samples by time std::sort(samples.begin(), samples.end(), [](const Sample& s1, const Sample& s2) {return s1.first < s2.first; }); // Copy Column c = Column::createColumnInfoFromCsvData(item.first, ""); c.dataType = Column::NUMERIC; for (const auto& sample : samples) { c.dateTimeValues.push_back(sample.first); c.values.push_back(sample.second); } columnInfoList.push_back(c); } TableData td("", "", columnInfoList); m_tableData = td; } return !errors; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- QDateTime RifCsvUserDataParser::tryParseDateTime(const std::string& colData, const QString& format) { return RiaQDateTimeTools::fromString(QString::fromStdString(colData), format); } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- QString RifCsvUserDataParser::tryDetermineCellSeparator() { QTextStream* dataStream = openDataStream(); std::vector lines; int iLine = 0; while(iLine < 10 && !dataStream->atEnd()) { QString line = dataStream->readLine(); if(line.isEmpty()) continue; lines.push_back(line); iLine++; } closeDataStream(); // Try different cell separators int totColumnCountTab = 0; int totColumnCountSemicolon = 0; int totColumnCountComma = 0; for (const QString& line : lines) { totColumnCountTab += RifFileParseTools::splitLineAndTrim(line, "\t").size(); totColumnCountSemicolon += RifFileParseTools::splitLineAndTrim(line, ";").size(); totColumnCountComma += RifFileParseTools::splitLineAndTrim(line, ",").size(); } double avgColumnCountTab = (double)totColumnCountTab / lines.size(); double avgColumnCountSemicolon = (double)totColumnCountSemicolon / lines.size(); double avgColumnCountComma = (double)totColumnCountComma / lines.size(); // Select the one having highest average double maxAvg = std::max(std::max(avgColumnCountTab, avgColumnCountSemicolon), avgColumnCountComma); if (maxAvg == avgColumnCountTab) return "\t"; if (maxAvg == avgColumnCountSemicolon) return ";"; if (maxAvg == avgColumnCountComma) return ","; return ""; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- QString RifCsvUserDataParser::tryDetermineDecimalSeparator(const QString& cellSeparator) { QTextStream* dataStream = openDataStream(); int iLine = 0; int successfulParsesDot = 0; int successfulParsesComma = 0; while (iLine < 10 && !dataStream->atEnd()) { QString line = dataStream->readLine(); if (line.isEmpty()) continue; for (const QString& cellData : RifFileParseTools::splitLineAndTrim(line, cellSeparator)) { bool parseOk; QLocale locale; locale = localeFromDecimalSeparator("."); locale.toDouble(cellData, &parseOk); if (parseOk) successfulParsesDot++; locale = localeFromDecimalSeparator(","); locale.toDouble(cellData, &parseOk); if (parseOk) successfulParsesComma++; } iLine++; } closeDataStream(); if (successfulParsesComma > successfulParsesDot) return ","; else return "."; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- QLocale RifCsvUserDataParser::localeFromDecimalSeparator(const QString& decimalSeparator) { if (decimalSeparator == ",") { return QLocale::Norwegian; } return QLocale::c(); } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- RifCsvUserDataFileParser::RifCsvUserDataFileParser(const QString& fileName, QString* errorText) : RifCsvUserDataParser(errorText) { m_fileName = fileName; m_file = nullptr; m_textStream = nullptr; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- RifCsvUserDataFileParser::~RifCsvUserDataFileParser() { if (m_textStream) { delete m_textStream; } closeFile(); } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- QTextStream* RifCsvUserDataFileParser::openDataStream() { if (!openFile()) return nullptr; m_textStream = new QTextStream(m_file); return m_textStream; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- void RifCsvUserDataFileParser::closeDataStream() { if (m_textStream) { delete m_textStream; m_textStream = nullptr; } closeFile(); } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- bool RifCsvUserDataFileParser::openFile() { if (!m_file) { m_file = new QFile(m_fileName); if (!m_file->open(QIODevice::ReadOnly | QIODevice::Text)) { RiaLogging::error(QString("Failed to open %1").arg(m_fileName)); delete m_file; return false; } } return true; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- void RifCsvUserDataFileParser::closeFile() { if (m_file) { m_file->close(); delete m_file; m_file = nullptr; } } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- RifCsvUserDataPastedTextParser::RifCsvUserDataPastedTextParser(const QString& text, QString* errorText): RifCsvUserDataParser(errorText) { m_text = text; m_textStream = nullptr; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- RifCsvUserDataPastedTextParser::~RifCsvUserDataPastedTextParser() { if (m_textStream) { delete m_textStream; } } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- QTextStream* RifCsvUserDataPastedTextParser::openDataStream() { if (m_textStream) { delete m_textStream; } m_textStream = new QTextStream(&m_text); return m_textStream; } //-------------------------------------------------------------------------------------------------- /// //-------------------------------------------------------------------------------------------------- void RifCsvUserDataPastedTextParser::closeDataStream() { if (m_textStream) { delete m_textStream; m_textStream = nullptr; } }