#3320 Observed data import. Line based CSV import

This commit is contained in:
Bjørn Erik Jensen 2018-09-19 13:02:49 +02:00
parent c1c87a0370
commit ce59e8a0d0
12 changed files with 311 additions and 66 deletions

View File

@ -184,7 +184,7 @@ std::vector<RimAsciiDataCurve*> RicPasteAsciiDataToSummaryPlotFeature::parseCurv
if (col->dataType != Column::NUMERIC) continue;
RimAsciiDataCurve* curve = new RimAsciiDataCurve();
curve->setTimeSteps(parser.dateTimeColumn()->dateTimeValues);
curve->setTimeSteps(parser.dateTimeColumn()->qDateTimeValues());
curve->setValues(parser.columnInfo(i)->values);
if (curvePrefix.isEmpty())
{

View File

@ -188,10 +188,17 @@ RicPasteAsciiDataToSummaryPlotFeatureUi::RicPasteAsciiDataToSummaryPlotFeatureUi
//--------------------------------------------------------------------------------------------------
void RicPasteAsciiDataToSummaryPlotFeatureUi::setUiModeImport(const QString& fileName)
{
m_uiMode = UI_MODE_IMPORT;
m_parser = std::unique_ptr<RifCsvUserDataParser>(new RifCsvUserDataFileParser(fileName));
initialize(m_parser.get());
if (m_parser->determineCsvLayout() != RifCsvUserDataParser::LineBased)
{
m_uiMode = UI_MODE_IMPORT;
initialize(m_parser.get());
}
else
{
m_uiMode = UI_MODE_SILENT;
}
}
//--------------------------------------------------------------------------------------------------
@ -205,6 +212,14 @@ void RicPasteAsciiDataToSummaryPlotFeatureUi::setUiModePasteText(const QString&
initialize(m_parser.get());
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
RicPasteAsciiDataToSummaryPlotFeatureUi::UiMode RicPasteAsciiDataToSummaryPlotFeatureUi::uiModeImport() const
{
return m_uiMode;
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------

View File

@ -73,7 +73,8 @@ public:
{
UI_MODE_NONE,
UI_MODE_IMPORT,
UI_MODE_PASTE
UI_MODE_PASTE,
UI_MODE_SILENT
};
enum DecimalSeparator
@ -123,6 +124,7 @@ public:
void setUiModeImport(const QString& fileName);
void setUiModePasteText(const QString& text);
UiMode uiModeImport() const;
const AsciiDataParseOptions parseOptions() const;
void createNewPlot();

View File

@ -54,8 +54,6 @@ RifCsvUserData::~RifCsvUserData()
bool RifCsvUserData::parse(const QString& fileName, const AsciiDataParseOptions& parseOptions, QString* errorText)
{
m_allResultAddresses.clear();
m_timeSteps.clear();
m_mapFromAddressToTimeStepIndex.clear();
m_mapFromAddressToResultIndex.clear();
m_parser = std::unique_ptr<RifCsvUserDataFileParser>(new RifCsvUserDataFileParser(fileName, errorText));
@ -100,14 +98,25 @@ bool RifCsvUserData::values(const RifEclipseSummaryAddress& resultAddress, std::
//--------------------------------------------------------------------------------------------------
const std::vector<time_t>& RifCsvUserData::timeSteps(const RifEclipseSummaryAddress& resultAddress) const
{
auto search = m_mapFromAddressToTimeStepIndex.find(resultAddress);
if (search != m_mapFromAddressToTimeStepIndex.end())
// First, check whether date time values exist for the current address
auto search = m_mapFromAddressToResultIndex.find(resultAddress);
if (search != m_mapFromAddressToResultIndex.end())
{
return m_timeSteps;
size_t index = m_mapFromAddressToResultIndex.at(resultAddress);
if (!m_parser->tableData().columnInfos()[index].dateTimeValues.empty())
{
return m_parser->tableData().columnInfos()[index].dateTimeValues;
}
}
// Then check for a separate date time column
int index = m_parser->tableData().dateTimeColumnIndex();
if (index >= 0)
{
return m_parser->tableData().columnInfos()[index].dateTimeValues;
}
static std::vector<time_t> emptyVector;
return emptyVector;
}
@ -138,19 +147,6 @@ void RifCsvUserData::buildTimeStepsAndMappings()
{
auto tableData = m_parser->tableData();
std::vector<time_t> timeStepsForTable = createTimeSteps(tableData);
if (timeStepsForTable.empty())
{
RiaLogging::warning(QString("Failed to find time data for table in file"));
RiaLogging::warning(QString("No data for this table is imported"));
return;
}
m_timeSteps = timeStepsForTable;
for (size_t columnIndex = 0; columnIndex < tableData.columnInfos().size(); columnIndex++)
{
const Column& ci = tableData.columnInfos()[columnIndex];
@ -161,26 +157,7 @@ void RifCsvUserData::buildTimeStepsAndMappings()
m_allResultAddresses.insert(sumAddress);
if (sumAddress.isErrorResult()) m_allErrorAddresses.insert(sumAddress);
m_mapFromAddressToTimeStepIndex[sumAddress] = m_timeSteps.size() - 1;
m_mapFromAddressToResultIndex[sumAddress] = columnIndex;
}
}
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
std::vector<time_t> RifCsvUserData::createTimeSteps(const TableData& tableData)
{
std::vector<time_t> tsVector;
const Column& col = tableData.columnInfos()[0];
tsVector.reserve(col.dateTimeValues.size());
for (const QDateTime& qdt : col.dateTimeValues)
{
tsVector.push_back(qdt.toTime_t());
}
return tsVector;
}

View File

@ -54,12 +54,9 @@ public:
private:
void buildTimeStepsAndMappings();
static std::vector<time_t> createTimeSteps(const TableData& table);
private:
std::unique_ptr<RifCsvUserDataParser> m_parser;
std::vector<time_t> m_timeSteps;
std::map<RifEclipseSummaryAddress, size_t > m_mapFromAddressToTimeStepIndex;
std::map<RifEclipseSummaryAddress, size_t > m_mapFromAddressToResultIndex;
};

View File

@ -37,6 +37,26 @@
#include <QFile>
#include <cmath>
#include <limits>
#include <algorithm>
#include <utility>
//--------------------------------------------------------------------------------------------------
/// Internal constants
//--------------------------------------------------------------------------------------------------
#define DOUBLE_INF std::numeric_limits<double>::infinity()
#define ISO_DATE_FORMAT "yyyy-MM-dd"
#define TIME_FORMAT "hh:mm:ss"
using Sample = std::pair<time_t, double>;
using SampleList = std::vector<Sample>;
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
enum CsvLineBasedColumnType { DATE, VECTOR, VALUE, ERROR_VALUE, COMMENTS };
const std::vector<QString> CSV_LINE_BASED_COL_NAMES = { "DATE", "VECTOR", "VALUE", "ERROR", "COMMENTS" };
//--------------------------------------------------------------------------------------------------
///
@ -59,7 +79,8 @@ RifCsvUserDataParser::~RifCsvUserDataParser()
//--------------------------------------------------------------------------------------------------
bool RifCsvUserDataParser::parse(const AsciiDataParseOptions& parseOptions)
{
return parseData(parseOptions);
if (determineCsvLayout() == LineBased) return parseLineBasedData();
return parseColumnBasedData(parseOptions);
}
//--------------------------------------------------------------------------------------------------
@ -96,6 +117,29 @@ const Column* RifCsvUserDataParser::dateTimeColumn() const
return nullptr;
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
std::vector<int> RifCsvUserDataParser::parseLineBasedHeader(QStringList headerCols)
{
std::vector<int> colIndexes;
for (int i = 0; i < (int)CSV_LINE_BASED_COL_NAMES.size(); i++)
{
for (int j = 0; j < (int)headerCols.size(); j++)
{
if (headerCols[j] == CSV_LINE_BASED_COL_NAMES[i])
{
colIndexes.push_back(j);
break;
}
}
if (i < 3 && (int)colIndexes.size() < i + 1) return {};
}
return colIndexes;
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
@ -174,6 +218,31 @@ QString RifCsvUserDataParser::previewText(int lineCount, const AsciiDataParseOpt
return preview;
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
RifCsvUserDataParser::CsvLayout RifCsvUserDataParser::determineCsvLayout()
{
QTextStream* dataStream = openDataStream();
QString firstLine;
QStringList headers;
while (!dataStream->atEnd())
{
firstLine = dataStream->readLine();
if (firstLine.isEmpty()) continue;
headers = firstLine.split(';');
if (headers.size() < 3 || headers.size() > 5) continue;
break;
}
closeDataStream();
if (headers.contains(CSV_LINE_BASED_COL_NAMES[DATE])
&& headers.contains(CSV_LINE_BASED_COL_NAMES[VECTOR])
&& headers.contains(CSV_LINE_BASED_COL_NAMES[VALUE])) return LineBased;
return ColumnBased;
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
@ -210,7 +279,7 @@ bool RifCsvUserDataParser::parseColumnInfo(QTextStream* dataStream, const AsciiD
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
bool RifCsvUserDataParser::parseData(const AsciiDataParseOptions& parseOptions)
bool RifCsvUserDataParser::parseColumnBasedData(const AsciiDataParseOptions& parseOptions)
{
bool errors = false;
enum { FIRST_DATA_ROW, DATA_ROW } parseState = FIRST_DATA_ROW;
@ -326,7 +395,8 @@ bool RifCsvUserDataParser::parseData(const AsciiDataParseOptions& parseOptions)
if (m_errorText) m_errorText->append("CSV import: Failed to parse date time value");
throw 0;
}
col.dateTimeValues.push_back(dt);
col.dateTimeValues.push_back(dt.toTime_t());
}
}
catch (...)
@ -348,6 +418,148 @@ bool RifCsvUserDataParser::parseData(const AsciiDataParseOptions& parseOptions)
return !errors;
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
bool RifCsvUserDataParser::parseLineBasedData()
{
bool errors = false;
QTextStream* dataStream = openDataStream();
std::vector<CsvLineBasedColumnType> columns;
std::map<RifEclipseSummaryAddress, std::vector<std::pair<time_t, double>>> addressesAndData;
std::vector<int> colIndexes;
// Parse header
int lineCount = 0;
bool headerFound = false;
bool expectErrorValue = false;
while (!dataStream->atEnd() && !errors)
{
lineCount++;
QString line = dataStream->readLine();
if (line.trimmed().isEmpty()) continue;
QStringList dataItems = RifFileParseTools::splitLineAndTrim(line, ";");
if (dataItems.size() < 3 || dataItems.size() > 5) continue;
if (!headerFound)
{
colIndexes = parseLineBasedHeader(dataItems);
if (!colIndexes.empty())
{
headerFound = true;
expectErrorValue = colIndexes.size() > ERROR_VALUE && colIndexes[ERROR_VALUE] >= 0;
}
continue;
}
if(dataItems.size() != (int)colIndexes.size()) continue;
{
auto textAddr = dataItems[colIndexes[VECTOR]];
auto addr = RifEclipseSummaryAddress::fromEclipseTextAddress(textAddr.toStdString());
auto errAddr = addr;
errAddr.setAsErrorResult();
if (!addr.isValid()) continue;
// VECTOR
{
if (addressesAndData.find(addr) == addressesAndData.end())
{
addressesAndData.insert(std::make_pair(addr, std::vector<Sample>()));
}
// Create error address if error value is expected
if (expectErrorValue)
{
if (addressesAndData.find(errAddr) == addressesAndData.end())
{
addressesAndData.insert(std::make_pair(errAddr, std::vector<Sample>()));
}
}
}
// DATE
QDateTime dateTime;
{
auto dateText = dataItems[colIndexes[DATE]].toStdString();
dateTime = tryParseDateTime(dateText, ISO_DATE_FORMAT);
if (!dateTime.isValid())
{
// Try to match date and time
dateTime = tryParseDateTime(dateText, QString(ISO_DATE_FORMAT) + " " + TIME_FORMAT);
}
if (!dateTime.isValid())
{
if (m_errorText) m_errorText->append(QString("CSV import: Failed to parse date time value in line %1").arg(QString::number(lineCount)));
throw 0;
}
}
// VALUE
{
bool parseOk = true;
double value = QLocale::c().toDouble(dataItems[colIndexes[VALUE]], &parseOk);
if (!parseOk)
{
if (m_errorText) m_errorText->append(QString("CSV import: Failed to parse numeric value in line %1\n").arg(QString::number(lineCount)));
throw 0;
}
auto& samples = addressesAndData[addr];
samples.push_back(std::make_pair(dateTime.toTime_t(), value));
}
// ERROR VALUE
if(expectErrorValue)
{
bool parseOk = true;
double value = QLocale::c().toDouble(dataItems[colIndexes[ERROR_VALUE]], &parseOk);
if (!parseOk) value = DOUBLE_INF;
auto& samples = addressesAndData[errAddr];
samples.push_back(std::make_pair(dateTime.toTime_t(), value));
}
}
}
closeDataStream();
if (!errors)
{
std::vector<Column> columnInfoList;
for (const auto& item : addressesAndData)
{
auto samples = item.second;
// Sort samples by time
std::sort(samples.begin(), samples.end(),
[](const Sample& s1, const Sample& s2) {return s1.first < s2.first; });
// Copy
Column c = Column::createColumnInfoFromCsvData(item.first, "");
c.dataType = Column::NUMERIC;
for (const auto& sample : samples)
{
c.dateTimeValues.push_back(sample.first);
c.values.push_back(sample.second);
}
columnInfoList.push_back(c);
}
TableData td("", "", columnInfoList);
m_tableData = td;
}
return !errors;
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------

View File

@ -39,6 +39,9 @@ class AsciiDataParseOptions;
//==================================================================================================
class RifCsvUserDataParser
{
public:
enum CsvLayout { ColumnBased, LineBased };
public:
RifCsvUserDataParser(QString* errorText = nullptr);
virtual ~RifCsvUserDataParser();
@ -46,12 +49,14 @@ public:
bool parse(const AsciiDataParseOptions& parseOptions);
const TableData& tableData() const;
const Column* columnInfo(size_t columnIndex) const;
const Column* dateTimeColumn() const;
const Column* columnInfo(size_t columnIndex) const;
const Column* dateTimeColumn() const;
bool parseColumnInfo(const AsciiDataParseOptions& parseOptions);
QString previewText(int lineCount, const AsciiDataParseOptions& parseOptions);
CsvLayout determineCsvLayout();
QString tryDetermineCellSeparator();
QString tryDetermineDecimalSeparator(const QString& cellSeparator);
@ -59,13 +64,16 @@ public:
protected:
virtual QTextStream* openDataStream() = 0;
virtual void closeDataStream() = 0;
virtual void closeDataStream() = 0;
private:
std::vector<int> parseLineBasedHeader(QStringList headerCols);
bool parseColumnInfo(QTextStream* dataStream,
const AsciiDataParseOptions& parseOptions,
std::vector<Column>* columnInfoList);
bool parseData(const AsciiDataParseOptions& parseOptions);
bool parseColumnBasedData(const AsciiDataParseOptions& parseOptions);
bool parseLineBasedData();
static QDateTime tryParseDateTime(const std::string& colData, const QString& format);
private:

View File

@ -942,9 +942,9 @@ size_t Column::itemCount() const
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
Column Column::createColumnInfoFromRsmData(const std::string& quantity, const std::string& unit, const RifEclipseSummaryAddress& adr)
Column Column::createColumnInfoFromRsmData(const std::string& quantity, const std::string& unit, const RifEclipseSummaryAddress& addr)
{
Column ci(adr, unit);
Column ci(addr, unit);
if (RifEclipseUserDataKeywordTools::isDate(quantity))
{
@ -970,6 +970,24 @@ Column Column::createColumnInfoFromCsvData(const RifEclipseSummaryAddress& addr,
return col;
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
std::vector<QDateTime> Column::qDateTimeValues() const
{
std::vector<QDateTime> output;
for (auto t : dateTimeValues) output.push_back(RiaQDateTimeTools::fromTime_t(t));
return output;
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------
int TableData::dateTimeColumnIndex() const
{
return m_dateTimeColumnIndex;
}
//--------------------------------------------------------------------------------------------------
///
//--------------------------------------------------------------------------------------------------

View File

@ -60,7 +60,7 @@ public:
size_t itemCount() const;
public:
static Column createColumnInfoFromRsmData(const std::string& quantity, const std::string& unit, const RifEclipseSummaryAddress& adr);
static Column createColumnInfoFromRsmData(const std::string& quantity, const std::string& unit, const RifEclipseSummaryAddress& addr);
static Column createColumnInfoFromCsvData(const RifEclipseSummaryAddress& addr, const std::string& unit);
RifEclipseSummaryAddress summaryAddress;
@ -71,7 +71,10 @@ public:
// Data containers
std::vector<double> values;
std::vector<std::string > textValues;
std::vector<QDateTime> dateTimeValues;
//std::vector<QDateTime> dateTimeValues;
std::vector<time_t> dateTimeValues;
std::vector<QDateTime> qDateTimeValues() const;
};
@ -89,8 +92,17 @@ public:
const std::vector<Column>& columnInfos)
: m_origin(origin),
m_startDate(startDate),
m_dateTimeColumnIndex(-1),
m_columnInfos(columnInfos)
{
for (size_t i = 0; i < columnInfos.size(); i++)
{
if (columnInfos[i].dataType == Column::DATETIME)
{
m_dateTimeColumnIndex = (int)i;
break;
}
}
}
std::string origin() const
@ -113,11 +125,13 @@ public:
return m_columnInfos;
}
int dateTimeColumnIndex() const;
QDateTime findFirstDate() const;
private:
std::string m_origin;
std::string m_startDate;
int m_dateTimeColumnIndex;
std::vector<Column> m_columnInfos;
};

View File

@ -70,10 +70,9 @@ bool RifReaderObservedData::open(const QString& headerFileName,
{
if (m_asciiParser && m_asciiParser->dateTimeColumn())
{
for (QDateTime timeStep : m_asciiParser->dateTimeColumn()->dateTimeValues)
for (time_t timeStep : m_asciiParser->dateTimeColumn()->dateTimeValues)
{
time_t t = timeStep.toTime_t();
m_timeSteps.push_back(t);
m_timeSteps.push_back(timeStep);
}
m_allResultAddresses.clear();

View File

@ -169,10 +169,13 @@ RimObservedData* RimObservedDataCollection::createAndAddCvsObservedDataFromFile(
}
parseOptions->setUiModeImport(fileName);
caf::PdmUiPropertyViewDialog propertyDialog(nullptr, parseOptions, "CSV Import Options", "");
if (propertyDialog.exec() != QDialog::Accepted)
if (parseOptions->uiModeImport() != RicPasteAsciiDataToSummaryPlotFeatureUi::UI_MODE_SILENT)
{
return nullptr;
caf::PdmUiPropertyViewDialog propertyDialog(nullptr, parseOptions, "CSV Import Options", "");
if (propertyDialog.exec() != QDialog::Accepted)
{
return nullptr;
}
}
caf::PdmSettings::writeFieldsToApplicationStore(parseOptions);

View File

@ -36,7 +36,7 @@ TEST(RifColumnBasedAsciiParserTest, TestDateFormatYyyymmddWithDash)
ASSERT_TRUE(parser.parse(parseOptions));
ASSERT_TRUE(parser.dateTimeColumn() != nullptr);
std::vector<QDateTime> timeSteps = parser.dateTimeColumn()->dateTimeValues;
std::vector<QDateTime> timeSteps = parser.dateTimeColumn()->qDateTimeValues();
ASSERT_EQ(size_t(4), timeSteps.size());
EXPECT_EQ("1993-02-23", timeSteps[0].toString(parseOptions.dateFormat).toStdString());
@ -69,7 +69,7 @@ TEST(RifColumnBasedAsciiParserTest, TestDateFormatYymmddWithDot)
ASSERT_TRUE(parser.parse(parseOptions));
ASSERT_TRUE(parser.dateTimeColumn() != nullptr);
std::vector<QDateTime> timeSteps = parser.dateTimeColumn()->dateTimeValues;
std::vector<QDateTime> timeSteps = parser.dateTimeColumn()->qDateTimeValues();
ASSERT_EQ(size_t(4), timeSteps.size());
EXPECT_EQ("93.02.23", timeSteps[0].toString(parseOptions.dateFormat).toStdString());
@ -98,7 +98,7 @@ TEST(RifColumnBasedAsciiParserTest, TestDateFormatDdmmyyWithDot)
ASSERT_TRUE(parser.parse(parseOptions));
ASSERT_TRUE(parser.dateTimeColumn() != nullptr);
std::vector<QDateTime> timeSteps = parser.dateTimeColumn()->dateTimeValues;
std::vector<QDateTime> timeSteps = parser.dateTimeColumn()->qDateTimeValues();
ASSERT_EQ(size_t(4), timeSteps.size());
EXPECT_EQ("23.02.93", timeSteps[0].toString(parseOptions.dateFormat).toStdString());