From e71f561236787f0ca7d1bb812c5959d0c83476a8 Mon Sep 17 00:00:00 2001 From: Geert Janssens Date: Thu, 8 Mar 2018 15:31:12 +0100 Subject: [PATCH] Bug 793467 - GnuCash crashes when trying to open a binary file instead of a CSV The cause was an uncaught exception from boost::tokenizer. Fix and add test case. --- .../csv-imp/assistant-csv-price-import.cpp | 2 +- .../csv-imp/assistant-csv-trans-import.cpp | 2 +- .../csv-imp/gnc-csv-tokenizer.cpp | 57 +++++++++++-------- .../import-export/csv-imp/gnc-tx-import.cpp | 2 +- .../csv-imp/test/test-tokenizer.cpp | 11 ++++ 5 files changed, 48 insertions(+), 26 deletions(-) diff --git a/gnucash/import-export/csv-imp/assistant-csv-price-import.cpp b/gnucash/import-export/csv-imp/assistant-csv-price-import.cpp index 53643eebc4..0db5c64b97 100644 --- a/gnucash/import-export/csv-imp/assistant-csv-price-import.cpp +++ b/gnucash/import-export/csv-imp/assistant-csv-price-import.cpp @@ -717,7 +717,7 @@ CsvImpPriceAssist::file_confirm_cb () catch (std::range_error &e) { /* Parsing failed ... */ - gnc_error_dialog (GTK_WINDOW(csv_imp_asst), "%s", e.what()); + gnc_error_dialog (GTK_WINDOW(csv_imp_asst), "%s", _(e.what())); return; } /* Get settings store and populate */ diff --git a/gnucash/import-export/csv-imp/assistant-csv-trans-import.cpp b/gnucash/import-export/csv-imp/assistant-csv-trans-import.cpp index 0d33e11051..c9100decdb 100644 --- a/gnucash/import-export/csv-imp/assistant-csv-trans-import.cpp +++ b/gnucash/import-export/csv-imp/assistant-csv-trans-import.cpp @@ -723,7 +723,7 @@ CsvImpTransAssist::file_confirm_cb () catch (std::range_error &e) { /* Parsing failed ... */ - gnc_error_dialog (GTK_WINDOW (csv_imp_asst), "%s", e.what()); + gnc_error_dialog (GTK_WINDOW (csv_imp_asst), "%s", _(e.what())); return; } diff --git a/gnucash/import-export/csv-imp/gnc-csv-tokenizer.cpp b/gnucash/import-export/csv-imp/gnc-csv-tokenizer.cpp index 54532c77d0..4812511268 100644 --- a/gnucash/import-export/csv-imp/gnc-csv-tokenizer.cpp +++ b/gnucash/import-export/csv-imp/gnc-csv-tokenizer.cpp @@ -11,6 +11,10 @@ #include #include +extern "C" { + #include +} + void GncCsvTokenizer::set_separators(const std::string& separators) { @@ -34,33 +38,40 @@ int GncCsvTokenizer::tokenize() m_tokenized_contents.clear(); std::istringstream in_stream(m_utf8_contents); - while (std::getline (in_stream, buffer)) + try { - // --- deal with line breaks in quoted strings - buffer = boost::trim_copy (buffer); // Removes trailing newline and spaces - last_quote = buffer.find_first_of('"'); - while (last_quote != std::string::npos) + while (std::getline (in_stream, buffer)) { - if (last_quote == 0) // Test separately because last_quote - 1 would be out of range - inside_quotes = !inside_quotes; - else if (buffer[ last_quote - 1 ] != '\\') - inside_quotes = !inside_quotes; + // --- deal with line breaks in quoted strings + buffer = boost::trim_copy (buffer); // Removes trailing newline and spaces + last_quote = buffer.find_first_of('"'); + while (last_quote != std::string::npos) + { + if (last_quote == 0) // Test separately because last_quote - 1 would be out of range + inside_quotes = !inside_quotes; + else if (buffer[ last_quote - 1 ] != '\\') + inside_quotes = !inside_quotes; - last_quote = buffer.find_first_of('"',last_quote+1); + last_quote = buffer.find_first_of('"',last_quote+1); + } + + line.append(buffer); + if (inside_quotes) + { + line.append(" "); + continue; + } + // --- + + Tokenizer tok(line, sep); + vec.assign(tok.begin(),tok.end()); + m_tokenized_contents.push_back(vec); + line.clear(); } - - line.append(buffer); - if (inside_quotes) - { - line.append(" "); - continue; - } - // --- - - Tokenizer tok(line, sep); - vec.assign(tok.begin(),tok.end()); - m_tokenized_contents.push_back(vec); - line.clear(); + } + catch (boost::escaped_list_error &e) + { + throw (std::range_error N_("There was an error parsing the file.")); } return 0; diff --git a/gnucash/import-export/csv-imp/gnc-tx-import.cpp b/gnucash/import-export/csv-imp/gnc-tx-import.cpp index a43f29c543..3b65944cb4 100644 --- a/gnucash/import-export/csv-imp/gnc-tx-import.cpp +++ b/gnucash/import-export/csv-imp/gnc-tx-import.cpp @@ -404,7 +404,7 @@ void GncTxImport::tokenize (bool guessColTypes) /* If it failed, generate an error. */ if (m_parsed_lines.size() == 0) { - throw (std::range_error ("Tokenizing failed.")); + throw (std::range_error (N_("There was an error parsing the file."))); return; } diff --git a/gnucash/import-export/csv-imp/test/test-tokenizer.cpp b/gnucash/import-export/csv-imp/test/test-tokenizer.cpp index a4cf1a9b32..977847553e 100644 --- a/gnucash/import-export/csv-imp/test/test-tokenizer.cpp +++ b/gnucash/import-export/csv-imp/test/test-tokenizer.cpp @@ -138,6 +138,17 @@ TEST_F (GncTokenizerTest, tokenize_from_csv_file) * independently. */ +/* First test whether we're properly catching boost::tokenizer throws + * This happens when the input data has invalid escape sequences */ +TEST_F (GncTokenizerTest, tokenize_binary_data) +{ + GncCsvTokenizer *csvtok = dynamic_cast(csv_tok.get()); + csvtok->set_separators (","); + + set_utf8_contents (csv_tok, R"(\764Test,Something)"); + EXPECT_THROW (csv_tok->tokenize(), std::range_error); +} + /* This helper function will run the parse step on the given data * with the parser as configured by the calling test function. * This allows the same code to be used with different csv test strings