Bug 793467 - GnuCash crashes when trying to open a binary file instead of a CSV

The cause was an uncaught exception from boost::tokenizer.
Fix and add test case.
This commit is contained in:
Geert Janssens 2018-03-08 15:31:12 +01:00
parent 6fe7d88548
commit e71f561236
5 changed files with 48 additions and 26 deletions

View File

@ -717,7 +717,7 @@ CsvImpPriceAssist::file_confirm_cb ()
catch (std::range_error &e) catch (std::range_error &e)
{ {
/* Parsing failed ... */ /* Parsing failed ... */
gnc_error_dialog (GTK_WINDOW(csv_imp_asst), "%s", e.what()); gnc_error_dialog (GTK_WINDOW(csv_imp_asst), "%s", _(e.what()));
return; return;
} }
/* Get settings store and populate */ /* Get settings store and populate */

View File

@ -723,7 +723,7 @@ CsvImpTransAssist::file_confirm_cb ()
catch (std::range_error &e) catch (std::range_error &e)
{ {
/* Parsing failed ... */ /* Parsing failed ... */
gnc_error_dialog (GTK_WINDOW (csv_imp_asst), "%s", e.what()); gnc_error_dialog (GTK_WINDOW (csv_imp_asst), "%s", _(e.what()));
return; return;
} }

View File

@ -11,6 +11,10 @@
#include <boost/locale.hpp> #include <boost/locale.hpp>
#include <boost/algorithm/string.hpp> #include <boost/algorithm/string.hpp>
extern "C" {
#include <glib/gi18n.h>
}
void void
GncCsvTokenizer::set_separators(const std::string& separators) GncCsvTokenizer::set_separators(const std::string& separators)
{ {
@ -34,33 +38,40 @@ int GncCsvTokenizer::tokenize()
m_tokenized_contents.clear(); m_tokenized_contents.clear();
std::istringstream in_stream(m_utf8_contents); std::istringstream in_stream(m_utf8_contents);
while (std::getline (in_stream, buffer)) try
{ {
// --- deal with line breaks in quoted strings while (std::getline (in_stream, buffer))
buffer = boost::trim_copy (buffer); // Removes trailing newline and spaces
last_quote = buffer.find_first_of('"');
while (last_quote != std::string::npos)
{ {
if (last_quote == 0) // Test separately because last_quote - 1 would be out of range // --- deal with line breaks in quoted strings
inside_quotes = !inside_quotes; buffer = boost::trim_copy (buffer); // Removes trailing newline and spaces
else if (buffer[ last_quote - 1 ] != '\\') last_quote = buffer.find_first_of('"');
inside_quotes = !inside_quotes; while (last_quote != std::string::npos)
{
if (last_quote == 0) // Test separately because last_quote - 1 would be out of range
inside_quotes = !inside_quotes;
else if (buffer[ last_quote - 1 ] != '\\')
inside_quotes = !inside_quotes;
last_quote = buffer.find_first_of('"',last_quote+1); last_quote = buffer.find_first_of('"',last_quote+1);
}
line.append(buffer);
if (inside_quotes)
{
line.append(" ");
continue;
}
// ---
Tokenizer tok(line, sep);
vec.assign(tok.begin(),tok.end());
m_tokenized_contents.push_back(vec);
line.clear();
} }
}
line.append(buffer); catch (boost::escaped_list_error &e)
if (inside_quotes) {
{ throw (std::range_error N_("There was an error parsing the file."));
line.append(" ");
continue;
}
// ---
Tokenizer tok(line, sep);
vec.assign(tok.begin(),tok.end());
m_tokenized_contents.push_back(vec);
line.clear();
} }
return 0; return 0;

View File

@ -404,7 +404,7 @@ void GncTxImport::tokenize (bool guessColTypes)
/* If it failed, generate an error. */ /* If it failed, generate an error. */
if (m_parsed_lines.size() == 0) if (m_parsed_lines.size() == 0)
{ {
throw (std::range_error ("Tokenizing failed.")); throw (std::range_error (N_("There was an error parsing the file.")));
return; return;
} }

View File

@ -138,6 +138,17 @@ TEST_F (GncTokenizerTest, tokenize_from_csv_file)
* independently. * independently.
*/ */
/* First test whether we're properly catching boost::tokenizer throws
* This happens when the input data has invalid escape sequences */
TEST_F (GncTokenizerTest, tokenize_binary_data)
{
GncCsvTokenizer *csvtok = dynamic_cast<GncCsvTokenizer*>(csv_tok.get());
csvtok->set_separators (",");
set_utf8_contents (csv_tok, R"(\764Test,Something)");
EXPECT_THROW (csv_tok->tokenize(), std::range_error);
}
/* This helper function will run the parse step on the given data /* This helper function will run the parse step on the given data
* with the parser as configured by the calling test function. * with the parser as configured by the calling test function.
* This allows the same code to be used with different csv test strings * This allows the same code to be used with different csv test strings