Bug 795666 - Backslash "\" in Description field spoils CSV Import without helpful error message

We've configure boost::tokenizer to take the backslash as the escape character
However boost::tokenizer will throw if it encounters a sole backslash that's
not an escape (it would expect two if a pure backslash is to be inserted).
Avoid this by replacing lone backslashes (not part of escapes) with double
backslashes before passing control to the tokenizer.
This commit is contained in:
Geert Janssens
2018-05-05 12:42:17 +02:00
parent 8b3a874418
commit 682b5cf581
2 changed files with 15 additions and 11 deletions

View File

@@ -63,6 +63,19 @@ int GncCsvTokenizer::tokenize()
}
// ---
// Deal with backslashes that are not meant to be escapes
// The boost::tokenizer with escaped_list_separator as we use
// it would choke on this.
auto bs_pos = line.find ('\\');
while (bs_pos != std::string::npos)
{
if ((bs_pos == line.size()) || // got trailing single backslash
(line.find_first_of ("\"\\n", bs_pos + 1) != bs_pos + 1)) // backslash is not part of known escapes \\, \" or \n
line = line.substr(0, bs_pos) + "\\\\" + line.substr(bs_pos + 1);
bs_pos += 2;
bs_pos = line.find ('\\', bs_pos);
}
Tokenizer tok(line, sep);
vec.assign(tok.begin(),tok.end());
m_tokenized_contents.push_back(vec);

View File

@@ -138,17 +138,6 @@ TEST_F (GncTokenizerTest, tokenize_from_csv_file)
* independently.
*/
/* First test whether we're properly catching boost::tokenizer throws
* This happens when the input data has invalid escape sequences */
TEST_F (GncTokenizerTest, tokenize_binary_data)
{
GncCsvTokenizer *csvtok = dynamic_cast<GncCsvTokenizer*>(csv_tok.get());
csvtok->set_separators (",");
set_utf8_contents (csv_tok, R"(\764Test,Something)");
EXPECT_THROW (csv_tok->tokenize(), std::range_error);
}
/* This helper function will run the parse step on the given data
* with the parser as configured by the calling test function.
* This allows the same code to be used with different csv test strings
@@ -185,6 +174,8 @@ static tokenize_csv_test_data comma_separated [] = {
{ "Date,Num,Description,Notes,Account,Deposit,Withdrawal,Balance", 8, { "Date","Num","Description","Notes","Account","Deposit","Withdrawal","Balance" } },
{ "05/01/15,45,Acme Inc.,,Miscellaneous,,\"1,100.00\",", 8, { "05/01/15","45","Acme Inc.","","Miscellaneous","","1,100.00","" } },
{ "05/01/15,45,Acme Inc.,,Miscellaneous,", 6, { "05/01/15","45","Acme Inc.","","Miscellaneous","",NULL,NULL } },
{ "Test\\ with backslash,nextfield", 2, { "Test\\ with backslash","nextfield",NULL,NULL,NULL,NULL,NULL,NULL } },
{ "Test with \\\" escaped quote,nextfield", 2, { "Test with \" escaped quote","nextfield",NULL,NULL,NULL,NULL,NULL,NULL } },
{ NULL, 0, { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL } },
};