mirror of
https://github.com/Gnucash/gnucash.git
synced 2025-02-25 18:55:30 -06:00
Bug 795666 - Backslash "\" in Description field spoils CSV Import without helpful error message
We've configure boost::tokenizer to take the backslash as the escape character However boost::tokenizer will throw if it encounters a sole backslash that's not an escape (it would expect two if a pure backslash is to be inserted). Avoid this by replacing lone backslashes (not part of escapes) with double backslashes before passing control to the tokenizer.
This commit is contained in:
@@ -63,6 +63,19 @@ int GncCsvTokenizer::tokenize()
|
||||
}
|
||||
// ---
|
||||
|
||||
// Deal with backslashes that are not meant to be escapes
|
||||
// The boost::tokenizer with escaped_list_separator as we use
|
||||
// it would choke on this.
|
||||
auto bs_pos = line.find ('\\');
|
||||
while (bs_pos != std::string::npos)
|
||||
{
|
||||
if ((bs_pos == line.size()) || // got trailing single backslash
|
||||
(line.find_first_of ("\"\\n", bs_pos + 1) != bs_pos + 1)) // backslash is not part of known escapes \\, \" or \n
|
||||
line = line.substr(0, bs_pos) + "\\\\" + line.substr(bs_pos + 1);
|
||||
bs_pos += 2;
|
||||
bs_pos = line.find ('\\', bs_pos);
|
||||
}
|
||||
|
||||
Tokenizer tok(line, sep);
|
||||
vec.assign(tok.begin(),tok.end());
|
||||
m_tokenized_contents.push_back(vec);
|
||||
|
||||
@@ -138,17 +138,6 @@ TEST_F (GncTokenizerTest, tokenize_from_csv_file)
|
||||
* independently.
|
||||
*/
|
||||
|
||||
/* First test whether we're properly catching boost::tokenizer throws
|
||||
* This happens when the input data has invalid escape sequences */
|
||||
TEST_F (GncTokenizerTest, tokenize_binary_data)
|
||||
{
|
||||
GncCsvTokenizer *csvtok = dynamic_cast<GncCsvTokenizer*>(csv_tok.get());
|
||||
csvtok->set_separators (",");
|
||||
|
||||
set_utf8_contents (csv_tok, R"(\764Test,Something)");
|
||||
EXPECT_THROW (csv_tok->tokenize(), std::range_error);
|
||||
}
|
||||
|
||||
/* This helper function will run the parse step on the given data
|
||||
* with the parser as configured by the calling test function.
|
||||
* This allows the same code to be used with different csv test strings
|
||||
@@ -185,6 +174,8 @@ static tokenize_csv_test_data comma_separated [] = {
|
||||
{ "Date,Num,Description,Notes,Account,Deposit,Withdrawal,Balance", 8, { "Date","Num","Description","Notes","Account","Deposit","Withdrawal","Balance" } },
|
||||
{ "05/01/15,45,Acme Inc.,,Miscellaneous,,\"1,100.00\",", 8, { "05/01/15","45","Acme Inc.","","Miscellaneous","","1,100.00","" } },
|
||||
{ "05/01/15,45,Acme Inc.,,Miscellaneous,", 6, { "05/01/15","45","Acme Inc.","","Miscellaneous","",NULL,NULL } },
|
||||
{ "Test\\ with backslash,nextfield", 2, { "Test\\ with backslash","nextfield",NULL,NULL,NULL,NULL,NULL,NULL } },
|
||||
{ "Test with \\\" escaped quote,nextfield", 2, { "Test with \" escaped quote","nextfield",NULL,NULL,NULL,NULL,NULL,NULL } },
|
||||
{ NULL, 0, { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL } },
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user