Handle the common csv double quote escape variation (repeating the double quote)

This commit is contained in:
Geert Janssens 2018-05-05 13:42:13 +02:00
parent 682b5cf581
commit 27c1df30af
2 changed files with 11 additions and 0 deletions

View File

@ -76,6 +76,16 @@ int GncCsvTokenizer::tokenize()
bs_pos = line.find ('\\', bs_pos);
}
// Deal with repeated " ("") in strings.
// This is commonly used as escape mechanism for double quotes in csv files.
// However boost just eats them.
bs_pos = line.find ("\"\"");
while (bs_pos != std::string::npos)
{
line.replace (bs_pos, 2, "\\\"");
bs_pos = line.find ("\"\"");
}
Tokenizer tok(line, sep);
vec.assign(tok.begin(),tok.end());
m_tokenized_contents.push_back(vec);

View File

@ -176,6 +176,7 @@ static tokenize_csv_test_data comma_separated [] = {
{ "05/01/15,45,Acme Inc.,,Miscellaneous,", 6, { "05/01/15","45","Acme Inc.","","Miscellaneous","",NULL,NULL } },
{ "Test\\ with backslash,nextfield", 2, { "Test\\ with backslash","nextfield",NULL,NULL,NULL,NULL,NULL,NULL } },
{ "Test with \\\" escaped quote,nextfield", 2, { "Test with \" escaped quote","nextfield",NULL,NULL,NULL,NULL,NULL,NULL } },
{ "Test with \"\" escaped quote,nextfield", 2, { "Test with \" escaped quote","nextfield",NULL,NULL,NULL,NULL,NULL,NULL } },
{ NULL, 0, { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL } },
};