Bug 795666 - Backslash "\" in Description field spoils CSV Import without helpful error message

We've configure boost::tokenizer to take the backslash as the escape character However boost::tokenizer will throw if it encounters a sole backslash that's not an escape (it would expect two if a pure backslash is to be inserted). Avoid this by replacing lone backslashes (not part of escapes) with double backslashes before passing control to the tokenizer.
2025-02-25 18:55:30 -06:00 · 2018-05-05 12:42:17 +02:00
parent 8b3a874418
commit 682b5cf581
2 changed files with 15 additions and 11 deletions
--- a/gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp
+++ b/gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp
@@ -63,6 +63,19 @@ int GncCsvTokenizer::tokenize()
            }
            // ---

+            // Deal with backslashes that are not meant to be escapes
+            // The boost::tokenizer with escaped_list_separator as we use
+            // it would choke on this.
+            auto bs_pos = line.find ('\\');
+            while (bs_pos != std::string::npos)
+            {
+                if ((bs_pos == line.size()) ||                                 // got trailing single backslash
+                    (line.find_first_of ("\"\\n", bs_pos + 1) != bs_pos + 1))  // backslash is not part of known escapes \\, \" or \n
+                    line = line.substr(0, bs_pos) + "\\\\" + line.substr(bs_pos + 1);
+                bs_pos += 2;
+                bs_pos = line.find ('\\', bs_pos);
+            }
+
            Tokenizer tok(line, sep);
            vec.assign(tok.begin(),tok.end());
            m_tokenized_contents.push_back(vec);
--- a/gnucash/import-export/csv-imp/test/test-tokenizer.cpp
+++ b/gnucash/import-export/csv-imp/test/test-tokenizer.cpp
@@ -138,17 +138,6 @@ TEST_F (GncTokenizerTest, tokenize_from_csv_file)
 * independently.
 */

-/* First test whether we're properly catching boost::tokenizer throws
- * This happens when the input data has invalid escape sequences */
-TEST_F (GncTokenizerTest, tokenize_binary_data)
-{
-    GncCsvTokenizer *csvtok = dynamic_cast<GncCsvTokenizer*>(csv_tok.get());
-    csvtok->set_separators (",");
-
-    set_utf8_contents (csv_tok, R"(\764Test,Something)");
-    EXPECT_THROW (csv_tok->tokenize(), std::range_error);
-}
-
 /* This helper function will run the parse step on the given data
 * with the parser as configured by the calling test function.
 * This allows the same code to be used with different csv test strings
@@ -185,6 +174,8 @@ static tokenize_csv_test_data comma_separated [] = {
        { "Date,Num,Description,Notes,Account,Deposit,Withdrawal,Balance", 8, { "Date","Num","Description","Notes","Account","Deposit","Withdrawal","Balance" } },
        { "05/01/15,45,Acme Inc.,,Miscellaneous,,\"1,100.00\",", 8, { "05/01/15","45","Acme Inc.","","Miscellaneous","","1,100.00","" } },
        { "05/01/15,45,Acme Inc.,,Miscellaneous,", 6, { "05/01/15","45","Acme Inc.","","Miscellaneous","",NULL,NULL } },
+        { "Test\\ with backslash,nextfield", 2, { "Test\\ with backslash","nextfield",NULL,NULL,NULL,NULL,NULL,NULL } },
+        { "Test with \\\" escaped quote,nextfield", 2, { "Test with \" escaped quote","nextfield",NULL,NULL,NULL,NULL,NULL,NULL } },
        { NULL, 0, { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL } },
 };