Rework assistant csv import regex code to eliminate the mandatory dummy field (#eol).

This consists of a couple of changes that are heavily interdependent: - read the csv file entirely in one string - tweak the regex so it knows the difference between the end of line and a newline in one of the fields - leverage the g_regex code to iterate over the full string directly
2025-02-25 18:55:30 -06:00 · 2014-09-26 16:48:06 +02:00
parent 595e359301
commit c652455ebb
3 changed files with 58 additions and 105 deletions
--- a/src/import-export/csv-exp/csv-tree-export.c
+++ b/src/import-export/csv-exp/csv-tree-export.c
@@ -155,8 +155,7 @@ void csv_tree_export (CsvExportInfo *info)
        header = g_strconcat (end_sep, _("type"), mid_sep, _("full_name"), mid_sep, _("name"), mid_sep,
                                _("code"), mid_sep, _("description"), mid_sep, _("color"), mid_sep,
                                _("notes"), mid_sep, _("commoditym"), mid_sep, _("commodityn"), mid_sep,
-                                _("hidden"), mid_sep, _("tax"), mid_sep, _("place_holder"), mid_sep, _("#eol"),
-                                 end_sep, EOLSTR, NULL);
+                                _("hidden"), mid_sep, _("tax"), mid_sep, _("place_holder"), end_sep, EOLSTR, NULL);
        DEBUG("Header String: %s", header);

        /* Write header line */
@@ -236,7 +235,7 @@ void csv_tree_export (CsvExportInfo *info)
            g_free (part2);
            /* Place Holder / end of line marker */
            currentSel = xaccAccountGetPlaceholder (acc) ? "T" : "F" ;
-            part2 = g_strconcat (part1, currentSel, mid_sep, _("#eol"), end_sep, EOLSTR, NULL);
+            part2 = g_strconcat (part1, currentSel, end_sep, EOLSTR, NULL);
            g_free (part1);

            DEBUG("Account String: %s", part2);
--- a/src/import-export/csv-imp/assistant-csv-account-import.c
+++ b/src/import-export/csv-imp/assistant-csv-account-import.c
@@ -113,17 +113,25 @@ static gchar *mnemonic_escape (const gchar *source)
 }

 static
-void create_regex (GString regex_str, const gchar *sep)
+void create_regex (GString *regex_str, const gchar *sep)
 {
    if (!sep) return;

    g_string_printf (regex_str,
-            "^(?<type>[^%s]*)%s?(?<full_name>\"(?:[^\"]|\"\")*\"|[^%s]*)%s?(?<name>\"(?:[^\"]|\"\")*\"|[^%s]*)%s\
-             ?(?<code>\"(?:[^\"]|\"\")*\"|[^%s]*)%s?(?<description>\"(?:[^\"]|\"\")*\"|[^%s]*)%s?(?<color>[^%s]*)%s\
-             ?(?<notes>\"(?:[^\"]|\"\")*\"|[^%s]*)%s?(?<commoditym>\"(?:[^\"]|\"\")*\"|[^%s]*)%s?(?<commodityn>\"(?:[^\"]|\"\")*\"|[^%s]*)%s\
-             ?(?<hidden>[^%s]*)%s?(?<tax>[^%s]*)%s?(?<place_holder>[^%s]*)%s(?<endofline>[^%s]*)$",
-            sep, sep, sep, sep, sep, sep, sep, sep, sep, sep, sep, sep, sep,
-            sep, sep, sep, sep, sep, sep, sep, sep, sep, sep, sep, sep);
+            "\\G(?<type>[^%s]*)%s"
+            "(?<full_name>\"(?:[^\"]|\"\")*\"|[^%s]*)%s"
+            "(?<name>\"(?:[^\"]|\"\")*\"|[^%s]*)%s"
+            "(?<code>\"(?:[^\"]|\"\")*\"|[^%s]*)%s?"
+            "(?<description>\"(?:[^\"]|\"\")*\"|[^%s]*)%s"
+            "(?<color>[^%s]*)%s"
+            "(?<notes>\"(?:[^\"]|\"\")*\"|[^%s]*)%s"
+            "(?<commoditym>\"(?:[^\"]|\"\")*\"|[^%s]*)%s"
+            "(?<commodityn>\"(?:[^\"]|\"\")*\"|[^%s]*)%s"
+            "(?<hidden>[^%s]*)%s"
+            "(?<tax>[^%s]*)%s"
+            "(?<place_holder>[^%s[:cntrl:]]*)(?:\\R*)",
+            sep, sep, sep, sep, sep, sep, sep, sep, sep, sep, sep, sep,
+            sep, sep, sep, sep, sep, sep, sep, sep, sep, sep, sep);

 }

@@ -622,6 +630,7 @@ csv_import_assistant_create (CsvImportInfo *info)
    info->tree_view = GTK_WIDGET(gtk_builder_get_object (builder, "treeview"));

    /* Comma Separated file default */
+    info->regexp = g_string_new ("");
    create_regex (info->regexp, ",");

    /* create model and bind to view */
--- a/src/import-export/csv-imp/csv-account-import.c
+++ b/src/import-export/csv-imp/csv-account-import.c
@@ -81,30 +81,29 @@ csv_import_result
 csv_import_read_file (const gchar *filename, const gchar *parser_regexp,
                      GtkListStore *store, guint max_rows)
 {
-    FILE       *f;
-    char       *line;
-    gchar      *line_utf8;
-    gchar      *end1, *end2;
-    GMatchInfo *match_info;
+    gchar      *locale_cont, *contents;
+    GMatchInfo *match_info = NULL;
+    GRegex     *regexpat = NULL;
    GError     *err;
-    GRegex     *regexpat;
-    int         row = 0;
-    gboolean match_found = FALSE;
+    gint       row = 0;
+    gboolean   match_found = FALSE;

    // model
    GtkTreeIter iter;

-    f = g_fopen (filename, "rt");
-    if (!f)
+    if (!g_file_get_contents (filename, &locale_cont, NULL, NULL))
    {
        //gnc_error_dialog( 0, _("File %s cannot be opened."), filename );
        return RESULT_OPEN_FAILED;
    }

+    contents = g_locale_to_utf8 (locale_cont, -1, NULL, NULL, NULL);
+    g_free (locale_cont);
+
    // compile the regular expression and check for errors
    err = NULL;
    regexpat =
-        g_regex_new (parser_regexp, G_REGEX_EXTENDED | G_REGEX_OPTIMIZE | G_REGEX_DUPNAMES, 0, &err);
+        g_regex_new (parser_regexp, G_REGEX_OPTIMIZE, 0, &err);
    if (err != NULL)
    {
        GtkWidget *dialog;
@@ -113,7 +112,6 @@ csv_import_read_file (const gchar *filename, const gchar *parser_regexp,
        errmsg = g_strdup_printf (_("Error in regular expression '%s':\n%s"),
                                  parser_regexp, err->message);
        g_error_free (err);
-        err = NULL;

        dialog = gtk_message_dialog_new (NULL,
                                         GTK_DIALOG_MODAL,
@@ -122,100 +120,47 @@ csv_import_read_file (const gchar *filename, const gchar *parser_regexp,
        gtk_dialog_run (GTK_DIALOG (dialog));
        gtk_widget_destroy (dialog);
        g_free (errmsg);
-        errmsg = 0;
+        g_free (contents);

-        fclose (f);
        return RESULT_ERROR_IN_REGEXP;
    }

-    /* Setup the two different line endings */
-#ifdef G_OS_WIN32
-    end1 = g_strconcat (_("#eol"),"\"\n", NULL);
-    end2 = g_strconcat (_("#eol"),"\n", NULL);
-#else
-    end1 = g_strconcat (_("#eol"),"\"\r\n", NULL);
-    end2 = g_strconcat (_("#eol"),"\r\n", NULL);
-#endif
-
-    // start the import
-#define buffer_size 1000
-    line = g_malloc0 (buffer_size);
-    while (!feof (f))
+    g_regex_match (regexpat, contents, 0, &match_info);
+    while (g_match_info_matches (match_info))
    {
-        gchar  *currentline = NULL;
-        int l;
+        match_found = TRUE;
+        // fill in the values
+        gtk_list_store_append (store, &iter);
+        fill_model_with_match (match_info, "type", store, &iter, TYPE);
+        fill_model_with_match (match_info, "full_name", store, &iter, FULL_NAME);
+        fill_model_with_match (match_info, "name", store, &iter, NAME);
+        fill_model_with_match (match_info, "code", store, &iter, CODE);
+        fill_model_with_match (match_info, "description", store, &iter, DESCRIPTION);
+        fill_model_with_match (match_info, "color", store, &iter, COLOR);
+        fill_model_with_match (match_info, "notes", store, &iter, NOTES);
+        fill_model_with_match (match_info, "commoditym", store, &iter, COMMODITYM);
+        fill_model_with_match (match_info, "commodityn", store, &iter, COMMODITYN);
+        fill_model_with_match (match_info, "hidden", store, &iter, HIDDEN);
+        fill_model_with_match (match_info, "tax", store, &iter, TAX);
+        fill_model_with_match (match_info, "place_holder", store, &iter, PLACE_HOLDER);
+        gtk_list_store_set (store, &iter, ROW_COLOR, NULL, -1);
+
        row++;
        if (row == max_rows)
            break;
-
-        // read one line
-        if (!fgets (line, buffer_size, f))
-            break; // eof
-
-        currentline = g_strdup (line);
-
-        while (!(g_str_has_suffix (line, end1) || g_str_has_suffix (line, end2)))
-        {
-            // read next line
-            if (fgets (line, buffer_size, f))
-            {
-                gchar *temp_str = NULL;
-                temp_str = g_strconcat (currentline, line, NULL);
-                g_free (currentline);
-                currentline = g_strdup (temp_str);
-                g_free (temp_str);
-            }
-            else
-                break; // eof
-         }
-
-        // now strip the '\r\n' from the end of the line
-        l = strlen (currentline);
-        if ((l > 0) && (currentline[l - 1] == '\n'))
-            currentline[l - 1] = 0;
-
-        if ((l > 0) && (currentline[l - 2] == '\r'))
-            currentline[l - 2] = 0;
-
-        // convert line from locale into utf8
-        line_utf8 = g_locale_to_utf8 (currentline, -1, NULL, NULL, NULL);
-
-        // parse the line
-        match_info = NULL;	// it seems, that in contrast to documentation, match_info is not always set -> g_match_info_free will segfault
-        if (g_regex_match (regexpat, line_utf8, 0, &match_info))
-        {
-            match_found = TRUE;
-            // fill in the values
-            gtk_list_store_append (store, &iter);
-            fill_model_with_match (match_info, "type", store, &iter, TYPE);
-            fill_model_with_match (match_info, "full_name", store, &iter, FULL_NAME);
-            fill_model_with_match (match_info, "name", store, &iter, NAME);
-            fill_model_with_match (match_info, "code", store, &iter, CODE);
-            fill_model_with_match (match_info, "description", store, &iter, DESCRIPTION);
-            fill_model_with_match (match_info, "color", store, &iter, COLOR);
-            fill_model_with_match (match_info, "notes", store, &iter, NOTES);
-            fill_model_with_match (match_info, "commoditym", store, &iter, COMMODITYM);
-            fill_model_with_match (match_info, "commodityn", store, &iter, COMMODITYN);
-            fill_model_with_match (match_info, "hidden", store, &iter, HIDDEN);
-            fill_model_with_match (match_info, "tax", store, &iter, TAX);
-            fill_model_with_match (match_info, "place_holder", store, &iter, PLACE_HOLDER);
-            gtk_list_store_set (store, &iter, ROW_COLOR, NULL, -1);
-        }
-
-        g_free (currentline);
-        g_match_info_free (match_info);
-        match_info = 0;
-        g_free (line_utf8);
-        line_utf8 = 0;
+        g_match_info_next (match_info, &err);
    }
-    g_free (end1);
-    g_free (end2);
-    g_free (line);
-    line = 0;

+    g_match_info_free (match_info);
    g_regex_unref (regexpat);
-    regexpat = 0;
-    fclose (f);
+    g_free (contents);
+
+    if (err != NULL)
+    {
+        g_printerr ("Error while matching: %s\n", err->message);
+        g_error_free (err);
+    }
+
    if (match_found == TRUE)
        return MATCH_FOUND;
    else