Rework assistant csv import regex code to eliminate the mandatory dummy field (#eol).

This consists of a couple of changes that are heavily interdependent:
- read the csv file entirely in one string
- tweak the regex so it knows the difference between the end of line and a newline in one of the fields
- leverage the g_regex code to iterate over the full string directly
This commit is contained in:
Geert Janssens
2014-09-26 16:48:06 +02:00
parent 595e359301
commit c652455ebb
3 changed files with 58 additions and 105 deletions

View File

@@ -155,8 +155,7 @@ void csv_tree_export (CsvExportInfo *info)
header = g_strconcat (end_sep, _("type"), mid_sep, _("full_name"), mid_sep, _("name"), mid_sep,
_("code"), mid_sep, _("description"), mid_sep, _("color"), mid_sep,
_("notes"), mid_sep, _("commoditym"), mid_sep, _("commodityn"), mid_sep,
_("hidden"), mid_sep, _("tax"), mid_sep, _("place_holder"), mid_sep, _("#eol"),
end_sep, EOLSTR, NULL);
_("hidden"), mid_sep, _("tax"), mid_sep, _("place_holder"), end_sep, EOLSTR, NULL);
DEBUG("Header String: %s", header);
/* Write header line */
@@ -236,7 +235,7 @@ void csv_tree_export (CsvExportInfo *info)
g_free (part2);
/* Place Holder / end of line marker */
currentSel = xaccAccountGetPlaceholder (acc) ? "T" : "F" ;
part2 = g_strconcat (part1, currentSel, mid_sep, _("#eol"), end_sep, EOLSTR, NULL);
part2 = g_strconcat (part1, currentSel, end_sep, EOLSTR, NULL);
g_free (part1);
DEBUG("Account String: %s", part2);

View File

@@ -113,17 +113,25 @@ static gchar *mnemonic_escape (const gchar *source)
}
static
void create_regex (GString regex_str, const gchar *sep)
void create_regex (GString *regex_str, const gchar *sep)
{
if (!sep) return;
g_string_printf (regex_str,
"^(?<type>[^%s]*)%s?(?<full_name>\"(?:[^\"]|\"\")*\"|[^%s]*)%s?(?<name>\"(?:[^\"]|\"\")*\"|[^%s]*)%s\
?(?<code>\"(?:[^\"]|\"\")*\"|[^%s]*)%s?(?<description>\"(?:[^\"]|\"\")*\"|[^%s]*)%s?(?<color>[^%s]*)%s\
?(?<notes>\"(?:[^\"]|\"\")*\"|[^%s]*)%s?(?<commoditym>\"(?:[^\"]|\"\")*\"|[^%s]*)%s?(?<commodityn>\"(?:[^\"]|\"\")*\"|[^%s]*)%s\
?(?<hidden>[^%s]*)%s?(?<tax>[^%s]*)%s?(?<place_holder>[^%s]*)%s(?<endofline>[^%s]*)$",
sep, sep, sep, sep, sep, sep, sep, sep, sep, sep, sep, sep, sep,
sep, sep, sep, sep, sep, sep, sep, sep, sep, sep, sep, sep);
"\\G(?<type>[^%s]*)%s"
"(?<full_name>\"(?:[^\"]|\"\")*\"|[^%s]*)%s"
"(?<name>\"(?:[^\"]|\"\")*\"|[^%s]*)%s"
"(?<code>\"(?:[^\"]|\"\")*\"|[^%s]*)%s?"
"(?<description>\"(?:[^\"]|\"\")*\"|[^%s]*)%s"
"(?<color>[^%s]*)%s"
"(?<notes>\"(?:[^\"]|\"\")*\"|[^%s]*)%s"
"(?<commoditym>\"(?:[^\"]|\"\")*\"|[^%s]*)%s"
"(?<commodityn>\"(?:[^\"]|\"\")*\"|[^%s]*)%s"
"(?<hidden>[^%s]*)%s"
"(?<tax>[^%s]*)%s"
"(?<place_holder>[^%s[:cntrl:]]*)(?:\\R*)",
sep, sep, sep, sep, sep, sep, sep, sep, sep, sep, sep, sep,
sep, sep, sep, sep, sep, sep, sep, sep, sep, sep, sep);
}
@@ -622,6 +630,7 @@ csv_import_assistant_create (CsvImportInfo *info)
info->tree_view = GTK_WIDGET(gtk_builder_get_object (builder, "treeview"));
/* Comma Separated file default */
info->regexp = g_string_new ("");
create_regex (info->regexp, ",");
/* create model and bind to view */

View File

@@ -81,30 +81,29 @@ csv_import_result
csv_import_read_file (const gchar *filename, const gchar *parser_regexp,
GtkListStore *store, guint max_rows)
{
FILE *f;
char *line;
gchar *line_utf8;
gchar *end1, *end2;
GMatchInfo *match_info;
gchar *locale_cont, *contents;
GMatchInfo *match_info = NULL;
GRegex *regexpat = NULL;
GError *err;
GRegex *regexpat;
int row = 0;
gboolean match_found = FALSE;
gint row = 0;
gboolean match_found = FALSE;
// model
GtkTreeIter iter;
f = g_fopen (filename, "rt");
if (!f)
if (!g_file_get_contents (filename, &locale_cont, NULL, NULL))
{
//gnc_error_dialog( 0, _("File %s cannot be opened."), filename );
return RESULT_OPEN_FAILED;
}
contents = g_locale_to_utf8 (locale_cont, -1, NULL, NULL, NULL);
g_free (locale_cont);
// compile the regular expression and check for errors
err = NULL;
regexpat =
g_regex_new (parser_regexp, G_REGEX_EXTENDED | G_REGEX_OPTIMIZE | G_REGEX_DUPNAMES, 0, &err);
g_regex_new (parser_regexp, G_REGEX_OPTIMIZE, 0, &err);
if (err != NULL)
{
GtkWidget *dialog;
@@ -113,7 +112,6 @@ csv_import_read_file (const gchar *filename, const gchar *parser_regexp,
errmsg = g_strdup_printf (_("Error in regular expression '%s':\n%s"),
parser_regexp, err->message);
g_error_free (err);
err = NULL;
dialog = gtk_message_dialog_new (NULL,
GTK_DIALOG_MODAL,
@@ -122,100 +120,47 @@ csv_import_read_file (const gchar *filename, const gchar *parser_regexp,
gtk_dialog_run (GTK_DIALOG (dialog));
gtk_widget_destroy (dialog);
g_free (errmsg);
errmsg = 0;
g_free (contents);
fclose (f);
return RESULT_ERROR_IN_REGEXP;
}
/* Setup the two different line endings */
#ifdef G_OS_WIN32
end1 = g_strconcat (_("#eol"),"\"\n", NULL);
end2 = g_strconcat (_("#eol"),"\n", NULL);
#else
end1 = g_strconcat (_("#eol"),"\"\r\n", NULL);
end2 = g_strconcat (_("#eol"),"\r\n", NULL);
#endif
// start the import
#define buffer_size 1000
line = g_malloc0 (buffer_size);
while (!feof (f))
g_regex_match (regexpat, contents, 0, &match_info);
while (g_match_info_matches (match_info))
{
gchar *currentline = NULL;
int l;
match_found = TRUE;
// fill in the values
gtk_list_store_append (store, &iter);
fill_model_with_match (match_info, "type", store, &iter, TYPE);
fill_model_with_match (match_info, "full_name", store, &iter, FULL_NAME);
fill_model_with_match (match_info, "name", store, &iter, NAME);
fill_model_with_match (match_info, "code", store, &iter, CODE);
fill_model_with_match (match_info, "description", store, &iter, DESCRIPTION);
fill_model_with_match (match_info, "color", store, &iter, COLOR);
fill_model_with_match (match_info, "notes", store, &iter, NOTES);
fill_model_with_match (match_info, "commoditym", store, &iter, COMMODITYM);
fill_model_with_match (match_info, "commodityn", store, &iter, COMMODITYN);
fill_model_with_match (match_info, "hidden", store, &iter, HIDDEN);
fill_model_with_match (match_info, "tax", store, &iter, TAX);
fill_model_with_match (match_info, "place_holder", store, &iter, PLACE_HOLDER);
gtk_list_store_set (store, &iter, ROW_COLOR, NULL, -1);
row++;
if (row == max_rows)
break;
// read one line
if (!fgets (line, buffer_size, f))
break; // eof
currentline = g_strdup (line);
while (!(g_str_has_suffix (line, end1) || g_str_has_suffix (line, end2)))
{
// read next line
if (fgets (line, buffer_size, f))
{
gchar *temp_str = NULL;
temp_str = g_strconcat (currentline, line, NULL);
g_free (currentline);
currentline = g_strdup (temp_str);
g_free (temp_str);
}
else
break; // eof
}
// now strip the '\r\n' from the end of the line
l = strlen (currentline);
if ((l > 0) && (currentline[l - 1] == '\n'))
currentline[l - 1] = 0;
if ((l > 0) && (currentline[l - 2] == '\r'))
currentline[l - 2] = 0;
// convert line from locale into utf8
line_utf8 = g_locale_to_utf8 (currentline, -1, NULL, NULL, NULL);
// parse the line
match_info = NULL; // it seems, that in contrast to documentation, match_info is not always set -> g_match_info_free will segfault
if (g_regex_match (regexpat, line_utf8, 0, &match_info))
{
match_found = TRUE;
// fill in the values
gtk_list_store_append (store, &iter);
fill_model_with_match (match_info, "type", store, &iter, TYPE);
fill_model_with_match (match_info, "full_name", store, &iter, FULL_NAME);
fill_model_with_match (match_info, "name", store, &iter, NAME);
fill_model_with_match (match_info, "code", store, &iter, CODE);
fill_model_with_match (match_info, "description", store, &iter, DESCRIPTION);
fill_model_with_match (match_info, "color", store, &iter, COLOR);
fill_model_with_match (match_info, "notes", store, &iter, NOTES);
fill_model_with_match (match_info, "commoditym", store, &iter, COMMODITYM);
fill_model_with_match (match_info, "commodityn", store, &iter, COMMODITYN);
fill_model_with_match (match_info, "hidden", store, &iter, HIDDEN);
fill_model_with_match (match_info, "tax", store, &iter, TAX);
fill_model_with_match (match_info, "place_holder", store, &iter, PLACE_HOLDER);
gtk_list_store_set (store, &iter, ROW_COLOR, NULL, -1);
}
g_free (currentline);
g_match_info_free (match_info);
match_info = 0;
g_free (line_utf8);
line_utf8 = 0;
g_match_info_next (match_info, &err);
}
g_free (end1);
g_free (end2);
g_free (line);
line = 0;
g_match_info_free (match_info);
g_regex_unref (regexpat);
regexpat = 0;
fclose (f);
g_free (contents);
if (err != NULL)
{
g_printerr ("Error while matching: %s\n", err->message);
g_error_free (err);
}
if (match_found == TRUE)
return MATCH_FOUND;
else