From 85b1b6febdc8d759a5df5642c41f45922a7293b9 Mon Sep 17 00:00:00 2001 From: David Hampton Date: Wed, 14 Jun 2006 03:42:07 +0000 Subject: [PATCH] Strip all invalid utf8 characters from imported QIF and OFX/QFX strings. This fixes bugs #106203 #338296 #344170 and #344219. git-svn-id: svn+ssh://svn.gnucash.org/repo/gnucash/trunk@14361 57a11ea4-9604-0410-9ed3-97b8803252fd --- ChangeLog | 11 ++++++++ src/core-utils/gnc-glib-utils.c | 24 +++++++++++++++++ src/core-utils/gnc-glib-utils.h | 32 ++++++++++++++++++++++- src/core-utils/gw-core-utils-spec.scm | 17 ++++++++++++ src/import-export/ofx/gnc-ofx-import.c | 8 ++++++ src/import-export/qif-import/qif-file.scm | 3 +++ 6 files changed, 94 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index b2e3f56c83..a4d630f81d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2006-06-13 David Hampton + + * src/core-utils/gnc-glib-utils.[ch]: + * src/core-utils/gw-core-utils-spec.scm: + * src/import-export/qif-import/qif-file.scm: + * src/import-export/ofx/gnc-ofx-import.c: Strip all invalid utf8 + characters from imported QIF and OFX/QFX strings. This fixes bugs + #106203 #338296 #344170 and #344219. Long term gnucash should be + enhanced to learn/remember the input encoding and automatically + convert to utf8. + 2006-06-13 Derek Atkins * configure.in: force-enable hbci if the user enables mt940 diff --git a/src/core-utils/gnc-glib-utils.c b/src/core-utils/gnc-glib-utils.c index acff0987f1..3acb2fcaa0 100644 --- a/src/core-utils/gnc-glib-utils.c +++ b/src/core-utils/gnc-glib-utils.c @@ -22,6 +22,8 @@ \********************************************************************/ #include "config.h" +#include +#include #include "gnc-glib-utils.h" @@ -41,3 +43,25 @@ safe_utf8_collate (const char * da, const char * db) return -1; return 0; } + +gboolean +gnc_utf8_validate (const gchar *str) +{ + return g_utf8_validate(str, -1, NULL); +} + +void +gnc_utf8_strip_invalid (gchar *str) +{ + gchar *end; + gint len; + + if (g_utf8_validate(str, -1, (const gchar **)&end)) + return; + + g_warning("Invalid utf8 string: %s", str); + do { + len = strlen(end); + memmove(end, end+1, len); /* shuffle the remainder one byte */ + } while (!g_utf8_validate(str, -1, (const gchar **)&end)); +} diff --git a/src/core-utils/gnc-glib-utils.h b/src/core-utils/gnc-glib-utils.h index 2e956d3ed5..f52d25bc48 100644 --- a/src/core-utils/gnc-glib-utils.h +++ b/src/core-utils/gnc-glib-utils.h @@ -43,7 +43,37 @@ @{ */ -int safe_utf8_collate (const char * da, const char * db); +/** Collate two utf8 strings. This function performs basic argument + * checking before calling g_utf8_collate. + * + * @param str1 The first string. + * + * @param str2 The first string. + * + * @return Same return value as g_utf8_collate. The values are: < 0 + * if str1 compares before str2, 0 if they compare equal, > 0 if str1 + * compares after str2. */ +int safe_utf8_collate (const char *str1, const char *str2); + + +/** This is a helper function for guile. C code should call + * g_utf8_validate directly. + * + * @param str The string to be validated. + * + * @return TRUE if this string is valid utf8. */ +gboolean gnc_utf8_validate (const gchar *str); + + +/** Strip any non-utf8 characters from a string. This function + * rewrites the string "in place" instead of allocating and returning + * a new string. This allows it to operat on strings that are + * defined as character arrays in a larger data structure. + * + * @param str A pointer to the string to strip of invalid + * characters. */ +void gnc_utf8_strip_invalid (gchar *str); + /** @} */ diff --git a/src/core-utils/gw-core-utils-spec.scm b/src/core-utils/gw-core-utils-spec.scm index e6d7ce3d4e..221f4182d0 100644 --- a/src/core-utils/gw-core-utils-spec.scm +++ b/src/core-utils/gw-core-utils-spec.scm @@ -25,6 +25,7 @@ (lambda (wrapset client-wrapset) (list "#include \n" + "#include \n" "#include \n"))) (gw:wrap-function @@ -52,4 +53,20 @@ '((( caller-owned) program)) "Get a boolean value from gconf.") + (gw:wrap-function + ws + 'gnc:utf8-validate + ' + "gnc_utf8_validate" + '((( caller-owned) program)) + "Validate UTF8 encoded text.") + + (gw:wrap-function + ws + 'gnc:utf8-strip-invalid + ' + "gnc_utf8_strip_invalid" + '((( caller-owned) program)) + "Strip string of non-utf8 characters.") + ) diff --git a/src/import-export/ofx/gnc-ofx-import.c b/src/import-export/ofx/gnc-ofx-import.c index 40f0fdbca8..2611ec86ae 100644 --- a/src/import-export/ofx/gnc-ofx-import.c +++ b/src/import-export/ofx/gnc-ofx-import.c @@ -47,6 +47,7 @@ #include "gnc-book.h" #include "gnc-ui-util.h" #include "gnc-gconf-utils.h" +#include "gnc-glib-utils.h" #define GCONF_SECTION "dialogs/import/ofx" @@ -124,6 +125,12 @@ int ofx_proc_transaction_cb(struct OfxTransactionData data, void * transaction_u data.account_id, 0, NULL, NULL, NO_TYPE, NULL, NULL); if(account!=NULL) { + /********** Validate the input strings to ensure utf8 ********************/ + if (data.name_valid) + gnc_utf8_strip_invalid(data.name); + if (data.memo_valid) + gnc_utf8_strip_invalid(data.memo); + /********** Create the transaction and setup transaction data ************/ book = xaccAccountGetBook(account); transaction = xaccMallocTransaction(book); @@ -595,6 +602,7 @@ int ofx_proc_account_cb(struct OfxAccountData data, void * account_user_data) } } + gnc_utf8_strip_invalid(data.account_name); account_description = g_strdup_printf( /* This string is a default account name. It MUST NOT contain the character ':' anywhere in it or diff --git a/src/import-export/qif-import/qif-file.scm b/src/import-export/qif-import/qif-file.scm index 1cb37d5cce..7880c17bf2 100644 --- a/src/import-export/qif-import/qif-file.scm +++ b/src/import-export/qif-import/qif-file.scm @@ -11,6 +11,8 @@ ;; just store the fields "raw". ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(use-modules (g-wrapped gw-core-utils)) + (cond ((or (string=? "1.3.4" (version)) (string=? "1.4" (substring (version) 0 3))) #f) @@ -72,6 +74,7 @@ ;; pick the 1-char tag off from the remainder of the line (set! tag (string-ref line 0)) (set! value (substring line 1)) + (gnc:utf8-strip-invalid value) ;; now do something with the line (if