Strip all invalid utf8 characters from imported QIF and OFX/QFX

strings.  This fixes bugs #106203 #338296 #344170 and #344219.


git-svn-id: svn+ssh://svn.gnucash.org/repo/gnucash/trunk@14361 57a11ea4-9604-0410-9ed3-97b8803252fd
This commit is contained in:
David Hampton 2006-06-14 03:42:07 +00:00
parent b3a7e34a1e
commit 85b1b6febd
6 changed files with 94 additions and 1 deletions

View File

@ -1,3 +1,14 @@
2006-06-13 David Hampton <hampton@employees.org>
* src/core-utils/gnc-glib-utils.[ch]:
* src/core-utils/gw-core-utils-spec.scm:
* src/import-export/qif-import/qif-file.scm:
* src/import-export/ofx/gnc-ofx-import.c: Strip all invalid utf8
characters from imported QIF and OFX/QFX strings. This fixes bugs
#106203 #338296 #344170 and #344219. Long term gnucash should be
enhanced to learn/remember the input encoding and automatically
convert to utf8.
2006-06-13 Derek Atkins <derek@ihtfp.com> 2006-06-13 Derek Atkins <derek@ihtfp.com>
* configure.in: force-enable hbci if the user enables mt940 * configure.in: force-enable hbci if the user enables mt940

View File

@ -22,6 +22,8 @@
\********************************************************************/ \********************************************************************/
#include "config.h" #include "config.h"
#include <stdio.h>
#include <string.h>
#include "gnc-glib-utils.h" #include "gnc-glib-utils.h"
@ -41,3 +43,25 @@ safe_utf8_collate (const char * da, const char * db)
return -1; return -1;
return 0; return 0;
} }
gboolean
gnc_utf8_validate (const gchar *str)
{
return g_utf8_validate(str, -1, NULL);
}
void
gnc_utf8_strip_invalid (gchar *str)
{
gchar *end;
gint len;
if (g_utf8_validate(str, -1, (const gchar **)&end))
return;
g_warning("Invalid utf8 string: %s", str);
do {
len = strlen(end);
memmove(end, end+1, len); /* shuffle the remainder one byte */
} while (!g_utf8_validate(str, -1, (const gchar **)&end));
}

View File

@ -43,7 +43,37 @@
@{ @{
*/ */
int safe_utf8_collate (const char * da, const char * db); /** Collate two utf8 strings. This function performs basic argument
* checking before calling g_utf8_collate.
*
* @param str1 The first string.
*
* @param str2 The first string.
*
* @return Same return value as g_utf8_collate. The values are: < 0
* if str1 compares before str2, 0 if they compare equal, > 0 if str1
* compares after str2. */
int safe_utf8_collate (const char *str1, const char *str2);
/** This is a helper function for guile. C code should call
* g_utf8_validate directly.
*
* @param str The string to be validated.
*
* @return TRUE if this string is valid utf8. */
gboolean gnc_utf8_validate (const gchar *str);
/** Strip any non-utf8 characters from a string. This function
* rewrites the string "in place" instead of allocating and returning
* a new string. This allows it to operat on strings that are
* defined as character arrays in a larger data structure.
*
* @param str A pointer to the string to strip of invalid
* characters. */
void gnc_utf8_strip_invalid (gchar *str);
/** @} */ /** @} */

View File

@ -25,6 +25,7 @@
(lambda (wrapset client-wrapset) (lambda (wrapset client-wrapset)
(list (list
"#include <gnc-gconf-utils.h>\n" "#include <gnc-gconf-utils.h>\n"
"#include <gnc-glib-utils.h>\n"
"#include <gnc-main.h>\n"))) "#include <gnc-main.h>\n")))
(gw:wrap-function (gw:wrap-function
@ -52,4 +53,20 @@
'(((<gw:mchars> caller-owned) program)) '(((<gw:mchars> caller-owned) program))
"Get a boolean value from gconf.") "Get a boolean value from gconf.")
(gw:wrap-function
ws
'gnc:utf8-validate
'<gw:bool>
"gnc_utf8_validate"
'(((<gw:mchars> caller-owned) program))
"Validate UTF8 encoded text.")
(gw:wrap-function
ws
'gnc:utf8-strip-invalid
'<gw:void>
"gnc_utf8_strip_invalid"
'(((<gw:mchars> caller-owned) program))
"Strip string of non-utf8 characters.")
) )

View File

@ -47,6 +47,7 @@
#include "gnc-book.h" #include "gnc-book.h"
#include "gnc-ui-util.h" #include "gnc-ui-util.h"
#include "gnc-gconf-utils.h" #include "gnc-gconf-utils.h"
#include "gnc-glib-utils.h"
#define GCONF_SECTION "dialogs/import/ofx" #define GCONF_SECTION "dialogs/import/ofx"
@ -124,6 +125,12 @@ int ofx_proc_transaction_cb(struct OfxTransactionData data, void * transaction_u
data.account_id, 0, NULL, NULL, NO_TYPE, NULL, NULL); data.account_id, 0, NULL, NULL, NO_TYPE, NULL, NULL);
if(account!=NULL) if(account!=NULL)
{ {
/********** Validate the input strings to ensure utf8 ********************/
if (data.name_valid)
gnc_utf8_strip_invalid(data.name);
if (data.memo_valid)
gnc_utf8_strip_invalid(data.memo);
/********** Create the transaction and setup transaction data ************/ /********** Create the transaction and setup transaction data ************/
book = xaccAccountGetBook(account); book = xaccAccountGetBook(account);
transaction = xaccMallocTransaction(book); transaction = xaccMallocTransaction(book);
@ -595,6 +602,7 @@ int ofx_proc_account_cb(struct OfxAccountData data, void * account_user_data)
} }
} }
gnc_utf8_strip_invalid(data.account_name);
account_description = g_strdup_printf( /* This string is a default account account_description = g_strdup_printf( /* This string is a default account
name. It MUST NOT contain the name. It MUST NOT contain the
character ':' anywhere in it or character ':' anywhere in it or

View File

@ -11,6 +11,8 @@
;; just store the fields "raw". ;; just store the fields "raw".
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(use-modules (g-wrapped gw-core-utils))
(cond (cond
((or (string=? "1.3.4" (version)) ((or (string=? "1.3.4" (version))
(string=? "1.4" (substring (version) 0 3))) #f) (string=? "1.4" (substring (version) 0 3))) #f)
@ -72,6 +74,7 @@
;; pick the 1-char tag off from the remainder of the line ;; pick the 1-char tag off from the remainder of the line
(set! tag (string-ref line 0)) (set! tag (string-ref line 0))
(set! value (substring line 1)) (set! value (substring line 1))
(gnc:utf8-strip-invalid value)
;; now do something with the line ;; now do something with the line
(if (if