diff --git a/src/core-utils/core-utils.i b/src/core-utils/core-utils.i index 52809c5daa..744930255b 100644 --- a/src/core-utils/core-utils.i +++ b/src/core-utils/core-utils.i @@ -23,3 +23,14 @@ void gnc_scm_log_debug(const gchar *); gchar * gnc_utf8_strip_invalid_strdup(const gchar *); %newobject gnc_locale_from_utf8; gchar * gnc_locale_from_utf8(const gchar *); +%newobject gnc_locale_to_utf8; +gchar * gnc_locale_to_utf8(const gchar *); +%rename ("gnc-utf8?") wrap_gnc_utf8_validate; +%inline %{ + /* This helper function wraps gnc_utf8_validate() into a predicate. */ + gboolean wrap_gnc_utf8_validate(const gchar *); + gboolean wrap_gnc_utf8_validate(const gchar * str) + { + return gnc_utf8_validate(str, -1, 0); + } +%} diff --git a/src/core-utils/core-utils.scm b/src/core-utils/core-utils.scm index 0a7dd78697..760ff167b0 100644 --- a/src/core-utils/core-utils.scm +++ b/src/core-utils/core-utils.scm @@ -11,8 +11,10 @@ (re-export gnc-is-debugging) (re-export g-find-program-in-path) +(re-export gnc-utf8?) (re-export gnc-utf8-strip-invalid-strdup) (re-export gnc-locale-from-utf8) +(re-export gnc-locale-to-utf8) (re-export gnc-scm-log-warn) (re-export gnc-scm-log-error) (re-export gnc-scm-log-msg) diff --git a/src/core-utils/gnc-glib-utils.c b/src/core-utils/gnc-glib-utils.c index b328ebbd95..29f13fc7f3 100644 --- a/src/core-utils/gnc-glib-utils.c +++ b/src/core-utils/gnc-glib-utils.c @@ -119,35 +119,10 @@ safe_utf8_collate (const char * da, const char * db) ((Char) >= 0x20 || (Char) == 0x09 || (Char) == 0x0A || (Char) == 0x0D) && \ ((Char) & 0xFFFE) != 0xFFFE) -/** - * gnc_utf8_validate (copied from g_utf8_validate): - * @str: a pointer to character data - * @max_len: max bytes to validate, or -1 to go until nul - * @end: return location for end of valid data - * - * Validates UTF-8 encoded text. @str is the text to validate; - * if @str is nul-terminated, then @max_len can be -1, otherwise - * @max_len should be the number of bytes to validate. - * If @end is non-%NULL, then the end of the valid range - * will be stored there (i.e. the address of the first invalid byte - * if some bytes were invalid, or the end of the text being validated - * otherwise). - * - * This function looks validates the strict subset of UTF-8 that is - * valid XML text, as detailed in - * http://www.w3.org/TR/REC-xml/#NT-Char linked from bug #346535 - * - * Returns %TRUE if all of @str was valid. Many GLib and GTK+ - * routines require valid UTF-8 as input; - * so data read from a file or the network should be checked - * with g_utf8_validate() before doing anything else with it. - * - * Return value: %TRUE if the text was valid UTF-8 - **/ -static gboolean -gnc_utf8_validate (const gchar *str, - gssize max_len, - const gchar **end) +gboolean +gnc_utf8_validate(const gchar *str, + gssize max_len, + const gchar **end) { const gchar *p; @@ -244,6 +219,21 @@ gnc_locale_from_utf8(const gchar* str) return locale_str; } +gchar * +gnc_locale_to_utf8(const gchar* str) +{ + gchar * utf8_str; + gsize bytes_written = 0; + GError * err = NULL; + + /* Convert to UTF-8 from the encoding used in the current locale. */ + utf8_str = g_locale_to_utf8(str, -1, NULL, &bytes_written, &err); + if (err) + g_warning("g_locale_to_utf8 failed: %s", err->message); + + return utf8_str; +} + GList* gnc_g_list_map(GList* list, GncGMapFunc fn, gpointer user_data) { diff --git a/src/core-utils/gnc-glib-utils.h b/src/core-utils/gnc-glib-utils.h index 3ef0682b98..f8fc5f20c3 100644 --- a/src/core-utils/gnc-glib-utils.h +++ b/src/core-utils/gnc-glib-utils.h @@ -55,6 +55,32 @@ * compares after str2. */ int safe_utf8_collate (const char *str1, const char *str2); +/** + * gnc_utf8_validate (copied from g_utf8_validate): + * @str: a pointer to character data + * @max_len: max bytes to validate, or -1 to go until nul + * @end: return location for end of valid data + * + * Validates UTF-8 encoded text. @str is the text to validate; + * if @str is nul-terminated, then @max_len can be -1, otherwise + * @max_len should be the number of bytes to validate. + * If @end is non-%NULL, then the end of the valid range + * will be stored there (i.e. the address of the first invalid byte + * if some bytes were invalid, or the end of the text being validated + * otherwise). + * + * This function looks validates the strict subset of UTF-8 that is + * valid XML text, as detailed in + * http://www.w3.org/TR/REC-xml/#NT-Char linked from bug #346535 + * + * Returns %TRUE if all of @str was valid. Many GLib and GTK+ + * routines require valid UTF-8 as input; + * so data read from a file or the network should be checked + * with g_utf8_validate() before doing anything else with it. + * + * Return value: %TRUE if the text was valid UTF-8 + **/ +gboolean gnc_utf8_validate(const gchar *str, gssize max_len, const gchar **end); /** Strip any non-utf8 characters from a string. This function * rewrites the string "in place" instead of allocating and returning @@ -91,9 +117,22 @@ gchar *gnc_utf8_strip_invalid_strdup (const gchar* str); * @param str A pointer to a UTF-8 encoded string to be converted. * * @return A newly allocated string that has to be g_free'd by the - * caller. */ + * caller. If an error occurs, NULL is returned. */ gchar *gnc_locale_from_utf8(const gchar* str); +/** Converts a string to UTF-8 from the encoding used for strings + * in the current locale. + * + * This essentially is a wrapper for g_locale_to_utf8 that can + * be swigified for use with Scheme to avoid adding a dependency + * for guile-glib. + * + * @param str A pointer to a string encoded according to locale. + * + * @return A newly allocated string that has to be g_free'd by the + * caller. If an error occurs, NULL is returned. */ +gchar *gnc_locale_to_utf8(const gchar* str); + typedef gpointer (*GncGMapFunc)(gpointer data, gpointer user_data); /** diff --git a/src/import-export/qif-import/qif-file.scm b/src/import-export/qif-import/qif-file.scm index 10067cdf0a..3928885e4d 100644 --- a/src/import-export/qif-import/qif-file.scm +++ b/src/import-export/qif-import/qif-file.scm @@ -28,6 +28,9 @@ ;; ;; Suck in all the lines. Don't do any string interpretation, ;; just store the fields "raw". +;; +;; FIXME: This function really should be able to return multiple +;; errors and warnings rather than a single one. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define (qif-file:read-file self path ticker-map window) @@ -87,10 +90,34 @@ (set! tag (string-ref line 0)) (set! value (substring line 1)) - ;; If the line doesn't conform to UTF-8, remove any invalid - ;; characters. This could be smarter, perhaps by trying a - ;; a default character set conversion based on the locale. - (set! value (gnc-utf8-strip-invalid-strdup value)) + ;; If the line doesn't conform to UTF-8, try a default + ;; character set conversion based on the locale. If that + ;; fails, remove any invalid characters. + (if (not (gnc-utf8? value)) + (let ((converted-value (gnc-locale-to-utf8 value))) + (if (or (string=? converted-value "") + (not (gnc-utf8? converted-value))) + (begin + (set! value (gnc-utf8-strip-invalid-strdup value)) + (set! return-val + (list #t (string-append + (_ "This file is not encoded in UTF-8 or ASCII.") + " " + (_ "Some characters have been discarded.")))) + (gnc:warn "qif-file:read-file:" + " stripping invalid characters." + "\nAfter: [" value "]")) + (begin + (set! return-val + (list #t (string-append + (_ "This file is not encoded in UTF-8 or ASCII.") + " " + (_ "Some characters have been converted according to your locale.")))) + (gnc:warn "qif-file:read-file:" + " converting characters by locale." + "\nBefore: [" value "]" + "\nAfter: [" converted-value "]") + (set! value converted-value))))) (if (eq? tag #\!) ;; The "!" tag has the highest precedence and is used