diff --git a/src/core-utils/core-utils.i b/src/core-utils/core-utils.i
index 52809c5daa..744930255b 100644
--- a/src/core-utils/core-utils.i
+++ b/src/core-utils/core-utils.i
@@ -23,3 +23,14 @@ void gnc_scm_log_debug(const gchar *);
gchar * gnc_utf8_strip_invalid_strdup(const gchar *);
%newobject gnc_locale_from_utf8;
gchar * gnc_locale_from_utf8(const gchar *);
+%newobject gnc_locale_to_utf8;
+gchar * gnc_locale_to_utf8(const gchar *);
+%rename ("gnc-utf8?") wrap_gnc_utf8_validate;
+%inline %{
+ /* This helper function wraps gnc_utf8_validate() into a predicate. */
+ gboolean wrap_gnc_utf8_validate(const gchar *);
+ gboolean wrap_gnc_utf8_validate(const gchar * str)
+ {
+ return gnc_utf8_validate(str, -1, 0);
+ }
+%}
diff --git a/src/core-utils/core-utils.scm b/src/core-utils/core-utils.scm
index 0a7dd78697..760ff167b0 100644
--- a/src/core-utils/core-utils.scm
+++ b/src/core-utils/core-utils.scm
@@ -11,8 +11,10 @@
(re-export gnc-is-debugging)
(re-export g-find-program-in-path)
+(re-export gnc-utf8?)
(re-export gnc-utf8-strip-invalid-strdup)
(re-export gnc-locale-from-utf8)
+(re-export gnc-locale-to-utf8)
(re-export gnc-scm-log-warn)
(re-export gnc-scm-log-error)
(re-export gnc-scm-log-msg)
diff --git a/src/core-utils/gnc-glib-utils.c b/src/core-utils/gnc-glib-utils.c
index b328ebbd95..29f13fc7f3 100644
--- a/src/core-utils/gnc-glib-utils.c
+++ b/src/core-utils/gnc-glib-utils.c
@@ -119,35 +119,10 @@ safe_utf8_collate (const char * da, const char * db)
((Char) >= 0x20 || (Char) == 0x09 || (Char) == 0x0A || (Char) == 0x0D) && \
((Char) & 0xFFFE) != 0xFFFE)
-/**
- * gnc_utf8_validate (copied from g_utf8_validate):
- * @str: a pointer to character data
- * @max_len: max bytes to validate, or -1 to go until nul
- * @end: return location for end of valid data
- *
- * Validates UTF-8 encoded text. @str is the text to validate;
- * if @str is nul-terminated, then @max_len can be -1, otherwise
- * @max_len should be the number of bytes to validate.
- * If @end is non-%NULL, then the end of the valid range
- * will be stored there (i.e. the address of the first invalid byte
- * if some bytes were invalid, or the end of the text being validated
- * otherwise).
- *
- * This function looks validates the strict subset of UTF-8 that is
- * valid XML text, as detailed in
- * http://www.w3.org/TR/REC-xml/#NT-Char linked from bug #346535
- *
- * Returns %TRUE if all of @str was valid. Many GLib and GTK+
- * routines require valid UTF-8 as input;
- * so data read from a file or the network should be checked
- * with g_utf8_validate() before doing anything else with it.
- *
- * Return value: %TRUE if the text was valid UTF-8
- **/
-static gboolean
-gnc_utf8_validate (const gchar *str,
- gssize max_len,
- const gchar **end)
+gboolean
+gnc_utf8_validate(const gchar *str,
+ gssize max_len,
+ const gchar **end)
{
const gchar *p;
@@ -244,6 +219,21 @@ gnc_locale_from_utf8(const gchar* str)
return locale_str;
}
+gchar *
+gnc_locale_to_utf8(const gchar* str)
+{
+ gchar * utf8_str;
+ gsize bytes_written = 0;
+ GError * err = NULL;
+
+ /* Convert to UTF-8 from the encoding used in the current locale. */
+ utf8_str = g_locale_to_utf8(str, -1, NULL, &bytes_written, &err);
+ if (err)
+ g_warning("g_locale_to_utf8 failed: %s", err->message);
+
+ return utf8_str;
+}
+
GList*
gnc_g_list_map(GList* list, GncGMapFunc fn, gpointer user_data)
{
diff --git a/src/core-utils/gnc-glib-utils.h b/src/core-utils/gnc-glib-utils.h
index 3ef0682b98..f8fc5f20c3 100644
--- a/src/core-utils/gnc-glib-utils.h
+++ b/src/core-utils/gnc-glib-utils.h
@@ -55,6 +55,32 @@
* compares after str2. */
int safe_utf8_collate (const char *str1, const char *str2);
+/**
+ * gnc_utf8_validate (copied from g_utf8_validate):
+ * @str: a pointer to character data
+ * @max_len: max bytes to validate, or -1 to go until nul
+ * @end: return location for end of valid data
+ *
+ * Validates UTF-8 encoded text. @str is the text to validate;
+ * if @str is nul-terminated, then @max_len can be -1, otherwise
+ * @max_len should be the number of bytes to validate.
+ * If @end is non-%NULL, then the end of the valid range
+ * will be stored there (i.e. the address of the first invalid byte
+ * if some bytes were invalid, or the end of the text being validated
+ * otherwise).
+ *
+ * This function looks validates the strict subset of UTF-8 that is
+ * valid XML text, as detailed in
+ * http://www.w3.org/TR/REC-xml/#NT-Char linked from bug #346535
+ *
+ * Returns %TRUE if all of @str was valid. Many GLib and GTK+
+ * routines require valid UTF-8 as input;
+ * so data read from a file or the network should be checked
+ * with g_utf8_validate() before doing anything else with it.
+ *
+ * Return value: %TRUE if the text was valid UTF-8
+ **/
+gboolean gnc_utf8_validate(const gchar *str, gssize max_len, const gchar **end);
/** Strip any non-utf8 characters from a string. This function
* rewrites the string "in place" instead of allocating and returning
@@ -91,9 +117,22 @@ gchar *gnc_utf8_strip_invalid_strdup (const gchar* str);
* @param str A pointer to a UTF-8 encoded string to be converted.
*
* @return A newly allocated string that has to be g_free'd by the
- * caller. */
+ * caller. If an error occurs, NULL is returned. */
gchar *gnc_locale_from_utf8(const gchar* str);
+/** Converts a string to UTF-8 from the encoding used for strings
+ * in the current locale.
+ *
+ * This essentially is a wrapper for g_locale_to_utf8 that can
+ * be swigified for use with Scheme to avoid adding a dependency
+ * for guile-glib.
+ *
+ * @param str A pointer to a string encoded according to locale.
+ *
+ * @return A newly allocated string that has to be g_free'd by the
+ * caller. If an error occurs, NULL is returned. */
+gchar *gnc_locale_to_utf8(const gchar* str);
+
typedef gpointer (*GncGMapFunc)(gpointer data, gpointer user_data);
/**
diff --git a/src/import-export/qif-import/qif-file.scm b/src/import-export/qif-import/qif-file.scm
index 10067cdf0a..3928885e4d 100644
--- a/src/import-export/qif-import/qif-file.scm
+++ b/src/import-export/qif-import/qif-file.scm
@@ -28,6 +28,9 @@
;;
;; Suck in all the lines. Don't do any string interpretation,
;; just store the fields "raw".
+;;
+;; FIXME: This function really should be able to return multiple
+;; errors and warnings rather than a single one.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define (qif-file:read-file self path ticker-map window)
@@ -87,10 +90,34 @@
(set! tag (string-ref line 0))
(set! value (substring line 1))
- ;; If the line doesn't conform to UTF-8, remove any invalid
- ;; characters. This could be smarter, perhaps by trying a
- ;; a default character set conversion based on the locale.
- (set! value (gnc-utf8-strip-invalid-strdup value))
+ ;; If the line doesn't conform to UTF-8, try a default
+ ;; character set conversion based on the locale. If that
+ ;; fails, remove any invalid characters.
+ (if (not (gnc-utf8? value))
+ (let ((converted-value (gnc-locale-to-utf8 value)))
+ (if (or (string=? converted-value "")
+ (not (gnc-utf8? converted-value)))
+ (begin
+ (set! value (gnc-utf8-strip-invalid-strdup value))
+ (set! return-val
+ (list #t (string-append
+ (_ "This file is not encoded in UTF-8 or ASCII.")
+ " "
+ (_ "Some characters have been discarded."))))
+ (gnc:warn "qif-file:read-file:"
+ " stripping invalid characters."
+ "\nAfter: [" value "]"))
+ (begin
+ (set! return-val
+ (list #t (string-append
+ (_ "This file is not encoded in UTF-8 or ASCII.")
+ " "
+ (_ "Some characters have been converted according to your locale."))))
+ (gnc:warn "qif-file:read-file:"
+ " converting characters by locale."
+ "\nBefore: [" value "]"
+ "\nAfter: [" converted-value "]")
+ (set! value converted-value)))))
(if (eq? tag #\!)
;; The "!" tag has the highest precedence and is used