Bug 710824 - GnuCash should sanitise UTF-8 before serialising files

to avoid writing broken unparseable XML.
This checks for both bad UTF8 and for invalid control characters
that libxml2 doesn't convert to entities.

git-svn-id: svn+ssh://svn.gnucash.org/repo/gnucash/trunk@23598 57a11ea4-9604-0410-9ed3-97b8803252fd
This commit is contained in:
John Ralls 2013-12-22 22:32:04 +00:00
parent d9c33eb571
commit 4147d88e72
11 changed files with 155 additions and 39 deletions

View File

@ -39,6 +39,7 @@ libgnc_backend_xml_utils_la_SOURCES = \
gnc-tax-table-xml-v2.c \
gnc-transaction-xml-v2.c \
gnc-vendor-xml-v2.c \
gnc-xml-helper.c \
io-example-account.c \
io-gncxml-gen.c \
io-gncxml-v1.c \

View File

@ -136,7 +136,7 @@ recurrence_to_dom_tree(const gchar *tag, const Recurrence *r)
WeekendAdjust wadj;
n = xmlNewNode(NULL, BAD_CAST tag);
xmlSetProp(n, BAD_CAST "version", BAD_CAST recurrence_version_string );
xmlSetProp(n, BAD_CAST "version", BAD_CAST recurrence_version_string);
xmlAddChild(n, guint_to_dom_tree(recurrence_mult,
recurrenceGetMultiplier(r)));
pt = recurrenceGetPeriodType(r);

View File

@ -85,11 +85,12 @@ gnc_schedXaction_dom_tree_create(SchedXaction *sx)
gint instCount;
const GncGUID *templ_acc_guid;
gboolean allow_2_2_incompat = TRUE;
gchar *name = g_strdup (xaccSchedXactionGetName(sx));
templ_acc_guid = xaccAccountGetGUID(sx->template_acct);
/* FIXME: this should be the same as the def in io-gncxml-v2.c */
ret = xmlNewNode( NULL, BAD_CAST GNC_SCHEDXACTION_TAG );
ret = xmlNewNode (NULL, BAD_CAST GNC_SCHEDXACTION_TAG);
if (allow_2_2_incompat)
xmlSetProp(ret, BAD_CAST "version", BAD_CAST schedxaction_version2_string);
@ -100,7 +101,8 @@ gnc_schedXaction_dom_tree_create(SchedXaction *sx)
guid_to_dom_tree(SX_ID,
xaccSchedXactionGetGUID(sx)) );
xmlNewTextChild( ret, NULL, BAD_CAST SX_NAME, BAD_CAST xaccSchedXactionGetName(sx) );
xmlNewTextChild( ret, NULL, BAD_CAST SX_NAME, checked_char_cast (name));
g_free (name);
if (allow_2_2_incompat)
{

View File

@ -74,21 +74,25 @@ split_to_dom_tree(const gchar *tag, Split *spl)
xmlAddChild(ret, guid_to_dom_tree("split:id", xaccSplitGetGUID(spl)));
{
const char *memo = xaccSplitGetMemo(spl);
char *memo = g_strdup (xaccSplitGetMemo(spl));
if (memo && g_strcmp0(memo, "") != 0)
{
xmlNewTextChild(ret, NULL, BAD_CAST "split:memo", (xmlChar*)memo);
xmlNewTextChild(ret, NULL, BAD_CAST "split:memo",
checked_char_cast (memo));
}
g_free (memo);
}
{
const char *action = xaccSplitGetAction(spl);
char *action = g_strdup (xaccSplitGetAction(spl));
if (action && g_strcmp0(action, "") != 0)
{
xmlNewTextChild(ret, NULL, BAD_CAST "split:action", (xmlChar*)action);
xmlNewTextChild(ret, NULL, BAD_CAST "split:action",
checked_char_cast (action));
}
g_free (action);
}
{
@ -97,7 +101,8 @@ split_to_dom_tree(const gchar *tag, Split *spl)
tmp[0] = xaccSplitGetReconcile(spl);
tmp[1] = '\0';
xmlNewTextChild(ret, NULL, BAD_CAST "split:reconciled-state", (xmlChar*)tmp);
xmlNewTextChild(ret, NULL, BAD_CAST "split:reconciled-state",
BAD_CAST tmp);
}
add_timespec(ret, "split:reconcile-date",
@ -153,31 +158,37 @@ xmlNodePtr
gnc_transaction_dom_tree_create(Transaction *trn)
{
xmlNodePtr ret;
gchar *str = NULL;
ret = xmlNewNode(NULL, BAD_CAST "gnc:transaction");
xmlSetProp(ret, BAD_CAST "version", BAD_CAST transaction_version_string);
xmlSetProp(ret, BAD_CAST "version",
BAD_CAST transaction_version_string);
xmlAddChild(ret, guid_to_dom_tree("trn:id", xaccTransGetGUID(trn)));
xmlAddChild(ret, commodity_ref_to_dom_tree("trn:currency",
xaccTransGetCurrency(trn)));
if (xaccTransGetNum(trn) && (g_strcmp0(xaccTransGetNum(trn), "") != 0))
str = g_strdup (xaccTransGetNum(trn));
if (str && (g_strcmp0(str, "") != 0))
{
xmlNewTextChild(ret, NULL, BAD_CAST "trn:num", (xmlChar*)xaccTransGetNum(trn));
xmlNewTextChild(ret, NULL, BAD_CAST "trn:num",
checked_char_cast (str));
}
g_free (str);
add_timespec(ret, "trn:date-posted", xaccTransRetDatePostedTS(trn), TRUE);
add_timespec(ret, "trn:date-entered",
xaccTransRetDateEnteredTS(trn), TRUE);
if (xaccTransGetDescription(trn))
str = g_strdup (xaccTransGetDescription(trn));
if (str)
{
xmlNewTextChild(ret, NULL, BAD_CAST "trn:description",
(xmlChar*)xaccTransGetDescription(trn));
checked_char_cast (str));
}
g_free (str);
{
xmlNodePtr kvpnode = kvp_frame_to_dom_tree("trn:slots",

View File

@ -0,0 +1,50 @@
/********************************************************************\
* gnc-xml-helper.h -- api for xml helpers *
* *
* Copyright (C) 2001 James LewisMoss <dres@debian.org> *
* *
* This program is free software; you can redistribute it and/or *
* modify it under the terms of the GNU General Public License as *
* published by the Free Software Foundation; either version 2 of *
* the License, or (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License*
* along with this program; if not, contact: *
* *
* Free Software Foundation Voice: +1-617-542-5942 *
* 51 Franklin Street, Fifth Floor Fax: +1-617-542-2652 *
* Boston, MA 02110-1301, USA gnu@gnu.org *
* *
\********************************************************************/
#include <glib.h>
#include "gnc-xml-helper.h"
xmlChar*
checked_char_cast (gchar *val)
{
const int length = -1; /* Assumes val is null-terminated */
gchar *end;
if (val == NULL) return NULL;
/* Replace any invalid UTF-8 characters with a sequence of '?' */
while (!g_utf8_validate (val, length, (const gchar**)(&end)))
*end = '?';
/* Replace any invalid (for XML) control characters (everything < 0x20
* except \n, \t, and \r) with '?'. Technically we should replace
* these with a numeric entity, but that will blow up the libxml
* functions that expect raw text. It seems unlikely that anyone
* would use intentionally use one of these characters anyway.
*/
for (end = val; *end; ++end)
if (*end > 0 && *end < 0x20 && *end != 0x09 &&
*end != 0x0a && *end != 0x0d)
*end = '?';
return (xmlChar*)(val);
}

View File

@ -42,5 +42,6 @@
# define xmlAttrPropertyValue children
# endif /* ifndef xmlAttrPropertyValue */
xmlChar* checked_char_cast (gchar *val);
#endif /* _GNC_XML_HELPER_H_ */

View File

@ -887,7 +887,7 @@ write_counts(FILE* out, ...)
gboolean success = TRUE;
va_start(ap, out);
type = va_arg(ap, char *);
type = g_strdup (va_arg(ap, char *));
while (success && type)
{
@ -907,9 +907,10 @@ write_counts(FILE* out, ...)
* This is invalid xml because the namespace isn't
* declared in the tag itself. This should be changed to
* 'type' at some point. */
xmlSetProp(node, BAD_CAST "cd:type", BAD_CAST type);
xmlNodeAddContent(node, BAD_CAST val);
xmlSetProp(node, BAD_CAST "cd:type", checked_char_cast (type));
xmlNodeAddContent(node, checked_char_cast (val));
g_free(val);
g_free (type);
xmlElemDump(out, NULL, node);
xmlFreeNode(node);
@ -1141,7 +1142,7 @@ write_pricedb(FILE *out, QofBook *book, sixtp_gdv2 *gd)
increment the progress bar as we go. */
if (fprintf( out, "<%s version=\"%s\">\n", parent->name,
xmlGetProp(parent, (xmlChar*) "version")) < 0)
xmlGetProp(parent, BAD_CAST "version")) < 0)
return FALSE;
/* We create our own output buffer so we can call xmlNodeDumpOutput to get

View File

@ -44,12 +44,13 @@ xmlNodePtr
text_to_dom_tree(const char *tag, const char *str)
{
xmlNodePtr result;
gchar *newstr = g_strdup (str);
g_return_val_if_fail(tag, NULL);
g_return_val_if_fail(str, NULL);
result = xmlNewNode(NULL, BAD_CAST tag);
g_return_val_if_fail(result, NULL);
xmlNodeAddContent(result, BAD_CAST str);
xmlNodeAddContent(result, checked_char_cast (newstr));
g_free (newstr);
return result;
}
@ -105,6 +106,7 @@ xmlNodePtr
commodity_ref_to_dom_tree(const char *tag, const gnc_commodity *c)
{
xmlNodePtr ret;
gchar *namespace, *mnemonic;
g_return_val_if_fail(c, NULL);
@ -114,10 +116,14 @@ commodity_ref_to_dom_tree(const char *tag, const gnc_commodity *c)
{
return NULL;
}
xmlNewTextChild(ret, NULL, BAD_CAST "cmdty:space", BAD_CAST gnc_commodity_get_namespace_compat(c));
xmlNewTextChild(ret, NULL, BAD_CAST "cmdty:id", BAD_CAST gnc_commodity_get_mnemonic(c));
namespace = g_strdup (gnc_commodity_get_namespace_compat(c));
mnemonic = g_strdup (gnc_commodity_get_mnemonic(c));
xmlNewTextChild(ret, NULL, BAD_CAST "cmdty:space",
checked_char_cast (namespace));
xmlNewTextChild(ret, NULL, BAD_CAST "cmdty:id",
checked_char_cast (mnemonic));
g_free (namespace);
g_free (mnemonic);
return ret;
}
@ -162,14 +168,16 @@ timespec_to_dom_tree(const char *tag, const Timespec *spec)
ret = xmlNewNode(NULL, BAD_CAST tag);
xmlNewTextChild(ret, NULL, BAD_CAST "ts:date", BAD_CAST date_str);
xmlNewTextChild(ret, NULL, BAD_CAST "ts:date",
checked_char_cast (date_str));
if (spec->tv_nsec > 0)
{
ns_str = timespec_nsec_to_string(spec);
if (ns_str)
{
xmlNewTextChild(ret, NULL, BAD_CAST "ts:ns", BAD_CAST ns_str);
xmlNewTextChild(ret, NULL, BAD_CAST "ts:ns",
checked_char_cast (ns_str));
}
}
@ -195,7 +203,7 @@ gdate_to_dom_tree(const char *tag, const GDate *date)
ret = xmlNewNode(NULL, BAD_CAST tag);
xmlNewTextChild(ret, NULL, BAD_CAST "gdate", BAD_CAST date_str);
xmlNewTextChild(ret, NULL, BAD_CAST "gdate", checked_char_cast (date_str));
g_free(date_str);
@ -215,7 +223,7 @@ gnc_numeric_to_dom_tree(const char *tag, const gnc_numeric *num)
ret = xmlNewNode(NULL, BAD_CAST tag);
xmlNodeAddContent(ret, BAD_CAST numstr);
xmlNodeAddContent(ret, checked_char_cast (numstr));
g_free(numstr);
@ -242,9 +250,12 @@ double_to_string(double value)
static void
add_text_to_node(xmlNodePtr node, gchar *type, gchar *val)
{
xmlSetProp(node, BAD_CAST "type", BAD_CAST type);
xmlNodeSetContent(node, BAD_CAST val);
g_free(val);
gchar *newtype = g_strdup (type);
gchar *newval = g_strdup (val);
xmlSetProp(node, BAD_CAST "type", checked_char_cast (type));
xmlNodeSetContent(node, checked_char_cast (val));
g_free (newtype);
g_free(newval);
}
@ -256,13 +267,17 @@ static void
add_kvp_value_node(xmlNodePtr node, gchar *tag, kvp_value* val)
{
xmlNodePtr val_node;
gchar *tmp_str1;
kvp_value_t kvp_type;
kvp_type = kvp_value_get_type(val);
if (kvp_type == KVP_TYPE_STRING)
val_node = xmlNewTextChild(node, NULL, BAD_CAST tag, BAD_CAST kvp_value_get_string(val));
{
gchar *newstr = g_strdup (kvp_value_get_string(val));
val_node = xmlNewTextChild(node, NULL, BAD_CAST tag,
checked_char_cast (newstr));
g_free (newstr);
}
else if (kvp_type == KVP_TYPE_TIMESPEC)
val_node = NULL;
else if (kvp_type == KVP_TYPE_GDATE)
@ -312,11 +327,12 @@ add_kvp_value_node(xmlNodePtr node, gchar *tag, kvp_value* val)
case KVP_TYPE_BINARY:
{
guint64 size;
gchar *tmp_str1;
void *binary_data = kvp_value_get_binary(val, &size);
xmlSetProp(val_node, BAD_CAST "type", BAD_CAST "binary");
g_return_if_fail(binary_data);
tmp_str1 = binary_to_string(binary_data, size);
xmlNodeSetContent(val_node, BAD_CAST tmp_str1);
xmlNodeSetContent(val_node, checked_char_cast (tmp_str1));
g_free(tmp_str1);
}
break;
@ -356,11 +372,12 @@ add_kvp_slot(gpointer key, gpointer value, gpointer data)
{
xmlNodePtr slot_node;
xmlNodePtr node = (xmlNodePtr)data;
gchar *newkey = g_strdup ((gchar*)key);
slot_node = xmlNewChild(node, NULL, BAD_CAST "slot", NULL);
xmlNewTextChild(slot_node, NULL, BAD_CAST "slot:key", (xmlChar*)key);
xmlNewTextChild(slot_node, NULL, BAD_CAST "slot:key",
checked_char_cast (newkey));
g_free (newkey);
add_kvp_value_node(slot_node, "slot:value", (kvp_value*)value);
}

View File

@ -63,7 +63,12 @@ static gboolean dom_start_handler(
{
while (*atptr != 0)
{
xmlSetProp(thing, BAD_CAST atptr[0], BAD_CAST atptr[1]);
gchar *attr0 = g_strdup (atptr[0]);
gchar *attr1 = g_strdup (atptr[1]);
xmlSetProp(thing, checked_char_cast (attr0),
checked_char_cast (attr1));
g_free (attr0);
g_free (attr1);
atptr += 2;
}
}
@ -88,7 +93,10 @@ static gboolean dom_chars_handler(
{
if (length > 0)
{
xmlNodeAddContentLen((xmlNodePtr)parent_data, BAD_CAST text, length);
gchar *newtext = g_strdup (text);
xmlNodeAddContentLen((xmlNodePtr)parent_data,
checked_char_cast (newtext), length);
g_free (newtext);
}
return TRUE;
}

View File

@ -8,6 +8,7 @@ test_date_converting_SOURCES = \
${top_srcdir}/src/backend/xml/sixtp.c \
${top_srcdir}/src/backend/xml/sixtp-stack.c \
${top_srcdir}/src/backend/xml/sixtp-to-dom-parser.c \
${top_srcdir}/src/backend/xml/gnc-xml-helper.c \
test-date-converting.c
test_dom_converters1_SOURCES = \
@ -17,6 +18,7 @@ test_dom_converters1_SOURCES = \
${top_srcdir}/src/backend/xml/sixtp.c \
${top_srcdir}/src/backend/xml/sixtp-stack.c \
${top_srcdir}/src/backend/xml/sixtp-to-dom-parser.c \
${top_srcdir}/src/backend/xml/gnc-xml-helper.c \
test-dom-converters1.c
test_kvp_frames_SOURCES = \
@ -26,6 +28,7 @@ test_kvp_frames_SOURCES = \
${top_srcdir}/src/backend/xml/sixtp.c \
${top_srcdir}/src/backend/xml/sixtp-stack.c \
${top_srcdir}/src/backend/xml/sixtp-to-dom-parser.c \
${top_srcdir}/src/backend/xml/gnc-xml-helper.c \
test-kvp-frames.c
# the xml backend is now a GModule - this test does
@ -53,6 +56,7 @@ test_load_example_account_SOURCES = \
${top_srcdir}/src/backend/xml/gnc-commodity-xml-v2.c \
${top_srcdir}/src/backend/xml/gnc-book-xml-v2.c \
${top_srcdir}/src/backend/xml/gnc-pricedb-xml-v2.c \
${top_srcdir}/src/backend/xml/gnc-xml-helper.c \
test-load-example-account.c
test_string_converters_SOURCES = \
@ -62,6 +66,7 @@ test_string_converters_SOURCES = \
${top_srcdir}/src/backend/xml/sixtp.c \
${top_srcdir}/src/backend/xml/sixtp-stack.c \
${top_srcdir}/src/backend/xml/sixtp-to-dom-parser.c \
${top_srcdir}/src/backend/xml/gnc-xml-helper.c \
test-string-converters.c
test_xml_account_SOURCES = \
@ -84,6 +89,7 @@ test_xml_account_SOURCES = \
${top_srcdir}/src/backend/xml/gnc-budget-xml-v2.c \
${top_srcdir}/src/backend/xml/io-gncxml-v2.c \
${top_srcdir}/src/backend/xml/io-utils.c \
${top_srcdir}/src/backend/xml/gnc-xml-helper.c \
test-xml-account.c
test_xml_commodity_SOURCES = \
@ -106,6 +112,7 @@ test_xml_commodity_SOURCES = \
${top_srcdir}/src/backend/xml/gnc-budget-xml-v2.c \
${top_srcdir}/src/backend/xml/io-gncxml-v2.c \
${top_srcdir}/src/backend/xml/io-utils.c \
${top_srcdir}/src/backend/xml/gnc-xml-helper.c \
test-xml-commodity.c
test_xml_pricedb_SOURCES = \
@ -128,6 +135,7 @@ test_xml_pricedb_SOURCES = \
${top_srcdir}/src/backend/xml/gnc-budget-xml-v2.c \
${top_srcdir}/src/backend/xml/io-gncxml-v2.c \
${top_srcdir}/src/backend/xml/io-utils.c \
${top_srcdir}/src/backend/xml/gnc-xml-helper.c \
test-xml-pricedb.c
test_xml_transaction_SOURCES = \
@ -150,6 +158,7 @@ test_xml_transaction_SOURCES = \
${top_srcdir}/src/backend/xml/gnc-pricedb-xml-v2.c \
${top_srcdir}/src/backend/xml/io-gncxml-v2.c \
${top_srcdir}/src/backend/xml/io-utils.c \
${top_srcdir}/src/backend/xml/gnc-xml-helper.c \
test-xml-transaction.c
test_xml2_is_file_SOURCES = \
@ -172,6 +181,7 @@ test_xml2_is_file_SOURCES = \
${top_srcdir}/src/backend/xml/io-gncxml-gen.c \
${top_srcdir}/src/backend/xml/io-gncxml-v2.c \
${top_srcdir}/src/backend/xml/io-utils.c \
${top_srcdir}/src/backend/xml/gnc-xml-helper.c \
test-xml2-is-file.c
TESTS = \

View File

@ -103,6 +103,20 @@ test_string_converters(void)
}
}
static void
test_bad_string (void)
{
gchar *badstr = "foo\abar";
gchar *sanitized = "foo?bar";
gchar *backout;
xmlNodePtr test_node = text_to_dom_tree ("test-string", badstr);
backout = dom_tree_to_text (test_node);
do_test_args (g_strcmp0 (backout, sanitized) == 0,
"string sanitizing", __FILE__, __LINE__,
"with string %s", badstr);
}
int
main(int argc, char **argv)
{
@ -110,6 +124,7 @@ main(int argc, char **argv)
test_binary();
fflush(stdout);
test_string_converters();
test_bad_string ();
fflush(stdout);
print_test_results();
exit(get_rv());