diff --git a/src/backend/xml/gnc-xml-helper.c b/src/backend/xml/gnc-xml-helper.c index d1f3f03257..1f22eabd9c 100644 --- a/src/backend/xml/gnc-xml-helper.c +++ b/src/backend/xml/gnc-xml-helper.c @@ -1,7 +1,7 @@ /********************************************************************\ * gnc-xml-helper.h -- api for xml helpers * * * - * Copyright (C) 2014 John Ralls * + * Copyright (C) 2001 James LewisMoss * * * * This program is free software; you can redistribute it and/or * * modify it under the terms of the GNU General Public License as * @@ -25,102 +25,26 @@ #include #include "gnc-xml-helper.h" - -/* The following applies to the utf8 array and decode function: - * Copyright (c) 2008-2009 Bjoern Hoehrmann - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// Copyright (c) 2008-2009 Bjoern Hoehrmann -// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. - -#define UTF8_ACCEPT 0 -#define UTF8_REJECT 1 - -static const guint8 utf8d[] = { - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf - 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df - 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef - 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff - 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0 10f - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2 12f - 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4 14f - 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6 16f - 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8 18f -}; - -static guint32 inline -decode(guint32* state, guint32* codep, guint32 byte) { - guint32 type = utf8d[(guint8)byte]; - - *codep = (*state != UTF8_ACCEPT) ? - (byte & 0x3fu) | (*codep << 6) : - (0xff >> type) & (byte); - - *state = utf8d[256 + *state*16 + type]; - return *state; -} - - xmlChar* checked_char_cast (gchar *val) { - gchar *p = val; - guint32 prev, curr; - guint8 count; - - for (prev = 0, curr = 0; *p; prev = curr, ++p) - { - guint32 codep; /* We don't care, it's a throwaway */ - if (*p && *p < 0x20 && *p != 0x09 && - *p != 0x0a && *p != 0x0d) - { - *p = '?'; - continue; - } - if (*(guint8*)p < 0x80) - continue; - switch (decode(&curr, &codep, *p)) - { - case UTF8_ACCEPT: - break; - case UTF8_REJECT: - curr = UTF8_ACCEPT; - *p = '?'; - if (prev != UTF8_ACCEPT) - *(p - 1) = '?'; - for (count = 0; count < prev / 3 + 1; ++count) - *(++p) = '?'; - break; - default: - break; - } - } + const int length = -1; /* Assumes val is null-terminated */ + gchar *end; + if (val == NULL) return NULL; + /* Replace any invalid UTF-8 characters with a sequence of '?' */ + while (!g_utf8_validate (val, length, (const gchar**)(&end))) + *end = '?'; + /* Replace any invalid (for XML) control characters (everything < 0x20 + * except \n, \t, and \r) with '?'. Technically we should replace + * these with a numeric entity, but that will blow up the libxml + * functions that expect raw text. It seems unlikely that anyone + * would use intentionally use one of these characters anyway. + */ + for (end = val; *end; ++end) + if (*end > 0 && *end < 0x20 && *end != 0x09 && + *end != 0x0a && *end != 0x0d) + *end = '?'; return (xmlChar*)(val); }