mirror of
https://github.com/Gnucash/gnucash.git
synced 2025-02-25 18:55:30 -06:00
Bug 721822 - GnuCash 2.6.0 loads data file much slower than 2.4.x
Profiling shows the main problem is checking the strings in the file for valid XML and UTF-8. This does both in one pass, which cuts the time in half, but it's still far slower than 2.4.x was.
This commit is contained in:
parent
e421123ef7
commit
6c461b3e95
@ -1,7 +1,7 @@
|
|||||||
/********************************************************************\
|
/********************************************************************\
|
||||||
* gnc-xml-helper.h -- api for xml helpers *
|
* gnc-xml-helper.h -- api for xml helpers *
|
||||||
* *
|
* *
|
||||||
* Copyright (C) 2001 James LewisMoss <dres@debian.org> *
|
* Copyright (C) 2014 John Ralls <jralls@ceridwen.us> *
|
||||||
* *
|
* *
|
||||||
* This program is free software; you can redistribute it and/or *
|
* This program is free software; you can redistribute it and/or *
|
||||||
* modify it under the terms of the GNU General Public License as *
|
* modify it under the terms of the GNU General Public License as *
|
||||||
@ -25,26 +25,102 @@
|
|||||||
#include <glib.h>
|
#include <glib.h>
|
||||||
#include "gnc-xml-helper.h"
|
#include "gnc-xml-helper.h"
|
||||||
|
|
||||||
|
|
||||||
|
/* The following applies to the utf8 array and decode function:
|
||||||
|
* Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use, copy,
|
||||||
|
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||||
|
* of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||||
|
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||||
|
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
|
||||||
|
// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
|
||||||
|
|
||||||
|
#define UTF8_ACCEPT 0
|
||||||
|
#define UTF8_REJECT 1
|
||||||
|
|
||||||
|
static const uint8_t utf8d[] = {
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
|
||||||
|
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
|
||||||
|
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
|
||||||
|
0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
|
||||||
|
0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
|
||||||
|
0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0 10f
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2 12f
|
||||||
|
1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4 14f
|
||||||
|
1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6 16f
|
||||||
|
1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8 18f
|
||||||
|
};
|
||||||
|
|
||||||
|
static uint32_t inline
|
||||||
|
decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
|
||||||
|
uint32_t type = utf8d[byte];
|
||||||
|
|
||||||
|
*codep = (*state != UTF8_ACCEPT) ?
|
||||||
|
(byte & 0x3fu) | (*codep << 6) :
|
||||||
|
(0xff >> type) & (byte);
|
||||||
|
|
||||||
|
*state = utf8d[256 + *state*16 + type];
|
||||||
|
return *state;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
xmlChar*
|
xmlChar*
|
||||||
checked_char_cast (gchar *val)
|
checked_char_cast (gchar *val)
|
||||||
{
|
{
|
||||||
const int length = -1; /* Assumes val is null-terminated */
|
gchar *p = val;
|
||||||
gchar *end;
|
uint32_t prev, curr;
|
||||||
if (val == NULL) return NULL;
|
uint8_t count;
|
||||||
/* Replace any invalid UTF-8 characters with a sequence of '?' */
|
|
||||||
while (!g_utf8_validate (val, length, (const gchar**)(&end)))
|
for (prev = 0, curr = 0; *p; prev = curr, ++p)
|
||||||
*end = '?';
|
{
|
||||||
/* Replace any invalid (for XML) control characters (everything < 0x20
|
uint32_t codep; /* We don't care, it's a throwaway */
|
||||||
* except \n, \t, and \r) with '?'. Technically we should replace
|
if (*p && *p < 0x20 && *p != 0x09 &&
|
||||||
* these with a numeric entity, but that will blow up the libxml
|
*p != 0x0a && *p != 0x0d)
|
||||||
* functions that expect raw text. It seems unlikely that anyone
|
{
|
||||||
* would use intentionally use one of these characters anyway.
|
*p = '?';
|
||||||
*/
|
continue;
|
||||||
|
}
|
||||||
|
if (*(uint8_t*)p < 0x80)
|
||||||
|
continue;
|
||||||
|
switch (decode(&curr, &codep, *p))
|
||||||
|
{
|
||||||
|
case UTF8_ACCEPT:
|
||||||
|
break;
|
||||||
|
case UTF8_REJECT:
|
||||||
|
curr = UTF8_ACCEPT;
|
||||||
|
*p = '?';
|
||||||
|
if (prev != UTF8_ACCEPT)
|
||||||
|
*(p - 1) = '?';
|
||||||
|
for (count = 0; count < prev / 3 + 1; ++count)
|
||||||
|
*(++p) = '?';
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (end = val; *end; ++end)
|
|
||||||
if (*end > 0 && *end < 0x20 && *end != 0x09 &&
|
|
||||||
*end != 0x0a && *end != 0x0d)
|
|
||||||
*end = '?';
|
|
||||||
return (xmlChar*)(val);
|
return (xmlChar*)(val);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user