mirror of
https://github.com/Gnucash/gnucash.git
synced 2024-11-30 12:44:01 -06:00
Fix tokenize_string()
This fix prevents empty strings as tokens and removes duplicated tokens. Function tokenize_string() is used for bayesian import matching, where empty token strings or duplicated tokens lead to wrong results within probability calculation for matching of a transaction to an account. Empty token strings can occur if (see function g_strsplit()) * two or more spaces occur directly after another * the string begins or ends with spaces
This commit is contained in:
parent
322f2d99de
commit
d07d4b962f
@ -387,8 +387,24 @@ tokenize_string(GList* existing_tokens, const char *string)
|
||||
/* add each token to the token GList */
|
||||
while (stringpos && *stringpos)
|
||||
{
|
||||
/* prepend the char* to the token GList */
|
||||
existing_tokens = g_list_prepend(existing_tokens, g_strdup(*stringpos));
|
||||
if (strlen(*stringpos) > 0)
|
||||
{
|
||||
/* check for duplicated tokens */
|
||||
gboolean duplicated = FALSE;
|
||||
for (GList* token = existing_tokens; token != NULL; token = token->next)
|
||||
{
|
||||
if (g_strcmp0(token->data, *stringpos) == 0)
|
||||
{
|
||||
duplicated = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (duplicated == FALSE)
|
||||
{
|
||||
/* prepend the char* to the token GList */
|
||||
existing_tokens = g_list_prepend(existing_tokens, g_strdup(*stringpos));
|
||||
}
|
||||
}
|
||||
|
||||
/* then move to the next string */
|
||||
stringpos++;
|
||||
|
Loading…
Reference in New Issue
Block a user