mirror of
https://github.com/Gnucash/gnucash.git
synced 2025-02-25 18:55:30 -06:00
Fix tokenize_string()
This fix prevents empty strings as tokens and removes duplicated tokens. Function tokenize_string() is used for bayesian import matching, where empty token strings or duplicated tokens lead to wrong results within probability calculation for matching of a transaction to an account. Empty token strings can occur if (see function g_strsplit()) * two or more spaces occur directly after another * the string begins or ends with spaces
This commit is contained in:
parent
322f2d99de
commit
d07d4b962f
@ -387,8 +387,24 @@ tokenize_string(GList* existing_tokens, const char *string)
|
|||||||
/* add each token to the token GList */
|
/* add each token to the token GList */
|
||||||
while (stringpos && *stringpos)
|
while (stringpos && *stringpos)
|
||||||
{
|
{
|
||||||
/* prepend the char* to the token GList */
|
if (strlen(*stringpos) > 0)
|
||||||
existing_tokens = g_list_prepend(existing_tokens, g_strdup(*stringpos));
|
{
|
||||||
|
/* check for duplicated tokens */
|
||||||
|
gboolean duplicated = FALSE;
|
||||||
|
for (GList* token = existing_tokens; token != NULL; token = token->next)
|
||||||
|
{
|
||||||
|
if (g_strcmp0(token->data, *stringpos) == 0)
|
||||||
|
{
|
||||||
|
duplicated = TRUE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (duplicated == FALSE)
|
||||||
|
{
|
||||||
|
/* prepend the char* to the token GList */
|
||||||
|
existing_tokens = g_list_prepend(existing_tokens, g_strdup(*stringpos));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* then move to the next string */
|
/* then move to the next string */
|
||||||
stringpos++;
|
stringpos++;
|
||||||
|
Loading…
Reference in New Issue
Block a user