mirror of
https://github.com/Gnucash/gnucash.git
synced 2025-02-25 18:55:30 -06:00
* src/import-export/import-backend.c:
* src/import-export/import-match-map.c: * src/import-export/import-match-map.h: Chris Morgan's Baysian Matching code, to match transactions based on Bayesian filtering of previously matched transactions. git-svn-id: svn+ssh://svn.gnucash.org/repo/gnucash/trunk@8044 57a11ea4-9604-0410-9ed3-97b8803252fd
This commit is contained in:
parent
6e143835cb
commit
b2ccbf62cf
@ -1,3 +1,11 @@
|
||||
2003-03-08 Derek Atkins <derek@ihtfp.com>
|
||||
|
||||
* src/import-export/import-backend.c:
|
||||
* src/import-export/import-match-map.c:
|
||||
* src/import-export/import-match-map.h:
|
||||
Chris Morgan's Baysian Matching code, to match transactions
|
||||
based on Bayesian filtering of previously matched transactions.
|
||||
|
||||
2003-03-06 Christian Stimming <stimming@tuhh.de>
|
||||
|
||||
* src/import-export/hbci/dialog-hbcitrans.c: Include a latest
|
||||
|
@ -44,6 +44,9 @@
|
||||
|
||||
#include "gnc-ui-util.h"
|
||||
|
||||
#define IMPORT_PAGE "Online Banking & Importing" /* from app-utils/prefs.scm */
|
||||
#define BAYES_OPTION "Use Bayesian Matching?"
|
||||
|
||||
/********************************************************************\
|
||||
* Constants *
|
||||
\********************************************************************/
|
||||
@ -90,6 +93,9 @@ struct _transactioninfo
|
||||
GNCImportAction action;
|
||||
GNCImportAction previous_action;
|
||||
|
||||
/* A list of tokenized strings to use for bayesian matching purposes */
|
||||
GList * match_tokens;
|
||||
|
||||
/* In case of a single destination account it is stored here. */
|
||||
Account *dest_acc;
|
||||
gboolean dest_acc_selected_manually;
|
||||
@ -241,6 +247,15 @@ void gnc_import_TransInfo_delete (GNCImportTransInfo *info)
|
||||
xaccTransDestroy(info->trans);
|
||||
xaccTransCommitEdit(info->trans);
|
||||
}
|
||||
if (info->match_tokens)
|
||||
{
|
||||
GList *node;
|
||||
|
||||
for (node = info->match_tokens; node; node = node->next)
|
||||
g_free (node->data);
|
||||
|
||||
g_list_free (info->match_tokens);
|
||||
}
|
||||
g_free(info);
|
||||
}
|
||||
}
|
||||
@ -343,28 +358,128 @@ GdkPixmap* gen_probability_pixmap(gint score_original, GNCImportSettings *settin
|
||||
* MatchMap- related functions (storing and retrieving)
|
||||
*/
|
||||
|
||||
/* searches using the GNCImportTransInfo through all existing transactions */
|
||||
/* if there is an exact match of the description and memo */
|
||||
/* Tokenize a string and append to an existing GList(or an empty GList)
|
||||
* the tokens
|
||||
*/
|
||||
static GList*
|
||||
tokenize_string(GList* existing_tokens, const char *string)
|
||||
{
|
||||
char **tokenized_strings; /* array of strings returned by g_strsplit() */
|
||||
char **stringpos;
|
||||
|
||||
tokenized_strings = g_strsplit(string, " ", 0);
|
||||
stringpos = tokenized_strings;
|
||||
|
||||
/* add each token to the token GList */
|
||||
while(stringpos && *stringpos)
|
||||
{
|
||||
/* prepend the char* to the token GList */
|
||||
existing_tokens = g_list_prepend(existing_tokens, g_strdup(*stringpos));
|
||||
|
||||
/* then move to the next string */
|
||||
stringpos++;
|
||||
}
|
||||
|
||||
/* free up the strings that g_strsplit() created */
|
||||
g_strfreev(tokenized_strings);
|
||||
|
||||
return existing_tokens;
|
||||
}
|
||||
|
||||
/* create and return a list of tokens for a given transaction info. */
|
||||
static GList*
|
||||
TransactionGetTokens(GNCImportTransInfo *info)
|
||||
{
|
||||
Transaction* transaction;
|
||||
GList* tokens;
|
||||
const char* text;
|
||||
time_t transtime;
|
||||
struct tm *tm_struct;
|
||||
char local_day_of_week[16];
|
||||
Split* split;
|
||||
int split_index;
|
||||
|
||||
g_return_val_if_fail (info, NULL);
|
||||
if (info->match_tokens) return info->match_tokens;
|
||||
|
||||
transaction = gnc_import_TransInfo_get_trans(info);
|
||||
g_assert(transaction);
|
||||
|
||||
tokens = 0; /* start off with an empty list */
|
||||
|
||||
/* make tokens from the transaction description */
|
||||
text = xaccTransGetDescription(transaction);
|
||||
tokens = tokenize_string(tokens, text);
|
||||
|
||||
/* the day of week the transaction occured is a good indicator of
|
||||
* what account this transaction belongs in get the date and covert
|
||||
* it to day of week as a token
|
||||
*/
|
||||
transtime = xaccTransGetDate(transaction);
|
||||
tm_struct = gmtime(&transtime);
|
||||
if(!strftime(local_day_of_week, sizeof(local_day_of_week), "%A", tm_struct))
|
||||
{
|
||||
PERR("TransactionGetTokens: error, strftime failed\n");
|
||||
}
|
||||
|
||||
/* we cannot add a locally allocated string to this array, dup it so
|
||||
* it frees the same way the rest do
|
||||
*/
|
||||
tokens = g_list_prepend(tokens, g_strdup(local_day_of_week));
|
||||
|
||||
/* make tokens from the memo of each split of this transaction */
|
||||
split_index = 0;
|
||||
while((split = xaccTransGetSplit(transaction, split_index)))
|
||||
{
|
||||
text = xaccSplitGetMemo(split);
|
||||
tokens = tokenize_string(tokens, text);
|
||||
split_index++; /* next split */
|
||||
}
|
||||
|
||||
/* remember the list of tokens for later.. */
|
||||
info->match_tokens = tokens;
|
||||
|
||||
/* return the pointer to the GList */
|
||||
return tokens;
|
||||
}
|
||||
|
||||
/* searches using the GNCImportTransInfo through all existing transactions
|
||||
* if there is an exact match of the description and memo
|
||||
*/
|
||||
static Account *
|
||||
matchmap_find_destination (GncImportMatchMap *matchmap,
|
||||
GNCImportTransInfo *info)
|
||||
matchmap_find_destination (GncImportMatchMap *matchmap, GNCImportTransInfo *info)
|
||||
{
|
||||
GncImportMatchMap *tmp_map;
|
||||
Account *result;
|
||||
g_assert (info);
|
||||
GList* tokens;
|
||||
gboolean useBayes;
|
||||
|
||||
g_assert (info);
|
||||
tmp_map = ((matchmap != NULL) ? matchmap :
|
||||
gnc_imap_create_from_account
|
||||
(xaccSplitGetAccount
|
||||
(gnc_import_TransInfo_get_fsplit (info))));
|
||||
|
||||
result = gnc_imap_find_account
|
||||
(tmp_map, GNCIMPORT_DESC,
|
||||
xaccTransGetDescription (gnc_import_TransInfo_get_trans (info)));
|
||||
useBayes = gnc_lookup_boolean_option(IMPORT_PAGE, BAYES_OPTION, TRUE);
|
||||
if(useBayes)
|
||||
{
|
||||
/* get the tokens for this transaction* */
|
||||
tokens = TransactionGetTokens(info);
|
||||
|
||||
/* try to find the destination account for this transaction from its tokens */
|
||||
result = gnc_imap_find_account_bayes(tmp_map, tokens);
|
||||
|
||||
} else {
|
||||
/* old system of transaction to account matching */
|
||||
result = gnc_imap_find_account
|
||||
(tmp_map, GNCIMPORT_DESC,
|
||||
xaccTransGetDescription (gnc_import_TransInfo_get_trans (info)));
|
||||
}
|
||||
|
||||
/* Disable matching by memo, until bayesian filtering is implemented.
|
||||
It's currently unlikely to help, and has adverse effects, causing false positives,
|
||||
since very often the type of the transaction is stored there.
|
||||
* It's currently unlikely to help, and has adverse effects,
|
||||
* causing false positives, since very often the type of the
|
||||
* transaction is stored there.
|
||||
|
||||
if (result == NULL)
|
||||
result = gnc_imap_find_account
|
||||
@ -390,6 +505,9 @@ matchmap_store_destination (GncImportMatchMap *matchmap,
|
||||
GncImportMatchMap *tmp_matchmap = NULL;
|
||||
Account *dest;
|
||||
const char *descr, *memo;
|
||||
GList *tokens;
|
||||
gboolean useBayes;
|
||||
|
||||
g_assert (trans_info);
|
||||
|
||||
/* This will store the destination account of the selected match if
|
||||
@ -410,20 +528,33 @@ matchmap_store_destination (GncImportMatchMap *matchmap,
|
||||
(xaccSplitGetAccount
|
||||
(gnc_import_TransInfo_get_fsplit (trans_info))));
|
||||
|
||||
descr = xaccTransGetDescription
|
||||
(gnc_import_TransInfo_get_trans (trans_info));
|
||||
if (descr && (strlen (descr) > 0))
|
||||
gnc_imap_add_account (tmp_matchmap,
|
||||
/* see what matching system we are currently using */
|
||||
useBayes = gnc_lookup_boolean_option(IMPORT_PAGE, BAYES_OPTION, TRUE);
|
||||
if(useBayes)
|
||||
{
|
||||
/* tokenize this transaction */
|
||||
tokens = TransactionGetTokens(trans_info);
|
||||
|
||||
/* add the tokens to the imap with the given destination account */
|
||||
gnc_imap_add_account_bayes(tmp_matchmap, tokens, dest);
|
||||
|
||||
} else {
|
||||
/* old matching system */
|
||||
descr = xaccTransGetDescription
|
||||
(gnc_import_TransInfo_get_trans (trans_info));
|
||||
if (descr && (strlen (descr) > 0))
|
||||
gnc_imap_add_account (tmp_matchmap,
|
||||
GNCIMPORT_DESC,
|
||||
descr,
|
||||
dest);
|
||||
memo = xaccSplitGetMemo
|
||||
(gnc_import_TransInfo_get_fsplit (trans_info));
|
||||
if (memo && (strlen (memo) > 0))
|
||||
gnc_imap_add_account (tmp_matchmap,
|
||||
memo = xaccSplitGetMemo
|
||||
(gnc_import_TransInfo_get_fsplit (trans_info));
|
||||
if (memo && (strlen (memo) > 0))
|
||||
gnc_imap_add_account (tmp_matchmap,
|
||||
GNCIMPORT_MEMO,
|
||||
memo,
|
||||
dest);
|
||||
} /* if(useBayes) */
|
||||
|
||||
if (matchmap == NULL)
|
||||
gnc_imap_destroy (tmp_matchmap);
|
||||
@ -935,7 +1066,7 @@ gnc_import_TransInfo_refresh_destacc (GNCImportTransInfo *transaction_info,
|
||||
/* if we haven't manually selected a destination account for this transaction */
|
||||
if(gnc_import_TransInfo_get_destacc_selected_manually(transaction_info) == FALSE)
|
||||
{
|
||||
/* Try to find a previous selected destination account string match for the ADD action */
|
||||
/* Try to find the destination account for this transaction based on prior ones */
|
||||
new_destacc = matchmap_find_destination(matchmap, transaction_info);
|
||||
gnc_import_TransInfo_set_destacc(transaction_info, new_destacc, FALSE);
|
||||
} else
|
||||
|
@ -25,11 +25,22 @@
|
||||
An import mapper service that stores Account Maps for the
|
||||
generic importer. This allows importers to map various
|
||||
"strings" to Gnucash accounts in a generic manner.
|
||||
@author Copyright (C) 2002 Derek Atkins <derek@ihtfp.com>
|
||||
@author Copyright (C) 2002,2003 Derek Atkins <derek@ihtfp.com>
|
||||
*/
|
||||
#include <string.h>
|
||||
#include <glib.h>
|
||||
#include "import-match-map.h"
|
||||
#include "kvp_frame.h"
|
||||
#include "Group.h"
|
||||
#include "gnc-ui-util.h"
|
||||
#include "gnc-engine-util.h"
|
||||
|
||||
/********************************************************************\
|
||||
* Constants *
|
||||
\********************************************************************/
|
||||
|
||||
static short module = MOD_IMPORT;
|
||||
|
||||
|
||||
struct _GncImportMatchMap {
|
||||
kvp_frame * frame;
|
||||
@ -37,7 +48,8 @@ struct _GncImportMatchMap {
|
||||
GNCBook * book;
|
||||
};
|
||||
|
||||
#define IMAP_FRAME "import-map"
|
||||
#define IMAP_FRAME "import-map"
|
||||
#define IMAP_FRAME_BAYES "import-map-bayes"
|
||||
|
||||
static GncImportMatchMap *
|
||||
gnc_imap_create_from_frame (kvp_frame *frame, Account *acc, GNCBook *book)
|
||||
@ -99,6 +111,9 @@ void gnc_imap_clear (GncImportMatchMap *imap)
|
||||
/* Clear the IMAP_FRAME kvp */
|
||||
kvp_frame_set_slot_path (imap->frame, NULL, IMAP_FRAME);
|
||||
|
||||
/* Clear the bayes kvp, IMAP_FRAME_BAYES */
|
||||
kvp_frame_set_slot_path (imap->frame, NULL, IMAP_FRAME_BAYES);
|
||||
|
||||
/* XXX: mark the account (or book) as dirty! */
|
||||
}
|
||||
|
||||
@ -143,4 +158,368 @@ void gnc_imap_add_account (GncImportMatchMap *imap, const char *category,
|
||||
/* XXX Mark the account (or book) as dirty! */
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/* Below here is the bayes transaction to account matching system */
|
||||
struct account_token_count
|
||||
{
|
||||
char* account_name;
|
||||
gint64 token_count; /* occurances of a given token for this account_name */
|
||||
};
|
||||
|
||||
/* total_count and the token_count for a given account let us calculate the
|
||||
* probability of a given account with any single token
|
||||
*/
|
||||
struct token_accounts_info
|
||||
{
|
||||
GList *accounts; /* array of struct account_token_count */
|
||||
gint64 total_count;
|
||||
};
|
||||
|
||||
/* gpointer is a pointer to a struct token_accounts_info
|
||||
* NOTE: can always assume that keys are unique, reduces code in this function
|
||||
*/
|
||||
static void buildTokenInfo(const char *key, kvp_value *value, gpointer data)
|
||||
{
|
||||
struct token_accounts_info *tokenInfo = (struct token_accounts_info*)data;
|
||||
struct account_token_count* this_account;
|
||||
|
||||
// PINFO("buildTokenInfo: account '%s', token_count: '%ld'\n", (char*)key,
|
||||
// (long)kvp_value_get_gint64(value));
|
||||
|
||||
/* add the count to the total_count */
|
||||
tokenInfo->total_count += kvp_value_get_gint64(value);
|
||||
|
||||
/* allocate a new structure for this account and it's token count */
|
||||
this_account = (struct account_token_count*)
|
||||
g_new0(struct account_token_count, 1);
|
||||
|
||||
/* fill in the account name and number of tokens found for this account name */
|
||||
this_account->account_name = (char*)key;
|
||||
this_account->token_count = kvp_value_get_gint64(value);
|
||||
|
||||
/* append onto the glist a pointer to the new account_token_count structure */
|
||||
tokenInfo->accounts = g_list_prepend(tokenInfo->accounts, this_account);
|
||||
}
|
||||
|
||||
/* intermediate values used to calculate the bayes probability of a given account
|
||||
* where p(AB) = (a*b)/[a*b + (1-a)(1-b)], product is (a*b),
|
||||
* product_difference is (1-a) * (1-b)
|
||||
*/
|
||||
struct account_probability
|
||||
{
|
||||
double product; /* product of probabilities */
|
||||
double product_difference; /* product of (1-probabilities) */
|
||||
};
|
||||
|
||||
/* convert a hash table of account names and (struct account_probability*)
|
||||
* into a hash table of 100000x the percentage match value, ie. 10% would be
|
||||
* 0.10 * 100000 = 10000
|
||||
*/
|
||||
#define PROBABILITY_FACTOR 100000
|
||||
static void buildProbabilities(gpointer key, gpointer value, gpointer data)
|
||||
{
|
||||
GHashTable *final_probabilities = (GHashTable*)data;
|
||||
struct account_probability *account_p = (struct account_probability*)value;
|
||||
|
||||
/* P(AB) = A*B / [A*B + (1-A)*(1-B)]
|
||||
* NOTE: so we only keep track of a running product(A*B*C...)
|
||||
* and product difference ((1-A)(1-B)...)
|
||||
*/
|
||||
gint32 probability =
|
||||
(account_p->product /
|
||||
(account_p->product + account_p->product_difference))
|
||||
* PROBABILITY_FACTOR;
|
||||
|
||||
PINFO("P('%s') = '%d'\n", (char*)key, probability);
|
||||
|
||||
g_hash_table_insert(final_probabilities, key, (gpointer)probability);
|
||||
}
|
||||
|
||||
/* Frees an array of the same time that buildProperties built */
|
||||
static void freeProbabilities(gpointer key, gpointer value, gpointer data)
|
||||
{
|
||||
/* free up the struct account_probability that was allocated
|
||||
* in gnc_imap_find_account_bayes()
|
||||
*/
|
||||
g_free(value);
|
||||
}
|
||||
|
||||
/* holds an account name and its corresponding integer probability
|
||||
* the integer probability is some factor of 10
|
||||
*/
|
||||
struct account_info
|
||||
{
|
||||
char* account_name;
|
||||
gint32 probability;
|
||||
};
|
||||
|
||||
/* Find the highest probability and the corresponding account name
|
||||
* store in data, a (struct account_info*)
|
||||
* NOTE: this is a g_hash_table_foreach() function for a hash table of entries
|
||||
* key is a pointer to the account name, value is a gint32, 100000x
|
||||
* the probability for this account
|
||||
*/
|
||||
static void highestProbability(gpointer key, gpointer value, gpointer data)
|
||||
{
|
||||
struct account_info *account_i = (struct account_info*)data;
|
||||
|
||||
/* if the current probability is greater than the stored, store the current */
|
||||
if((gint32)value > account_i->probability)
|
||||
{
|
||||
/* Save the new highest probability and the assoaciated account name */
|
||||
account_i->probability = (gint32)value;
|
||||
account_i->account_name = key;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#define threshold (.90 * PROBABILITY_FACTOR) /* 90% */
|
||||
|
||||
/* Look up an Account in the map */
|
||||
Account* gnc_imap_find_account_bayes(GncImportMatchMap *imap, GList *tokens)
|
||||
{
|
||||
struct token_accounts_info tokenInfo; /* holds the accounts and total
|
||||
* token count for a single token */
|
||||
GList *current_token; /* pointer to the current token from the
|
||||
* input GList *tokens */
|
||||
GList *current_account_token; /* pointer to the struct
|
||||
* account_token_count */
|
||||
struct account_token_count *account_c; /* an account name and the number
|
||||
* of times a token has appeared
|
||||
* for the account */
|
||||
struct account_probability *account_p; /* intermediate storage of values
|
||||
* to compute the bayes probability
|
||||
* of an account */
|
||||
GHashTable *running_probabilities = g_hash_table_new(g_str_hash, g_str_equal);
|
||||
GHashTable *final_probabilities = g_hash_table_new(g_str_hash, g_str_equal);
|
||||
struct account_info account_i;
|
||||
kvp_value* value;
|
||||
kvp_frame* token_frame;
|
||||
|
||||
ENTER(" ");
|
||||
|
||||
/* check to see if the imap is NULL */
|
||||
if(!imap)
|
||||
{
|
||||
PINFO("imap is null, returning null");
|
||||
LEAVE(" ");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* find the probability for each account that contains any of the tokens
|
||||
* in the input tokens list
|
||||
*/
|
||||
for(current_token = tokens; current_token; current_token = current_token->next)
|
||||
{
|
||||
/* zero out the token_accounts_info structure */
|
||||
memset(&tokenInfo, 0, sizeof(struct token_accounts_info));
|
||||
|
||||
PINFO("token: '%s'", (char*)current_token->data);
|
||||
|
||||
/* find the slot for the given token off of the source account
|
||||
* for these tokens, search off of the IMAP_FRAME_BAYES path so
|
||||
* we aren't looking from the parent of the entire kvp tree
|
||||
*/
|
||||
value = kvp_frame_get_slot_path(imap->frame, IMAP_FRAME_BAYES,
|
||||
(char*)current_token->data, NULL);
|
||||
|
||||
/* if value is null we should skip over this token */
|
||||
if(!value)
|
||||
continue;
|
||||
|
||||
/* convert the slot(value) into a the frame that contains the
|
||||
* list of accounts
|
||||
*/
|
||||
token_frame = kvp_value_get_frame(value);
|
||||
|
||||
/* token_frame should NEVER be null */
|
||||
if(!token_frame)
|
||||
{
|
||||
PERR("token '%s' has no accounts", (char*)current_token->data);
|
||||
continue; /* skip over this token */
|
||||
}
|
||||
|
||||
/* process the accounts for this token, adding the account if it
|
||||
* doesn't already exist or adding to the existing accounts token
|
||||
* count if it does
|
||||
*/
|
||||
kvp_frame_for_each_slot(token_frame, buildTokenInfo, &tokenInfo);
|
||||
|
||||
/* for each account we have just found, see if the account already exists
|
||||
* in the list of account probabilities, if not add it
|
||||
*/
|
||||
for(current_account_token = tokenInfo.accounts; current_account_token;
|
||||
current_account_token = current_account_token->next)
|
||||
{
|
||||
/* get the account name and corresponding token count */
|
||||
account_c = (struct account_token_count*)current_account_token->data;
|
||||
|
||||
PINFO("account_c->account_name('%s'), "
|
||||
"account_c->token_count('%ld')/total_count('%ld')",
|
||||
account_c->account_name, (long)account_c->token_count,
|
||||
(long)tokenInfo.total_count);
|
||||
|
||||
account_p = g_hash_table_lookup(running_probabilities,
|
||||
account_c->account_name);
|
||||
|
||||
/* if the account exists in the list then continue
|
||||
* the running probablities
|
||||
*/
|
||||
if(account_p)
|
||||
{
|
||||
account_p->product =
|
||||
((double)account_c->token_count / (double)tokenInfo.total_count)
|
||||
* account_p->product;
|
||||
account_p->product_difference =
|
||||
((double)1 - ((double)account_c->token_count /
|
||||
(double)tokenInfo.total_count))
|
||||
* account_p->product_difference;
|
||||
PINFO("product == %f, product_difference == %f",
|
||||
account_p->product, account_p->product_difference);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* add a new entry */
|
||||
PINFO("adding a new entry for this account");
|
||||
account_p = (struct account_probability*)
|
||||
g_new0(struct account_probability, 1);
|
||||
|
||||
/* set the product and product difference values */
|
||||
account_p->product = ((double)account_c->token_count /
|
||||
(double)tokenInfo.total_count);
|
||||
account_p->product_difference =
|
||||
(double)1 - ((double)account_c->token_count /
|
||||
(double)tokenInfo.total_count);
|
||||
|
||||
PINFO("product == %f, product_difference == %f",
|
||||
account_p->product, account_p->product_difference);
|
||||
|
||||
/* add the account name and (struct account_probability*)
|
||||
* to the hash table */
|
||||
g_hash_table_insert(running_probabilities,
|
||||
account_c->account_name, account_p);
|
||||
}
|
||||
} /* for all accounts in tokenInfo */
|
||||
|
||||
/* free the data in tokenInfo */
|
||||
for(current_account_token = tokenInfo.accounts; current_account_token;
|
||||
current_account_token = current_account_token->next)
|
||||
{
|
||||
/* free up each struct account_token_count we allocated */
|
||||
g_free((struct account_token_count*)current_account_token->data);
|
||||
}
|
||||
|
||||
g_list_free(tokenInfo.accounts); /* free the accounts GList */
|
||||
}
|
||||
|
||||
/* build a hash table of account names and their final probabilities
|
||||
* from each entry in the running_probabilties hash table
|
||||
*/
|
||||
g_hash_table_foreach(running_probabilities, buildProbabilities,
|
||||
final_probabilities);
|
||||
|
||||
/* find the highest probabilty and the corresponding account */
|
||||
memset(&account_i, 0, sizeof(struct account_info));
|
||||
g_hash_table_foreach(final_probabilities, highestProbability, &account_i);
|
||||
|
||||
/* free each element of the running_probabilities hash */
|
||||
g_hash_table_foreach(running_probabilities, freeProbabilities, NULL);
|
||||
|
||||
/* free the hash tables */
|
||||
g_hash_table_destroy(running_probabilities);
|
||||
g_hash_table_destroy(final_probabilities);
|
||||
|
||||
PINFO("highest P('%s') = '%d'", account_i.account_name, account_i.probability);
|
||||
|
||||
/* has this probability met our threshold? */
|
||||
if(account_i.probability >= threshold)
|
||||
{
|
||||
PINFO("found match");
|
||||
LEAVE(" ");
|
||||
return xaccGetAccountFromFullName(gnc_book_get_group(imap->book),
|
||||
account_i.account_name,
|
||||
gnc_get_account_separator());
|
||||
}
|
||||
|
||||
PINFO("no match");
|
||||
LEAVE(" ");
|
||||
|
||||
return NULL; /* we didn't meet our threshold, return NULL for an account */
|
||||
}
|
||||
|
||||
|
||||
/* Updates the imap for a given account using a list of tokens */
|
||||
void gnc_imap_add_account_bayes(GncImportMatchMap *imap, GList *tokens, Account *acc)
|
||||
{
|
||||
GList *current_token;
|
||||
kvp_value *value;
|
||||
gint64 token_count;
|
||||
char* account_fullname;
|
||||
kvp_value *new_value; /* the value that will be added back into the kvp tree */
|
||||
|
||||
ENTER(" ");
|
||||
|
||||
/* if imap is null return */
|
||||
if(!imap)
|
||||
{
|
||||
LEAVE(" ");
|
||||
return;
|
||||
}
|
||||
|
||||
account_fullname = xaccAccountGetFullName(acc, gnc_get_account_separator());
|
||||
|
||||
PINFO("account name: '%s'\n", account_fullname);
|
||||
|
||||
/* process each token in the list */
|
||||
for(current_token = g_list_first(tokens); current_token;
|
||||
current_token = current_token->next)
|
||||
{
|
||||
/* start off with no tokens for this account */
|
||||
token_count = 0;
|
||||
|
||||
PINFO("adding token '%s'\n", (char*)current_token->data);
|
||||
|
||||
/* is this token/account_name already in the kvp tree? */
|
||||
value = kvp_frame_get_slot_path(imap->frame, IMAP_FRAME_BAYES,
|
||||
(char*)current_token->data, account_fullname,
|
||||
NULL);
|
||||
|
||||
/* if the token/account is already in the tree, read the current
|
||||
* value from the tree and use this for the basis of the value we
|
||||
* are putting back
|
||||
*/
|
||||
if(value)
|
||||
{
|
||||
PINFO("found existing value of '%ld'\n",
|
||||
(long)kvp_value_get_gint64(value));
|
||||
|
||||
/* convert this value back into an integer */
|
||||
token_count+=kvp_value_get_gint64(value);
|
||||
}
|
||||
|
||||
/* increment the token count */
|
||||
token_count++;
|
||||
|
||||
/* create a new value */
|
||||
new_value = kvp_value_new_gint64(token_count);
|
||||
|
||||
/* insert the value into the kvp tree at
|
||||
* /imap->frame/IMAP_FRAME/token_string/account_name_string
|
||||
*/
|
||||
kvp_frame_set_slot_path(imap->frame, new_value, IMAP_FRAME_BAYES,
|
||||
(char*)current_token->data, account_fullname, NULL);
|
||||
|
||||
/* kvp_frame_set_slot_path() copied the value so we
|
||||
* need to delete this one ;-) */
|
||||
kvp_value_delete(new_value);
|
||||
}
|
||||
|
||||
/* free up the account fullname string */
|
||||
g_free(account_fullname);
|
||||
|
||||
LEAVE(" ");
|
||||
}
|
||||
|
||||
/** @} */
|
||||
|
@ -24,7 +24,7 @@
|
||||
An import mapper service that stores Account Maps for the
|
||||
generic importer. This allows importers to map various
|
||||
"strings" to Gnucash accounts in a generic manner.
|
||||
@author Copyright (C) 2002 Derek Atkins <derek@ihtfp.com>
|
||||
@author Copyright (C) 2002,2003 Derek Atkins <derek@ihtfp.com>
|
||||
*/
|
||||
#ifndef GNC_IMPORT_MATCH_MAP_H
|
||||
#define GNC_IMPORT_MATCH_MAP_H
|
||||
@ -48,8 +48,8 @@ void gnc_imap_destroy (GncImportMatchMap *imap);
|
||||
void gnc_imap_clear (GncImportMatchMap *imap);
|
||||
|
||||
/** Look up an Account in the map */
|
||||
Account * gnc_imap_find_account (GncImportMatchMap *imap, const char *category,
|
||||
const char *key);
|
||||
Account* gnc_imap_find_account(GncImportMatchMap *imap, const char* category,
|
||||
const char *key);
|
||||
|
||||
/** Store an Account in the map. This mapping is immediatly stored in
|
||||
the underlying kvp frame, regardless of whether the MatchMap is
|
||||
@ -57,6 +57,16 @@ Account * gnc_imap_find_account (GncImportMatchMap *imap, const char *category,
|
||||
void gnc_imap_add_account (GncImportMatchMap *imap, const char *category,
|
||||
const char *key, Account *acc);
|
||||
|
||||
/** Look up an Account in the map from a GList* of pointers to strings(tokens)
|
||||
from the current transaction */
|
||||
Account* gnc_imap_find_account_bayes (GncImportMatchMap *imap, GList* tokens);
|
||||
|
||||
/** Store an Account in the map. This mapping is immediatly stored in
|
||||
the underlying kvp frame, regardless of whether the MatchMap is
|
||||
destroyed later or not. */
|
||||
void gnc_imap_add_account_bayes (GncImportMatchMap *imap, GList* tokens,
|
||||
Account *acc);
|
||||
|
||||
|
||||
/** @name Some well-known categories
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user