Changed bayes import map design

This commit introduces a new feature flag:
GNC_FEATURE_GUID_FLAT_BAYESIAN. It signifies that the bayes import map
data are stored flat and by guid. Any time bayes import map data are
accessed, they are converted if necessary.
This commit is contained in:
lmat
2017-12-09 19:49:03 -08:00
parent f782be1a51
commit fbf4843f31
8 changed files with 209 additions and 193 deletions

View File

@@ -1022,12 +1022,6 @@ RESTART:
gnc_warning_dialog(NULL, "%s", message);
g_free ( message );
}
// Convert imap mappings from account full name to guid strings
qof_event_suspend();
gnc_account_imap_convert_bayes (gnc_get_current_book());
qof_event_resume();
return TRUE;
}

View File

@@ -5270,6 +5270,160 @@ get_first_pass_probabilities(GncImportMatchMap * imap, GList * tokens)
return ret;
}
static std::string
look_for_old_separator_descendants (Account *root, std::string const & full_name, const gchar *separator)
{
GList *top_accounts, *ptr;
gint found_len = 0;
gchar found_sep;
top_accounts = gnc_account_get_descendants (root);
PINFO("Incoming full_name is '%s', current separator is '%s'", full_name.c_str (), separator);
/* Go through list of top level accounts */
for (ptr = top_accounts; ptr; ptr = g_list_next (ptr))
{
const gchar *name = xaccAccountGetName (static_cast <Account const *> (ptr->data));
// we are looking for the longest top level account that matches
if (g_str_has_prefix (full_name.c_str (), name))
{
gint name_len = strlen (name);
const gchar old_sep = full_name[name_len];
if (!g_ascii_isalnum (old_sep)) // test for non alpha numeric
{
if (name_len > found_len)
{
found_sep = full_name[name_len];
found_len = name_len;
}
}
}
}
g_list_free (top_accounts); // Free the List
std::string new_name {full_name};
if (found_len > 1)
std::replace (new_name.begin (), new_name.end (), found_sep, *separator);
PINFO ("Return full_name is '%s'", new_name.c_str ());
return new_name;
}
static std::string
get_guid_from_account_name (Account * root, std::string const & name)
{
auto map_account = gnc_account_lookup_by_full_name (root, name.c_str ());
if (!map_account)
{
auto temp_account_name = look_for_old_separator_descendants (root, name,
gnc_get_account_separator_string ());
map_account = gnc_account_lookup_by_full_name (root, temp_account_name.c_str ());
}
auto temp_guid = gnc::GUID {*xaccAccountGetGUID (map_account)};
return temp_guid.to_string ();
}
static FlatKvpEntry
convert_entry (KvpEntry entry, Account* root)
{
/*We need to make a copy here.*/
auto account_name = entry.first.back();
if (!gnc::GUID::is_valid_guid (account_name))
{
/* Earlier version stored the account name in the import map, and
* there were early beta versions of 2.7 that stored a GUID.
* If there is no GUID, we assume it's an account name. */
/* Take off the account name and replace it with the GUID */
entry.first.pop_back();
auto guid_str = get_guid_from_account_name (root, account_name);
entry.first.emplace_back (guid_str);
}
std::string new_key {std::accumulate (entry.first.begin(), entry.first.end(), std::string {})};
new_key = IMAP_FRAME_BAYES + new_key;
return {new_key, entry.second};
}
static std::vector<FlatKvpEntry>
get_new_flat_imap (Account * acc)
{
auto frame = qof_instance_get_slots (QOF_INSTANCE (acc));
auto slot = frame->get_slot ({IMAP_FRAME_BAYES});
if (!slot)
return {};
auto imap_frame = slot->get<KvpFrame*> ();
auto flat_kvp = imap_frame->flatten_kvp ();
auto root = gnc_account_get_root (acc);
std::vector <FlatKvpEntry> ret;
for (auto const & flat_entry : flat_kvp)
{
auto converted_entry = convert_entry (flat_entry, root);
/*If the entry was invalid, we don't perpetuate it.*/
if (converted_entry.first.size())
ret.emplace_back (converted_entry);
}
return ret;
}
static bool
convert_imap_account_bayes_to_flat (Account *acc)
{
auto frame = qof_instance_get_slots (QOF_INSTANCE (acc));
if (!frame->get_keys().size())
return false;
auto new_imap = get_new_flat_imap(acc);
xaccAccountBeginEdit(acc);
frame->set({IMAP_FRAME_BAYES}, nullptr);
if (!new_imap.size ())
{
xaccAccountCommitEdit(acc);
return false;
}
std::for_each(new_imap.begin(), new_imap.end(), [&frame] (FlatKvpEntry const & entry) {
frame->set({entry.first.c_str()}, entry.second);
});
qof_instance_set_dirty (QOF_INSTANCE (acc));
xaccAccountCommitEdit(acc);
return true;
}
/*
* Checks for import map data and converts them when found.
*/
static bool
imap_convert_bayes_to_flat (QofBook * book)
{
auto root = gnc_book_get_root_account (book);
auto accts = gnc_account_get_descendants_sorted (root);
bool ret = false;
for (auto ptr = accts; ptr; ptr = g_list_next (ptr))
{
Account *acc = static_cast <Account*> (ptr->data);
if (convert_imap_account_bayes_to_flat (acc))
{
ret = true;
gnc_features_set_used (book, GNC_FEATURE_GUID_FLAT_BAYESIAN);
}
}
g_list_free (accts);
return ret;
}
/*
* Here we check to see the state of import map data.
*
* If the GUID_FLAT_BAYESIAN feature flag is set, everything
* should be fine.
*
* If it is not set, there are two possibilities: import data
* are present from a previous version or not. If they are,
* they are converted, and the feature flag set. If there are
* no previous data, nothing is done.
*/
static void
check_import_map_data (QofBook *book)
{
if (gnc_features_check_used (book, GNC_FEATURE_GUID_FLAT_BAYESIAN))
return;
/* This function will set GNC_FEATURE_GUID_FLAT_BAYESIAN if necessary.*/
imap_convert_bayes_to_flat (book);
}
static constexpr double threshold = .90 * probability_factor; /* 90% */
/** Look up an Account in the map */
@@ -5278,6 +5432,7 @@ gnc_account_imap_find_account_bayes (GncImportMatchMap *imap, GList *tokens)
{
if (!imap)
return nullptr;
check_import_map_data (imap->book);
auto first_pass = get_first_pass_probabilities(imap, tokens);
if (!first_pass.size())
return nullptr;
@@ -5330,7 +5485,7 @@ change_imap_entry (GncImportMatchMap *imap, std::string const & path, int64_t to
// Add or Update the entry based on guid
qof_instance_set_path_kvp (QOF_INSTANCE (imap->acc), &value, {path});
gnc_features_set_used (imap->book, GNC_FEATURE_GUID_BAYESIAN);
gnc_features_set_used (imap->book, GNC_FEATURE_GUID_FLAT_BAYESIAN);
}
/** Updates the imap for a given account using a list of tokens */
@@ -5350,6 +5505,7 @@ gnc_account_imap_add_account_bayes (GncImportMatchMap *imap,
LEAVE(" ");
return;
}
check_import_map_data (imap->book);
g_return_if_fail (acc != NULL);
account_fullname = gnc_account_get_full_name(acc);
@@ -5462,6 +5618,7 @@ build_bayes (const char *key, KvpValue * value, GncImapInfo & imapInfo)
GList *
gnc_account_imap_get_info_bayes (Account *acc)
{
check_import_map_data (gnc_account_get_book (acc));
/* A dummy object which is used to hold the specified account, and the list
* of data about which we care. */
GncImapInfo imapInfo {acc, nullptr};
@@ -5533,151 +5690,6 @@ gnc_account_delete_map_entry (Account *acc, char *full_category, gboolean empty)
g_free (full_category);
}
/*******************************************************************************/
static std::string
look_for_old_separator_descendants (Account *root, std::string const & full_name, const gchar *separator)
{
GList *top_accounts, *ptr;
gint found_len = 0;
gchar found_sep;
top_accounts = gnc_account_get_descendants (root);
PINFO("Incoming full_name is '%s', current separator is '%s'", full_name.c_str (), separator);
/* Go through list of top level accounts */
for (ptr = top_accounts; ptr; ptr = g_list_next (ptr))
{
const gchar *name = xaccAccountGetName (static_cast <Account const *> (ptr->data));
// we are looking for the longest top level account that matches
if (g_str_has_prefix (full_name.c_str (), name))
{
gint name_len = strlen (name);
const gchar old_sep = full_name[name_len];
if (!g_ascii_isalnum (old_sep)) // test for non alpha numeric
{
if (name_len > found_len)
{
found_sep = full_name[name_len];
found_len = name_len;
}
}
}
}
g_list_free (top_accounts); // Free the List
std::string new_name {full_name};
if (found_len > 1)
std::replace (new_name.begin (), new_name.end (), found_sep, *separator);
PINFO("Return full_name is '%s'", new_name);
return new_name;
}
static std::string
get_guid_from_account_name (Account * root, std::string const & name)
{
auto map_account = gnc_account_lookup_by_full_name (root, name.c_str ());
if (!map_account)
{
auto temp_account_name = look_for_old_separator_descendants (root, name,
gnc_get_account_separator_string ());
map_account = gnc_account_lookup_by_full_name (root, temp_account_name.c_str ());
}
auto temp_guid = gnc::GUID {*xaccAccountGetGUID (map_account)};
return temp_guid.to_string ();
}
static FlatKvpEntry
convert_entry (KvpEntry entry, Account* root)
{
/*We need to make a copy here.*/
auto account_name = entry.first.back();
entry.first.pop_back();
auto guid_str = get_guid_from_account_name (root, account_name);
entry.first.emplace_back ("/");
entry.first.emplace_back (guid_str);
std::string new_key {std::accumulate (entry.first.begin(), entry.first.end(), std::string {})};
new_key = IMAP_FRAME_BAYES + new_key;
return {new_key, entry.second};
}
static std::vector<FlatKvpEntry>
get_new_guid_imap (Account * acc)
{
auto frame = qof_instance_get_slots (QOF_INSTANCE (acc));
auto slot = frame->get_slot ({IMAP_FRAME_BAYES});
if (!slot)
return {};
auto imap_frame = slot->get<KvpFrame*> ();
auto flat_kvp = imap_frame->flatten_kvp ();
auto root = gnc_account_get_root (acc);
std::vector <FlatKvpEntry> ret;
for (auto const & flat_entry : flat_kvp)
ret.emplace_back (convert_entry (flat_entry, root));
return ret;
}
static bool
convert_imap_account_bayes_to_guid (Account *acc)
{
auto frame = qof_instance_get_slots (QOF_INSTANCE (acc));
if (!frame->get_keys().size())
return false;
auto new_imap = get_new_guid_imap(acc);
xaccAccountBeginEdit(acc);
frame->set({IMAP_FRAME_BAYES}, nullptr);
if (!new_imap.size ())
{
xaccAccountCommitEdit(acc);
return false;
}
std::for_each(new_imap.begin(), new_imap.end(), [&frame] (FlatKvpEntry const & entry) {
frame->set({entry.first.c_str()}, entry.second);
});
qof_instance_set_dirty (QOF_INSTANCE (acc));
xaccAccountCommitEdit(acc);
return true;
}
char const * run_once_key_to_guid {"changed-bayesian-to-guid"};
static void
imap_convert_bayes_to_guid (QofBook * book)
{
auto root = gnc_book_get_root_account (book);
auto accts = gnc_account_get_descendants_sorted (root);
for (auto ptr = accts; ptr; ptr = g_list_next (ptr))
{
Account *acc = static_cast <Account*> (ptr->data);
if (convert_imap_account_bayes_to_guid (acc))
gnc_features_set_used (book, GNC_FEATURE_GUID_BAYESIAN);
}
g_list_free (accts);
}
static bool
run_once_key_set (char const * key, QofBook * book)
{
GValue value G_VALUE_INIT;
qof_instance_get_path_kvp (QOF_INSTANCE(book), &value, {key});
return G_VALUE_HOLDS_STRING(&value) && strcmp(g_value_get_string(&value), "true");
}
static void
set_run_once_key (char const * key, QofBook * book)
{
GValue value G_VALUE_INIT;
g_value_init(&value, G_TYPE_BOOLEAN);
g_value_set_boolean(&value, TRUE);
qof_instance_set_path_kvp(QOF_INSTANCE (book), &value, {key});
}
void
gnc_account_imap_convert_bayes (QofBook *book)
{
if (run_once_key_set (run_once_key_to_guid, book))
return;
imap_convert_bayes_to_guid (book);
set_run_once_key (run_once_key_to_guid, book);
}
/* ================================================================ */
/* QofObject function implementation and registration */

View File

@@ -1449,11 +1449,6 @@ gchar *gnc_account_get_map_entry (Account *acc, const char *full_category);
*/
void gnc_account_delete_map_entry (Account *acc, char *full_category, gboolean empty);
/** Search for Bayesian entries with mappings based on full account name and change
* them to be based on the account guid
*/
void gnc_account_imap_convert_bayes (QofBook *book);
/** @} */

View File

@@ -46,6 +46,7 @@ static gncFeature known_features[] =
{ GNC_FEATURE_KVP_EXTRA_DATA, "Extra data for addresses, jobs or invoice entries (requires at least GnuCash 2.6.4)" },
{ GNC_FEATURE_BOOK_CURRENCY, "User specifies a 'book-currency'; costs of other currencies/commodities tracked in terms of book-currency (requires at least GnuCash 2.7.0)" },
{ GNC_FEATURE_GUID_BAYESIAN, "Use account GUID as key for Bayesian data (requires at least GnuCash 2.6.12)" },
{ GNC_FEATURE_GUID_FLAT_BAYESIAN, "Use account GUID as key for bayesian data and store KVP flat" },
{ NULL },
};
@@ -148,6 +149,32 @@ void gnc_features_set_used (QofBook *book, const gchar *feature)
}
qof_book_set_feature (book, feature, description);
}
struct CheckFeature
{
gchar const * checked_feature;
gboolean found;
};
static void gnc_features_check_feature_cb (gpointer pkey, gpointer value,
gpointer data)
{
const gchar *key = (const gchar*)pkey;
struct CheckFeature * check_data = data;
g_assert(data);
if (!g_strcmp0 (key, check_data->checked_feature))
check_data->found = TRUE;
}
gboolean gnc_features_check_used (QofBook *book, const gchar * feature)
{
GHashTable *features_used = qof_book_get_features (book);
struct CheckFeature check_data = {feature, FALSE};
/* Setup the known_features hash table */
gnc_features_init();
g_hash_table_foreach (features_used, &gnc_features_check_feature_cb, &check_data);
g_hash_table_unref (features_used);
return check_data.found;
}

View File

@@ -50,6 +50,7 @@ extern "C" {
#define GNC_FEATURE_KVP_EXTRA_DATA "Extra data in addresses, jobs or invoice entries"
#define GNC_FEATURE_BOOK_CURRENCY "Use a Book-Currency"
#define GNC_FEATURE_GUID_BAYESIAN "Account GUID based Bayesian data"
#define GNC_FEATURE_GUID_FLAT_BAYESIAN "Account GUID based bayesian with flat KVP"
/** @} */
@@ -68,10 +69,14 @@ gchar *gnc_features_test_unknown (QofBook *book);
*/
void gnc_features_set_used (QofBook *book, const gchar *feature);
/*
* Returns true if the specified feature is used.
*/
gboolean gnc_features_check_used (QofBook *, char const * feature);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /*__cplusplus*/
#endif /* GNC_FEATURES_H */
/** @} */
/** @} */

View File

@@ -427,16 +427,15 @@ KvpFrame::flatten_kvp_impl(std::vector <std::string> path, std::vector <KvpEntry
{
for (auto const & entry : m_valuemap)
{
std::vector<std::string> new_path {path};
new_path.push_back("/");
if (entry.second->get_type() == KvpValue::Type::FRAME)
{
std::vector<std::string> send_path {path};
send_path.push_back("/");
send_path.push_back(entry.first);
entry.second->get<KvpFrame*>()->flatten_kvp_impl(send_path, entries);
new_path.push_back(entry.first);
entry.second->get<KvpFrame*>()->flatten_kvp_impl(new_path, entries);
}
else
{
std::vector <std::string> new_path {path};
new_path.emplace_back (entry.first);
entries.emplace_back (new_path, entry.second);
}

View File

@@ -1076,7 +1076,7 @@ static void
add_feature_to_hash (const gchar *key, KvpValue *value, GHashTable * user_data)
{
gchar *descr = g_strdup(value->get<const char*>());
g_hash_table_insert (*(GHashTable**)user_data, (gchar*)key, descr);
g_hash_table_insert (user_data, (gchar*)key, descr);
}
GHashTable *

View File

@@ -318,13 +318,8 @@ TEST_F(ImapBayesTest, AddAccountBayes)
EXPECT_EQ(2, value->get<int64_t>());
}
TEST_F(ImapBayesTest, ConvertAccountBayes)
TEST_F(ImapBayesTest, ConvertBayesData)
{
// prevent the embedded beginedit/committedit from doing anything
qof_instance_increase_editlevel(QOF_INSTANCE(t_bank_account));
qof_instance_mark_clean(QOF_INSTANCE(t_bank_account));
gnc_account_imap_add_account_bayes(t_imap, t_list1, t_expense_account1); //Food
gnc_account_imap_add_account_bayes(t_imap, t_list2, t_expense_account2); //Drink
auto root = qof_instance_get_slots(QOF_INSTANCE(t_bank_account));
auto book = qof_instance_get_slots(QOF_INSTANCE(t_imap->book));
auto acct1_guid = guid_to_string (xaccAccountGetGUID(t_expense_account1)); //Food
@@ -334,15 +329,6 @@ TEST_F(ImapBayesTest, ConvertAccountBayes)
auto val1 = new KvpValue(static_cast<int64_t>(10));
auto val2 = new KvpValue(static_cast<int64_t>(5));
auto val3 = new KvpValue(static_cast<int64_t>(2));
// Test for existing entries, all will be 1
auto value = root->get_slot({std::string{IMAP_FRAME_BAYES} + "/" + foo + "/" + acct1_guid});
EXPECT_EQ(1, value->get<int64_t>());
value = root->get_slot({std::string{IMAP_FRAME_BAYES} + "/" + bar + "/" + acct1_guid});
EXPECT_EQ(1, value->get<int64_t>());
value = root->get_slot({std::string{IMAP_FRAME_BAYES} + "/" + baz + "/" + acct2_guid});
EXPECT_EQ(1, value->get<int64_t>());
value = root->get_slot({std::string{IMAP_FRAME_BAYES} + "/" + waldo + "/" + acct2_guid});
EXPECT_EQ(1, value->get<int64_t>());
// Set up some old entries
root->set_path({IMAP_FRAME_BAYES, "severely", "divided", "token", "Asset-Bank"}, val1);
root->set_path({IMAP_FRAME_BAYES, salt, "Asset-Bank#Bank"}, new KvpValue{*val1});
@@ -350,13 +336,11 @@ TEST_F(ImapBayesTest, ConvertAccountBayes)
root->set_path({IMAP_FRAME_BAYES, pork, "Expense#Food"}, new KvpValue{*val2});
root->set_path({IMAP_FRAME_BAYES, sausage, "Expense#Drink"}, val3);
root->set_path({IMAP_FRAME_BAYES, foo, "Expense#Food"}, new KvpValue{*val2});
EXPECT_EQ(1, qof_instance_get_editlevel(QOF_INSTANCE(t_bank_account)));
EXPECT_TRUE(qof_instance_get_dirty_flag(QOF_INSTANCE(t_bank_account)));
qof_instance_mark_clean(QOF_INSTANCE(t_bank_account));
// Start Convert
gnc_account_imap_convert_bayes (t_imap->book);
root->set_path({IMAP_FRAME_BAYES, salt, acct1_guid}, new KvpValue{*val1});
/*Calling into the imap functions should trigger a conversion.*/
gnc_account_imap_add_account_bayes(t_imap, t_list5, t_expense_account2); //pork and sausage; account Food
// convert from 'Asset-Bank' to 'Asset-Bank' guid
value = root->get_slot({std::string{IMAP_FRAME_BAYES} + "/severely/divided/token/" + acct3_guid});
auto value = root->get_slot({std::string{IMAP_FRAME_BAYES} + "/severely/divided/token/" + acct3_guid});
EXPECT_EQ(10, value->get<int64_t>());
// convert from 'Asset-Bank#Bank' to 'Sav Bank' guid
value = root->get_slot({std::string{IMAP_FRAME_BAYES} + "/" + salt + "/" + acct4_guid});
@@ -366,14 +350,14 @@ TEST_F(ImapBayesTest, ConvertAccountBayes)
EXPECT_EQ(5, value->get<int64_t>());
// convert from 'Expense#Drink' to 'Drink' guid
value = root->get_slot({std::string{IMAP_FRAME_BAYES} + "/" + sausage + "/" + acct2_guid});
EXPECT_EQ(2, value->get<int64_t>());
/*We put in 2, then called it once to bring it up to 3.*/
EXPECT_EQ(3, value->get<int64_t>());
// convert from 'Expense#Food' to 'Food' guid but add to original value
value = root->get_slot({std::string{IMAP_FRAME_BAYES} + "/" + foo + "/" + acct1_guid});
EXPECT_EQ(5, value->get<int64_t>());
// Check for run once flag
auto vals = book->get_slot({"changed-bayesian-to-guid"});
EXPECT_STREQ("true", vals->get<const char*>());
EXPECT_EQ(1, qof_instance_get_editlevel(QOF_INSTANCE(t_bank_account)));
// Keep GUID value from original
value = root->get_slot({std::string{IMAP_FRAME_BAYES} + "/" + salt + "/" + acct1_guid});
EXPECT_EQ(10, value->get<int64_t>());
EXPECT_TRUE(qof_instance_get_dirty_flag(QOF_INSTANCE(t_bank_account)));
}