Alerting: remove State cache entries on Ruler Delete (#33638)

for https://github.com/grafana/alerting-squad/issues/133
This commit is contained in:
Kyle Brandt 2021-05-03 14:01:33 -04:00 committed by GitHub
parent 0609b80fdc
commit 48358efc13
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 61 additions and 12 deletions

View File

@ -73,7 +73,7 @@ func (api *API) RegisterAPIEndpoints(m *metrics.Metrics) {
api.RegisterRulerApiEndpoints(NewForkedRuler(
api.DatasourceCache,
NewLotexRuler(proxy, logger),
RulerSrv{DatasourceCache: api.DatasourceCache, store: api.RuleStore, log: logger},
RulerSrv{DatasourceCache: api.DatasourceCache, manager: api.StateManager, store: api.RuleStore, log: logger},
), m)
api.RegisterTestingApiEndpoints(TestingApiSrv{
AlertingProxy: proxy,

View File

@ -7,6 +7,7 @@ import (
"time"
"github.com/grafana/grafana/pkg/services/datasources"
"github.com/grafana/grafana/pkg/services/ngalert/state"
"github.com/grafana/grafana/pkg/services/ngalert/store"
coreapi "github.com/grafana/grafana/pkg/api"
@ -22,6 +23,7 @@ import (
type RulerSrv struct {
store store.RuleStore
DatasourceCache datasources.CacheService
manager *state.Manager
log log.Logger
}
@ -32,9 +34,15 @@ func (srv RulerSrv) RouteDeleteNamespaceRulesConfig(c *models.ReqContext) respon
return toNamespaceErrorResponse(err)
}
if err := srv.store.DeleteNamespaceAlertRules(c.SignedInUser.OrgId, namespace.Uid); err != nil {
uids, err := srv.store.DeleteNamespaceAlertRules(c.SignedInUser.OrgId, namespace.Uid)
if err != nil {
return response.Error(http.StatusInternalServerError, "failed to delete namespace alert rules", err)
}
for _, uid := range uids {
srv.manager.RemoveByRuleUID(c.SignedInUser.OrgId, uid)
}
return response.JSON(http.StatusAccepted, util.DynMap{"message": "namespace rules deleted"})
}
@ -45,13 +53,19 @@ func (srv RulerSrv) RouteDeleteRuleGroupConfig(c *models.ReqContext) response.Re
return toNamespaceErrorResponse(err)
}
ruleGroup := c.Params(":Groupname")
if err := srv.store.DeleteRuleGroupAlertRules(c.SignedInUser.OrgId, namespace.Uid, ruleGroup); err != nil {
uids, err := srv.store.DeleteRuleGroupAlertRules(c.SignedInUser.OrgId, namespace.Uid, ruleGroup)
if err != nil {
if errors.Is(err, ngmodels.ErrRuleGroupNamespaceNotFound) {
return response.Error(http.StatusNotFound, "failed to delete rule group", err)
}
return response.Error(http.StatusInternalServerError, "failed to delete rule group", err)
}
for _, uid := range uids {
srv.manager.RemoveByRuleUID(c.SignedInUser.OrgId, uid)
}
return response.JSON(http.StatusAccepted, util.DynMap{"message": "rule group deleted"})
}

View File

@ -113,6 +113,17 @@ func (c *cache) getStatesByRuleUID() map[string][]*State {
return ruleMap
}
// removeByRuleUID deletes all entries in the state cache that match the given UID.
func (c *cache) removeByRuleUID(orgID int64, uid string) {
c.mtxStates.Lock()
defer c.mtxStates.Unlock()
for k, state := range c.states {
if state.AlertRuleUID == uid && state.OrgID == orgID {
delete(c.states, k)
}
}
}
func (c *cache) reset() {
c.mtxStates.Lock()
defer c.mtxStates.Unlock()

View File

@ -45,11 +45,16 @@ func (st *Manager) Get(id string) (*State, error) {
return st.cache.get(id)
}
//Used to ensure a clean cache on startup
// ResetCache is used to ensure a clean cache on startup.
func (st *Manager) ResetCache() {
st.cache.reset()
}
// RemoveByRuleUID deletes all entries in the state manager that match the given rule UID.
func (st *Manager) RemoveByRuleUID(orgID int64, ruleUID string) {
st.cache.removeByRuleUID(orgID, ruleUID)
}
func (st *Manager) ProcessEvalResults(alertRule *ngModels.AlertRule, results eval.Results) []*State {
st.Log.Debug("state manager processing evaluation results", "uid", alertRule.UID, "resultCount", len(results))
var states []*State

View File

@ -38,8 +38,8 @@ type UpsertRule struct {
// Store is the interface for persisting alert rules and instances
type RuleStore interface {
DeleteAlertRuleByUID(orgID int64, ruleUID string) error
DeleteNamespaceAlertRules(orgID int64, namespaceUID string) error
DeleteRuleGroupAlertRules(orgID int64, namespaceUID string, ruleGroup string) error
DeleteNamespaceAlertRules(orgID int64, namespaceUID string) ([]string, error)
DeleteRuleGroupAlertRules(orgID int64, namespaceUID string, ruleGroup string) ([]string, error)
GetAlertRuleByUID(*ngmodels.GetAlertRuleByUIDQuery) error
GetAlertRulesForScheduling(query *ngmodels.ListAlertRulesQuery) error
GetOrgAlertRules(query *ngmodels.ListAlertRulesQuery) error
@ -87,9 +87,19 @@ func (st DBstore) DeleteAlertRuleByUID(orgID int64, ruleUID string) error {
})
}
// DeleteNamespaceAlertRules is a handler for deleting namespace alert rules.
func (st DBstore) DeleteNamespaceAlertRules(orgID int64, namespaceUID string) error {
return st.SQLStore.WithTransactionalDbSession(context.Background(), func(sess *sqlstore.DBSession) error {
// DeleteNamespaceAlertRules is a handler for deleting namespace alert rules. A list of deleted rule UIDs are returned.
func (st DBstore) DeleteNamespaceAlertRules(orgID int64, namespaceUID string) ([]string, error) {
ruleUIDs := []string{}
err := st.SQLStore.WithTransactionalDbSession(context.Background(), func(sess *sqlstore.DBSession) error {
if err := sess.SQL("SELECT uid FROM alert_rule WHERE org_id = ? and namespace_uid = ?", orgID, namespaceUID).Find(&ruleUIDs); err != nil {
return err
}
if _, err := sess.Exec("DELETE FROM alert_rule WHERE org_id = ? and namespace_uid = ?", orgID, namespaceUID); err != nil {
return err
}
if _, err := sess.Exec("DELETE FROM alert_rule WHERE org_id = ? and namespace_uid = ?", orgID, namespaceUID); err != nil {
return err
}
@ -106,11 +116,18 @@ func (st DBstore) DeleteNamespaceAlertRules(orgID int64, namespaceUID string) er
return nil
})
return ruleUIDs, err
}
// DeleteRuleGroupAlertRules is a handler for deleting rule group alert rules.
func (st DBstore) DeleteRuleGroupAlertRules(orgID int64, namespaceUID string, ruleGroup string) error {
return st.SQLStore.WithTransactionalDbSession(context.Background(), func(sess *sqlstore.DBSession) error {
// DeleteRuleGroupAlertRules is a handler for deleting rule group alert rules. A list of deleted rule UIDs are returned.
func (st DBstore) DeleteRuleGroupAlertRules(orgID int64, namespaceUID string, ruleGroup string) ([]string, error) {
ruleUIDs := []string{}
err := st.SQLStore.WithTransactionalDbSession(context.Background(), func(sess *sqlstore.DBSession) error {
if err := sess.SQL("SELECT uid FROM alert_rule WHERE org_id = ? and namespace_uid = ? and rule_group = ?",
orgID, namespaceUID, ruleGroup).Find(&ruleUIDs); err != nil {
return err
}
exist, err := sess.Exist(&ngmodels.AlertRule{OrgID: orgID, NamespaceUID: namespaceUID, RuleGroup: ruleGroup})
if err != nil {
return err
@ -136,6 +153,8 @@ func (st DBstore) DeleteRuleGroupAlertRules(orgID int64, namespaceUID string, ru
return nil
})
return ruleUIDs, err
}
// GetAlertRuleByUID is a handler for retrieving an alert rule from that database by its UID and organisation ID.