Alerting: Refactor state manager's cache (#56197)

* remove ResetAllStates because it's not used
* refactor cache to accept logs, metrics and url as method args
* update manager Warm method to set the entire state at once
* remove unused reset method
* introduce ruleStates
* change getOrCreate to belong to ruleStates
* update Get to not return error
This commit is contained in:
Yuriy Tseretyan
2022-10-06 15:30:12 -04:00
committed by GitHub
parent 15d2653b89
commit 7b6437402a
5 changed files with 125 additions and 101 deletions

View File

@@ -15,28 +15,40 @@ import (
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
)
type cache struct {
states map[int64]map[string]map[string]*State // orgID > alertRuleUID > stateID > state
mtxStates sync.RWMutex
log log.Logger
metrics *metrics.State
externalURL *url.URL
type ruleStates struct {
states map[string]*State
}
func newCache(logger log.Logger, metrics *metrics.State, externalURL *url.URL) *cache {
type cache struct {
states map[int64]map[string]*ruleStates // orgID > alertRuleUID > stateID > state
mtxStates sync.RWMutex
}
func newCache() *cache {
return &cache{
states: make(map[int64]map[string]map[string]*State),
log: logger,
metrics: metrics,
externalURL: externalURL,
states: make(map[int64]map[string]*ruleStates),
}
}
func (c *cache) getOrCreate(ctx context.Context, alertRule *ngModels.AlertRule, result eval.Result, extraLabels data.Labels) *State {
func (c *cache) getOrCreate(ctx context.Context, log log.Logger, alertRule *ngModels.AlertRule, result eval.Result, extraLabels data.Labels, externalURL *url.URL) *State {
c.mtxStates.Lock()
defer c.mtxStates.Unlock()
var orgStates map[string]*ruleStates
var ok bool
if orgStates, ok = c.states[alertRule.OrgID]; !ok {
orgStates = make(map[string]*ruleStates)
c.states[alertRule.OrgID] = orgStates
}
var states *ruleStates
if states, ok = orgStates[alertRule.UID]; !ok {
states = &ruleStates{states: make(map[string]*State)}
c.states[alertRule.OrgID][alertRule.UID] = states
}
return states.getOrCreate(ctx, log, alertRule, result, extraLabels, externalURL)
}
ruleLabels, annotations := c.expandRuleLabelsAndAnnotations(ctx, alertRule, result, extraLabels)
func (rs *ruleStates) getOrCreate(ctx context.Context, log log.Logger, alertRule *ngModels.AlertRule, result eval.Result, extraLabels data.Labels, externalURL *url.URL) *State {
ruleLabels, annotations := rs.expandRuleLabelsAndAnnotations(ctx, log, alertRule, result, extraLabels, externalURL)
lbs := make(data.Labels, len(extraLabels)+len(ruleLabels)+len(result.Instance))
dupes := make(data.Labels)
@@ -53,7 +65,7 @@ func (c *cache) getOrCreate(ctx context.Context, alertRule *ngModels.AlertRule,
}
}
if len(dupes) > 0 {
c.log.Warn("rule declares one or many reserved labels. Those rules labels will be ignored", "labels", dupes)
log.Warn("rule declares one or many reserved labels. Those rules labels will be ignored", "labels", dupes)
}
dupes = make(data.Labels)
for key, val := range result.Instance {
@@ -66,23 +78,16 @@ func (c *cache) getOrCreate(ctx context.Context, alertRule *ngModels.AlertRule,
}
}
if len(dupes) > 0 {
c.log.Warn("evaluation result contains either reserved labels or labels declared in the rules. Those labels from the result will be ignored", "labels", dupes)
log.Warn("evaluation result contains either reserved labels or labels declared in the rules. Those labels from the result will be ignored", "labels", dupes)
}
il := ngModels.InstanceLabels(lbs)
id, err := il.StringKey()
if err != nil {
c.log.Error("error getting cacheId for entry", "err", err.Error())
log.Error("error getting cacheId for entry", "err", err.Error())
}
if _, ok := c.states[alertRule.OrgID]; !ok {
c.states[alertRule.OrgID] = make(map[string]map[string]*State)
}
if _, ok := c.states[alertRule.OrgID][alertRule.UID]; !ok {
c.states[alertRule.OrgID][alertRule.UID] = make(map[string]*State)
}
if state, ok := c.states[alertRule.OrgID][alertRule.UID][id]; ok {
if state, ok := rs.states[id]; ok {
// Annotations can change over time, however we also want to maintain
// certain annotations across evaluations
for k, v := range state.Annotations {
@@ -95,7 +100,7 @@ func (c *cache) getOrCreate(ctx context.Context, alertRule *ngModels.AlertRule,
}
}
state.Annotations = annotations
c.states[alertRule.OrgID][alertRule.UID][id] = state
rs.states[id] = state
return state
}
@@ -112,21 +117,21 @@ func (c *cache) getOrCreate(ctx context.Context, alertRule *ngModels.AlertRule,
if result.State == eval.Alerting {
newState.StartsAt = result.EvaluatedAt
}
c.states[alertRule.OrgID][alertRule.UID][id] = newState
rs.states[id] = newState
return newState
}
func (c *cache) expandRuleLabelsAndAnnotations(ctx context.Context, alertRule *ngModels.AlertRule, alertInstance eval.Result, extraLabels data.Labels) (data.Labels, data.Labels) {
func (rs *ruleStates) expandRuleLabelsAndAnnotations(ctx context.Context, log log.Logger, alertRule *ngModels.AlertRule, alertInstance eval.Result, extraLabels data.Labels, externalURL *url.URL) (data.Labels, data.Labels) {
// use labels from the result and extra labels to expand the labels and annotations declared by the rule
templateLabels := mergeLabels(extraLabels, alertInstance.Instance)
expand := func(original map[string]string) map[string]string {
expanded := make(map[string]string, len(original))
for k, v := range original {
ev, err := expandTemplate(ctx, alertRule.Title, v, templateLabels, alertInstance, c.externalURL)
ev, err := expandTemplate(ctx, alertRule.Title, v, templateLabels, alertInstance, externalURL)
expanded[k] = ev
if err != nil {
c.log.Error("error in expanding template", "name", k, "value", v, "err", err.Error())
log.Error("error in expanding template", "name", k, "value", v, "err", err.Error())
// Store the original template on error.
expanded[k] = v
}
@@ -137,25 +142,36 @@ func (c *cache) expandRuleLabelsAndAnnotations(ctx context.Context, alertRule *n
return expand(alertRule.Labels), expand(alertRule.Annotations)
}
func (c *cache) setAllStates(newStates map[int64]map[string]*ruleStates) {
c.mtxStates.Lock()
defer c.mtxStates.Unlock()
c.states = newStates
}
func (c *cache) set(entry *State) {
c.mtxStates.Lock()
defer c.mtxStates.Unlock()
if _, ok := c.states[entry.OrgID]; !ok {
c.states[entry.OrgID] = make(map[string]map[string]*State)
c.states[entry.OrgID] = make(map[string]*ruleStates)
}
if _, ok := c.states[entry.OrgID][entry.AlertRuleUID]; !ok {
c.states[entry.OrgID][entry.AlertRuleUID] = make(map[string]*State)
c.states[entry.OrgID][entry.AlertRuleUID] = &ruleStates{states: make(map[string]*State)}
}
c.states[entry.OrgID][entry.AlertRuleUID][entry.CacheId] = entry
c.states[entry.OrgID][entry.AlertRuleUID].states[entry.CacheId] = entry
}
func (c *cache) get(orgID int64, alertRuleUID, stateId string) (*State, error) {
func (c *cache) get(orgID int64, alertRuleUID, stateId string) *State {
c.mtxStates.RLock()
defer c.mtxStates.RUnlock()
if state, ok := c.states[orgID][alertRuleUID][stateId]; ok {
return state, nil
ruleStates, ok := c.states[orgID][alertRuleUID]
if ok {
var state *State
state, ok = ruleStates.states[stateId]
if ok {
return state
}
}
return nil, fmt.Errorf("no entry for %s:%s was found", alertRuleUID, stateId)
return nil
}
func (c *cache) getAll(orgID int64) []*State {
@@ -163,7 +179,7 @@ func (c *cache) getAll(orgID int64) []*State {
c.mtxStates.RLock()
defer c.mtxStates.RUnlock()
for _, v1 := range c.states[orgID] {
for _, v2 := range v1 {
for _, v2 := range v1.states {
states = append(states, v2)
}
}
@@ -171,38 +187,47 @@ func (c *cache) getAll(orgID int64) []*State {
}
func (c *cache) getStatesForRuleUID(orgID int64, alertRuleUID string) []*State {
var ruleStates []*State
var result []*State
c.mtxStates.RLock()
defer c.mtxStates.RUnlock()
for _, state := range c.states[orgID][alertRuleUID] {
ruleStates = append(ruleStates, state)
orgRules, ok := c.states[orgID]
if !ok {
return nil
}
return ruleStates
rs, ok := orgRules[alertRuleUID]
if !ok {
return nil
}
for _, state := range rs.states {
result = append(result, state)
}
return result
}
// removeByRuleUID deletes all entries in the state cache that match the given UID. Returns removed states
func (c *cache) removeByRuleUID(orgID int64, uid string) []*State {
c.mtxStates.Lock()
defer c.mtxStates.Unlock()
statesMap := c.states[orgID][uid]
delete(c.states[orgID], uid)
if statesMap == nil {
orgStates, ok := c.states[orgID]
if !ok {
return nil
}
states := make([]*State, 0, len(statesMap))
for _, state := range statesMap {
rs, ok := orgStates[uid]
if !ok {
return nil
}
delete(c.states[orgID], uid)
if len(rs.states) == 0 {
return nil
}
states := make([]*State, 0, len(rs.states))
for _, state := range rs.states {
states = append(states, state)
}
return states
}
func (c *cache) reset() {
c.mtxStates.Lock()
defer c.mtxStates.Unlock()
c.states = make(map[int64]map[string]map[string]*State)
}
func (c *cache) recordMetrics() {
func (c *cache) recordMetrics(metrics *metrics.State) {
c.mtxStates.RLock()
defer c.mtxStates.RUnlock()
@@ -217,9 +242,9 @@ func (c *cache) recordMetrics() {
}
for org, orgMap := range c.states {
c.metrics.GroupRules.WithLabelValues(fmt.Sprint(org)).Set(float64(len(orgMap)))
metrics.GroupRules.WithLabelValues(fmt.Sprint(org)).Set(float64(len(orgMap)))
for _, rule := range orgMap {
for _, state := range rule {
for _, state := range rule.states {
n := ct[state.State]
ct[state.State] = n + 1
}
@@ -227,7 +252,7 @@ func (c *cache) recordMetrics() {
}
for k, n := range ct {
c.metrics.AlertState.WithLabelValues(strings.ToLower(k.String())).Set(float64(n))
metrics.AlertState.WithLabelValues(strings.ToLower(k.String())).Set(float64(n))
}
}
@@ -248,5 +273,9 @@ func mergeLabels(a, b data.Labels) data.Labels {
func (c *cache) deleteEntry(orgID int64, alertRuleUID, cacheID string) {
c.mtxStates.Lock()
defer c.mtxStates.Unlock()
delete(c.states[orgID][alertRuleUID], cacheID)
ruleStates, ok := c.states[orgID][alertRuleUID]
if !ok {
return
}
delete(ruleStates.states, cacheID)
}