Alerting: Move rule_groups_rules metric from State to Scheduler. (#63144)

The `rule_groups_rules` metric is currently defined and computed by `State`.
It makes more sense for this metric to be computed off of the configured rule
set, not based on the rule evaluation state. There could be an edge condition
where a rule does not have a state yet, and so is uncounted.

Additionally, we would like this metric (and others), to have a `rule_group`
label, and this is much easier to achieve if the metric is produced from the
`Scheduler` package.
This commit is contained in:
Steve Simpson
2023-02-09 17:05:19 +01:00
committed by GitHub
parent 90f8959d3c
commit 4d1a2c3370
5 changed files with 64 additions and 22 deletions

View File

@@ -177,8 +177,7 @@ func (sch *schedule) DeleteAlertRule(keys ...ngmodels.AlertRuleKey) {
}
// Our best bet at this point is that we update the metrics with what we hope to schedule in the next tick.
alertRules, _ := sch.schedulableAlertRules.all()
sch.metrics.SchedulableAlertRules.Set(float64(len(alertRules)))
sch.metrics.SchedulableAlertRulesHash.Set(float64(hashUIDs(alertRules)))
sch.updateRulesMetrics(alertRules)
}
func (sch *schedule) schedulePeriodic(ctx context.Context, t *ticker.T) error {
@@ -209,6 +208,21 @@ type readyToRunItem struct {
evaluation
}
func (sch *schedule) updateRulesMetrics(alertRules []*ngmodels.AlertRule) {
orgs := make(map[int64]int64)
for _, rule := range alertRules {
orgs[rule.OrgID]++
}
for org, numRules := range orgs {
sch.metrics.GroupRules.WithLabelValues(fmt.Sprint(org)).Set(float64(numRules))
}
// While these are the rules that we iterate over, at the moment there's no 100% guarantee that they'll be
// scheduled as rules could be removed before we get a chance to evaluate them.
sch.metrics.SchedulableAlertRules.Set(float64(len(alertRules)))
sch.metrics.SchedulableAlertRulesHash.Set(float64(hashUIDs(alertRules)))
}
func (sch *schedule) processTick(ctx context.Context, dispatcherGroup *errgroup.Group, tick time.Time) ([]readyToRunItem, map[ngmodels.AlertRuleKey]struct{}) {
tickNum := tick.Unix() / int64(sch.baseInterval.Seconds())
@@ -223,10 +237,7 @@ func (sch *schedule) processTick(ctx context.Context, dispatcherGroup *errgroup.
// so, at the end, the remaining registered alert rules are the deleted ones
registeredDefinitions := sch.registry.keyMap()
// While these are the rules that we iterate over, at the moment there's no 100% guarantee that they'll be
// scheduled as rules could be removed before we get a chance to evaluate them.
sch.metrics.SchedulableAlertRules.Set(float64(len(alertRules)))
sch.metrics.SchedulableAlertRulesHash.Set(float64(hashUIDs(alertRules)))
sch.updateRulesMetrics(alertRules)
readyToRun := make([]readyToRunItem, 0)
missingFolder := make(map[string][]string)