Alerting: Add metric counting rule groups per org (#80669)

* Refactor, fix bad map hint

* Count groups per org
This commit is contained in:
Alexander Weaver
2024-01-16 16:35:56 -06:00
committed by GitHub
parent 6b37a887d5
commit 3c796ecc8f
2 changed files with 27 additions and 4 deletions

View File

@@ -204,21 +204,34 @@ type readyToRunItem struct {
}
func (sch *schedule) updateRulesMetrics(alertRules []*ngmodels.AlertRule) {
orgs := make(map[int64]int64, len(alertRules))
orgsPaused := make(map[int64]int64, len(alertRules))
rulesPerOrg := make(map[int64]int64) // orgID -> count
orgsPaused := make(map[int64]int64) // orgID -> count
groupsPerOrg := make(map[int64]map[string]struct{}) // orgID -> set of groups
for _, rule := range alertRules {
orgs[rule.OrgID]++
rulesPerOrg[rule.OrgID]++
if rule.IsPaused {
orgsPaused[rule.OrgID]++
}
orgGroups, ok := groupsPerOrg[rule.OrgID]
if !ok {
orgGroups = make(map[string]struct{})
groupsPerOrg[rule.OrgID] = orgGroups
}
orgGroups[rule.RuleGroup] = struct{}{}
}
for orgID, numRules := range orgs {
for orgID, numRules := range rulesPerOrg {
numRulesPaused := orgsPaused[orgID]
sch.metrics.GroupRules.WithLabelValues(fmt.Sprint(orgID), metrics.AlertRuleActiveLabelValue).Set(float64(numRules - numRulesPaused))
sch.metrics.GroupRules.WithLabelValues(fmt.Sprint(orgID), metrics.AlertRulePausedLabelValue).Set(float64(numRulesPaused))
}
for orgID, groups := range groupsPerOrg {
sch.metrics.Groups.WithLabelValues(fmt.Sprint(orgID)).Set(float64(len(groups)))
}
// While these are the rules that we iterate over, at the moment there's no 100% guarantee that they'll be
// scheduled as rules could be removed before we get a chance to evaluate them.
sch.metrics.SchedulableAlertRules.Set(float64(len(alertRules)))