mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: Add metric counting rule groups per org (#80669)
* Refactor, fix bad map hint * Count groups per org
This commit is contained in:
parent
6b37a887d5
commit
3c796ecc8f
@ -21,6 +21,7 @@ type Scheduler struct {
|
||||
ProcessDuration *prometheus.HistogramVec
|
||||
SendDuration *prometheus.HistogramVec
|
||||
GroupRules *prometheus.GaugeVec
|
||||
Groups *prometheus.GaugeVec
|
||||
SchedulePeriodicDuration prometheus.Histogram
|
||||
SchedulableAlertRules prometheus.Gauge
|
||||
SchedulableAlertRulesHash prometheus.Gauge
|
||||
@ -100,6 +101,15 @@ func NewSchedulerMetrics(r prometheus.Registerer) *Scheduler {
|
||||
},
|
||||
[]string{"org", "state"},
|
||||
),
|
||||
Groups: promauto.With(r).NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: Namespace,
|
||||
Subsystem: Subsystem,
|
||||
Name: "rule_groups",
|
||||
Help: "The number of alert rule groups",
|
||||
},
|
||||
[]string{"org"},
|
||||
),
|
||||
SchedulePeriodicDuration: promauto.With(r).NewHistogram(
|
||||
prometheus.HistogramOpts{
|
||||
Namespace: Namespace,
|
||||
|
@ -204,21 +204,34 @@ type readyToRunItem struct {
|
||||
}
|
||||
|
||||
func (sch *schedule) updateRulesMetrics(alertRules []*ngmodels.AlertRule) {
|
||||
orgs := make(map[int64]int64, len(alertRules))
|
||||
orgsPaused := make(map[int64]int64, len(alertRules))
|
||||
rulesPerOrg := make(map[int64]int64) // orgID -> count
|
||||
orgsPaused := make(map[int64]int64) // orgID -> count
|
||||
groupsPerOrg := make(map[int64]map[string]struct{}) // orgID -> set of groups
|
||||
for _, rule := range alertRules {
|
||||
orgs[rule.OrgID]++
|
||||
rulesPerOrg[rule.OrgID]++
|
||||
|
||||
if rule.IsPaused {
|
||||
orgsPaused[rule.OrgID]++
|
||||
}
|
||||
|
||||
orgGroups, ok := groupsPerOrg[rule.OrgID]
|
||||
if !ok {
|
||||
orgGroups = make(map[string]struct{})
|
||||
groupsPerOrg[rule.OrgID] = orgGroups
|
||||
}
|
||||
orgGroups[rule.RuleGroup] = struct{}{}
|
||||
}
|
||||
|
||||
for orgID, numRules := range orgs {
|
||||
for orgID, numRules := range rulesPerOrg {
|
||||
numRulesPaused := orgsPaused[orgID]
|
||||
sch.metrics.GroupRules.WithLabelValues(fmt.Sprint(orgID), metrics.AlertRuleActiveLabelValue).Set(float64(numRules - numRulesPaused))
|
||||
sch.metrics.GroupRules.WithLabelValues(fmt.Sprint(orgID), metrics.AlertRulePausedLabelValue).Set(float64(numRulesPaused))
|
||||
}
|
||||
|
||||
for orgID, groups := range groupsPerOrg {
|
||||
sch.metrics.Groups.WithLabelValues(fmt.Sprint(orgID)).Set(float64(len(groups)))
|
||||
}
|
||||
|
||||
// While these are the rules that we iterate over, at the moment there's no 100% guarantee that they'll be
|
||||
// scheduled as rules could be removed before we get a chance to evaluate them.
|
||||
sch.metrics.SchedulableAlertRules.Set(float64(len(alertRules)))
|
||||
|
Loading…
Reference in New Issue
Block a user