mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: Schedule Alert rules metric tracking (#50415)
* Alerting: Schedule Alert rules metric tracking Change the record of metrics from one place to two as an attempt to have a semi-accurate record.
This commit is contained in:
parent
2813e49842
commit
c59938b235
@ -56,7 +56,12 @@ Scopes must have an order to ensure consistency and ease of search, this helps u
|
||||
- [ENHANCEMENT] Create folder 'General Alerting' when Grafana starts from the scratch #48866
|
||||
- [ENHANCEMENT] Rule changes authorization logic to use UID folder scope instead of ID scope #48970
|
||||
- [ENHANCEMENT] Scheduler: ticker to support stopping #48142
|
||||
- [ENHANCEMENT] Scheduler: Adds new metrics to track rules that might be scheduled.
|
||||
- [ENHANCEMENT] Scheduler: Adds new metrics to track rules that might be scheduled #49874
|
||||
- `grafana_alerting_schedule_alert_rules `
|
||||
- `grafana_alerting_schedule_alert_rules_hash `
|
||||
- [CHANGE] Scheduler: Renaming of metrics to make them consistent with similar metrics exposed by the component #49874
|
||||
- `grafana_alerting_get_alert_rules_duration_seconds` to `grafana_alerting_schedule_periodic_duration_seconds`
|
||||
- `grafana_alerting_schedule_periodic_duration_seconds` to `grafana_alerting_schedule_query_alert_rules_duration_seconds`
|
||||
- [FEATURE] Indicate whether routes are provisioned when GETting Alertmanager configuration #47857
|
||||
- [FEATURE] Indicate whether contact point is provisioned when GETting Alertmanager configuration #48323
|
||||
- [FEATURE] Indicate whether alert rule is provisioned when GETting the rule #48458
|
||||
|
@ -180,7 +180,7 @@ func newSchedulerMetrics(r prometheus.Registerer) *Scheduler {
|
||||
Namespace: Namespace,
|
||||
Subsystem: Subsystem,
|
||||
Name: "schedule_alert_rules",
|
||||
Help: "The number of alert rules being considered for evaluation each tick.",
|
||||
Help: "The number of alert rules that could be considered for evaluation at the next tick.",
|
||||
},
|
||||
),
|
||||
SchedulableAlertRulesHash: promauto.With(r).NewGauge(
|
||||
@ -188,7 +188,7 @@ func newSchedulerMetrics(r prometheus.Registerer) *Scheduler {
|
||||
Namespace: Namespace,
|
||||
Subsystem: Subsystem,
|
||||
Name: "schedule_alert_rules_hash",
|
||||
Help: "A hash of the alert rules over time.",
|
||||
Help: "A hash of the alert rules that could be considered for evaluation at the next tick.",
|
||||
}),
|
||||
UpdateSchedulableAlertRulesDuration: promauto.With(r).NewHistogram(
|
||||
prometheus.HistogramOpts{
|
||||
|
@ -50,7 +50,5 @@ func (sch *schedule) updateSchedulableAlertRules(ctx context.Context, disabledOr
|
||||
return fmt.Errorf("failed to get alert rules: %w", err)
|
||||
}
|
||||
sch.schedulableAlertRules.set(q.Result)
|
||||
sch.metrics.SchedulableAlertRules.Set(float64(len(q.Result)))
|
||||
sch.metrics.SchedulableAlertRulesHash.Set(float64(hashUIDs(q.Result)))
|
||||
return nil
|
||||
}
|
||||
|
@ -163,6 +163,9 @@ func (r *schedulableAlertRulesRegistry) update(rule *models.SchedulableAlertRule
|
||||
r.rules[rule.GetKey()] = rule
|
||||
}
|
||||
|
||||
// del removes pair that has specific key from schedulableAlertRulesRegistry.
|
||||
// Returns 2-tuple where the first element is value of the removed pair
|
||||
// and the second element indicates whether element with the specified key existed.
|
||||
func (r *schedulableAlertRulesRegistry) del(k models.AlertRuleKey) (*models.SchedulableAlertRule, bool) {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
|
@ -338,6 +338,11 @@ func (sch *schedule) DeleteAlertRule(key models.AlertRuleKey) {
|
||||
}
|
||||
// stop rule evaluation
|
||||
ruleInfo.stop()
|
||||
|
||||
// Our best bet at this point is that we update the metrics with what we hope to schedule in the next tick.
|
||||
alertRules := sch.schedulableAlertRules.all()
|
||||
sch.metrics.SchedulableAlertRules.Set(float64(len(alertRules)))
|
||||
sch.metrics.SchedulableAlertRulesHash.Set(float64(hashUIDs(alertRules)))
|
||||
}
|
||||
|
||||
func (sch *schedule) adminConfigSync(ctx context.Context) error {
|
||||
@ -392,6 +397,11 @@ func (sch *schedule) schedulePeriodic(ctx context.Context) error {
|
||||
// so, at the end, the remaining registered alert rules are the deleted ones
|
||||
registeredDefinitions := sch.registry.keyMap()
|
||||
|
||||
// While these are the rules that we iterate over, at the moment there's no 100% guarantee that they'll be
|
||||
// scheduled as rules could be removed before we get a chance to evaluate them.
|
||||
sch.metrics.SchedulableAlertRules.Set(float64(len(alertRules)))
|
||||
sch.metrics.SchedulableAlertRulesHash.Set(float64(hashUIDs(alertRules)))
|
||||
|
||||
type readyToRunItem struct {
|
||||
key models.AlertRuleKey
|
||||
ruleName string
|
||||
|
Loading…
Reference in New Issue
Block a user