mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: Schedule Alert rules metric tracking (#50415)
* Alerting: Schedule Alert rules metric tracking Change the record of metrics from one place to two as an attempt to have a semi-accurate record.
This commit is contained in:
@@ -56,7 +56,12 @@ Scopes must have an order to ensure consistency and ease of search, this helps u
|
|||||||
- [ENHANCEMENT] Create folder 'General Alerting' when Grafana starts from the scratch #48866
|
- [ENHANCEMENT] Create folder 'General Alerting' when Grafana starts from the scratch #48866
|
||||||
- [ENHANCEMENT] Rule changes authorization logic to use UID folder scope instead of ID scope #48970
|
- [ENHANCEMENT] Rule changes authorization logic to use UID folder scope instead of ID scope #48970
|
||||||
- [ENHANCEMENT] Scheduler: ticker to support stopping #48142
|
- [ENHANCEMENT] Scheduler: ticker to support stopping #48142
|
||||||
- [ENHANCEMENT] Scheduler: Adds new metrics to track rules that might be scheduled.
|
- [ENHANCEMENT] Scheduler: Adds new metrics to track rules that might be scheduled #49874
|
||||||
|
- `grafana_alerting_schedule_alert_rules `
|
||||||
|
- `grafana_alerting_schedule_alert_rules_hash `
|
||||||
|
- [CHANGE] Scheduler: Renaming of metrics to make them consistent with similar metrics exposed by the component #49874
|
||||||
|
- `grafana_alerting_get_alert_rules_duration_seconds` to `grafana_alerting_schedule_periodic_duration_seconds`
|
||||||
|
- `grafana_alerting_schedule_periodic_duration_seconds` to `grafana_alerting_schedule_query_alert_rules_duration_seconds`
|
||||||
- [FEATURE] Indicate whether routes are provisioned when GETting Alertmanager configuration #47857
|
- [FEATURE] Indicate whether routes are provisioned when GETting Alertmanager configuration #47857
|
||||||
- [FEATURE] Indicate whether contact point is provisioned when GETting Alertmanager configuration #48323
|
- [FEATURE] Indicate whether contact point is provisioned when GETting Alertmanager configuration #48323
|
||||||
- [FEATURE] Indicate whether alert rule is provisioned when GETting the rule #48458
|
- [FEATURE] Indicate whether alert rule is provisioned when GETting the rule #48458
|
||||||
|
|||||||
@@ -180,7 +180,7 @@ func newSchedulerMetrics(r prometheus.Registerer) *Scheduler {
|
|||||||
Namespace: Namespace,
|
Namespace: Namespace,
|
||||||
Subsystem: Subsystem,
|
Subsystem: Subsystem,
|
||||||
Name: "schedule_alert_rules",
|
Name: "schedule_alert_rules",
|
||||||
Help: "The number of alert rules being considered for evaluation each tick.",
|
Help: "The number of alert rules that could be considered for evaluation at the next tick.",
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
SchedulableAlertRulesHash: promauto.With(r).NewGauge(
|
SchedulableAlertRulesHash: promauto.With(r).NewGauge(
|
||||||
@@ -188,7 +188,7 @@ func newSchedulerMetrics(r prometheus.Registerer) *Scheduler {
|
|||||||
Namespace: Namespace,
|
Namespace: Namespace,
|
||||||
Subsystem: Subsystem,
|
Subsystem: Subsystem,
|
||||||
Name: "schedule_alert_rules_hash",
|
Name: "schedule_alert_rules_hash",
|
||||||
Help: "A hash of the alert rules over time.",
|
Help: "A hash of the alert rules that could be considered for evaluation at the next tick.",
|
||||||
}),
|
}),
|
||||||
UpdateSchedulableAlertRulesDuration: promauto.With(r).NewHistogram(
|
UpdateSchedulableAlertRulesDuration: promauto.With(r).NewHistogram(
|
||||||
prometheus.HistogramOpts{
|
prometheus.HistogramOpts{
|
||||||
|
|||||||
@@ -50,7 +50,5 @@ func (sch *schedule) updateSchedulableAlertRules(ctx context.Context, disabledOr
|
|||||||
return fmt.Errorf("failed to get alert rules: %w", err)
|
return fmt.Errorf("failed to get alert rules: %w", err)
|
||||||
}
|
}
|
||||||
sch.schedulableAlertRules.set(q.Result)
|
sch.schedulableAlertRules.set(q.Result)
|
||||||
sch.metrics.SchedulableAlertRules.Set(float64(len(q.Result)))
|
|
||||||
sch.metrics.SchedulableAlertRulesHash.Set(float64(hashUIDs(q.Result)))
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -163,6 +163,9 @@ func (r *schedulableAlertRulesRegistry) update(rule *models.SchedulableAlertRule
|
|||||||
r.rules[rule.GetKey()] = rule
|
r.rules[rule.GetKey()] = rule
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// del removes pair that has specific key from schedulableAlertRulesRegistry.
|
||||||
|
// Returns 2-tuple where the first element is value of the removed pair
|
||||||
|
// and the second element indicates whether element with the specified key existed.
|
||||||
func (r *schedulableAlertRulesRegistry) del(k models.AlertRuleKey) (*models.SchedulableAlertRule, bool) {
|
func (r *schedulableAlertRulesRegistry) del(k models.AlertRuleKey) (*models.SchedulableAlertRule, bool) {
|
||||||
r.mu.Lock()
|
r.mu.Lock()
|
||||||
defer r.mu.Unlock()
|
defer r.mu.Unlock()
|
||||||
|
|||||||
@@ -338,6 +338,11 @@ func (sch *schedule) DeleteAlertRule(key models.AlertRuleKey) {
|
|||||||
}
|
}
|
||||||
// stop rule evaluation
|
// stop rule evaluation
|
||||||
ruleInfo.stop()
|
ruleInfo.stop()
|
||||||
|
|
||||||
|
// Our best bet at this point is that we update the metrics with what we hope to schedule in the next tick.
|
||||||
|
alertRules := sch.schedulableAlertRules.all()
|
||||||
|
sch.metrics.SchedulableAlertRules.Set(float64(len(alertRules)))
|
||||||
|
sch.metrics.SchedulableAlertRulesHash.Set(float64(hashUIDs(alertRules)))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sch *schedule) adminConfigSync(ctx context.Context) error {
|
func (sch *schedule) adminConfigSync(ctx context.Context) error {
|
||||||
@@ -392,6 +397,11 @@ func (sch *schedule) schedulePeriodic(ctx context.Context) error {
|
|||||||
// so, at the end, the remaining registered alert rules are the deleted ones
|
// so, at the end, the remaining registered alert rules are the deleted ones
|
||||||
registeredDefinitions := sch.registry.keyMap()
|
registeredDefinitions := sch.registry.keyMap()
|
||||||
|
|
||||||
|
// While these are the rules that we iterate over, at the moment there's no 100% guarantee that they'll be
|
||||||
|
// scheduled as rules could be removed before we get a chance to evaluate them.
|
||||||
|
sch.metrics.SchedulableAlertRules.Set(float64(len(alertRules)))
|
||||||
|
sch.metrics.SchedulableAlertRulesHash.Set(float64(hashUIDs(alertRules)))
|
||||||
|
|
||||||
type readyToRunItem struct {
|
type readyToRunItem struct {
|
||||||
key models.AlertRuleKey
|
key models.AlertRuleKey
|
||||||
ruleName string
|
ruleName string
|
||||||
|
|||||||
Reference in New Issue
Block a user