From 0ed70d0b2ff0103695c720443fda4b5e3ddc7f02 Mon Sep 17 00:00:00 2001 From: Alexander Akhmetov Date: Fri, 20 Sep 2024 17:56:40 +0200 Subject: [PATCH] Alerting: Add a metric to track the number of rules with simplified editor settings (#93511) * Alerting: Add a metric to track the number of rules with simplified editor settings --- pkg/services/ngalert/metrics/scheduler.go | 10 +++ pkg/services/ngalert/schedule/metrics.go | 18 ++++- .../ngalert/schedule/schedule_unit_test.go | 77 +++++++++++++++++++ 3 files changed, 104 insertions(+), 1 deletion(-) diff --git a/pkg/services/ngalert/metrics/scheduler.go b/pkg/services/ngalert/metrics/scheduler.go index 9c39727b382..76b0dbf22a1 100644 --- a/pkg/services/ngalert/metrics/scheduler.go +++ b/pkg/services/ngalert/metrics/scheduler.go @@ -31,6 +31,7 @@ type Scheduler struct { UpdateSchedulableAlertRulesDuration prometheus.Histogram Ticker *ticker.Metrics EvaluationMissed *prometheus.CounterVec + SimplifiedEditorRules *prometheus.GaugeVec } func NewSchedulerMetrics(r prometheus.Registerer) *Scheduler { @@ -182,5 +183,14 @@ func NewSchedulerMetrics(r prometheus.Registerer) *Scheduler { }, []string{"org", "name"}, ), + SimplifiedEditorRules: promauto.With(r).NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: Subsystem, + Name: "simplified_editor_rules", + Help: "The number of alert rules using simplified editor settings.", + }, + []string{"org", "setting"}, + ), } } diff --git a/pkg/services/ngalert/schedule/metrics.go b/pkg/services/ngalert/schedule/metrics.go index 0d3e04f6589..50d10e96a36 100644 --- a/pkg/services/ngalert/schedule/metrics.go +++ b/pkg/services/ngalert/schedule/metrics.go @@ -6,7 +6,7 @@ import ( "sort" "github.com/grafana/grafana/pkg/services/ngalert/metrics" - models "github.com/grafana/grafana/pkg/services/ngalert/models" + "github.com/grafana/grafana/pkg/services/ngalert/models" ) // hashUIDs returns a fnv64 hash of the UIDs for all alert rules. @@ -47,6 +47,8 @@ func (sch *schedule) updateRulesMetrics(alertRules []*models.AlertRule) { // gauge for groups per org groupsPerOrg := make(map[int64]map[string]struct{}) + simplifiedEditorSettingsPerOrg := make(map[int64]map[string]int64) // orgID -> setting -> count + for _, rule := range alertRules { // Count rules by org, type and state state := metrics.AlertRuleActiveLabelValue @@ -70,6 +72,14 @@ func (sch *schedule) updateRulesMetrics(alertRules []*models.AlertRule) { orgsNfSettings[rule.OrgID]++ } + // Count rules with simplified editor settings per org + if rule.Metadata.EditorSettings.SimplifiedQueryAndExpressionsSection { + if _, ok := simplifiedEditorSettingsPerOrg[rule.OrgID]; !ok { + simplifiedEditorSettingsPerOrg[rule.OrgID] = make(map[string]int64) + } + simplifiedEditorSettingsPerOrg[rule.OrgID]["simplified_query_and_expressions_section"]++ + } + // Count groups per org orgGroups, ok := groupsPerOrg[rule.OrgID] if !ok { @@ -83,6 +93,7 @@ func (sch *schedule) updateRulesMetrics(alertRules []*models.AlertRule) { sch.metrics.GroupRules.Reset() sch.metrics.SimpleNotificationRules.Reset() sch.metrics.Groups.Reset() + sch.metrics.SimplifiedEditorRules.Reset() // Set metrics for key, count := range buckets { @@ -94,6 +105,11 @@ func (sch *schedule) updateRulesMetrics(alertRules []*models.AlertRule) { for orgID, groups := range groupsPerOrg { sch.metrics.Groups.WithLabelValues(fmt.Sprint(orgID)).Set(float64(len(groups))) } + for orgID, settings := range simplifiedEditorSettingsPerOrg { + for setting, count := range settings { + sch.metrics.SimplifiedEditorRules.WithLabelValues(fmt.Sprint(orgID), setting).Set(float64(count)) + } + } // While these are the rules that we iterate over, at the moment there's no 100% guarantee that they'll be // scheduled as rules could be removed before we get a chance to evaluate them. sch.metrics.SchedulableAlertRules.Set(float64(len(alertRules))) diff --git a/pkg/services/ngalert/schedule/schedule_unit_test.go b/pkg/services/ngalert/schedule/schedule_unit_test.go index 9f7a3a05dea..54b83cfb277 100644 --- a/pkg/services/ngalert/schedule/schedule_unit_test.go +++ b/pkg/services/ngalert/schedule/schedule_unit_test.go @@ -716,6 +716,83 @@ func TestSchedule_updateRulesMetrics(t *testing.T) { }) }) + t.Run("simplified_editor_rules metric should reflect the current state", func(t *testing.T) { + const firstOrgID int64 = 1 + const secondOrgID int64 = 2 + + alertRuleWithAdvancedSettings := models.RuleGen.With( + models.RuleGen.WithOrgID(firstOrgID), + models.RuleGen.WithEditorSettingsSimplifiedQueryAndExpressionsSection(false), + ).GenerateRef() + + // The rule does not have simplified editor enabled, should not be in the metrics + t.Run("it should not show metrics", func(t *testing.T) { + sch.updateRulesMetrics([]*models.AlertRule{alertRuleWithAdvancedSettings}) + + expectedMetric := "" + err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_simplified_editor_rules") + require.ErrorContains(t, err, "expected metric name(s) not found: [grafana_alerting_simplified_editor_rules]") + }) + + alertRule1 := models.RuleGen.With( + models.RuleGen.WithOrgID(firstOrgID), + models.RuleGen.WithEditorSettingsSimplifiedQueryAndExpressionsSection(true), + ).GenerateRef() + + t.Run("it should show one rule in a single org", func(t *testing.T) { + sch.updateRulesMetrics([]*models.AlertRule{alertRuleWithAdvancedSettings, alertRule1}) + + expectedMetric := fmt.Sprintf( + `# HELP grafana_alerting_simplified_editor_rules The number of alert rules using simplified editor settings. + # TYPE grafana_alerting_simplified_editor_rules gauge + grafana_alerting_simplified_editor_rules{org="%[1]d",setting="simplified_query_and_expressions_section"} 1 + `, alertRule1.OrgID) + + err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_simplified_editor_rules") + require.NoError(t, err) + }) + + alertRule2 := models.RuleGen.With( + models.RuleGen.WithOrgID(secondOrgID), + models.RuleGen.WithEditorSettingsSimplifiedQueryAndExpressionsSection(true), + ).GenerateRef() + + t.Run("it should show two rules in two orgs", func(t *testing.T) { + sch.updateRulesMetrics([]*models.AlertRule{alertRuleWithAdvancedSettings, alertRule1, alertRule2}) + + expectedMetric := fmt.Sprintf( + `# HELP grafana_alerting_simplified_editor_rules The number of alert rules using simplified editor settings. + # TYPE grafana_alerting_simplified_editor_rules gauge + grafana_alerting_simplified_editor_rules{org="%[1]d",setting="simplified_query_and_expressions_section"} 1 + grafana_alerting_simplified_editor_rules{org="%[2]d",setting="simplified_query_and_expressions_section"} 1 + `, alertRule1.OrgID, alertRule2.OrgID) + + err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_simplified_editor_rules") + require.NoError(t, err) + }) + + t.Run("after removing one of the rules it should show one present rule and one org", func(t *testing.T) { + sch.updateRulesMetrics([]*models.AlertRule{alertRuleWithAdvancedSettings, alertRule2}) + + expectedMetric := fmt.Sprintf( + `# HELP grafana_alerting_simplified_editor_rules The number of alert rules using simplified editor settings. + # TYPE grafana_alerting_simplified_editor_rules gauge + grafana_alerting_simplified_editor_rules{org="%d",setting="simplified_query_and_expressions_section"} 1 + `, alertRule2.OrgID) + + err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_simplified_editor_rules") + require.NoError(t, err) + }) + + t.Run("after removing all rules it should not show any metrics", func(t *testing.T) { + sch.updateRulesMetrics([]*models.AlertRule{}) + + expectedMetric := "" + err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_simplified_editor_rules") + require.ErrorContains(t, err, "expected metric name(s) not found: [grafana_alerting_simplified_editor_rules]") + }) + }) + t.Run("rule_groups metric should reflect the current state", func(t *testing.T) { const firstOrgID int64 = 1 const secondOrgID int64 = 2