From 43a246f431fc7ce94ea66f3151ca55801dfcd2fe Mon Sep 17 00:00:00 2001 From: Steve Simpson Date: Mon, 17 Jun 2024 12:25:47 +0200 Subject: [PATCH] Alerting: Improve performance of /api/prometheus for large numbers of alerts. (#89268) * Alerting: Optimize sorting alert instances. * Also change other Labels fields for consistency --- pkg/services/ngalert/api/api_prometheus.go | 12 ++--- .../ngalert/api/tooling/definitions/prom.go | 53 ++++++------------- .../tooling/definitions/prom_bench_test.go | 5 +- .../api/tooling/definitions/prom_test.go | 36 ++++++++----- 4 files changed, 49 insertions(+), 57 deletions(-) diff --git a/pkg/services/ngalert/api/api_prometheus.go b/pkg/services/ngalert/api/api_prometheus.go index ccea63ffbb8..d5c6343f780 100644 --- a/pkg/services/ngalert/api/api_prometheus.go +++ b/pkg/services/ngalert/api/api_prometheus.go @@ -97,8 +97,8 @@ func PrepareAlertStatuses(manager state.AlertInstanceManager, opts AlertStatuses } alertResponse.Data.Alerts = append(alertResponse.Data.Alerts, &apimodels.Alert{ - Labels: alertState.GetLabels(labelOptions...), - Annotations: alertState.Annotations, + Labels: apimodels.LabelsFromMap(alertState.GetLabels(labelOptions...)), + Annotations: apimodels.LabelsFromMap(alertState.Annotations), // TODO: or should we make this two fields? Using one field lets the // frontend use the same logic for parsing text on annotations and this. @@ -444,12 +444,12 @@ func toRuleGroup(log log.Logger, manager state.AlertInstanceManager, groupKey ng Name: rule.Title, Query: ruleToQuery(log, rule), Duration: rule.For.Seconds(), - Annotations: rule.Annotations, + Annotations: apimodels.LabelsFromMap(rule.Annotations), } newRule := apimodels.Rule{ Name: rule.Title, - Labels: rule.GetLabels(labelOptions...), + Labels: apimodels.LabelsFromMap(rule.GetLabels(labelOptions...)), Health: "ok", Type: rule.Type().String(), LastEvaluation: time.Time{}, @@ -471,8 +471,8 @@ func toRuleGroup(log log.Logger, manager state.AlertInstanceManager, groupKey ng totals["error"] += 1 } alert := apimodels.Alert{ - Labels: alertState.GetLabels(labelOptions...), - Annotations: alertState.Annotations, + Labels: apimodels.LabelsFromMap(alertState.GetLabels(labelOptions...)), + Annotations: apimodels.LabelsFromMap(alertState.Annotations), // TODO: or should we make this two fields? Using one field lets the // frontend use the same logic for parsing text on annotations and this. diff --git a/pkg/services/ngalert/api/tooling/definitions/prom.go b/pkg/services/ngalert/api/tooling/definitions/prom.go index cb37533377c..f2ad8c4c110 100644 --- a/pkg/services/ngalert/api/tooling/definitions/prom.go +++ b/pkg/services/ngalert/api/tooling/definitions/prom.go @@ -9,6 +9,7 @@ import ( "time" v1 "github.com/prometheus/client_golang/api/prometheus/v1" + promlabels "github.com/prometheus/prometheus/model/labels" ) // swagger:route GET /prometheus/grafana/api/v1/rules prometheus RouteGetGrafanaRuleStatuses @@ -151,7 +152,7 @@ type AlertingRule struct { Query string `json:"query,omitempty"` Duration float64 `json:"duration,omitempty"` // required: true - Annotations overrideLabels `json:"annotations,omitempty"` + Annotations promlabels.Labels `json:"annotations,omitempty"` // required: true ActiveAt *time.Time `json:"activeAt,omitempty"` Alerts []Alert `json:"alerts,omitempty"` @@ -166,8 +167,8 @@ type Rule struct { // required: true Name string `json:"name"` // required: true - Query string `json:"query"` - Labels overrideLabels `json:"labels,omitempty"` + Query string `json:"query"` + Labels promlabels.Labels `json:"labels,omitempty"` // required: true Health string `json:"health"` LastError string `json:"lastError,omitempty"` @@ -181,9 +182,9 @@ type Rule struct { // swagger:model type Alert struct { // required: true - Labels overrideLabels `json:"labels"` + Labels promlabels.Labels `json:"labels"` // required: true - Annotations overrideLabels `json:"annotations"` + Annotations promlabels.Labels `json:"annotations"` // required: true State string `json:"state"` ActiveAt *time.Time `json:"activeAt"` @@ -300,31 +301,6 @@ func (by AlertsBy) TopK(alerts []Alert, k int) []Alert { // is more important than "normal". If two alerts have the same importance // then the ordering is based on their ActiveAt time and their labels. func AlertsByImportance(a1, a2 *Alert) bool { - // labelsForComparison concatenates each key/value pair into a string and - // sorts them. - labelsForComparison := func(m map[string]string) []string { - s := make([]string, 0, len(m)) - for k, v := range m { - s = append(s, k+v) - } - sort.Strings(s) - return s - } - - // compareLabels returns true if labels1 are less than labels2. This happens - // when labels1 has fewer labels than labels2, or if the next label from - // labels1 is lexicographically less than the next label from labels2. - compareLabels := func(labels1, labels2 []string) bool { - if len(labels1) == len(labels2) { - for i := range labels1 { - if labels1[i] != labels2[i] { - return labels1[i] < labels2[i] - } - } - } - return len(labels1) < len(labels2) - } - // The importance of an alert is first based on the importance of their states. // This ordering is intended to show the most important alerts first when // using pagination. @@ -345,9 +321,7 @@ func AlertsByImportance(a1, a2 *Alert) bool { return true } // Both alerts are active since the same time so compare their labels - labels1 := labelsForComparison(a1.Labels) - labels2 := labelsForComparison(a2.Labels) - return compareLabels(labels1, labels2) + return promlabels.Compare(a1.Labels, a2.Labels) < 0 } return importance1 < importance2 @@ -362,9 +336,16 @@ func (s AlertsSorter) Len() int { return len(s.alerts) } func (s AlertsSorter) Swap(i, j int) { s.alerts[i], s.alerts[j] = s.alerts[j], s.alerts[i] } func (s AlertsSorter) Less(i, j int) bool { return s.by(&s.alerts[i], &s.alerts[j]) } -// override the labels type with a map for generation. -// The custom marshaling for labels.Labels ends up doing this anyways. -type overrideLabels map[string]string +// LabelsFromMap creates Labels from a map. Note the Labels type requires the +// labels be sorted, so we make sure to do that. +func LabelsFromMap(m map[string]string) promlabels.Labels { + sb := promlabels.NewScratchBuilder(len(m)) + for k, v := range m { + sb.Add(k, v) + } + sb.Sort() + return sb.Labels() +} // swagger:parameters RouteGetGrafanaAlertStatuses type GetGrafanaAlertStatusesParams struct { diff --git a/pkg/services/ngalert/api/tooling/definitions/prom_bench_test.go b/pkg/services/ngalert/api/tooling/definitions/prom_bench_test.go index 64a051751cc..65d6fa39754 100644 --- a/pkg/services/ngalert/api/tooling/definitions/prom_bench_test.go +++ b/pkg/services/ngalert/api/tooling/definitions/prom_bench_test.go @@ -21,10 +21,11 @@ func makeAlerts(amount int) []Alert { alerts := make([]Alert, amount) for i := 0; i < len(alerts); i++ { - alerts[i].Labels = make(map[string]string) + lbls := make(map[string]string) for label := 0; label < numLabels; label++ { - alerts[i].Labels[fmt.Sprintf("label_%d", label)] = fmt.Sprintf("label_%d_value_%d", label, i%100) + lbls[fmt.Sprintf("label_%d", label)] = fmt.Sprintf("label_%d_value_%d", label, i%100) } + alerts[i].Labels = LabelsFromMap(lbls) if i%100 < percentAlerting { alerts[i].State = "alerting" diff --git a/pkg/services/ngalert/api/tooling/definitions/prom_test.go b/pkg/services/ngalert/api/tooling/definitions/prom_test.go index a2013541e2b..40c6dc66107 100644 --- a/pkg/services/ngalert/api/tooling/definitions/prom_test.go +++ b/pkg/services/ngalert/api/tooling/definitions/prom_test.go @@ -69,32 +69,42 @@ func TestSortAlertsByImportance(t *testing.T) { }, { name: "inactive alerts with same importance are ordered by labels", input: []Alert{ - {State: "normal", Labels: map[string]string{"c": "d"}}, - {State: "normal", Labels: map[string]string{"a": "b"}}, + {State: "normal", Labels: LabelsFromMap(map[string]string{"c": "d"})}, + {State: "normal", Labels: LabelsFromMap(map[string]string{"a": "b"})}, }, expected: []Alert{ - {State: "normal", Labels: map[string]string{"a": "b"}}, - {State: "normal", Labels: map[string]string{"c": "d"}}, + {State: "normal", Labels: LabelsFromMap(map[string]string{"a": "b"})}, + {State: "normal", Labels: LabelsFromMap(map[string]string{"c": "d"})}, }, }, { - name: "active alerts with same importance and active time are ordered fewest labels first", + name: "active alerts with same importance and active time are ordered by label names", input: []Alert{ - {State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"a": "b", "c": "d"}}, - {State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"e": "f"}}, + {State: "alerting", ActiveAt: &tm1, Labels: LabelsFromMap(map[string]string{"c": "d", "e": "f"})}, + {State: "alerting", ActiveAt: &tm1, Labels: LabelsFromMap(map[string]string{"a": "b"})}, }, expected: []Alert{ - {State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"e": "f"}}, - {State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"a": "b", "c": "d"}}, + {State: "alerting", ActiveAt: &tm1, Labels: LabelsFromMap(map[string]string{"a": "b"})}, + {State: "alerting", ActiveAt: &tm1, Labels: LabelsFromMap(map[string]string{"c": "d", "e": "f"})}, }, }, { name: "active alerts with same importance and active time are ordered by labels", input: []Alert{ - {State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"c": "d"}}, - {State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"a": "b"}}, + {State: "alerting", ActiveAt: &tm1, Labels: LabelsFromMap(map[string]string{"c": "d"})}, + {State: "alerting", ActiveAt: &tm1, Labels: LabelsFromMap(map[string]string{"a": "b"})}, }, expected: []Alert{ - {State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"a": "b"}}, - {State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"c": "d"}}, + {State: "alerting", ActiveAt: &tm1, Labels: LabelsFromMap(map[string]string{"a": "b"})}, + {State: "alerting", ActiveAt: &tm1, Labels: LabelsFromMap(map[string]string{"c": "d"})}, + }, + }, { + name: "active alerts with same importance and active time are ordered by label values", + input: []Alert{ + {State: "alerting", ActiveAt: &tm1, Labels: LabelsFromMap(map[string]string{"x": "b"})}, + {State: "alerting", ActiveAt: &tm1, Labels: LabelsFromMap(map[string]string{"x": "a"})}, + }, + expected: []Alert{ + {State: "alerting", ActiveAt: &tm1, Labels: LabelsFromMap(map[string]string{"x": "a"})}, + {State: "alerting", ActiveAt: &tm1, Labels: LabelsFromMap(map[string]string{"x": "b"})}, }, }}