Alerting: Improve performance of /api/prometheus for large numbers of alerts. (#89268)

* Alerting: Optimize sorting alert instances.

* Also change other Labels fields for consistency
This commit is contained in:
Steve Simpson 2024-06-17 12:25:47 +02:00 committed by GitHub
parent a7726ff813
commit 43a246f431
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 49 additions and 57 deletions

View File

@ -97,8 +97,8 @@ func PrepareAlertStatuses(manager state.AlertInstanceManager, opts AlertStatuses
} }
alertResponse.Data.Alerts = append(alertResponse.Data.Alerts, &apimodels.Alert{ alertResponse.Data.Alerts = append(alertResponse.Data.Alerts, &apimodels.Alert{
Labels: alertState.GetLabels(labelOptions...), Labels: apimodels.LabelsFromMap(alertState.GetLabels(labelOptions...)),
Annotations: alertState.Annotations, Annotations: apimodels.LabelsFromMap(alertState.Annotations),
// TODO: or should we make this two fields? Using one field lets the // TODO: or should we make this two fields? Using one field lets the
// frontend use the same logic for parsing text on annotations and this. // frontend use the same logic for parsing text on annotations and this.
@ -444,12 +444,12 @@ func toRuleGroup(log log.Logger, manager state.AlertInstanceManager, groupKey ng
Name: rule.Title, Name: rule.Title,
Query: ruleToQuery(log, rule), Query: ruleToQuery(log, rule),
Duration: rule.For.Seconds(), Duration: rule.For.Seconds(),
Annotations: rule.Annotations, Annotations: apimodels.LabelsFromMap(rule.Annotations),
} }
newRule := apimodels.Rule{ newRule := apimodels.Rule{
Name: rule.Title, Name: rule.Title,
Labels: rule.GetLabels(labelOptions...), Labels: apimodels.LabelsFromMap(rule.GetLabels(labelOptions...)),
Health: "ok", Health: "ok",
Type: rule.Type().String(), Type: rule.Type().String(),
LastEvaluation: time.Time{}, LastEvaluation: time.Time{},
@ -471,8 +471,8 @@ func toRuleGroup(log log.Logger, manager state.AlertInstanceManager, groupKey ng
totals["error"] += 1 totals["error"] += 1
} }
alert := apimodels.Alert{ alert := apimodels.Alert{
Labels: alertState.GetLabels(labelOptions...), Labels: apimodels.LabelsFromMap(alertState.GetLabels(labelOptions...)),
Annotations: alertState.Annotations, Annotations: apimodels.LabelsFromMap(alertState.Annotations),
// TODO: or should we make this two fields? Using one field lets the // TODO: or should we make this two fields? Using one field lets the
// frontend use the same logic for parsing text on annotations and this. // frontend use the same logic for parsing text on annotations and this.

View File

@ -9,6 +9,7 @@ import (
"time" "time"
v1 "github.com/prometheus/client_golang/api/prometheus/v1" v1 "github.com/prometheus/client_golang/api/prometheus/v1"
promlabels "github.com/prometheus/prometheus/model/labels"
) )
// swagger:route GET /prometheus/grafana/api/v1/rules prometheus RouteGetGrafanaRuleStatuses // swagger:route GET /prometheus/grafana/api/v1/rules prometheus RouteGetGrafanaRuleStatuses
@ -151,7 +152,7 @@ type AlertingRule struct {
Query string `json:"query,omitempty"` Query string `json:"query,omitempty"`
Duration float64 `json:"duration,omitempty"` Duration float64 `json:"duration,omitempty"`
// required: true // required: true
Annotations overrideLabels `json:"annotations,omitempty"` Annotations promlabels.Labels `json:"annotations,omitempty"`
// required: true // required: true
ActiveAt *time.Time `json:"activeAt,omitempty"` ActiveAt *time.Time `json:"activeAt,omitempty"`
Alerts []Alert `json:"alerts,omitempty"` Alerts []Alert `json:"alerts,omitempty"`
@ -166,8 +167,8 @@ type Rule struct {
// required: true // required: true
Name string `json:"name"` Name string `json:"name"`
// required: true // required: true
Query string `json:"query"` Query string `json:"query"`
Labels overrideLabels `json:"labels,omitempty"` Labels promlabels.Labels `json:"labels,omitempty"`
// required: true // required: true
Health string `json:"health"` Health string `json:"health"`
LastError string `json:"lastError,omitempty"` LastError string `json:"lastError,omitempty"`
@ -181,9 +182,9 @@ type Rule struct {
// swagger:model // swagger:model
type Alert struct { type Alert struct {
// required: true // required: true
Labels overrideLabels `json:"labels"` Labels promlabels.Labels `json:"labels"`
// required: true // required: true
Annotations overrideLabels `json:"annotations"` Annotations promlabels.Labels `json:"annotations"`
// required: true // required: true
State string `json:"state"` State string `json:"state"`
ActiveAt *time.Time `json:"activeAt"` ActiveAt *time.Time `json:"activeAt"`
@ -300,31 +301,6 @@ func (by AlertsBy) TopK(alerts []Alert, k int) []Alert {
// is more important than "normal". If two alerts have the same importance // is more important than "normal". If two alerts have the same importance
// then the ordering is based on their ActiveAt time and their labels. // then the ordering is based on their ActiveAt time and their labels.
func AlertsByImportance(a1, a2 *Alert) bool { func AlertsByImportance(a1, a2 *Alert) bool {
// labelsForComparison concatenates each key/value pair into a string and
// sorts them.
labelsForComparison := func(m map[string]string) []string {
s := make([]string, 0, len(m))
for k, v := range m {
s = append(s, k+v)
}
sort.Strings(s)
return s
}
// compareLabels returns true if labels1 are less than labels2. This happens
// when labels1 has fewer labels than labels2, or if the next label from
// labels1 is lexicographically less than the next label from labels2.
compareLabels := func(labels1, labels2 []string) bool {
if len(labels1) == len(labels2) {
for i := range labels1 {
if labels1[i] != labels2[i] {
return labels1[i] < labels2[i]
}
}
}
return len(labels1) < len(labels2)
}
// The importance of an alert is first based on the importance of their states. // The importance of an alert is first based on the importance of their states.
// This ordering is intended to show the most important alerts first when // This ordering is intended to show the most important alerts first when
// using pagination. // using pagination.
@ -345,9 +321,7 @@ func AlertsByImportance(a1, a2 *Alert) bool {
return true return true
} }
// Both alerts are active since the same time so compare their labels // Both alerts are active since the same time so compare their labels
labels1 := labelsForComparison(a1.Labels) return promlabels.Compare(a1.Labels, a2.Labels) < 0
labels2 := labelsForComparison(a2.Labels)
return compareLabels(labels1, labels2)
} }
return importance1 < importance2 return importance1 < importance2
@ -362,9 +336,16 @@ func (s AlertsSorter) Len() int { return len(s.alerts) }
func (s AlertsSorter) Swap(i, j int) { s.alerts[i], s.alerts[j] = s.alerts[j], s.alerts[i] } func (s AlertsSorter) Swap(i, j int) { s.alerts[i], s.alerts[j] = s.alerts[j], s.alerts[i] }
func (s AlertsSorter) Less(i, j int) bool { return s.by(&s.alerts[i], &s.alerts[j]) } func (s AlertsSorter) Less(i, j int) bool { return s.by(&s.alerts[i], &s.alerts[j]) }
// override the labels type with a map for generation. // LabelsFromMap creates Labels from a map. Note the Labels type requires the
// The custom marshaling for labels.Labels ends up doing this anyways. // labels be sorted, so we make sure to do that.
type overrideLabels map[string]string func LabelsFromMap(m map[string]string) promlabels.Labels {
sb := promlabels.NewScratchBuilder(len(m))
for k, v := range m {
sb.Add(k, v)
}
sb.Sort()
return sb.Labels()
}
// swagger:parameters RouteGetGrafanaAlertStatuses // swagger:parameters RouteGetGrafanaAlertStatuses
type GetGrafanaAlertStatusesParams struct { type GetGrafanaAlertStatusesParams struct {

View File

@ -21,10 +21,11 @@ func makeAlerts(amount int) []Alert {
alerts := make([]Alert, amount) alerts := make([]Alert, amount)
for i := 0; i < len(alerts); i++ { for i := 0; i < len(alerts); i++ {
alerts[i].Labels = make(map[string]string) lbls := make(map[string]string)
for label := 0; label < numLabels; label++ { for label := 0; label < numLabels; label++ {
alerts[i].Labels[fmt.Sprintf("label_%d", label)] = fmt.Sprintf("label_%d_value_%d", label, i%100) lbls[fmt.Sprintf("label_%d", label)] = fmt.Sprintf("label_%d_value_%d", label, i%100)
} }
alerts[i].Labels = LabelsFromMap(lbls)
if i%100 < percentAlerting { if i%100 < percentAlerting {
alerts[i].State = "alerting" alerts[i].State = "alerting"

View File

@ -69,32 +69,42 @@ func TestSortAlertsByImportance(t *testing.T) {
}, { }, {
name: "inactive alerts with same importance are ordered by labels", name: "inactive alerts with same importance are ordered by labels",
input: []Alert{ input: []Alert{
{State: "normal", Labels: map[string]string{"c": "d"}}, {State: "normal", Labels: LabelsFromMap(map[string]string{"c": "d"})},
{State: "normal", Labels: map[string]string{"a": "b"}}, {State: "normal", Labels: LabelsFromMap(map[string]string{"a": "b"})},
}, },
expected: []Alert{ expected: []Alert{
{State: "normal", Labels: map[string]string{"a": "b"}}, {State: "normal", Labels: LabelsFromMap(map[string]string{"a": "b"})},
{State: "normal", Labels: map[string]string{"c": "d"}}, {State: "normal", Labels: LabelsFromMap(map[string]string{"c": "d"})},
}, },
}, { }, {
name: "active alerts with same importance and active time are ordered fewest labels first", name: "active alerts with same importance and active time are ordered by label names",
input: []Alert{ input: []Alert{
{State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"a": "b", "c": "d"}}, {State: "alerting", ActiveAt: &tm1, Labels: LabelsFromMap(map[string]string{"c": "d", "e": "f"})},
{State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"e": "f"}}, {State: "alerting", ActiveAt: &tm1, Labels: LabelsFromMap(map[string]string{"a": "b"})},
}, },
expected: []Alert{ expected: []Alert{
{State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"e": "f"}}, {State: "alerting", ActiveAt: &tm1, Labels: LabelsFromMap(map[string]string{"a": "b"})},
{State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"a": "b", "c": "d"}}, {State: "alerting", ActiveAt: &tm1, Labels: LabelsFromMap(map[string]string{"c": "d", "e": "f"})},
}, },
}, { }, {
name: "active alerts with same importance and active time are ordered by labels", name: "active alerts with same importance and active time are ordered by labels",
input: []Alert{ input: []Alert{
{State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"c": "d"}}, {State: "alerting", ActiveAt: &tm1, Labels: LabelsFromMap(map[string]string{"c": "d"})},
{State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"a": "b"}}, {State: "alerting", ActiveAt: &tm1, Labels: LabelsFromMap(map[string]string{"a": "b"})},
}, },
expected: []Alert{ expected: []Alert{
{State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"a": "b"}}, {State: "alerting", ActiveAt: &tm1, Labels: LabelsFromMap(map[string]string{"a": "b"})},
{State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"c": "d"}}, {State: "alerting", ActiveAt: &tm1, Labels: LabelsFromMap(map[string]string{"c": "d"})},
},
}, {
name: "active alerts with same importance and active time are ordered by label values",
input: []Alert{
{State: "alerting", ActiveAt: &tm1, Labels: LabelsFromMap(map[string]string{"x": "b"})},
{State: "alerting", ActiveAt: &tm1, Labels: LabelsFromMap(map[string]string{"x": "a"})},
},
expected: []Alert{
{State: "alerting", ActiveAt: &tm1, Labels: LabelsFromMap(map[string]string{"x": "a"})},
{State: "alerting", ActiveAt: &tm1, Labels: LabelsFromMap(map[string]string{"x": "b"})},
}, },
}} }}