Alerting: Add limits to the Prometheus Rules API (#65169)

This commit adds a number of limits to the Grafana flavor of the Prometheus Rules API: 1. `limit` limits the maximum number of Rule Groups returned 2. `limit_rules` limits the maximum number of rules per Rule Group 3. `limit_alerts` limits the maximum number of alerts per rule It sorts Rule Groups and rules within Rule Groups such that data in the response is stable across requests. It also returns summaries (totals) for all Rule Groups, individual Rule Groups and rules.
2025-02-25 18:55:37 -06:00 · 2023-04-03 10:17:02 +01:00
parent 093498a03b
commit d96b0a71d3
8 changed files with 613 additions and 23 deletions
--- a/pkg/services/ngalert/api/api_prometheus.go
+++ b/pkg/services/ngalert/api/api_prometheus.go
@@ -115,12 +115,16 @@ func (srv PrometheusSrv) RouteGetRuleStatuses(c *contextmodel.ReqContext) respon
 		return ErrResp(http.StatusBadRequest, errors.New("panel_id must be set with dashboard_uid"), "")
 	}

+	limitGroups := c.QueryInt64WithDefault("limit", -1)
+	limitRulesPerGroup := c.QueryInt64WithDefault("limit_rules", -1)
+	limitAlertsPerRule := c.QueryInt64WithDefault("limit_alerts", -1)
+
 	ruleResponse := apimodels.RuleResponse{
 		DiscoveryBase: apimodels.DiscoveryBase{
 			Status: "success",
 		},
 		Data: apimodels.RuleDiscovery{
-			RuleGroups: []*apimodels.RuleGroup{},
+			RuleGroups: []apimodels.RuleGroup{},
 		},
 	}

@@ -161,14 +165,22 @@ func (srv PrometheusSrv) RouteGetRuleStatuses(c *contextmodel.ReqContext) respon
 		return accesscontrol.HasAccess(srv.ac, c)(accesscontrol.ReqViewer, evaluator)
 	}

+	// Group rules together by Namespace and Rule Group. Rules are also grouped by Org ID,
+	// but in this API all rules belong to the same organization.
 	groupedRules := make(map[ngmodels.AlertRuleGroupKey][]*ngmodels.AlertRule)
 	for _, rule := range ruleList {
-		key := rule.GetGroupKey()
-		rulesInGroup := groupedRules[key]
-		rulesInGroup = append(rulesInGroup, rule)
-		groupedRules[key] = rulesInGroup
+		groupKey := rule.GetGroupKey()
+		ruleGroup := groupedRules[groupKey]
+		ruleGroup = append(ruleGroup, rule)
+		groupedRules[groupKey] = ruleGroup
+	}
+	// Sort the rules in each rule group by index. We do this at the end instead of
+	// after each append to avoid having to sort each group multiple times.
+	for _, groupRules := range groupedRules {
+		ngmodels.AlertRulesBy(ngmodels.AlertRulesByIndex).Sort(groupRules)
 	}

+	rulesTotals := make(map[string]int64, len(groupedRules))
 	for groupKey, rules := range groupedRules {
 		folder := namespaceMap[groupKey.NamespaceUID]
 		if folder == nil {
@@ -178,16 +190,37 @@ func (srv PrometheusSrv) RouteGetRuleStatuses(c *contextmodel.ReqContext) respon
 		if !authorizeAccessToRuleGroup(rules, hasAccess) {
 			continue
 		}
-		ruleResponse.Data.RuleGroups = append(ruleResponse.Data.RuleGroups, srv.toRuleGroup(groupKey.RuleGroup, folder, rules, labelOptions))
+		ruleGroup, totals := srv.toRuleGroup(groupKey, folder, rules, limitAlertsPerRule, labelOptions)
+		if limitRulesPerGroup > -1 && int64(len(rules)) > limitRulesPerGroup {
+			ruleGroup.Rules = ruleGroup.Rules[0:limitRulesPerGroup]
+		}
+		ruleGroup.Totals = totals
+		for k, v := range totals {
+			rulesTotals[k] += v
+		}
+		ruleResponse.Data.RuleGroups = append(ruleResponse.Data.RuleGroups, *ruleGroup)
 	}
+
+	ruleResponse.Data.Totals = rulesTotals
+
+	// Sort Rule Groups before checking limits
+	apimodels.RuleGroupsBy(apimodels.RuleGroupsByFileAndName).Sort(ruleResponse.Data.RuleGroups)
+	if limitGroups > -1 && int64(len(ruleResponse.Data.RuleGroups)) >= limitGroups {
+		ruleResponse.Data.RuleGroups = ruleResponse.Data.RuleGroups[0:limitGroups]
+	}
+
 	return response.JSON(http.StatusOK, ruleResponse)
 }

-func (srv PrometheusSrv) toRuleGroup(groupName string, folder *folder.Folder, rules []*ngmodels.AlertRule, labelOptions []ngmodels.LabelOption) *apimodels.RuleGroup {
+func (srv PrometheusSrv) toRuleGroup(groupKey ngmodels.AlertRuleGroupKey, folder *folder.Folder, rules []*ngmodels.AlertRule, limitAlerts int64, labelOptions []ngmodels.LabelOption) (*apimodels.RuleGroup, map[string]int64) {
 	newGroup := &apimodels.RuleGroup{
-		Name: groupName,
-		File: folder.Title, // file is what Prometheus uses for provisioning, we replace it with namespace.
+		Name: groupKey.RuleGroup,
+		// file is what Prometheus uses for provisioning, we replace it with namespace which is the folder in Grafana.
+		File: folder.Title,
 	}
+
+	rulesTotals := make(map[string]int64, len(rules))
+
 	ngmodels.RulesGroup(rules).SortByGroupIndex()
 	for _, rule := range rules {
 		alertingRule := apimodels.AlertingRule{
@@ -206,14 +239,19 @@ func (srv PrometheusSrv) toRuleGroup(groupName string, folder *folder.Folder, ru
 			LastEvaluation: time.Time{},
 		}

-		for _, alertState := range srv.manager.GetStatesForRuleUID(rule.OrgID, rule.UID) {
+		states := srv.manager.GetStatesForRuleUID(rule.OrgID, rule.UID)
+		totals := make(map[string]int64)
+		for _, alertState := range states {
 			activeAt := alertState.StartsAt
 			valString := ""
 			if alertState.State == eval.Alerting || alertState.State == eval.Pending {
 				valString = formatValues(alertState)
 			}
-
-			alert := &apimodels.Alert{
+			totals[strings.ToLower(alertState.State.String())] += 1
+			if alertState.Error != nil {
+				totals["error"] += 1
+			}
+			alert := apimodels.Alert{
 				Labels:      alertState.GetLabels(labelOptions...),
 				Annotations: alertState.Annotations,

@@ -237,6 +275,9 @@ func (srv PrometheusSrv) toRuleGroup(groupName string, folder *folder.Folder, ru
 					alertingRule.State = "pending"
 				}
 			case eval.Alerting:
+				if alertingRule.ActiveAt == nil || alertingRule.ActiveAt.After(activeAt) {
+					alertingRule.ActiveAt = &activeAt
+				}
 				alertingRule.State = "firing"
 			case eval.Error:
 				newRule.Health = "error"
@@ -252,14 +293,30 @@ func (srv PrometheusSrv) toRuleGroup(groupName string, folder *folder.Folder, ru
 			alertingRule.Alerts = append(alertingRule.Alerts, alert)
 		}

+		if alertingRule.State != "" {
+			rulesTotals[alertingRule.State] += 1
+		}
+
+		if newRule.Health == "error" || newRule.Health == "nodata" {
+			rulesTotals[newRule.Health] += 1
+		}
+
+		apimodels.AlertsBy(apimodels.AlertsByImportance).Sort(alertingRule.Alerts)
+
+		if limitAlerts > -1 && int64(len(alertingRule.Alerts)) > limitAlerts {
+			alertingRule.Alerts = alertingRule.Alerts[0:limitAlerts]
+		}
+
 		alertingRule.Rule = newRule
+		alertingRule.Totals = totals
 		newGroup.Rules = append(newGroup.Rules, alertingRule)
 		newGroup.Interval = float64(rule.IntervalSeconds)
 		// TODO yuri. Change that when scheduler will process alerts in groups
 		newGroup.EvaluationTime = newRule.EvaluationTime
 		newGroup.LastEvaluation = newRule.LastEvaluation
 	}
-	return newGroup
+
+	return newGroup, rulesTotals
 }

 // ruleToQuery attempts to extract the datasource queries from the alert query model.
--- a/pkg/services/ngalert/api/api_prometheus_test.go
+++ b/pkg/services/ngalert/api/api_prometheus_test.go
@@ -305,6 +305,9 @@ func TestRouteGetRuleStatuses(t *testing.T) {
 					"activeAt": "0001-01-01T00:00:00Z",
 					"value": ""
 				}],
+				"totals": {
+					"normal": 1
+				},
 				"labels": {
 					"__a_private_label_on_the_rule__": "a_value"
 				},
@@ -314,10 +317,16 @@ func TestRouteGetRuleStatuses(t *testing.T) {
 				"duration": 180,
 				"evaluationTime": 60
 			}],
+			"totals": {
+				"inactive": 1
+			},
 			"interval": 60,
 			"lastEvaluation": "2022-03-10T14:01:00Z",
 			"evaluationTime": 60
-		}]
+		}],
+		"totals": {
+			"inactive": 1
+		}
 	}
 }
 `, folder.Title), string(r.Body()))
@@ -358,6 +367,9 @@ func TestRouteGetRuleStatuses(t *testing.T) {
 					"activeAt": "0001-01-01T00:00:00Z",
 					"value": ""
 				}],
+				"totals": {
+					"normal": 1
+				},
 				"labels": {
 					"__a_private_label_on_the_rule__": "a_value",
 					"__alert_rule_uid__": "RuleUID"
@@ -368,10 +380,16 @@ func TestRouteGetRuleStatuses(t *testing.T) {
 				"duration": 180,
 				"evaluationTime": 60
 			}],
+			"totals": {
+				"inactive": 1
+			},
 			"interval": 60,
 			"lastEvaluation": "2022-03-10T14:01:00Z",
 			"evaluationTime": 60
-		}]
+		}],
+		"totals": {
+			"inactive": 1
+		}
 	}
 }
 `, folder.Title), string(r.Body()))
@@ -406,6 +424,9 @@ func TestRouteGetRuleStatuses(t *testing.T) {
 					"activeAt": "0001-01-01T00:00:00Z",
 					"value": ""
 				}],
+				"totals": {
+					"normal": 1
+				},
 				"labels": {
 					"__a_private_label_on_the_rule__": "a_value"
 				},
@@ -415,10 +436,16 @@ func TestRouteGetRuleStatuses(t *testing.T) {
 				"duration": 180,
 				"evaluationTime": 60
 			}],
+			"totals": {
+				"inactive": 1
+			},
 			"interval": 60,
 			"lastEvaluation": "2022-03-10T14:01:00Z",
 			"evaluationTime": 60
-		}]
+		}],
+		"totals": {
+			"inactive": 1
+		}
 	}
 }
 `, folder.Title), string(r.Body()))
@@ -504,6 +531,184 @@ func TestRouteGetRuleStatuses(t *testing.T) {
 			assert.Emptyf(t, rules, "not all expected rules were returned")
 		})
 	})
+
+	t.Run("test with limit on Rule Groups", func(t *testing.T) {
+		fakeStore, _, _, api := setupAPI(t)
+
+		rules := ngmodels.GenerateAlertRules(2, ngmodels.AlertRuleGen(withOrgID(orgID)))
+		fakeStore.PutRule(context.Background(), rules...)
+
+		t.Run("first without limit", func(t *testing.T) {
+			r, err := http.NewRequest("GET", "/api/v1/rules", nil)
+			require.NoError(t, err)
+			c := &contextmodel.ReqContext{
+				Context: &web.Context{Req: r},
+				SignedInUser: &user.SignedInUser{
+					OrgID:   orgID,
+					OrgRole: org.RoleViewer,
+				},
+			}
+			resp := api.RouteGetRuleStatuses(c)
+			require.Equal(t, http.StatusOK, resp.Status())
+			var res apimodels.RuleResponse
+			require.NoError(t, json.Unmarshal(resp.Body(), &res))
+
+			// There should be 2 inactive rules across all Rule Groups
+			require.Equal(t, map[string]int64{"inactive": 2}, res.Data.Totals)
+			require.Len(t, res.Data.RuleGroups, 2)
+			for _, rg := range res.Data.RuleGroups {
+				// Each Rule Group should have 1 inactive rule
+				require.Equal(t, map[string]int64{"inactive": 1}, rg.Totals)
+				require.Len(t, rg.Rules, 1)
+			}
+		})
+
+		t.Run("then with limit", func(t *testing.T) {
+			r, err := http.NewRequest("GET", "/api/v1/rules?limit=1", nil)
+			require.NoError(t, err)
+			c := &contextmodel.ReqContext{
+				Context: &web.Context{Req: r},
+				SignedInUser: &user.SignedInUser{
+					OrgID:   orgID,
+					OrgRole: org.RoleViewer,
+				},
+			}
+			resp := api.RouteGetRuleStatuses(c)
+			require.Equal(t, http.StatusOK, resp.Status())
+			var res apimodels.RuleResponse
+			require.NoError(t, json.Unmarshal(resp.Body(), &res))
+
+			// There should be 2 inactive rules across all Rule Groups
+			require.Equal(t, map[string]int64{"inactive": 2}, res.Data.Totals)
+			require.Len(t, res.Data.RuleGroups, 1)
+			rg := res.Data.RuleGroups[0]
+			// The Rule Group within the limit should have 1 inactive rule
+			require.Equal(t, map[string]int64{"inactive": 1}, rg.Totals)
+			require.Len(t, rg.Rules, 1)
+		})
+	})
+
+	t.Run("test with limit rules", func(t *testing.T) {
+		fakeStore, _, _, api := setupAPI(t)
+		rules := ngmodels.GenerateAlertRules(2, ngmodels.AlertRuleGen(withOrgID(orgID), withGroup("Rule-Group-1")))
+		fakeStore.PutRule(context.Background(), rules...)
+
+		t.Run("first without limit", func(t *testing.T) {
+			r, err := http.NewRequest("GET", "/api/v1/rules", nil)
+			require.NoError(t, err)
+			c := &contextmodel.ReqContext{
+				Context: &web.Context{Req: r},
+				SignedInUser: &user.SignedInUser{
+					OrgID:   orgID,
+					OrgRole: org.RoleViewer,
+				},
+			}
+			resp := api.RouteGetRuleStatuses(c)
+			require.Equal(t, http.StatusOK, resp.Status())
+			var res apimodels.RuleResponse
+			require.NoError(t, json.Unmarshal(resp.Body(), &res))
+
+			// There should be 2 inactive rules across all Rule Groups
+			require.Equal(t, map[string]int64{"inactive": 2}, res.Data.Totals)
+			require.Len(t, res.Data.RuleGroups, 2)
+			for _, rg := range res.Data.RuleGroups {
+				// Each Rule Group should have 1 inactive rule
+				require.Equal(t, map[string]int64{"inactive": 1}, rg.Totals)
+				require.Len(t, rg.Rules, 1)
+			}
+		})
+
+		t.Run("then with limit", func(t *testing.T) {
+			r, err := http.NewRequest("GET", "/api/v1/rules?limit=1&limit_rules=1", nil)
+			require.NoError(t, err)
+			c := &contextmodel.ReqContext{
+				Context: &web.Context{Req: r},
+				SignedInUser: &user.SignedInUser{
+					OrgID:   orgID,
+					OrgRole: org.RoleViewer,
+				},
+			}
+			resp := api.RouteGetRuleStatuses(c)
+			require.Equal(t, http.StatusOK, resp.Status())
+			var res apimodels.RuleResponse
+			require.NoError(t, json.Unmarshal(resp.Body(), &res))
+
+			// There should be 2 inactive rules
+			require.Equal(t, map[string]int64{"inactive": 2}, res.Data.Totals)
+			require.Len(t, res.Data.RuleGroups, 1)
+			rg := res.Data.RuleGroups[0]
+			// The Rule Group within the limit should have 1 inactive rule because of the limit
+			require.Equal(t, map[string]int64{"inactive": 1}, rg.Totals)
+			require.Len(t, rg.Rules, 1)
+		})
+	})
+
+	t.Run("test with limit alerts", func(t *testing.T) {
+		fakeStore, fakeAIM, _, api := setupAPI(t)
+		rules := ngmodels.GenerateAlertRules(2, ngmodels.AlertRuleGen(withOrgID(orgID), withGroup("Rule-Group-1")))
+		fakeStore.PutRule(context.Background(), rules...)
+		// create a normal and firing alert for each rule
+		for _, r := range rules {
+			fakeAIM.GenerateAlertInstances(orgID, r.UID, 1)
+			fakeAIM.GenerateAlertInstances(orgID, r.UID, 1, withAlertingState())
+		}
+
+		t.Run("first without limit", func(t *testing.T) {
+			r, err := http.NewRequest("GET", "/api/v1/rules", nil)
+			require.NoError(t, err)
+			c := &contextmodel.ReqContext{
+				Context: &web.Context{Req: r},
+				SignedInUser: &user.SignedInUser{
+					OrgID:   orgID,
+					OrgRole: org.RoleViewer,
+				},
+			}
+			resp := api.RouteGetRuleStatuses(c)
+			require.Equal(t, http.StatusOK, resp.Status())
+			var res apimodels.RuleResponse
+			require.NoError(t, json.Unmarshal(resp.Body(), &res))
+
+			// There should be 2 firing rules across all Rule Groups
+			require.Equal(t, map[string]int64{"firing": 2}, res.Data.Totals)
+			require.Len(t, res.Data.RuleGroups, 2)
+			for _, rg := range res.Data.RuleGroups {
+				// Each Rule Group should have 1 firing rule
+				require.Equal(t, map[string]int64{"firing": 1}, rg.Totals)
+				require.Len(t, rg.Rules, 1)
+				// Each rule should have two alerts
+				require.Equal(t, map[string]int64{"alerting": 1, "normal": 1}, rg.Rules[0].Totals)
+			}
+		})
+
+		t.Run("then with limits", func(t *testing.T) {
+			r, err := http.NewRequest("GET", "/api/v1/rules?limit=1&limit_rules=1&limit_alerts=1", nil)
+			require.NoError(t, err)
+			c := &contextmodel.ReqContext{
+				Context: &web.Context{Req: r},
+				SignedInUser: &user.SignedInUser{
+					OrgID:   orgID,
+					OrgRole: org.RoleViewer,
+				},
+			}
+			resp := api.RouteGetRuleStatuses(c)
+			require.Equal(t, http.StatusOK, resp.Status())
+			var res apimodels.RuleResponse
+			require.NoError(t, json.Unmarshal(resp.Body(), &res))
+
+			// There should be 2 firing rules across all Rule Groups
+			require.Equal(t, map[string]int64{"firing": 2}, res.Data.Totals)
+			rg := res.Data.RuleGroups[0]
+			// The Rule Group within the limit should have 1 inactive rule because of the limit
+			require.Equal(t, map[string]int64{"firing": 1}, rg.Totals)
+			require.Len(t, rg.Rules, 1)
+			rule := rg.Rules[0]
+			// The rule should have two alerts, but just one should be returned
+			require.Equal(t, map[string]int64{"alerting": 1, "normal": 1}, rule.Totals)
+			require.Len(t, rule.Alerts, 1)
+			// Firing alerts should have precedence over normal alerts
+			require.Equal(t, "Alerting", rule.Alerts[0].State)
+		})
+	})
 }

 func setupAPI(t *testing.T) (*fakes.RuleStore, *fakeAlertInstanceManager, *acmock.Mock, PrometheusSrv) {
--- a/pkg/services/ngalert/api/tooling/definitions/prom.go
+++ b/pkg/services/ngalert/api/tooling/definitions/prom.go
@@ -1,6 +1,9 @@
 package definitions

 import (
+	"fmt"
+	"sort"
+	"strings"
 	"time"

 	v1 "github.com/prometheus/client_golang/api/prometheus/v1"
@@ -65,7 +68,8 @@ type DiscoveryBase struct {
 // swagger:model
 type RuleDiscovery struct {
 	// required: true
-	RuleGroups []*RuleGroup `json:"groups"`
+	RuleGroups []RuleGroup      `json:"groups"`
+	Totals     map[string]int64 `json:"totals,omitempty"`
 }

 // AlertDiscovery has info for all active alerts.
@@ -85,13 +89,37 @@ type RuleGroup struct {
 	// specific properties, both alerting and recording rules are exposed in the
 	// same array.
 	// required: true
-	Rules []AlertingRule `json:"rules"`
+	Rules  []AlertingRule   `json:"rules"`
+	Totals map[string]int64 `json:"totals"`
 	// required: true
 	Interval       float64   `json:"interval"`
 	LastEvaluation time.Time `json:"lastEvaluation"`
 	EvaluationTime float64   `json:"evaluationTime"`
 }

+// RuleGroupsBy is a function that defines the ordering of Rule Groups.
+type RuleGroupsBy func(a1, a2 *RuleGroup) bool
+
+func (by RuleGroupsBy) Sort(groups []RuleGroup) {
+	sort.Sort(RuleGroupsSorter{groups: groups, by: by})
+}
+
+func RuleGroupsByFileAndName(a1, a2 *RuleGroup) bool {
+	if a1.File == a2.File {
+		return a1.Name < a2.Name
+	}
+	return a1.File < a2.File
+}
+
+type RuleGroupsSorter struct {
+	groups []RuleGroup
+	by     RuleGroupsBy
+}
+
+func (s RuleGroupsSorter) Len() int           { return len(s.groups) }
+func (s RuleGroupsSorter) Swap(i, j int)      { s.groups[i], s.groups[j] = s.groups[j], s.groups[i] }
+func (s RuleGroupsSorter) Less(i, j int) bool { return s.by(&s.groups[i], &s.groups[j]) }
+
 // adapted from cortex
 // swagger:model
 type AlertingRule struct {
@@ -106,7 +134,9 @@ type AlertingRule struct {
 	// required: true
 	Annotations overrideLabels `json:"annotations,omitempty"`
 	// required: true
-	Alerts []*Alert `json:"alerts,omitempty"`
+	ActiveAt *time.Time       `json:"activeAt,omitempty"`
+	Alerts   []Alert          `json:"alerts,omitempty"`
+	Totals   map[string]int64 `json:"totals,omitempty"`
 	Rule
 }

@@ -141,6 +171,107 @@ type Alert struct {
 	Value string `json:"value"`
 }

+type StateByImportance int
+
+const (
+	StateAlerting = iota
+	StatePending
+	StateError
+	StateNoData
+	StateNormal
+)
+
+func stateByImportanceFromString(s string) (StateByImportance, error) {
+	switch s = strings.ToLower(s); s {
+	case "alerting":
+		return StateAlerting, nil
+	case "pending":
+		return StatePending, nil
+	case "error":
+		return StateError, nil
+	case "nodata":
+		return StateNoData, nil
+	case "normal":
+		return StateNormal, nil
+	default:
+		return -1, fmt.Errorf("unknown state: %s", s)
+	}
+}
+
+// AlertsBy is a function that defines the ordering of alerts.
+type AlertsBy func(a1, a2 *Alert) bool
+
+func (by AlertsBy) Sort(alerts []Alert) {
+	sort.Sort(AlertsSorter{alerts: alerts, by: by})
+}
+
+// AlertsByImportance orders alerts by importance. An alert is more important
+// than another alert if its status has higher importance. For example, "alerting"
+// is more important than "normal". If two alerts have the same importance
+// then the ordering is based on their ActiveAt time and their labels.
+func AlertsByImportance(a1, a2 *Alert) bool {
+	// labelsForComparison concatenates each key/value pair into a string and
+	// sorts them.
+	labelsForComparison := func(m map[string]string) []string {
+		s := make([]string, 0, len(m))
+		for k, v := range m {
+			s = append(s, k+v)
+		}
+		sort.Strings(s)
+		return s
+	}
+
+	// compareLabels returns true if labels1 are less than labels2. This happens
+	// when labels1 has fewer labels than labels2, or if the next label from
+	// labels1 is lexicographically less than the next label from labels2.
+	compareLabels := func(labels1, labels2 []string) bool {
+		if len(labels1) == len(labels2) {
+			for i := range labels1 {
+				if labels1[i] != labels2[i] {
+					return labels1[i] < labels2[i]
+				}
+			}
+		}
+		return len(labels1) < len(labels2)
+	}
+
+	// The importance of an alert is first based on the importance of their states.
+	// This ordering is intended to show the most important alerts first when
+	// using pagination.
+	importance1, _ := stateByImportanceFromString(a1.State)
+	importance2, _ := stateByImportanceFromString(a2.State)
+
+	// If both alerts have the same importance then the ordering is based on
+	// their ActiveAt time, and if those are equal, their labels.
+	if importance1 == importance2 {
+		if a1.ActiveAt != nil && a2.ActiveAt == nil {
+			// The first alert is active but not the second
+			return true
+		} else if a1.ActiveAt == nil && a2.ActiveAt != nil {
+			// The second alert is active but not the first
+			return false
+		} else if a1.ActiveAt != nil && a2.ActiveAt != nil && a1.ActiveAt.Before(*a2.ActiveAt) {
+			// Both alerts are active but a1 happened before a2
+			return true
+		}
+		// Both alerts are active since the same time so compare their labels
+		labels1 := labelsForComparison(a1.Labels)
+		labels2 := labelsForComparison(a2.Labels)
+		return compareLabels(labels1, labels2)
+	}
+
+	return importance1 < importance2
+}
+
+type AlertsSorter struct {
+	alerts []Alert
+	by     AlertsBy
+}
+
+func (s AlertsSorter) Len() int           { return len(s.alerts) }
+func (s AlertsSorter) Swap(i, j int)      { s.alerts[i], s.alerts[j] = s.alerts[j], s.alerts[i] }
+func (s AlertsSorter) Less(i, j int) bool { return s.by(&s.alerts[i], &s.alerts[j]) }
+
 // override the labels type with a map for generation.
 // The custom marshaling for labels.Labels ends up doing this anyways.
 type overrideLabels map[string]string
--- a/pkg/services/ngalert/api/tooling/definitions/prom_test.go
+++ b/pkg/services/ngalert/api/tooling/definitions/prom_test.go
@@ -0,0 +1,66 @@
+package definitions
+
+import (
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestSortAlertsByImportance(t *testing.T) {
+	tm1, tm2 := time.Now(), time.Now().Add(time.Second)
+	tc := []struct {
+		name     string
+		input    []Alert
+		expected []Alert
+	}{{
+		name:     "alerts are ordered in expected importance",
+		input:    []Alert{{State: "normal"}, {State: "nodata"}, {State: "error"}, {State: "pending"}, {State: "alerting"}},
+		expected: []Alert{{State: "alerting"}, {State: "pending"}, {State: "error"}, {State: "nodata"}, {State: "normal"}},
+	}, {
+		name:     "alerts with same importance are ordered active first",
+		input:    []Alert{{State: "normal"}, {State: "normal", ActiveAt: &tm1}},
+		expected: []Alert{{State: "normal", ActiveAt: &tm1}, {State: "normal"}},
+	}, {
+		name:     "active alerts with same importance are ordered newest first",
+		input:    []Alert{{State: "alerting", ActiveAt: &tm2}, {State: "alerting", ActiveAt: &tm1}},
+		expected: []Alert{{State: "alerting", ActiveAt: &tm1}, {State: "alerting", ActiveAt: &tm2}},
+	}, {
+		name: "inactive alerts with same importance are ordered by labels",
+		input: []Alert{
+			{State: "normal", Labels: map[string]string{"c": "d"}},
+			{State: "normal", Labels: map[string]string{"a": "b"}},
+		},
+		expected: []Alert{
+			{State: "normal", Labels: map[string]string{"a": "b"}},
+			{State: "normal", Labels: map[string]string{"c": "d"}},
+		},
+	}, {
+		name: "active alerts with same importance and active time are ordered fewest labels first",
+		input: []Alert{
+			{State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"a": "b", "c": "d"}},
+			{State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"e": "f"}},
+		},
+		expected: []Alert{
+			{State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"e": "f"}},
+			{State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"a": "b", "c": "d"}},
+		},
+	}, {
+		name: "active alerts with same importance and active time are ordered by labels",
+		input: []Alert{
+			{State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"c": "d"}},
+			{State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"a": "b"}},
+		},
+		expected: []Alert{
+			{State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"a": "b"}},
+			{State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"c": "d"}},
+		},
+	}}
+
+	for _, tt := range tc {
+		t.Run(tt.name, func(t *testing.T) {
+			AlertsBy(AlertsByImportance).Sort(tt.input)
+			assert.EqualValues(t, tt.expected, tt.input)
+		})
+	}
+}
--- a/pkg/services/ngalert/models/alert_rule.go
+++ b/pkg/services/ngalert/models/alert_rule.go
@@ -180,6 +180,37 @@ type AlertRuleWithOptionals struct {
 	HasPause bool
 }

+// AlertsRulesBy is a function that defines the ordering of alert rules.
+type AlertRulesBy func(a1, a2 *AlertRule) bool
+
+func (by AlertRulesBy) Sort(rules []*AlertRule) {
+	sort.Sort(AlertRulesSorter{rules: rules, by: by})
+}
+
+// AlertRulesByIndex orders alert rules by rule group index. You should
+// make sure that all alert rules belong to the same rule group (have the
+// same RuleGroupKey) before using this ordering.
+func AlertRulesByIndex(a1, a2 *AlertRule) bool {
+	return a1.RuleGroupIndex < a2.RuleGroupIndex
+}
+
+func AlertRulesByGroupKeyAndIndex(a1, a2 *AlertRule) bool {
+	k1, k2 := a1.GetGroupKey(), a2.GetGroupKey()
+	if k1 == k2 {
+		return a1.RuleGroupIndex < a2.RuleGroupIndex
+	}
+	return AlertRuleGroupKeyByNamespaceAndRuleGroup(&k1, &k2)
+}
+
+type AlertRulesSorter struct {
+	rules []*AlertRule
+	by    AlertRulesBy
+}
+
+func (s AlertRulesSorter) Len() int           { return len(s.rules) }
+func (s AlertRulesSorter) Swap(i, j int)      { s.rules[i], s.rules[j] = s.rules[j], s.rules[i] }
+func (s AlertRulesSorter) Less(i, j int) bool { return s.by(s.rules[i], s.rules[j]) }
+
 // GetDashboardUID returns the DashboardUID or "".
 func (alertRule *AlertRule) GetDashboardUID() string {
 	if alertRule.DashboardUID != nil {
@@ -303,6 +334,29 @@ func (k AlertRuleKey) String() string {
 	return fmt.Sprintf("{orgID: %d, UID: %s}", k.OrgID, k.UID)
 }

+// AlertRuleGroupKeyBy is a function that defines the ordering of alert rule group keys.
+type AlertRuleGroupKeyBy func(a1, a2 *AlertRuleGroupKey) bool
+
+func (by AlertRuleGroupKeyBy) Sort(keys []AlertRuleGroupKey) {
+	sort.Sort(AlertRuleGroupKeySorter{keys: keys, by: by})
+}
+
+func AlertRuleGroupKeyByNamespaceAndRuleGroup(k1, k2 *AlertRuleGroupKey) bool {
+	if k1.NamespaceUID == k2.NamespaceUID {
+		return k1.RuleGroup < k2.RuleGroup
+	}
+	return k1.NamespaceUID < k2.NamespaceUID
+}
+
+type AlertRuleGroupKeySorter struct {
+	keys []AlertRuleGroupKey
+	by   AlertRuleGroupKeyBy
+}
+
+func (s AlertRuleGroupKeySorter) Len() int           { return len(s.keys) }
+func (s AlertRuleGroupKeySorter) Swap(i, j int)      { s.keys[i], s.keys[j] = s.keys[j], s.keys[i] }
+func (s AlertRuleGroupKeySorter) Less(i, j int) bool { return s.by(&s.keys[i], &s.keys[j]) }
+
 // GetKey returns the alert definitions identifier
 func (alertRule *AlertRule) GetKey() AlertRuleKey {
 	return AlertRuleKey{OrgID: alertRule.OrgID, UID: alertRule.UID}
--- a/pkg/services/ngalert/models/alert_rule_test.go
+++ b/pkg/services/ngalert/models/alert_rule_test.go
@@ -17,6 +17,51 @@ import (
 	"github.com/grafana/grafana/pkg/util"
 )

+func TestSortAlertRulesByGroupKeyAndIndex(t *testing.T) {
+	tc := []struct {
+		name     string
+		input    []*AlertRule
+		expected []*AlertRule
+	}{{
+		name: "alert rules are ordered by organization",
+		input: []*AlertRule{
+			{OrgID: 2, NamespaceUID: "test2"},
+			{OrgID: 1, NamespaceUID: "test1"},
+		},
+		expected: []*AlertRule{
+			{OrgID: 1, NamespaceUID: "test1"},
+			{OrgID: 2, NamespaceUID: "test2"},
+		},
+	}, {
+		name: "alert rules in same organization are ordered by namespace",
+		input: []*AlertRule{
+			{OrgID: 1, NamespaceUID: "test2"},
+			{OrgID: 1, NamespaceUID: "test1"},
+		},
+		expected: []*AlertRule{
+			{OrgID: 1, NamespaceUID: "test1"},
+			{OrgID: 1, NamespaceUID: "test2"},
+		},
+	}, {
+		name: "alert rules with same group key are ordered by index",
+		input: []*AlertRule{
+			{OrgID: 1, NamespaceUID: "test", RuleGroupIndex: 2},
+			{OrgID: 1, NamespaceUID: "test", RuleGroupIndex: 1},
+		},
+		expected: []*AlertRule{
+			{OrgID: 1, NamespaceUID: "test", RuleGroupIndex: 1},
+			{OrgID: 1, NamespaceUID: "test", RuleGroupIndex: 2},
+		},
+	}}
+
+	for _, tt := range tc {
+		t.Run(tt.name, func(t *testing.T) {
+			AlertRulesBy(AlertRulesByGroupKeyAndIndex).Sort(tt.input)
+			assert.EqualValues(t, tt.expected, tt.input)
+		})
+	}
+}
+
 func TestNoDataStateFromString(t *testing.T) {
 	allKnownNoDataStates := [...]NoDataState{
 		Alerting,
--- a/pkg/tests/api/alerting/api_prometheus_test.go
+++ b/pkg/tests/api/alerting/api_prometheus_test.go
@@ -259,10 +259,16 @@ func TestIntegrationPrometheusRules(t *testing.T) {
 				"lastEvaluation": "0001-01-01T00:00:00Z",
 				"evaluationTime": 0
 			}],
+			"totals": {
+				"inactive": 2
+			},
 			"interval": 60,
 			"lastEvaluation": "0001-01-01T00:00:00Z",
 			"evaluationTime": 0
-		}]
+		}],
+		"totals": {
+			"inactive": 2
+		}
 	}
 }`, string(b))
 	}
@@ -311,10 +317,16 @@ func TestIntegrationPrometheusRules(t *testing.T) {
 				"lastEvaluation": "0001-01-01T00:00:00Z",
 				"evaluationTime": 0
 			}],
+			"totals": {
+				"inactive": 2
+			},
 			"interval": 60,
 			"lastEvaluation": "0001-01-01T00:00:00Z",
 			"evaluationTime": 0
-		}]
+		}],
+		"totals": {
+			"inactive": 2
+		}
 	}
 }`, string(b))
 			return true
@@ -454,10 +466,16 @@ func TestIntegrationPrometheusRulesFilterByDashboard(t *testing.T) {
 				"lastEvaluation": "0001-01-01T00:00:00Z",
 				"evaluationTime": 0
 			}],
+			"totals": {
+				"inactive": 2
+			},
 			"interval": 60,
 			"lastEvaluation": "0001-01-01T00:00:00Z",
 			"evaluationTime": 0
-		}]
+		}],
+		"totals": {
+			"inactive": 2
+		}
 	}
 }`, dashboardUID)
 	expectedFilteredByJSON := fmt.Sprintf(`
@@ -481,10 +499,16 @@ func TestIntegrationPrometheusRulesFilterByDashboard(t *testing.T) {
 				"lastEvaluation": "0001-01-01T00:00:00Z",
 				"evaluationTime": 0
 			}],
+			"totals": {
+				"inactive": 1
+			},
 			"interval": 60,
 			"lastEvaluation": "0001-01-01T00:00:00Z",
 			"evaluationTime": 0
-		}]
+		}],
+		"totals": {
+			"inactive": 1
+		}
 	}
 }`, dashboardUID)
 	expectedNoneJSON := `
--- a/pkg/web/context.go
+++ b/pkg/web/context.go
@@ -188,6 +188,14 @@ func (ctx *Context) QueryInt64(name string) int64 {
 	return n
 }

+func (ctx *Context) QueryInt64WithDefault(name string, d int64) int64 {
+	n, err := strconv.ParseInt(ctx.Query(name), 10, 64)
+	if err != nil {
+		return d
+	}
+	return n
+}
+
 // GetCookie returns given cookie value from request header.
 func (ctx *Context) GetCookie(name string) string {
 	cookie, err := ctx.Req.Cookie(name)