diff --git a/pkg/services/ngalert/api/api_prometheus.go b/pkg/services/ngalert/api/api_prometheus.go index 6c60958fbee..46813a42b8d 100644 --- a/pkg/services/ngalert/api/api_prometheus.go +++ b/pkg/services/ngalert/api/api_prometheus.go @@ -10,6 +10,7 @@ import ( "strings" "time" + "github.com/prometheus/alertmanager/pkg/labels" apiv1 "github.com/prometheus/client_golang/api/prometheus/v1" "github.com/grafana/grafana/pkg/api/response" @@ -21,6 +22,7 @@ import ( "github.com/grafana/grafana/pkg/services/ngalert/eval" ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models" "github.com/grafana/grafana/pkg/services/ngalert/state" + "github.com/grafana/grafana/pkg/util" ) type PrometheusSrv struct { @@ -105,6 +107,44 @@ func getPanelIDFromRequest(r *http.Request) (int64, error) { return 0, nil } +func getMatchersFromRequest(r *http.Request) (labels.Matchers, error) { + var matchers labels.Matchers + for _, s := range r.URL.Query()["matcher"] { + var m labels.Matcher + if err := json.Unmarshal([]byte(s), &m); err != nil { + return nil, err + } + if len(m.Name) == 0 { + return nil, errors.New("bad matcher: the name cannot be blank") + } + matchers = append(matchers, &m) + } + return matchers, nil +} + +func getStatesFromRequest(r *http.Request) ([]eval.State, error) { + var states []eval.State + for _, s := range r.URL.Query()["state"] { + s = strings.ToLower(s) + switch s { + case "normal", "inactive": + states = append(states, eval.Normal) + case "alerting", "firing": + states = append(states, eval.Alerting) + case "pending": + states = append(states, eval.Pending) + case "nodata": + states = append(states, eval.NoData) + // nolint:goconst + case "error": + states = append(states, eval.Error) + default: + return states, fmt.Errorf("unknown state '%s'", s) + } + } + return states, nil +} + func (srv PrometheusSrv) RouteGetRuleStatuses(c *contextmodel.ReqContext) response.Response { dashboardUID := c.Query("dashboard_uid") panelID, err := getPanelIDFromRequest(c.Req) @@ -115,12 +155,28 @@ func (srv PrometheusSrv) RouteGetRuleStatuses(c *contextmodel.ReqContext) respon return ErrResp(http.StatusBadRequest, errors.New("panel_id must be set with dashboard_uid"), "") } + limitGroups := c.QueryInt64WithDefault("limit", -1) + limitRulesPerGroup := c.QueryInt64WithDefault("limit_rules", -1) + limitAlertsPerRule := c.QueryInt64WithDefault("limit_alerts", -1) + matchers, err := getMatchersFromRequest(c.Req) + if err != nil { + return ErrResp(http.StatusBadRequest, err, "") + } + withStates, err := getStatesFromRequest(c.Req) + if err != nil { + return ErrResp(http.StatusBadRequest, err, "") + } + withStatesFast := make(map[eval.State]struct{}) + for _, state := range withStates { + withStatesFast[state] = struct{}{} + } + ruleResponse := apimodels.RuleResponse{ DiscoveryBase: apimodels.DiscoveryBase{ Status: "success", }, Data: apimodels.RuleDiscovery{ - RuleGroups: []*apimodels.RuleGroup{}, + RuleGroups: []apimodels.RuleGroup{}, }, } @@ -161,14 +217,22 @@ func (srv PrometheusSrv) RouteGetRuleStatuses(c *contextmodel.ReqContext) respon return accesscontrol.HasAccess(srv.ac, c)(accesscontrol.ReqViewer, evaluator) } + // Group rules together by Namespace and Rule Group. Rules are also grouped by Org ID, + // but in this API all rules belong to the same organization. groupedRules := make(map[ngmodels.AlertRuleGroupKey][]*ngmodels.AlertRule) for _, rule := range ruleList { - key := rule.GetGroupKey() - rulesInGroup := groupedRules[key] - rulesInGroup = append(rulesInGroup, rule) - groupedRules[key] = rulesInGroup + groupKey := rule.GetGroupKey() + ruleGroup := groupedRules[groupKey] + ruleGroup = append(ruleGroup, rule) + groupedRules[groupKey] = ruleGroup + } + // Sort the rules in each rule group by index. We do this at the end instead of + // after each append to avoid having to sort each group multiple times. + for _, groupRules := range groupedRules { + ngmodels.AlertRulesBy(ngmodels.AlertRulesByIndex).Sort(groupRules) } + rulesTotals := make(map[string]int64, len(groupedRules)) for groupKey, rules := range groupedRules { folder := namespaceMap[groupKey.NamespaceUID] if folder == nil { @@ -178,16 +242,73 @@ func (srv PrometheusSrv) RouteGetRuleStatuses(c *contextmodel.ReqContext) respon if !authorizeAccessToRuleGroup(rules, hasAccess) { continue } - ruleResponse.Data.RuleGroups = append(ruleResponse.Data.RuleGroups, srv.toRuleGroup(groupKey.RuleGroup, folder, rules, labelOptions)) + ruleGroup, totals := srv.toRuleGroup(groupKey, folder, rules, limitAlertsPerRule, withStatesFast, matchers, labelOptions) + ruleGroup.Totals = totals + for k, v := range totals { + rulesTotals[k] += v + } + + if len(withStates) > 0 { + // Filtering is weird but firing, pending, and normal filters also need to be + // applied to the rule. Others such as nodata and error should have no effect. + // This is to match the current behavior in the UI. + filteredRules := make([]apimodels.AlertingRule, 0, len(ruleGroup.Rules)) + for _, rule := range ruleGroup.Rules { + var state *eval.State + switch rule.State { + case "normal", "inactive": + state = util.Pointer(eval.Normal) + case "alerting", "firing": + state = util.Pointer(eval.Alerting) + case "pending": + state = util.Pointer(eval.Pending) + } + if state != nil { + if _, ok := withStatesFast[*state]; ok { + filteredRules = append(filteredRules, rule) + } + } + } + ruleGroup.Rules = filteredRules + } + + if limitRulesPerGroup > -1 && int64(len(ruleGroup.Rules)) > limitRulesPerGroup { + ruleGroup.Rules = ruleGroup.Rules[0:limitRulesPerGroup] + } + + ruleResponse.Data.RuleGroups = append(ruleResponse.Data.RuleGroups, *ruleGroup) } + + ruleResponse.Data.Totals = rulesTotals + + // Sort Rule Groups before checking limits + apimodels.RuleGroupsBy(apimodels.RuleGroupsByFileAndName).Sort(ruleResponse.Data.RuleGroups) + if limitGroups > -1 && int64(len(ruleResponse.Data.RuleGroups)) >= limitGroups { + ruleResponse.Data.RuleGroups = ruleResponse.Data.RuleGroups[0:limitGroups] + } + return response.JSON(http.StatusOK, ruleResponse) } -func (srv PrometheusSrv) toRuleGroup(groupName string, folder *folder.Folder, rules []*ngmodels.AlertRule, labelOptions []ngmodels.LabelOption) *apimodels.RuleGroup { - newGroup := &apimodels.RuleGroup{ - Name: groupName, - File: folder.Title, // file is what Prometheus uses for provisioning, we replace it with namespace. +// This is the same as matchers.Matches but avoids the need to create a LabelSet +func matchersMatch(matchers []*labels.Matcher, labels map[string]string) bool { + for _, m := range matchers { + if !m.Matches(labels[m.Name]) { + return false + } } + return true +} + +func (srv PrometheusSrv) toRuleGroup(groupKey ngmodels.AlertRuleGroupKey, folder *folder.Folder, rules []*ngmodels.AlertRule, limitAlerts int64, withStates map[eval.State]struct{}, matchers labels.Matchers, labelOptions []ngmodels.LabelOption) (*apimodels.RuleGroup, map[string]int64) { + newGroup := &apimodels.RuleGroup{ + Name: groupKey.RuleGroup, + // file is what Prometheus uses for provisioning, we replace it with namespace which is the folder in Grafana. + File: folder.Title, + } + + rulesTotals := make(map[string]int64, len(rules)) + ngmodels.RulesGroup(rules).SortByGroupIndex() for _, rule := range rules { alertingRule := apimodels.AlertingRule{ @@ -206,14 +327,20 @@ func (srv PrometheusSrv) toRuleGroup(groupName string, folder *folder.Folder, ru LastEvaluation: time.Time{}, } - for _, alertState := range srv.manager.GetStatesForRuleUID(rule.OrgID, rule.UID) { + states := srv.manager.GetStatesForRuleUID(rule.OrgID, rule.UID) + totals := make(map[string]int64) + for _, alertState := range states { activeAt := alertState.StartsAt valString := "" if alertState.State == eval.Alerting || alertState.State == eval.Pending { valString = formatValues(alertState) } - - alert := &apimodels.Alert{ + totals[strings.ToLower(alertState.State.String())] += 1 + // Do not add error twice when execution error state is Error + if alertState.Error != nil && rule.ExecErrState != ngmodels.ErrorErrState { + totals["error"] += 1 + } + alert := apimodels.Alert{ Labels: alertState.GetLabels(labelOptions...), Annotations: alertState.Annotations, @@ -237,6 +364,9 @@ func (srv PrometheusSrv) toRuleGroup(groupName string, folder *folder.Folder, ru alertingRule.State = "pending" } case eval.Alerting: + if alertingRule.ActiveAt == nil || alertingRule.ActiveAt.After(activeAt) { + alertingRule.ActiveAt = &activeAt + } alertingRule.State = "firing" case eval.Error: newRule.Health = "error" @@ -249,17 +379,43 @@ func (srv PrometheusSrv) toRuleGroup(groupName string, folder *folder.Folder, ru newRule.Health = "error" } + if len(withStates) > 0 { + if _, ok := withStates[alertState.State]; !ok { + continue + } + } + + if !matchersMatch(matchers, alertState.Labels) { + continue + } + alertingRule.Alerts = append(alertingRule.Alerts, alert) } + if alertingRule.State != "" { + rulesTotals[alertingRule.State] += 1 + } + + if newRule.Health == "error" || newRule.Health == "nodata" { + rulesTotals[newRule.Health] += 1 + } + + apimodels.AlertsBy(apimodels.AlertsByImportance).Sort(alertingRule.Alerts) + + if limitAlerts > -1 && int64(len(alertingRule.Alerts)) > limitAlerts { + alertingRule.Alerts = alertingRule.Alerts[0:limitAlerts] + } + alertingRule.Rule = newRule + alertingRule.Totals = totals newGroup.Rules = append(newGroup.Rules, alertingRule) newGroup.Interval = float64(rule.IntervalSeconds) // TODO yuri. Change that when scheduler will process alerts in groups newGroup.EvaluationTime = newRule.EvaluationTime newGroup.LastEvaluation = newRule.LastEvaluation } - return newGroup + + return newGroup, rulesTotals } // ruleToQuery attempts to extract the datasource queries from the alert query model. diff --git a/pkg/services/ngalert/api/api_prometheus_test.go b/pkg/services/ngalert/api/api_prometheus_test.go index 6f085d8ab29..97f8ce59b64 100644 --- a/pkg/services/ngalert/api/api_prometheus_test.go +++ b/pkg/services/ngalert/api/api_prometheus_test.go @@ -3,6 +3,7 @@ package api import ( "context" "encoding/json" + "errors" "fmt" "math/rand" "net/http" @@ -19,6 +20,7 @@ import ( "github.com/grafana/grafana/pkg/infra/log" acmock "github.com/grafana/grafana/pkg/services/accesscontrol/mock" contextmodel "github.com/grafana/grafana/pkg/services/contexthandler/model" + "github.com/grafana/grafana/pkg/services/folder" apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" "github.com/grafana/grafana/pkg/services/ngalert/eval" ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models" @@ -254,6 +256,30 @@ func withAlertingState() forEachState { } } +func withAlertingErrorState() forEachState { + return func(s *state.State) *state.State { + s.SetAlerting("", timeNow(), timeNow().Add(5*time.Minute)) + s.Error = errors.New("this is an error") + return s + } +} + +func withErrorState() forEachState { + return func(s *state.State) *state.State { + s.SetError(errors.New("this is an error"), timeNow(), timeNow().Add(5*time.Minute)) + return s + } +} + +func withLabels(labels data.Labels) forEachState { + return func(s *state.State) *state.State { + for k, v := range labels { + s.Labels[k] = v + } + return s + } +} + func TestRouteGetRuleStatuses(t *testing.T) { timeNow = func() time.Time { return time.Date(2022, 3, 10, 14, 0, 0, 0, time.UTC) } orgID := int64(1) @@ -305,6 +331,9 @@ func TestRouteGetRuleStatuses(t *testing.T) { "activeAt": "0001-01-01T00:00:00Z", "value": "" }], + "totals": { + "normal": 1 + }, "labels": { "__a_private_label_on_the_rule__": "a_value" }, @@ -314,10 +343,16 @@ func TestRouteGetRuleStatuses(t *testing.T) { "duration": 180, "evaluationTime": 60 }], + "totals": { + "inactive": 1 + }, "interval": 60, "lastEvaluation": "2022-03-10T14:01:00Z", "evaluationTime": 60 - }] + }], + "totals": { + "inactive": 1 + } } } `, folder.Title), string(r.Body())) @@ -358,6 +393,9 @@ func TestRouteGetRuleStatuses(t *testing.T) { "activeAt": "0001-01-01T00:00:00Z", "value": "" }], + "totals": { + "normal": 1 + }, "labels": { "__a_private_label_on_the_rule__": "a_value", "__alert_rule_uid__": "RuleUID" @@ -368,10 +406,16 @@ func TestRouteGetRuleStatuses(t *testing.T) { "duration": 180, "evaluationTime": 60 }], + "totals": { + "inactive": 1 + }, "interval": 60, "lastEvaluation": "2022-03-10T14:01:00Z", "evaluationTime": 60 - }] + }], + "totals": { + "inactive": 1 + } } } `, folder.Title), string(r.Body())) @@ -406,6 +450,9 @@ func TestRouteGetRuleStatuses(t *testing.T) { "activeAt": "0001-01-01T00:00:00Z", "value": "" }], + "totals": { + "normal": 1 + }, "labels": { "__a_private_label_on_the_rule__": "a_value" }, @@ -415,10 +462,16 @@ func TestRouteGetRuleStatuses(t *testing.T) { "duration": 180, "evaluationTime": 60 }], + "totals": { + "inactive": 1 + }, "interval": 60, "lastEvaluation": "2022-03-10T14:01:00Z", "evaluationTime": 60 - }] + }], + "totals": { + "inactive": 1 + } } } `, folder.Title), string(r.Body())) @@ -504,6 +557,637 @@ func TestRouteGetRuleStatuses(t *testing.T) { assert.Emptyf(t, rules, "not all expected rules were returned") }) }) + + t.Run("test totals are expected", func(t *testing.T) { + fakeStore, fakeAIM, _, api := setupAPI(t) + // Create rules in the same Rule Group to keep assertions simple + rules := ngmodels.GenerateAlertRules(3, ngmodels.AlertRuleGen(withOrgID(orgID), withGroup("Rule-Group-1"), withNamespace(&folder.Folder{ + Title: "Folder-1", + }))) + // Need to sort these so we add alerts to the rules as ordered in the response + ngmodels.AlertRulesBy(ngmodels.AlertRulesByIndex).Sort(rules) + // The last two rules will have errors, however the first will be alerting + // while the second one will have a DatasourceError alert. + rules[1].ExecErrState = ngmodels.AlertingErrState + rules[2].ExecErrState = ngmodels.ErrorErrState + fakeStore.PutRule(context.Background(), rules...) + + // create a normal and alerting state for the first rule + fakeAIM.GenerateAlertInstances(orgID, rules[0].UID, 1) + fakeAIM.GenerateAlertInstances(orgID, rules[0].UID, 1, withAlertingState()) + // create an error state for the last two rules + fakeAIM.GenerateAlertInstances(orgID, rules[1].UID, 1, withAlertingErrorState()) + fakeAIM.GenerateAlertInstances(orgID, rules[2].UID, 1, withErrorState()) + + r, err := http.NewRequest("GET", "/api/v1/rules", nil) + require.NoError(t, err) + c := &contextmodel.ReqContext{ + Context: &web.Context{Req: r}, + SignedInUser: &user.SignedInUser{ + OrgID: orgID, + OrgRole: org.RoleViewer, + }, + } + resp := api.RouteGetRuleStatuses(c) + require.Equal(t, http.StatusOK, resp.Status()) + var res apimodels.RuleResponse + require.NoError(t, json.Unmarshal(resp.Body(), &res)) + + // Even though there are just 3 rules, the totals should show two firing rules, + // one inactive rules and two errors + require.Equal(t, map[string]int64{"firing": 2, "inactive": 1, "error": 2}, res.Data.Totals) + // There should be 1 Rule Group that contains all rules + require.Len(t, res.Data.RuleGroups, 1) + rg := res.Data.RuleGroups[0] + require.Len(t, rg.Rules, 3) + + // The first rule should have an alerting and normal alert + r1 := rg.Rules[0] + require.Equal(t, map[string]int64{"alerting": 1, "normal": 1}, r1.Totals) + require.Len(t, r1.Alerts, 2) + // The second rule should have an alerting alert + r2 := rg.Rules[1] + require.Equal(t, map[string]int64{"alerting": 1, "error": 1}, r2.Totals) + require.Len(t, r2.Alerts, 1) + // The last rule should have an error alert + r3 := rg.Rules[2] + require.Equal(t, map[string]int64{"error": 1}, r3.Totals) + require.Len(t, r3.Alerts, 1) + }) + + t.Run("test time of first firing alert", func(t *testing.T) { + fakeStore, fakeAIM, _, api := setupAPI(t) + // Create rules in the same Rule Group to keep assertions simple + rules := ngmodels.GenerateAlertRules(1, ngmodels.AlertRuleGen(withOrgID(orgID))) + fakeStore.PutRule(context.Background(), rules...) + + getRuleResponse := func() apimodels.RuleResponse { + r, err := http.NewRequest("GET", "/api/v1/rules", nil) + require.NoError(t, err) + c := &contextmodel.ReqContext{ + Context: &web.Context{Req: r}, + SignedInUser: &user.SignedInUser{ + OrgID: orgID, + OrgRole: org.RoleViewer, + }, + } + resp := api.RouteGetRuleStatuses(c) + require.Equal(t, http.StatusOK, resp.Status()) + var res apimodels.RuleResponse + require.NoError(t, json.Unmarshal(resp.Body(), &res)) + return res + } + + // no alerts so timestamp should be nil + res := getRuleResponse() + require.Len(t, res.Data.RuleGroups, 1) + rg := res.Data.RuleGroups[0] + require.Len(t, rg.Rules, 1) + require.Nil(t, rg.Rules[0].ActiveAt) + + // create a normal alert, the timestamp should still be nil + fakeAIM.GenerateAlertInstances(orgID, rules[0].UID, 1) + res = getRuleResponse() + require.Len(t, res.Data.RuleGroups, 1) + rg = res.Data.RuleGroups[0] + require.Len(t, rg.Rules, 1) + require.Nil(t, rg.Rules[0].ActiveAt) + + // create a firing alert, the timestamp should be non-nil + fakeAIM.GenerateAlertInstances(orgID, rules[0].UID, 1, withAlertingState()) + res = getRuleResponse() + require.Len(t, res.Data.RuleGroups, 1) + rg = res.Data.RuleGroups[0] + require.Len(t, rg.Rules, 1) + require.NotNil(t, rg.Rules[0].ActiveAt) + + lastActiveAt := rg.Rules[0].ActiveAt + // create a second firing alert, the timestamp of first firing alert should be the same + fakeAIM.GenerateAlertInstances(orgID, rules[0].UID, 1, withAlertingState()) + res = getRuleResponse() + require.Len(t, res.Data.RuleGroups, 1) + rg = res.Data.RuleGroups[0] + require.Len(t, rg.Rules, 1) + require.Equal(t, lastActiveAt, rg.Rules[0].ActiveAt) + }) + + t.Run("test with limit on Rule Groups", func(t *testing.T) { + fakeStore, _, _, api := setupAPI(t) + + rules := ngmodels.GenerateAlertRules(2, ngmodels.AlertRuleGen(withOrgID(orgID))) + fakeStore.PutRule(context.Background(), rules...) + + t.Run("first without limit", func(t *testing.T) { + r, err := http.NewRequest("GET", "/api/v1/rules", nil) + require.NoError(t, err) + c := &contextmodel.ReqContext{ + Context: &web.Context{Req: r}, + SignedInUser: &user.SignedInUser{ + OrgID: orgID, + OrgRole: org.RoleViewer, + }, + } + resp := api.RouteGetRuleStatuses(c) + require.Equal(t, http.StatusOK, resp.Status()) + var res apimodels.RuleResponse + require.NoError(t, json.Unmarshal(resp.Body(), &res)) + + // There should be 2 inactive rules across all Rule Groups + require.Equal(t, map[string]int64{"inactive": 2}, res.Data.Totals) + require.Len(t, res.Data.RuleGroups, 2) + for _, rg := range res.Data.RuleGroups { + // Each Rule Group should have 1 inactive rule + require.Equal(t, map[string]int64{"inactive": 1}, rg.Totals) + require.Len(t, rg.Rules, 1) + } + }) + + t.Run("then with limit", func(t *testing.T) { + r, err := http.NewRequest("GET", "/api/v1/rules?limit=1", nil) + require.NoError(t, err) + c := &contextmodel.ReqContext{ + Context: &web.Context{Req: r}, + SignedInUser: &user.SignedInUser{ + OrgID: orgID, + OrgRole: org.RoleViewer, + }, + } + resp := api.RouteGetRuleStatuses(c) + require.Equal(t, http.StatusOK, resp.Status()) + var res apimodels.RuleResponse + require.NoError(t, json.Unmarshal(resp.Body(), &res)) + + // There should be 2 inactive rules across all Rule Groups + require.Equal(t, map[string]int64{"inactive": 2}, res.Data.Totals) + require.Len(t, res.Data.RuleGroups, 1) + rg := res.Data.RuleGroups[0] + // The Rule Group within the limit should have 1 inactive rule + require.Equal(t, map[string]int64{"inactive": 1}, rg.Totals) + require.Len(t, rg.Rules, 1) + }) + + t.Run("then with limit larger than number of rule groups", func(t *testing.T) { + r, err := http.NewRequest("GET", "/api/v1/rules?limit=1", nil) + require.NoError(t, err) + c := &contextmodel.ReqContext{ + Context: &web.Context{Req: r}, + SignedInUser: &user.SignedInUser{ + OrgID: orgID, + OrgRole: org.RoleViewer, + }, + } + resp := api.RouteGetRuleStatuses(c) + require.Equal(t, http.StatusOK, resp.Status()) + var res apimodels.RuleResponse + require.NoError(t, json.Unmarshal(resp.Body(), &res)) + require.Len(t, res.Data.RuleGroups, 1) + }) + }) + + t.Run("test with limit rules", func(t *testing.T) { + fakeStore, _, _, api := setupAPI(t) + rules := ngmodels.GenerateAlertRules(2, ngmodels.AlertRuleGen(withOrgID(orgID), withGroup("Rule-Group-1"))) + fakeStore.PutRule(context.Background(), rules...) + + t.Run("first without limit", func(t *testing.T) { + r, err := http.NewRequest("GET", "/api/v1/rules", nil) + require.NoError(t, err) + c := &contextmodel.ReqContext{ + Context: &web.Context{Req: r}, + SignedInUser: &user.SignedInUser{ + OrgID: orgID, + OrgRole: org.RoleViewer, + }, + } + resp := api.RouteGetRuleStatuses(c) + require.Equal(t, http.StatusOK, resp.Status()) + var res apimodels.RuleResponse + require.NoError(t, json.Unmarshal(resp.Body(), &res)) + + // There should be 2 inactive rules across all Rule Groups + require.Equal(t, map[string]int64{"inactive": 2}, res.Data.Totals) + require.Len(t, res.Data.RuleGroups, 2) + for _, rg := range res.Data.RuleGroups { + // Each Rule Group should have 1 inactive rule + require.Equal(t, map[string]int64{"inactive": 1}, rg.Totals) + require.Len(t, rg.Rules, 1) + } + }) + + t.Run("then with limit", func(t *testing.T) { + r, err := http.NewRequest("GET", "/api/v1/rules?limit=1&limit_rules=1", nil) + require.NoError(t, err) + c := &contextmodel.ReqContext{ + Context: &web.Context{Req: r}, + SignedInUser: &user.SignedInUser{ + OrgID: orgID, + OrgRole: org.RoleViewer, + }, + } + resp := api.RouteGetRuleStatuses(c) + require.Equal(t, http.StatusOK, resp.Status()) + var res apimodels.RuleResponse + require.NoError(t, json.Unmarshal(resp.Body(), &res)) + + // There should be 2 inactive rules + require.Equal(t, map[string]int64{"inactive": 2}, res.Data.Totals) + require.Len(t, res.Data.RuleGroups, 1) + rg := res.Data.RuleGroups[0] + // The Rule Group within the limit should have 1 inactive rule because of the limit + require.Equal(t, map[string]int64{"inactive": 1}, rg.Totals) + require.Len(t, rg.Rules, 1) + }) + + t.Run("then with limit larger than number of rules", func(t *testing.T) { + r, err := http.NewRequest("GET", "/api/v1/rules?limit=1&limit_rules=2", nil) + require.NoError(t, err) + c := &contextmodel.ReqContext{ + Context: &web.Context{Req: r}, + SignedInUser: &user.SignedInUser{ + OrgID: orgID, + OrgRole: org.RoleViewer, + }, + } + resp := api.RouteGetRuleStatuses(c) + require.Equal(t, http.StatusOK, resp.Status()) + var res apimodels.RuleResponse + require.NoError(t, json.Unmarshal(resp.Body(), &res)) + require.Len(t, res.Data.RuleGroups, 1) + require.Len(t, res.Data.RuleGroups[0].Rules, 1) + }) + }) + + t.Run("test with limit alerts", func(t *testing.T) { + fakeStore, fakeAIM, _, api := setupAPI(t) + rules := ngmodels.GenerateAlertRules(2, ngmodels.AlertRuleGen(withOrgID(orgID), withGroup("Rule-Group-1"))) + fakeStore.PutRule(context.Background(), rules...) + // create a normal and firing alert for each rule + for _, r := range rules { + fakeAIM.GenerateAlertInstances(orgID, r.UID, 1) + fakeAIM.GenerateAlertInstances(orgID, r.UID, 1, withAlertingState()) + } + + t.Run("first without limit", func(t *testing.T) { + r, err := http.NewRequest("GET", "/api/v1/rules", nil) + require.NoError(t, err) + c := &contextmodel.ReqContext{ + Context: &web.Context{Req: r}, + SignedInUser: &user.SignedInUser{ + OrgID: orgID, + OrgRole: org.RoleViewer, + }, + } + resp := api.RouteGetRuleStatuses(c) + require.Equal(t, http.StatusOK, resp.Status()) + var res apimodels.RuleResponse + require.NoError(t, json.Unmarshal(resp.Body(), &res)) + + // There should be 2 firing rules across all Rule Groups + require.Equal(t, map[string]int64{"firing": 2}, res.Data.Totals) + require.Len(t, res.Data.RuleGroups, 2) + for _, rg := range res.Data.RuleGroups { + // Each Rule Group should have 1 firing rule + require.Equal(t, map[string]int64{"firing": 1}, rg.Totals) + require.Len(t, rg.Rules, 1) + // Each rule should have two alerts + require.Equal(t, map[string]int64{"alerting": 1, "normal": 1}, rg.Rules[0].Totals) + } + }) + + t.Run("then with limits", func(t *testing.T) { + r, err := http.NewRequest("GET", "/api/v1/rules?limit=1&limit_rules=1&limit_alerts=1", nil) + require.NoError(t, err) + c := &contextmodel.ReqContext{ + Context: &web.Context{Req: r}, + SignedInUser: &user.SignedInUser{ + OrgID: orgID, + OrgRole: org.RoleViewer, + }, + } + resp := api.RouteGetRuleStatuses(c) + require.Equal(t, http.StatusOK, resp.Status()) + var res apimodels.RuleResponse + require.NoError(t, json.Unmarshal(resp.Body(), &res)) + + // There should be 2 firing rules across all Rule Groups + require.Equal(t, map[string]int64{"firing": 2}, res.Data.Totals) + rg := res.Data.RuleGroups[0] + // The Rule Group within the limit should have 1 inactive rule because of the limit + require.Equal(t, map[string]int64{"firing": 1}, rg.Totals) + require.Len(t, rg.Rules, 1) + rule := rg.Rules[0] + // The rule should have two alerts, but just one should be returned + require.Equal(t, map[string]int64{"alerting": 1, "normal": 1}, rule.Totals) + require.Len(t, rule.Alerts, 1) + // Firing alerts should have precedence over normal alerts + require.Equal(t, "Alerting", rule.Alerts[0].State) + }) + + t.Run("then with limit larger than number of alerts", func(t *testing.T) { + r, err := http.NewRequest("GET", "/api/v1/rules?limit=1&limit_rules=1&limit_alerts=3", nil) + require.NoError(t, err) + c := &contextmodel.ReqContext{ + Context: &web.Context{Req: r}, + SignedInUser: &user.SignedInUser{ + OrgID: orgID, + OrgRole: org.RoleViewer, + }, + } + resp := api.RouteGetRuleStatuses(c) + require.Equal(t, http.StatusOK, resp.Status()) + var res apimodels.RuleResponse + require.NoError(t, json.Unmarshal(resp.Body(), &res)) + require.Len(t, res.Data.RuleGroups, 1) + require.Len(t, res.Data.RuleGroups[0].Rules, 1) + require.Len(t, res.Data.RuleGroups[0].Rules[0].Alerts, 2) + }) + }) + + t.Run("test with filters on state", func(t *testing.T) { + fakeStore, fakeAIM, _, api := setupAPI(t) + // create two rules in the same Rule Group to keep assertions simple + rules := ngmodels.GenerateAlertRules(3, ngmodels.AlertRuleGen(withOrgID(orgID), withGroup("Rule-Group-1"), withNamespace(&folder.Folder{ + Title: "Folder-1", + }))) + // Need to sort these so we add alerts to the rules as ordered in the response + ngmodels.AlertRulesBy(ngmodels.AlertRulesByIndex).Sort(rules) + // The last two rules will have errors, however the first will be alerting + // while the second one will have a DatasourceError alert. + rules[1].ExecErrState = ngmodels.AlertingErrState + rules[2].ExecErrState = ngmodels.ErrorErrState + fakeStore.PutRule(context.Background(), rules...) + + // create a normal and alerting state for the first rule + fakeAIM.GenerateAlertInstances(orgID, rules[0].UID, 1) + fakeAIM.GenerateAlertInstances(orgID, rules[0].UID, 1, withAlertingState()) + // create an error state for the last two rules + fakeAIM.GenerateAlertInstances(orgID, rules[1].UID, 1, withAlertingErrorState()) + fakeAIM.GenerateAlertInstances(orgID, rules[2].UID, 1, withErrorState()) + + t.Run("invalid state returns 400 Bad Request", func(t *testing.T) { + r, err := http.NewRequest("GET", "/api/v1/rules?state=unknown", nil) + require.NoError(t, err) + c := &contextmodel.ReqContext{ + Context: &web.Context{Req: r}, + SignedInUser: &user.SignedInUser{ + OrgID: orgID, + OrgRole: org.RoleViewer, + }, + } + resp := api.RouteGetRuleStatuses(c) + require.Equal(t, http.StatusBadRequest, resp.Status()) + var res apimodels.RuleResponse + require.NoError(t, json.Unmarshal(resp.Body(), &res)) + require.Equal(t, "unknown state 'unknown'", res.Error) + }) + + t.Run("first without filters", func(t *testing.T) { + r, err := http.NewRequest("GET", "/api/v1/rules", nil) + require.NoError(t, err) + c := &contextmodel.ReqContext{ + Context: &web.Context{Req: r}, + SignedInUser: &user.SignedInUser{ + OrgID: orgID, + OrgRole: org.RoleViewer, + }, + } + resp := api.RouteGetRuleStatuses(c) + require.Equal(t, http.StatusOK, resp.Status()) + var res apimodels.RuleResponse + require.NoError(t, json.Unmarshal(resp.Body(), &res)) + + // There should be 2 firing rules, 1 inactive rule, and 2 with errors + require.Equal(t, map[string]int64{"firing": 2, "inactive": 1, "error": 2}, res.Data.Totals) + require.Len(t, res.Data.RuleGroups, 1) + rg := res.Data.RuleGroups[0] + require.Len(t, rg.Rules, 3) + + // The first two rules should be firing and the last should be inactive + require.Equal(t, "firing", rg.Rules[0].State) + require.Equal(t, map[string]int64{"alerting": 1, "normal": 1}, rg.Rules[0].Totals) + require.Len(t, rg.Rules[0].Alerts, 2) + require.Equal(t, "firing", rg.Rules[1].State) + require.Equal(t, map[string]int64{"alerting": 1, "error": 1}, rg.Rules[1].Totals) + require.Len(t, rg.Rules[1].Alerts, 1) + require.Equal(t, "inactive", rg.Rules[2].State) + require.Equal(t, map[string]int64{"error": 1}, rg.Rules[2].Totals) + require.Len(t, rg.Rules[2].Alerts, 1) + }) + + t.Run("then with filter for firing alerts", func(t *testing.T) { + r, err := http.NewRequest("GET", "/api/v1/rules?state=firing", nil) + require.NoError(t, err) + c := &contextmodel.ReqContext{ + Context: &web.Context{Req: r}, + SignedInUser: &user.SignedInUser{ + OrgID: orgID, + OrgRole: org.RoleViewer, + }, + } + resp := api.RouteGetRuleStatuses(c) + require.Equal(t, http.StatusOK, resp.Status()) + var res apimodels.RuleResponse + require.NoError(t, json.Unmarshal(resp.Body(), &res)) + + // The totals should be the same + require.Equal(t, map[string]int64{"firing": 2, "inactive": 1, "error": 2}, res.Data.Totals) + + // The inactive rules should be filtered out of the result + require.Len(t, res.Data.RuleGroups, 1) + rg := res.Data.RuleGroups[0] + require.Len(t, rg.Rules, 2) + + // Both firing rules should be returned with their totals unchanged + require.Equal(t, "firing", rg.Rules[0].State) + require.Equal(t, map[string]int64{"alerting": 1, "normal": 1}, rg.Rules[0].Totals) + // The first rule should have just 1 firing alert as the inactive alert + // has been removed by the filter for firing alerts + require.Len(t, rg.Rules[0].Alerts, 1) + + require.Equal(t, "firing", rg.Rules[1].State) + require.Equal(t, map[string]int64{"alerting": 1, "error": 1}, rg.Rules[1].Totals) + require.Len(t, rg.Rules[1].Alerts, 1) + }) + + t.Run("then with filters for both inactive and firing alerts", func(t *testing.T) { + r, err := http.NewRequest("GET", "/api/v1/rules?state=inactive&state=firing", nil) + require.NoError(t, err) + c := &contextmodel.ReqContext{ + Context: &web.Context{Req: r}, + SignedInUser: &user.SignedInUser{ + OrgID: orgID, + OrgRole: org.RoleViewer, + }, + } + resp := api.RouteGetRuleStatuses(c) + require.Equal(t, http.StatusOK, resp.Status()) + var res apimodels.RuleResponse + require.NoError(t, json.Unmarshal(resp.Body(), &res)) + + // The totals should be the same + require.Equal(t, map[string]int64{"firing": 2, "inactive": 1, "error": 2}, res.Data.Totals) + + // The number of rules returned should also be the same + require.Len(t, res.Data.RuleGroups, 1) + rg := res.Data.RuleGroups[0] + require.Len(t, rg.Rules, 3) + + // The first two rules should be firing and the last should be inactive + require.Equal(t, "firing", rg.Rules[0].State) + require.Equal(t, map[string]int64{"alerting": 1, "normal": 1}, rg.Rules[0].Totals) + require.Len(t, rg.Rules[0].Alerts, 2) + require.Equal(t, "firing", rg.Rules[1].State) + require.Equal(t, map[string]int64{"alerting": 1, "error": 1}, rg.Rules[1].Totals) + require.Len(t, rg.Rules[1].Alerts, 1) + + // The last rule should have 1 alert as the filter includes errors too + require.Equal(t, "inactive", rg.Rules[2].State) + require.Equal(t, map[string]int64{"error": 1}, rg.Rules[2].Totals) + // The error alert has been removed as the filters are inactive and firing + require.Len(t, rg.Rules[2].Alerts, 0) + }) + }) + + t.Run("test with matcher on labels", func(t *testing.T) { + fakeStore, fakeAIM, _, api := setupAPI(t) + // create two rules in the same Rule Group to keep assertions simple + rules := ngmodels.GenerateAlertRules(1, ngmodels.AlertRuleGen(withOrgID(orgID), withGroup("Rule-Group-1"), withNamespace(&folder.Folder{ + Title: "Folder-1", + }))) + fakeStore.PutRule(context.Background(), rules...) + + // create a normal and alerting state for each rule + fakeAIM.GenerateAlertInstances(orgID, rules[0].UID, 1, + withLabels(data.Labels{"test": "value1"})) + fakeAIM.GenerateAlertInstances(orgID, rules[0].UID, 1, + withLabels(data.Labels{"test": "value2"}), withAlertingState()) + + t.Run("invalid matchers returns 400 Bad Request", func(t *testing.T) { + r, err := http.NewRequest("GET", "/api/v1/rules?matcher={\"name\":\"\"}", nil) + require.NoError(t, err) + c := &contextmodel.ReqContext{ + Context: &web.Context{Req: r}, + SignedInUser: &user.SignedInUser{ + OrgID: orgID, + OrgRole: org.RoleViewer, + }, + } + resp := api.RouteGetRuleStatuses(c) + require.Equal(t, http.StatusBadRequest, resp.Status()) + var res apimodels.RuleResponse + require.NoError(t, json.Unmarshal(resp.Body(), &res)) + require.Equal(t, "bad matcher: the name cannot be blank", res.Error) + }) + + t.Run("first without matchers", func(t *testing.T) { + r, err := http.NewRequest("GET", "/api/v1/rules", nil) + require.NoError(t, err) + c := &contextmodel.ReqContext{ + Context: &web.Context{Req: r}, + SignedInUser: &user.SignedInUser{ + OrgID: orgID, + OrgRole: org.RoleViewer, + }, + } + resp := api.RouteGetRuleStatuses(c) + require.Equal(t, http.StatusOK, resp.Status()) + var res apimodels.RuleResponse + require.NoError(t, json.Unmarshal(resp.Body(), &res)) + + require.Len(t, res.Data.RuleGroups, 1) + rg := res.Data.RuleGroups[0] + require.Len(t, rg.Rules, 1) + require.Len(t, rg.Rules[0].Alerts, 2) + }) + + t.Run("then with single matcher", func(t *testing.T) { + r, err := http.NewRequest("GET", "/api/v1/rules?matcher={\"name\":\"test\",\"isEqual\":true,\"value\":\"value1\"}", nil) + require.NoError(t, err) + c := &contextmodel.ReqContext{ + Context: &web.Context{Req: r}, + SignedInUser: &user.SignedInUser{ + OrgID: orgID, + OrgRole: org.RoleViewer, + }, + } + resp := api.RouteGetRuleStatuses(c) + require.Equal(t, http.StatusOK, resp.Status()) + var res apimodels.RuleResponse + require.NoError(t, json.Unmarshal(resp.Body(), &res)) + + // There should be just the alert with the label test=value1 + require.Len(t, res.Data.RuleGroups, 1) + rg := res.Data.RuleGroups[0] + require.Len(t, rg.Rules, 1) + require.Len(t, rg.Rules[0].Alerts, 1) + }) + + t.Run("then with URL encoded regex matcher", func(t *testing.T) { + r, err := http.NewRequest("GET", "/api/v1/rules?matcher=%7B%22name%22:%22test%22%2C%22isEqual%22:true%2C%22isRegex%22:true%2C%22value%22:%22value%5B0-9%5D%2B%22%7D%0A", nil) + require.NoError(t, err) + c := &contextmodel.ReqContext{ + Context: &web.Context{Req: r}, + SignedInUser: &user.SignedInUser{ + OrgID: orgID, + OrgRole: org.RoleViewer, + }, + } + resp := api.RouteGetRuleStatuses(c) + require.Equal(t, http.StatusOK, resp.Status()) + var res apimodels.RuleResponse + require.NoError(t, json.Unmarshal(resp.Body(), &res)) + + // There should be just the alert with the label test=value1 + require.Len(t, res.Data.RuleGroups, 1) + rg := res.Data.RuleGroups[0] + require.Len(t, rg.Rules, 1) + require.Len(t, rg.Rules[0].Alerts, 2) + }) + + t.Run("then with multiple matchers", func(t *testing.T) { + r, err := http.NewRequest("GET", "/api/v1/rules?matcher={\"name\":\"alertname\",\"isEqual\":true,\"value\":\"test_title_0\"}&matcher={\"name\":\"test\",\"isEqual\":true,\"value\":\"value1\"}", nil) + require.NoError(t, err) + c := &contextmodel.ReqContext{ + Context: &web.Context{Req: r}, + SignedInUser: &user.SignedInUser{ + OrgID: orgID, + OrgRole: org.RoleViewer, + }, + } + resp := api.RouteGetRuleStatuses(c) + require.Equal(t, http.StatusOK, resp.Status()) + var res apimodels.RuleResponse + require.NoError(t, json.Unmarshal(resp.Body(), &res)) + + // There should be just the alert with the label test=value1 + require.Len(t, res.Data.RuleGroups, 1) + rg := res.Data.RuleGroups[0] + require.Len(t, rg.Rules, 1) + require.Len(t, rg.Rules[0].Alerts, 1) + }) + + t.Run("then with multiple matchers that don't match", func(t *testing.T) { + r, err := http.NewRequest("GET", "/api/v1/rules?matcher={\"name\":\"alertname\",\"isEqual\":true,\"value\":\"test_title_0\"}&matcher={\"name\":\"test\",\"isEqual\":true,\"value\":\"value3\"}", nil) + require.NoError(t, err) + c := &contextmodel.ReqContext{ + Context: &web.Context{Req: r}, + SignedInUser: &user.SignedInUser{ + OrgID: orgID, + OrgRole: org.RoleViewer, + }, + } + resp := api.RouteGetRuleStatuses(c) + require.Equal(t, http.StatusOK, resp.Status()) + var res apimodels.RuleResponse + require.NoError(t, json.Unmarshal(resp.Body(), &res)) + + // There should no alerts + require.Len(t, res.Data.RuleGroups, 1) + rg := res.Data.RuleGroups[0] + require.Len(t, rg.Rules, 1) + require.Len(t, rg.Rules[0].Alerts, 0) + }) + }) } func setupAPI(t *testing.T) (*fakes.RuleStore, *fakeAlertInstanceManager, *acmock.Mock, PrometheusSrv) { diff --git a/pkg/services/ngalert/api/tooling/definitions/prom.go b/pkg/services/ngalert/api/tooling/definitions/prom.go index 6a4ffa5d985..346280c33ce 100644 --- a/pkg/services/ngalert/api/tooling/definitions/prom.go +++ b/pkg/services/ngalert/api/tooling/definitions/prom.go @@ -1,6 +1,9 @@ package definitions import ( + "fmt" + "sort" + "strings" "time" v1 "github.com/prometheus/client_golang/api/prometheus/v1" @@ -65,7 +68,8 @@ type DiscoveryBase struct { // swagger:model type RuleDiscovery struct { // required: true - RuleGroups []*RuleGroup `json:"groups"` + RuleGroups []RuleGroup `json:"groups"` + Totals map[string]int64 `json:"totals,omitempty"` } // AlertDiscovery has info for all active alerts. @@ -85,13 +89,37 @@ type RuleGroup struct { // specific properties, both alerting and recording rules are exposed in the // same array. // required: true - Rules []AlertingRule `json:"rules"` + Rules []AlertingRule `json:"rules"` + Totals map[string]int64 `json:"totals"` // required: true Interval float64 `json:"interval"` LastEvaluation time.Time `json:"lastEvaluation"` EvaluationTime float64 `json:"evaluationTime"` } +// RuleGroupsBy is a function that defines the ordering of Rule Groups. +type RuleGroupsBy func(a1, a2 *RuleGroup) bool + +func (by RuleGroupsBy) Sort(groups []RuleGroup) { + sort.Sort(RuleGroupsSorter{groups: groups, by: by}) +} + +func RuleGroupsByFileAndName(a1, a2 *RuleGroup) bool { + if a1.File == a2.File { + return a1.Name < a2.Name + } + return a1.File < a2.File +} + +type RuleGroupsSorter struct { + groups []RuleGroup + by RuleGroupsBy +} + +func (s RuleGroupsSorter) Len() int { return len(s.groups) } +func (s RuleGroupsSorter) Swap(i, j int) { s.groups[i], s.groups[j] = s.groups[j], s.groups[i] } +func (s RuleGroupsSorter) Less(i, j int) bool { return s.by(&s.groups[i], &s.groups[j]) } + // adapted from cortex // swagger:model type AlertingRule struct { @@ -106,7 +134,9 @@ type AlertingRule struct { // required: true Annotations overrideLabels `json:"annotations,omitempty"` // required: true - Alerts []*Alert `json:"alerts,omitempty"` + ActiveAt *time.Time `json:"activeAt,omitempty"` + Alerts []Alert `json:"alerts,omitempty"` + Totals map[string]int64 `json:"totals,omitempty"` Rule } @@ -141,6 +171,107 @@ type Alert struct { Value string `json:"value"` } +type StateByImportance int + +const ( + StateAlerting = iota + StatePending + StateError + StateNoData + StateNormal +) + +func stateByImportanceFromString(s string) (StateByImportance, error) { + switch s = strings.ToLower(s); s { + case "alerting": + return StateAlerting, nil + case "pending": + return StatePending, nil + case "error": + return StateError, nil + case "nodata": + return StateNoData, nil + case "normal": + return StateNormal, nil + default: + return -1, fmt.Errorf("unknown state: %s", s) + } +} + +// AlertsBy is a function that defines the ordering of alerts. +type AlertsBy func(a1, a2 *Alert) bool + +func (by AlertsBy) Sort(alerts []Alert) { + sort.Sort(AlertsSorter{alerts: alerts, by: by}) +} + +// AlertsByImportance orders alerts by importance. An alert is more important +// than another alert if its status has higher importance. For example, "alerting" +// is more important than "normal". If two alerts have the same importance +// then the ordering is based on their ActiveAt time and their labels. +func AlertsByImportance(a1, a2 *Alert) bool { + // labelsForComparison concatenates each key/value pair into a string and + // sorts them. + labelsForComparison := func(m map[string]string) []string { + s := make([]string, 0, len(m)) + for k, v := range m { + s = append(s, k+v) + } + sort.Strings(s) + return s + } + + // compareLabels returns true if labels1 are less than labels2. This happens + // when labels1 has fewer labels than labels2, or if the next label from + // labels1 is lexicographically less than the next label from labels2. + compareLabels := func(labels1, labels2 []string) bool { + if len(labels1) == len(labels2) { + for i := range labels1 { + if labels1[i] != labels2[i] { + return labels1[i] < labels2[i] + } + } + } + return len(labels1) < len(labels2) + } + + // The importance of an alert is first based on the importance of their states. + // This ordering is intended to show the most important alerts first when + // using pagination. + importance1, _ := stateByImportanceFromString(a1.State) + importance2, _ := stateByImportanceFromString(a2.State) + + // If both alerts have the same importance then the ordering is based on + // their ActiveAt time, and if those are equal, their labels. + if importance1 == importance2 { + if a1.ActiveAt != nil && a2.ActiveAt == nil { + // The first alert is active but not the second + return true + } else if a1.ActiveAt == nil && a2.ActiveAt != nil { + // The second alert is active but not the first + return false + } else if a1.ActiveAt != nil && a2.ActiveAt != nil && a1.ActiveAt.Before(*a2.ActiveAt) { + // Both alerts are active but a1 happened before a2 + return true + } + // Both alerts are active since the same time so compare their labels + labels1 := labelsForComparison(a1.Labels) + labels2 := labelsForComparison(a2.Labels) + return compareLabels(labels1, labels2) + } + + return importance1 < importance2 +} + +type AlertsSorter struct { + alerts []Alert + by AlertsBy +} + +func (s AlertsSorter) Len() int { return len(s.alerts) } +func (s AlertsSorter) Swap(i, j int) { s.alerts[i], s.alerts[j] = s.alerts[j], s.alerts[i] } +func (s AlertsSorter) Less(i, j int) bool { return s.by(&s.alerts[i], &s.alerts[j]) } + // override the labels type with a map for generation. // The custom marshaling for labels.Labels ends up doing this anyways. type overrideLabels map[string]string diff --git a/pkg/services/ngalert/api/tooling/definitions/prom_test.go b/pkg/services/ngalert/api/tooling/definitions/prom_test.go new file mode 100644 index 00000000000..107eedcb5c2 --- /dev/null +++ b/pkg/services/ngalert/api/tooling/definitions/prom_test.go @@ -0,0 +1,66 @@ +package definitions + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestSortAlertsByImportance(t *testing.T) { + tm1, tm2 := time.Now(), time.Now().Add(time.Second) + tc := []struct { + name string + input []Alert + expected []Alert + }{{ + name: "alerts are ordered in expected importance", + input: []Alert{{State: "normal"}, {State: "nodata"}, {State: "error"}, {State: "pending"}, {State: "alerting"}}, + expected: []Alert{{State: "alerting"}, {State: "pending"}, {State: "error"}, {State: "nodata"}, {State: "normal"}}, + }, { + name: "alerts with same importance are ordered active first", + input: []Alert{{State: "normal"}, {State: "normal", ActiveAt: &tm1}}, + expected: []Alert{{State: "normal", ActiveAt: &tm1}, {State: "normal"}}, + }, { + name: "active alerts with same importance are ordered newest first", + input: []Alert{{State: "alerting", ActiveAt: &tm2}, {State: "alerting", ActiveAt: &tm1}}, + expected: []Alert{{State: "alerting", ActiveAt: &tm1}, {State: "alerting", ActiveAt: &tm2}}, + }, { + name: "inactive alerts with same importance are ordered by labels", + input: []Alert{ + {State: "normal", Labels: map[string]string{"c": "d"}}, + {State: "normal", Labels: map[string]string{"a": "b"}}, + }, + expected: []Alert{ + {State: "normal", Labels: map[string]string{"a": "b"}}, + {State: "normal", Labels: map[string]string{"c": "d"}}, + }, + }, { + name: "active alerts with same importance and active time are ordered fewest labels first", + input: []Alert{ + {State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"a": "b", "c": "d"}}, + {State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"e": "f"}}, + }, + expected: []Alert{ + {State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"e": "f"}}, + {State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"a": "b", "c": "d"}}, + }, + }, { + name: "active alerts with same importance and active time are ordered by labels", + input: []Alert{ + {State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"c": "d"}}, + {State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"a": "b"}}, + }, + expected: []Alert{ + {State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"a": "b"}}, + {State: "alerting", ActiveAt: &tm1, Labels: map[string]string{"c": "d"}}, + }, + }} + + for _, tt := range tc { + t.Run(tt.name, func(t *testing.T) { + AlertsBy(AlertsByImportance).Sort(tt.input) + assert.EqualValues(t, tt.expected, tt.input) + }) + } +} diff --git a/pkg/services/ngalert/models/alert_rule.go b/pkg/services/ngalert/models/alert_rule.go index 5b03f6c6a63..4fc34822daa 100644 --- a/pkg/services/ngalert/models/alert_rule.go +++ b/pkg/services/ngalert/models/alert_rule.go @@ -180,6 +180,37 @@ type AlertRuleWithOptionals struct { HasPause bool } +// AlertsRulesBy is a function that defines the ordering of alert rules. +type AlertRulesBy func(a1, a2 *AlertRule) bool + +func (by AlertRulesBy) Sort(rules []*AlertRule) { + sort.Sort(AlertRulesSorter{rules: rules, by: by}) +} + +// AlertRulesByIndex orders alert rules by rule group index. You should +// make sure that all alert rules belong to the same rule group (have the +// same RuleGroupKey) before using this ordering. +func AlertRulesByIndex(a1, a2 *AlertRule) bool { + return a1.RuleGroupIndex < a2.RuleGroupIndex +} + +func AlertRulesByGroupKeyAndIndex(a1, a2 *AlertRule) bool { + k1, k2 := a1.GetGroupKey(), a2.GetGroupKey() + if k1 == k2 { + return a1.RuleGroupIndex < a2.RuleGroupIndex + } + return AlertRuleGroupKeyByNamespaceAndRuleGroup(&k1, &k2) +} + +type AlertRulesSorter struct { + rules []*AlertRule + by AlertRulesBy +} + +func (s AlertRulesSorter) Len() int { return len(s.rules) } +func (s AlertRulesSorter) Swap(i, j int) { s.rules[i], s.rules[j] = s.rules[j], s.rules[i] } +func (s AlertRulesSorter) Less(i, j int) bool { return s.by(s.rules[i], s.rules[j]) } + // GetDashboardUID returns the DashboardUID or "". func (alertRule *AlertRule) GetDashboardUID() string { if alertRule.DashboardUID != nil { @@ -303,6 +334,29 @@ func (k AlertRuleKey) String() string { return fmt.Sprintf("{orgID: %d, UID: %s}", k.OrgID, k.UID) } +// AlertRuleGroupKeyBy is a function that defines the ordering of alert rule group keys. +type AlertRuleGroupKeyBy func(a1, a2 *AlertRuleGroupKey) bool + +func (by AlertRuleGroupKeyBy) Sort(keys []AlertRuleGroupKey) { + sort.Sort(AlertRuleGroupKeySorter{keys: keys, by: by}) +} + +func AlertRuleGroupKeyByNamespaceAndRuleGroup(k1, k2 *AlertRuleGroupKey) bool { + if k1.NamespaceUID == k2.NamespaceUID { + return k1.RuleGroup < k2.RuleGroup + } + return k1.NamespaceUID < k2.NamespaceUID +} + +type AlertRuleGroupKeySorter struct { + keys []AlertRuleGroupKey + by AlertRuleGroupKeyBy +} + +func (s AlertRuleGroupKeySorter) Len() int { return len(s.keys) } +func (s AlertRuleGroupKeySorter) Swap(i, j int) { s.keys[i], s.keys[j] = s.keys[j], s.keys[i] } +func (s AlertRuleGroupKeySorter) Less(i, j int) bool { return s.by(&s.keys[i], &s.keys[j]) } + // GetKey returns the alert definitions identifier func (alertRule *AlertRule) GetKey() AlertRuleKey { return AlertRuleKey{OrgID: alertRule.OrgID, UID: alertRule.UID} diff --git a/pkg/services/ngalert/models/alert_rule_test.go b/pkg/services/ngalert/models/alert_rule_test.go index 8f33d2dc875..e566c4af5db 100644 --- a/pkg/services/ngalert/models/alert_rule_test.go +++ b/pkg/services/ngalert/models/alert_rule_test.go @@ -17,6 +17,51 @@ import ( "github.com/grafana/grafana/pkg/util" ) +func TestSortAlertRulesByGroupKeyAndIndex(t *testing.T) { + tc := []struct { + name string + input []*AlertRule + expected []*AlertRule + }{{ + name: "alert rules are ordered by organization", + input: []*AlertRule{ + {OrgID: 2, NamespaceUID: "test2"}, + {OrgID: 1, NamespaceUID: "test1"}, + }, + expected: []*AlertRule{ + {OrgID: 1, NamespaceUID: "test1"}, + {OrgID: 2, NamespaceUID: "test2"}, + }, + }, { + name: "alert rules in same organization are ordered by namespace", + input: []*AlertRule{ + {OrgID: 1, NamespaceUID: "test2"}, + {OrgID: 1, NamespaceUID: "test1"}, + }, + expected: []*AlertRule{ + {OrgID: 1, NamespaceUID: "test1"}, + {OrgID: 1, NamespaceUID: "test2"}, + }, + }, { + name: "alert rules with same group key are ordered by index", + input: []*AlertRule{ + {OrgID: 1, NamespaceUID: "test", RuleGroupIndex: 2}, + {OrgID: 1, NamespaceUID: "test", RuleGroupIndex: 1}, + }, + expected: []*AlertRule{ + {OrgID: 1, NamespaceUID: "test", RuleGroupIndex: 1}, + {OrgID: 1, NamespaceUID: "test", RuleGroupIndex: 2}, + }, + }} + + for _, tt := range tc { + t.Run(tt.name, func(t *testing.T) { + AlertRulesBy(AlertRulesByGroupKeyAndIndex).Sort(tt.input) + assert.EqualValues(t, tt.expected, tt.input) + }) + } +} + func TestNoDataStateFromString(t *testing.T) { allKnownNoDataStates := [...]NoDataState{ Alerting, diff --git a/pkg/tests/api/alerting/api_prometheus_test.go b/pkg/tests/api/alerting/api_prometheus_test.go index 32a6c10488d..1616d338265 100644 --- a/pkg/tests/api/alerting/api_prometheus_test.go +++ b/pkg/tests/api/alerting/api_prometheus_test.go @@ -259,10 +259,16 @@ func TestIntegrationPrometheusRules(t *testing.T) { "lastEvaluation": "0001-01-01T00:00:00Z", "evaluationTime": 0 }], + "totals": { + "inactive": 2 + }, "interval": 60, "lastEvaluation": "0001-01-01T00:00:00Z", "evaluationTime": 0 - }] + }], + "totals": { + "inactive": 2 + } } }`, string(b)) } @@ -311,10 +317,16 @@ func TestIntegrationPrometheusRules(t *testing.T) { "lastEvaluation": "0001-01-01T00:00:00Z", "evaluationTime": 0 }], + "totals": { + "inactive": 2 + }, "interval": 60, "lastEvaluation": "0001-01-01T00:00:00Z", "evaluationTime": 0 - }] + }], + "totals": { + "inactive": 2 + } } }`, string(b)) return true @@ -454,10 +466,16 @@ func TestIntegrationPrometheusRulesFilterByDashboard(t *testing.T) { "lastEvaluation": "0001-01-01T00:00:00Z", "evaluationTime": 0 }], + "totals": { + "inactive": 2 + }, "interval": 60, "lastEvaluation": "0001-01-01T00:00:00Z", "evaluationTime": 0 - }] + }], + "totals": { + "inactive": 2 + } } }`, dashboardUID) expectedFilteredByJSON := fmt.Sprintf(` @@ -481,10 +499,16 @@ func TestIntegrationPrometheusRulesFilterByDashboard(t *testing.T) { "lastEvaluation": "0001-01-01T00:00:00Z", "evaluationTime": 0 }], + "totals": { + "inactive": 1 + }, "interval": 60, "lastEvaluation": "0001-01-01T00:00:00Z", "evaluationTime": 0 - }] + }], + "totals": { + "inactive": 1 + } } }`, dashboardUID) expectedNoneJSON := ` diff --git a/pkg/web/context.go b/pkg/web/context.go index c3d5ba63a72..5fd3eaa398f 100644 --- a/pkg/web/context.go +++ b/pkg/web/context.go @@ -188,6 +188,14 @@ func (ctx *Context) QueryInt64(name string) int64 { return n } +func (ctx *Context) QueryInt64WithDefault(name string, d int64) int64 { + n, err := strconv.ParseInt(ctx.Query(name), 10, 64) + if err != nil { + return d + } + return n +} + // GetCookie returns given cookie value from request header. func (ctx *Context) GetCookie(name string) string { cookie, err := ctx.Req.Cookie(name)