Alerting: Add pagination to /api/prometheus/grafana/api/v1/rules (#95959)

* Intermediate step before refactoring

* Sort groups to paginate on them

* Formatting and improved test

* Address comments

* Update tests
This commit is contained in:
Fayzal Ghantiwala 2024-11-08 16:58:14 +00:00 committed by GitHub
parent 2458329f4f
commit 5a143be653
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 234 additions and 72 deletions

View File

@ -2,10 +2,12 @@ package api
import ( import (
"context" "context"
"encoding/base64"
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
"net/url" "net/url"
"slices"
"sort" "sort"
"strconv" "strconv"
"strings" "strings"
@ -264,7 +266,6 @@ func PrepareRuleGroupStatuses(log log.Logger, manager state.AlertInstanceManager
return ruleResponse return ruleResponse
} }
limitGroups := getInt64WithDefault(opts.Query, "limit", -1)
limitRulesPerGroup := getInt64WithDefault(opts.Query, "limit_rules", -1) limitRulesPerGroup := getInt64WithDefault(opts.Query, "limit_rules", -1)
limitAlertsPerRule := getInt64WithDefault(opts.Query, "limit_alerts", -1) limitAlertsPerRule := getInt64WithDefault(opts.Query, "limit_alerts", -1)
matchers, err := getMatchersFromQuery(opts.Query) matchers, err := getMatchersFromQuery(opts.Query)
@ -331,15 +332,15 @@ func PrepareRuleGroupStatuses(log log.Logger, manager state.AlertInstanceManager
ruleNamesSet[rn] = struct{}{} ruleNamesSet[rn] = struct{}{}
} }
groupedRules := getGroupedRules(ruleList, ruleNamesSet) maxGroups := getInt64WithDefault(opts.Query, "group_limit", -1)
nextToken := opts.Query.Get("group_next_token")
groupedRules := getGroupedRules(log, ruleList, ruleNamesSet, opts.Namespaces)
rulesTotals := make(map[string]int64, len(groupedRules)) rulesTotals := make(map[string]int64, len(groupedRules))
for groupKey, rules := range groupedRules { var newToken string
folder, ok := opts.Namespaces[groupKey.NamespaceUID] foundToken := false
if !ok { for _, rg := range groupedRules {
log.Warn("Query returned rules that belong to folder the user does not have access to. All rules that belong to that namespace will not be added to the response", "folder_uid", groupKey.NamespaceUID) ok, err := opts.AuthorizeRuleGroup(rg.Rules)
continue
}
ok, err := opts.AuthorizeRuleGroup(rules)
if err != nil { if err != nil {
ruleResponse.DiscoveryBase.Status = "error" ruleResponse.DiscoveryBase.Status = "error"
ruleResponse.DiscoveryBase.Error = fmt.Sprintf("cannot authorize access to rule group: %s", err.Error()) ruleResponse.DiscoveryBase.Error = fmt.Sprintf("cannot authorize access to rule group: %s", err.Error())
@ -350,7 +351,19 @@ func PrepareRuleGroupStatuses(log log.Logger, manager state.AlertInstanceManager
continue continue
} }
ruleGroup, totals := toRuleGroup(log, manager, status, groupKey, folder, rules, limitAlertsPerRule, withStatesFast, matchers, labelOptions) if nextToken != "" && !foundToken {
if nextToken != getRuleGroupNextToken(rg.Folder, rg.GroupKey.RuleGroup) {
continue
}
foundToken = true
}
if maxGroups > -1 && len(ruleResponse.Data.RuleGroups) == int(maxGroups) {
newToken = getRuleGroupNextToken(rg.Folder, rg.GroupKey.RuleGroup)
break
}
ruleGroup, totals := toRuleGroup(log, manager, status, rg.GroupKey, rg.Folder, rg.Rules, limitAlertsPerRule, withStatesFast, matchers, labelOptions)
ruleGroup.Totals = totals ruleGroup.Totals = totals
for k, v := range totals { for k, v := range totals {
rulesTotals[k] += v rulesTotals[k] += v
@ -367,18 +380,28 @@ func PrepareRuleGroupStatuses(log log.Logger, manager state.AlertInstanceManager
ruleResponse.Data.RuleGroups = append(ruleResponse.Data.RuleGroups, *ruleGroup) ruleResponse.Data.RuleGroups = append(ruleResponse.Data.RuleGroups, *ruleGroup)
} }
ruleResponse.Data.Totals = rulesTotals ruleResponse.Data.NextToken = newToken
// Sort Rule Groups before checking limits // Only return Totals if there is no pagination
apimodels.RuleGroupsBy(apimodels.RuleGroupsByFileAndName).Sort(ruleResponse.Data.RuleGroups) if maxGroups == -1 {
if limitGroups > -1 && int64(len(ruleResponse.Data.RuleGroups)) >= limitGroups { ruleResponse.Data.Totals = rulesTotals
ruleResponse.Data.RuleGroups = ruleResponse.Data.RuleGroups[0:limitGroups]
} }
return ruleResponse return ruleResponse
} }
func getGroupedRules(ruleList ngmodels.RulesGroup, ruleNamesSet map[string]struct{}) map[ngmodels.AlertRuleGroupKey][]*ngmodels.AlertRule { func getRuleGroupNextToken(namespace, group string) string {
return base64.URLEncoding.EncodeToString([]byte(namespace + "/" + group))
}
type ruleGroup struct {
Folder string
GroupKey ngmodels.AlertRuleGroupKey
Rules []*ngmodels.AlertRule
}
// Returns a slice of rule groups ordered by namespace and group name
func getGroupedRules(log log.Logger, ruleList ngmodels.RulesGroup, ruleNamesSet map[string]struct{}, namespaceMap map[string]string) []*ruleGroup {
// Group rules together by Namespace and Rule Group. Rules are also grouped by Org ID, // Group rules together by Namespace and Rule Group. Rules are also grouped by Org ID,
// but in this API all rules belong to the same organization. Also filter by rule name if // but in this API all rules belong to the same organization. Also filter by rule name if
// it was provided as a query param. // it was provided as a query param.
@ -394,12 +417,38 @@ func getGroupedRules(ruleList ngmodels.RulesGroup, ruleNamesSet map[string]struc
ruleGroup = append(ruleGroup, rule) ruleGroup = append(ruleGroup, rule)
groupedRules[groupKey] = ruleGroup groupedRules[groupKey] = ruleGroup
} }
// Sort the rules in each rule group by index. We do this at the end instead of
// after each append to avoid having to sort each group multiple times. ruleGroups := make([]*ruleGroup, 0, len(groupedRules))
for _, groupRules := range groupedRules { for groupKey, groupRules := range groupedRules {
folder, ok := namespaceMap[groupKey.NamespaceUID]
if !ok {
log.Warn("Query returned rules that belong to folder the user does not have access to. All rules that belong to that namespace will not be added to the response", "folder_uid", groupKey.NamespaceUID)
continue
}
// Sort the rules in each rule group by index. We do this at the end instead of
// after each append to avoid having to sort each group multiple times.
ngmodels.AlertRulesBy(ngmodels.AlertRulesByIndex).Sort(groupRules) ngmodels.AlertRulesBy(ngmodels.AlertRulesByIndex).Sort(groupRules)
ruleGroups = append(ruleGroups, &ruleGroup{
Folder: folder,
GroupKey: groupKey,
Rules: groupRules,
})
} }
return groupedRules
// Sort the groups first by namespace, then group name
slices.SortFunc(ruleGroups, func(a, b *ruleGroup) int {
nsCmp := strings.Compare(a.Folder, b.Folder)
if nsCmp != 0 {
return nsCmp
}
// If Namespaces are equal, check the group names
return strings.Compare(a.GroupKey.RuleGroup, b.GroupKey.RuleGroup)
})
return ruleGroups
} }
func filterRules(ruleGroup *apimodels.RuleGroup, withStatesFast map[eval.State]struct{}) { func filterRules(ruleGroup *apimodels.RuleGroup, withStatesFast map[eval.State]struct{}) {

View File

@ -664,6 +664,153 @@ func TestRouteGetRuleStatuses(t *testing.T) {
}) })
}) })
t.Run("when requesting rules with pagination", func(t *testing.T) {
ruleStore := fakes.NewRuleStore(t)
fakeAIM := NewFakeAlertInstanceManager(t)
// Generate 9 rule groups across 3 namespaces
// Added in reverse order so we can check that
// they are sorted when returned
allRules := make([]*ngmodels.AlertRule, 0, 9)
for i := 8; i >= 0; i-- {
rules := gen.With(gen.WithGroupKey(ngmodels.AlertRuleGroupKey{
RuleGroup: fmt.Sprintf("rule_group_%d", i),
NamespaceUID: fmt.Sprintf("namespace_%d", i/9),
OrgID: orgID,
})).GenerateManyRef(1)
allRules = append(allRules, rules...)
ruleStore.PutRule(context.Background(), rules...)
}
api := PrometheusSrv{
log: log.NewNopLogger(),
manager: fakeAIM,
status: newFakeSchedulerReader(t).setupStates(fakeAIM),
store: ruleStore,
authz: accesscontrol.NewRuleService(acimpl.ProvideAccessControl(featuremgmt.WithFeatures(), zanzana.NewNoopClient())),
}
permissions := createPermissionsForRules(allRules, orgID)
user := &user.SignedInUser{
OrgID: orgID,
Permissions: permissions,
}
c := &contextmodel.ReqContext{
SignedInUser: user,
}
t.Run("should return all groups when not specifying max_groups query param", func(t *testing.T) {
r, err := http.NewRequest("GET", "/api/v1/rules", nil)
require.NoError(t, err)
c.Context = &web.Context{Req: r}
resp := api.RouteGetRuleStatuses(c)
require.Equal(t, http.StatusOK, resp.Status())
result := &apimodels.RuleResponse{}
require.NoError(t, json.Unmarshal(resp.Body(), result))
require.Len(t, result.Data.RuleGroups, 9)
require.NotZero(t, len(result.Data.Totals))
for i := 0; i < 9; i++ {
folder, err := api.store.GetNamespaceByUID(context.Background(), fmt.Sprintf("namespace_%d", i/9), orgID, user)
require.NoError(t, err)
require.Equal(t, folder.Fullpath, result.Data.RuleGroups[i].File)
require.Equal(t, fmt.Sprintf("rule_group_%d", i), result.Data.RuleGroups[i].Name)
}
})
t.Run("should return group_limit number of groups in each call", func(t *testing.T) {
r, err := http.NewRequest("GET", "/api/v1/rules?group_limit=2", nil)
require.NoError(t, err)
c.Context = &web.Context{Req: r}
resp := api.RouteGetRuleStatuses(c)
require.Equal(t, http.StatusOK, resp.Status())
result := &apimodels.RuleResponse{}
require.NoError(t, json.Unmarshal(resp.Body(), result))
returnedGroups := make([]apimodels.RuleGroup, 0, len(allRules))
require.Len(t, result.Data.RuleGroups, 2)
require.Len(t, result.Data.Totals, 0)
returnedGroups = append(returnedGroups, result.Data.RuleGroups...)
require.NotEmpty(t, result.Data.NextToken)
token := result.Data.NextToken
for i := 0; i < 3; i++ {
r, err := http.NewRequest("GET", fmt.Sprintf("/api/v1/rules?group_limit=2&group_next_token=%s", token), nil)
require.NoError(t, err)
c.Context = &web.Context{Req: r}
resp := api.RouteGetRuleStatuses(c)
require.Equal(t, http.StatusOK, resp.Status())
result := &apimodels.RuleResponse{}
require.NoError(t, json.Unmarshal(resp.Body(), result))
require.Len(t, result.Data.RuleGroups, 2)
require.Len(t, result.Data.Totals, 0)
returnedGroups = append(returnedGroups, result.Data.RuleGroups...)
require.NotEmpty(t, result.Data.NextToken)
token = result.Data.NextToken
}
// Final page should only return a single group and no token
r, err = http.NewRequest("GET", fmt.Sprintf("/api/v1/rules?group_limit=2&group_next_token=%s", token), nil)
require.NoError(t, err)
c.Context = &web.Context{Req: r}
resp = api.RouteGetRuleStatuses(c)
require.Equal(t, http.StatusOK, resp.Status())
result = &apimodels.RuleResponse{}
require.NoError(t, json.Unmarshal(resp.Body(), result))
require.Len(t, result.Data.RuleGroups, 1)
require.Len(t, result.Data.Totals, 0)
returnedGroups = append(returnedGroups, result.Data.RuleGroups...)
require.Empty(t, result.Data.NextToken)
for i := 0; i < 9; i++ {
folder, err := api.store.GetNamespaceByUID(context.Background(), fmt.Sprintf("namespace_%d", i/9), orgID, user)
require.NoError(t, err)
require.Equal(t, folder.Fullpath, returnedGroups[i].File)
require.Equal(t, fmt.Sprintf("rule_group_%d", i), returnedGroups[i].Name)
}
})
t.Run("bad token should return no results", func(t *testing.T) {
r, err := http.NewRequest("GET", "/api/v1/rules?group_limit=10&group_next_token=foobar", nil)
require.NoError(t, err)
c.Context = &web.Context{Req: r}
resp := api.RouteGetRuleStatuses(c)
require.Equal(t, http.StatusOK, resp.Status())
result := &apimodels.RuleResponse{}
require.NoError(t, json.Unmarshal(resp.Body(), result))
require.Len(t, result.Data.RuleGroups, 0)
})
t.Run("should return nothing when using group_limit=0", func(t *testing.T) {
r, err := http.NewRequest("GET", "/api/v1/rules?group_limit=0", nil)
require.NoError(t, err)
c.Context = &web.Context{Req: r}
resp := api.RouteGetRuleStatuses(c)
require.Equal(t, http.StatusOK, resp.Status())
result := &apimodels.RuleResponse{}
require.NoError(t, json.Unmarshal(resp.Body(), result))
require.Len(t, result.Data.RuleGroups, 0)
})
})
t.Run("when fine-grained access is enabled", func(t *testing.T) { t.Run("when fine-grained access is enabled", func(t *testing.T) {
t.Run("should return only rules if the user can query all data sources", func(t *testing.T) { t.Run("should return only rules if the user can query all data sources", func(t *testing.T) {
ruleStore := fakes.NewRuleStore(t) ruleStore := fakes.NewRuleStore(t)
@ -851,47 +998,6 @@ func TestRouteGetRuleStatuses(t *testing.T) {
require.Len(t, rg.Rules, 1) require.Len(t, rg.Rules, 1)
} }
}) })
t.Run("then with limit", func(t *testing.T) {
r, err := http.NewRequest("GET", "/api/v1/rules?limit=1", nil)
require.NoError(t, err)
c := &contextmodel.ReqContext{
Context: &web.Context{Req: r},
SignedInUser: &user.SignedInUser{
OrgID: orgID,
Permissions: queryPermissions,
},
}
resp := api.RouteGetRuleStatuses(c)
require.Equal(t, http.StatusOK, resp.Status())
var res apimodels.RuleResponse
require.NoError(t, json.Unmarshal(resp.Body(), &res))
// There should be 2 inactive rules across all Rule Groups
require.Equal(t, map[string]int64{"inactive": 2}, res.Data.Totals)
require.Len(t, res.Data.RuleGroups, 1)
rg := res.Data.RuleGroups[0]
// The Rule Group within the limit should have 1 inactive rule
require.Equal(t, map[string]int64{"inactive": 1}, rg.Totals)
require.Len(t, rg.Rules, 1)
})
t.Run("then with limit larger than number of rule groups", func(t *testing.T) {
r, err := http.NewRequest("GET", "/api/v1/rules?limit=1", nil)
require.NoError(t, err)
c := &contextmodel.ReqContext{
Context: &web.Context{Req: r},
SignedInUser: &user.SignedInUser{
OrgID: orgID,
Permissions: queryPermissions,
},
}
resp := api.RouteGetRuleStatuses(c)
require.Equal(t, http.StatusOK, resp.Status())
var res apimodels.RuleResponse
require.NoError(t, json.Unmarshal(resp.Body(), &res))
require.Len(t, res.Data.RuleGroups, 1)
})
}) })
t.Run("test with limit rules", func(t *testing.T) { t.Run("test with limit rules", func(t *testing.T) {
@ -925,7 +1031,7 @@ func TestRouteGetRuleStatuses(t *testing.T) {
}) })
t.Run("then with limit", func(t *testing.T) { t.Run("then with limit", func(t *testing.T) {
r, err := http.NewRequest("GET", "/api/v1/rules?limit=1&limit_rules=1", nil) r, err := http.NewRequest("GET", "/api/v1/rules?limit_rules=1", nil)
require.NoError(t, err) require.NoError(t, err)
c := &contextmodel.ReqContext{ c := &contextmodel.ReqContext{
Context: &web.Context{Req: r}, Context: &web.Context{Req: r},
@ -941,15 +1047,18 @@ func TestRouteGetRuleStatuses(t *testing.T) {
// There should be 2 inactive rules // There should be 2 inactive rules
require.Equal(t, map[string]int64{"inactive": 2}, res.Data.Totals) require.Equal(t, map[string]int64{"inactive": 2}, res.Data.Totals)
require.Len(t, res.Data.RuleGroups, 1) require.Len(t, res.Data.RuleGroups, 2)
rg := res.Data.RuleGroups[0] // The Rule Groups should have 1 inactive rule because of the limit
// The Rule Group within the limit should have 1 inactive rule because of the limit rg1 := res.Data.RuleGroups[0]
require.Equal(t, map[string]int64{"inactive": 1}, rg.Totals) require.Equal(t, map[string]int64{"inactive": 1}, rg1.Totals)
require.Len(t, rg.Rules, 1) require.Len(t, rg1.Rules, 1)
rg2 := res.Data.RuleGroups[1]
require.Equal(t, map[string]int64{"inactive": 1}, rg2.Totals)
require.Len(t, rg2.Rules, 1)
}) })
t.Run("then with limit larger than number of rules", func(t *testing.T) { t.Run("then with limit larger than number of rules", func(t *testing.T) {
r, err := http.NewRequest("GET", "/api/v1/rules?limit=1&limit_rules=2", nil) r, err := http.NewRequest("GET", "/api/v1/rules?limit_rules=2", nil)
require.NoError(t, err) require.NoError(t, err)
c := &contextmodel.ReqContext{ c := &contextmodel.ReqContext{
Context: &web.Context{Req: r}, Context: &web.Context{Req: r},
@ -962,8 +1071,9 @@ func TestRouteGetRuleStatuses(t *testing.T) {
require.Equal(t, http.StatusOK, resp.Status()) require.Equal(t, http.StatusOK, resp.Status())
var res apimodels.RuleResponse var res apimodels.RuleResponse
require.NoError(t, json.Unmarshal(resp.Body(), &res)) require.NoError(t, json.Unmarshal(resp.Body(), &res))
require.Len(t, res.Data.RuleGroups, 1) require.Len(t, res.Data.RuleGroups, 2)
require.Len(t, res.Data.RuleGroups[0].Rules, 1) require.Len(t, res.Data.RuleGroups[0].Rules, 1)
require.Len(t, res.Data.RuleGroups[1].Rules, 1)
}) })
}) })
@ -1006,7 +1116,7 @@ func TestRouteGetRuleStatuses(t *testing.T) {
}) })
t.Run("then with limits", func(t *testing.T) { t.Run("then with limits", func(t *testing.T) {
r, err := http.NewRequest("GET", "/api/v1/rules?limit=1&limit_rules=1&limit_alerts=1", nil) r, err := http.NewRequest("GET", "/api/v1/rules?limit_rules=1&limit_alerts=1", nil)
require.NoError(t, err) require.NoError(t, err)
c := &contextmodel.ReqContext{ c := &contextmodel.ReqContext{
Context: &web.Context{Req: r}, Context: &web.Context{Req: r},
@ -1036,7 +1146,7 @@ func TestRouteGetRuleStatuses(t *testing.T) {
}) })
t.Run("then with limit larger than number of alerts", func(t *testing.T) { t.Run("then with limit larger than number of alerts", func(t *testing.T) {
r, err := http.NewRequest("GET", "/api/v1/rules?limit=1&limit_rules=1&limit_alerts=3", nil) r, err := http.NewRequest("GET", "/api/v1/rules?limit_rules=1&limit_alerts=3", nil)
require.NoError(t, err) require.NoError(t, err)
c := &contextmodel.ReqContext{ c := &contextmodel.ReqContext{
Context: &web.Context{Req: r}, Context: &web.Context{Req: r},
@ -1049,9 +1159,11 @@ func TestRouteGetRuleStatuses(t *testing.T) {
require.Equal(t, http.StatusOK, resp.Status()) require.Equal(t, http.StatusOK, resp.Status())
var res apimodels.RuleResponse var res apimodels.RuleResponse
require.NoError(t, json.Unmarshal(resp.Body(), &res)) require.NoError(t, json.Unmarshal(resp.Body(), &res))
require.Len(t, res.Data.RuleGroups, 1) require.Len(t, res.Data.RuleGroups, 2)
require.Len(t, res.Data.RuleGroups[0].Rules, 1) require.Len(t, res.Data.RuleGroups[0].Rules, 1)
require.Len(t, res.Data.RuleGroups[0].Rules[0].Alerts, 2) require.Len(t, res.Data.RuleGroups[0].Rules[0].Alerts, 2)
require.Len(t, res.Data.RuleGroups[1].Rules, 1)
require.Len(t, res.Data.RuleGroups[1].Rules[0].Alerts, 2)
}) })
}) })

View File

@ -72,6 +72,7 @@ type DiscoveryBase struct {
type RuleDiscovery struct { type RuleDiscovery struct {
// required: true // required: true
RuleGroups []RuleGroup `json:"groups"` RuleGroups []RuleGroup `json:"groups"`
NextToken string `json:"groupNextToken,omitempty"`
Totals map[string]int64 `json:"totals,omitempty"` Totals map[string]int64 `json:"totals,omitempty"`
} }