Alerting: Add pagination to /api/prometheus/grafana/api/v1/rules (#95959)

* Intermediate step before refactoring

* Sort groups to paginate on them

* Formatting and improved test

* Address comments

* Update tests
This commit is contained in:
Fayzal Ghantiwala 2024-11-08 16:58:14 +00:00 committed by GitHub
parent 2458329f4f
commit 5a143be653
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 234 additions and 72 deletions

View File

@ -2,10 +2,12 @@ package api
import (
"context"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"net/url"
"slices"
"sort"
"strconv"
"strings"
@ -264,7 +266,6 @@ func PrepareRuleGroupStatuses(log log.Logger, manager state.AlertInstanceManager
return ruleResponse
}
limitGroups := getInt64WithDefault(opts.Query, "limit", -1)
limitRulesPerGroup := getInt64WithDefault(opts.Query, "limit_rules", -1)
limitAlertsPerRule := getInt64WithDefault(opts.Query, "limit_alerts", -1)
matchers, err := getMatchersFromQuery(opts.Query)
@ -331,15 +332,15 @@ func PrepareRuleGroupStatuses(log log.Logger, manager state.AlertInstanceManager
ruleNamesSet[rn] = struct{}{}
}
groupedRules := getGroupedRules(ruleList, ruleNamesSet)
maxGroups := getInt64WithDefault(opts.Query, "group_limit", -1)
nextToken := opts.Query.Get("group_next_token")
groupedRules := getGroupedRules(log, ruleList, ruleNamesSet, opts.Namespaces)
rulesTotals := make(map[string]int64, len(groupedRules))
for groupKey, rules := range groupedRules {
folder, ok := opts.Namespaces[groupKey.NamespaceUID]
if !ok {
log.Warn("Query returned rules that belong to folder the user does not have access to. All rules that belong to that namespace will not be added to the response", "folder_uid", groupKey.NamespaceUID)
continue
}
ok, err := opts.AuthorizeRuleGroup(rules)
var newToken string
foundToken := false
for _, rg := range groupedRules {
ok, err := opts.AuthorizeRuleGroup(rg.Rules)
if err != nil {
ruleResponse.DiscoveryBase.Status = "error"
ruleResponse.DiscoveryBase.Error = fmt.Sprintf("cannot authorize access to rule group: %s", err.Error())
@ -350,7 +351,19 @@ func PrepareRuleGroupStatuses(log log.Logger, manager state.AlertInstanceManager
continue
}
ruleGroup, totals := toRuleGroup(log, manager, status, groupKey, folder, rules, limitAlertsPerRule, withStatesFast, matchers, labelOptions)
if nextToken != "" && !foundToken {
if nextToken != getRuleGroupNextToken(rg.Folder, rg.GroupKey.RuleGroup) {
continue
}
foundToken = true
}
if maxGroups > -1 && len(ruleResponse.Data.RuleGroups) == int(maxGroups) {
newToken = getRuleGroupNextToken(rg.Folder, rg.GroupKey.RuleGroup)
break
}
ruleGroup, totals := toRuleGroup(log, manager, status, rg.GroupKey, rg.Folder, rg.Rules, limitAlertsPerRule, withStatesFast, matchers, labelOptions)
ruleGroup.Totals = totals
for k, v := range totals {
rulesTotals[k] += v
@ -367,18 +380,28 @@ func PrepareRuleGroupStatuses(log log.Logger, manager state.AlertInstanceManager
ruleResponse.Data.RuleGroups = append(ruleResponse.Data.RuleGroups, *ruleGroup)
}
ruleResponse.Data.Totals = rulesTotals
ruleResponse.Data.NextToken = newToken
// Sort Rule Groups before checking limits
apimodels.RuleGroupsBy(apimodels.RuleGroupsByFileAndName).Sort(ruleResponse.Data.RuleGroups)
if limitGroups > -1 && int64(len(ruleResponse.Data.RuleGroups)) >= limitGroups {
ruleResponse.Data.RuleGroups = ruleResponse.Data.RuleGroups[0:limitGroups]
// Only return Totals if there is no pagination
if maxGroups == -1 {
ruleResponse.Data.Totals = rulesTotals
}
return ruleResponse
}
func getGroupedRules(ruleList ngmodels.RulesGroup, ruleNamesSet map[string]struct{}) map[ngmodels.AlertRuleGroupKey][]*ngmodels.AlertRule {
func getRuleGroupNextToken(namespace, group string) string {
return base64.URLEncoding.EncodeToString([]byte(namespace + "/" + group))
}
type ruleGroup struct {
Folder string
GroupKey ngmodels.AlertRuleGroupKey
Rules []*ngmodels.AlertRule
}
// Returns a slice of rule groups ordered by namespace and group name
func getGroupedRules(log log.Logger, ruleList ngmodels.RulesGroup, ruleNamesSet map[string]struct{}, namespaceMap map[string]string) []*ruleGroup {
// Group rules together by Namespace and Rule Group. Rules are also grouped by Org ID,
// but in this API all rules belong to the same organization. Also filter by rule name if
// it was provided as a query param.
@ -394,12 +417,38 @@ func getGroupedRules(ruleList ngmodels.RulesGroup, ruleNamesSet map[string]struc
ruleGroup = append(ruleGroup, rule)
groupedRules[groupKey] = ruleGroup
}
// Sort the rules in each rule group by index. We do this at the end instead of
// after each append to avoid having to sort each group multiple times.
for _, groupRules := range groupedRules {
ruleGroups := make([]*ruleGroup, 0, len(groupedRules))
for groupKey, groupRules := range groupedRules {
folder, ok := namespaceMap[groupKey.NamespaceUID]
if !ok {
log.Warn("Query returned rules that belong to folder the user does not have access to. All rules that belong to that namespace will not be added to the response", "folder_uid", groupKey.NamespaceUID)
continue
}
// Sort the rules in each rule group by index. We do this at the end instead of
// after each append to avoid having to sort each group multiple times.
ngmodels.AlertRulesBy(ngmodels.AlertRulesByIndex).Sort(groupRules)
ruleGroups = append(ruleGroups, &ruleGroup{
Folder: folder,
GroupKey: groupKey,
Rules: groupRules,
})
}
return groupedRules
// Sort the groups first by namespace, then group name
slices.SortFunc(ruleGroups, func(a, b *ruleGroup) int {
nsCmp := strings.Compare(a.Folder, b.Folder)
if nsCmp != 0 {
return nsCmp
}
// If Namespaces are equal, check the group names
return strings.Compare(a.GroupKey.RuleGroup, b.GroupKey.RuleGroup)
})
return ruleGroups
}
func filterRules(ruleGroup *apimodels.RuleGroup, withStatesFast map[eval.State]struct{}) {

View File

@ -664,6 +664,153 @@ func TestRouteGetRuleStatuses(t *testing.T) {
})
})
t.Run("when requesting rules with pagination", func(t *testing.T) {
ruleStore := fakes.NewRuleStore(t)
fakeAIM := NewFakeAlertInstanceManager(t)
// Generate 9 rule groups across 3 namespaces
// Added in reverse order so we can check that
// they are sorted when returned
allRules := make([]*ngmodels.AlertRule, 0, 9)
for i := 8; i >= 0; i-- {
rules := gen.With(gen.WithGroupKey(ngmodels.AlertRuleGroupKey{
RuleGroup: fmt.Sprintf("rule_group_%d", i),
NamespaceUID: fmt.Sprintf("namespace_%d", i/9),
OrgID: orgID,
})).GenerateManyRef(1)
allRules = append(allRules, rules...)
ruleStore.PutRule(context.Background(), rules...)
}
api := PrometheusSrv{
log: log.NewNopLogger(),
manager: fakeAIM,
status: newFakeSchedulerReader(t).setupStates(fakeAIM),
store: ruleStore,
authz: accesscontrol.NewRuleService(acimpl.ProvideAccessControl(featuremgmt.WithFeatures(), zanzana.NewNoopClient())),
}
permissions := createPermissionsForRules(allRules, orgID)
user := &user.SignedInUser{
OrgID: orgID,
Permissions: permissions,
}
c := &contextmodel.ReqContext{
SignedInUser: user,
}
t.Run("should return all groups when not specifying max_groups query param", func(t *testing.T) {
r, err := http.NewRequest("GET", "/api/v1/rules", nil)
require.NoError(t, err)
c.Context = &web.Context{Req: r}
resp := api.RouteGetRuleStatuses(c)
require.Equal(t, http.StatusOK, resp.Status())
result := &apimodels.RuleResponse{}
require.NoError(t, json.Unmarshal(resp.Body(), result))
require.Len(t, result.Data.RuleGroups, 9)
require.NotZero(t, len(result.Data.Totals))
for i := 0; i < 9; i++ {
folder, err := api.store.GetNamespaceByUID(context.Background(), fmt.Sprintf("namespace_%d", i/9), orgID, user)
require.NoError(t, err)
require.Equal(t, folder.Fullpath, result.Data.RuleGroups[i].File)
require.Equal(t, fmt.Sprintf("rule_group_%d", i), result.Data.RuleGroups[i].Name)
}
})
t.Run("should return group_limit number of groups in each call", func(t *testing.T) {
r, err := http.NewRequest("GET", "/api/v1/rules?group_limit=2", nil)
require.NoError(t, err)
c.Context = &web.Context{Req: r}
resp := api.RouteGetRuleStatuses(c)
require.Equal(t, http.StatusOK, resp.Status())
result := &apimodels.RuleResponse{}
require.NoError(t, json.Unmarshal(resp.Body(), result))
returnedGroups := make([]apimodels.RuleGroup, 0, len(allRules))
require.Len(t, result.Data.RuleGroups, 2)
require.Len(t, result.Data.Totals, 0)
returnedGroups = append(returnedGroups, result.Data.RuleGroups...)
require.NotEmpty(t, result.Data.NextToken)
token := result.Data.NextToken
for i := 0; i < 3; i++ {
r, err := http.NewRequest("GET", fmt.Sprintf("/api/v1/rules?group_limit=2&group_next_token=%s", token), nil)
require.NoError(t, err)
c.Context = &web.Context{Req: r}
resp := api.RouteGetRuleStatuses(c)
require.Equal(t, http.StatusOK, resp.Status())
result := &apimodels.RuleResponse{}
require.NoError(t, json.Unmarshal(resp.Body(), result))
require.Len(t, result.Data.RuleGroups, 2)
require.Len(t, result.Data.Totals, 0)
returnedGroups = append(returnedGroups, result.Data.RuleGroups...)
require.NotEmpty(t, result.Data.NextToken)
token = result.Data.NextToken
}
// Final page should only return a single group and no token
r, err = http.NewRequest("GET", fmt.Sprintf("/api/v1/rules?group_limit=2&group_next_token=%s", token), nil)
require.NoError(t, err)
c.Context = &web.Context{Req: r}
resp = api.RouteGetRuleStatuses(c)
require.Equal(t, http.StatusOK, resp.Status())
result = &apimodels.RuleResponse{}
require.NoError(t, json.Unmarshal(resp.Body(), result))
require.Len(t, result.Data.RuleGroups, 1)
require.Len(t, result.Data.Totals, 0)
returnedGroups = append(returnedGroups, result.Data.RuleGroups...)
require.Empty(t, result.Data.NextToken)
for i := 0; i < 9; i++ {
folder, err := api.store.GetNamespaceByUID(context.Background(), fmt.Sprintf("namespace_%d", i/9), orgID, user)
require.NoError(t, err)
require.Equal(t, folder.Fullpath, returnedGroups[i].File)
require.Equal(t, fmt.Sprintf("rule_group_%d", i), returnedGroups[i].Name)
}
})
t.Run("bad token should return no results", func(t *testing.T) {
r, err := http.NewRequest("GET", "/api/v1/rules?group_limit=10&group_next_token=foobar", nil)
require.NoError(t, err)
c.Context = &web.Context{Req: r}
resp := api.RouteGetRuleStatuses(c)
require.Equal(t, http.StatusOK, resp.Status())
result := &apimodels.RuleResponse{}
require.NoError(t, json.Unmarshal(resp.Body(), result))
require.Len(t, result.Data.RuleGroups, 0)
})
t.Run("should return nothing when using group_limit=0", func(t *testing.T) {
r, err := http.NewRequest("GET", "/api/v1/rules?group_limit=0", nil)
require.NoError(t, err)
c.Context = &web.Context{Req: r}
resp := api.RouteGetRuleStatuses(c)
require.Equal(t, http.StatusOK, resp.Status())
result := &apimodels.RuleResponse{}
require.NoError(t, json.Unmarshal(resp.Body(), result))
require.Len(t, result.Data.RuleGroups, 0)
})
})
t.Run("when fine-grained access is enabled", func(t *testing.T) {
t.Run("should return only rules if the user can query all data sources", func(t *testing.T) {
ruleStore := fakes.NewRuleStore(t)
@ -851,47 +998,6 @@ func TestRouteGetRuleStatuses(t *testing.T) {
require.Len(t, rg.Rules, 1)
}
})
t.Run("then with limit", func(t *testing.T) {
r, err := http.NewRequest("GET", "/api/v1/rules?limit=1", nil)
require.NoError(t, err)
c := &contextmodel.ReqContext{
Context: &web.Context{Req: r},
SignedInUser: &user.SignedInUser{
OrgID: orgID,
Permissions: queryPermissions,
},
}
resp := api.RouteGetRuleStatuses(c)
require.Equal(t, http.StatusOK, resp.Status())
var res apimodels.RuleResponse
require.NoError(t, json.Unmarshal(resp.Body(), &res))
// There should be 2 inactive rules across all Rule Groups
require.Equal(t, map[string]int64{"inactive": 2}, res.Data.Totals)
require.Len(t, res.Data.RuleGroups, 1)
rg := res.Data.RuleGroups[0]
// The Rule Group within the limit should have 1 inactive rule
require.Equal(t, map[string]int64{"inactive": 1}, rg.Totals)
require.Len(t, rg.Rules, 1)
})
t.Run("then with limit larger than number of rule groups", func(t *testing.T) {
r, err := http.NewRequest("GET", "/api/v1/rules?limit=1", nil)
require.NoError(t, err)
c := &contextmodel.ReqContext{
Context: &web.Context{Req: r},
SignedInUser: &user.SignedInUser{
OrgID: orgID,
Permissions: queryPermissions,
},
}
resp := api.RouteGetRuleStatuses(c)
require.Equal(t, http.StatusOK, resp.Status())
var res apimodels.RuleResponse
require.NoError(t, json.Unmarshal(resp.Body(), &res))
require.Len(t, res.Data.RuleGroups, 1)
})
})
t.Run("test with limit rules", func(t *testing.T) {
@ -925,7 +1031,7 @@ func TestRouteGetRuleStatuses(t *testing.T) {
})
t.Run("then with limit", func(t *testing.T) {
r, err := http.NewRequest("GET", "/api/v1/rules?limit=1&limit_rules=1", nil)
r, err := http.NewRequest("GET", "/api/v1/rules?limit_rules=1", nil)
require.NoError(t, err)
c := &contextmodel.ReqContext{
Context: &web.Context{Req: r},
@ -941,15 +1047,18 @@ func TestRouteGetRuleStatuses(t *testing.T) {
// There should be 2 inactive rules
require.Equal(t, map[string]int64{"inactive": 2}, res.Data.Totals)
require.Len(t, res.Data.RuleGroups, 1)
rg := res.Data.RuleGroups[0]
// The Rule Group within the limit should have 1 inactive rule because of the limit
require.Equal(t, map[string]int64{"inactive": 1}, rg.Totals)
require.Len(t, rg.Rules, 1)
require.Len(t, res.Data.RuleGroups, 2)
// The Rule Groups should have 1 inactive rule because of the limit
rg1 := res.Data.RuleGroups[0]
require.Equal(t, map[string]int64{"inactive": 1}, rg1.Totals)
require.Len(t, rg1.Rules, 1)
rg2 := res.Data.RuleGroups[1]
require.Equal(t, map[string]int64{"inactive": 1}, rg2.Totals)
require.Len(t, rg2.Rules, 1)
})
t.Run("then with limit larger than number of rules", func(t *testing.T) {
r, err := http.NewRequest("GET", "/api/v1/rules?limit=1&limit_rules=2", nil)
r, err := http.NewRequest("GET", "/api/v1/rules?limit_rules=2", nil)
require.NoError(t, err)
c := &contextmodel.ReqContext{
Context: &web.Context{Req: r},
@ -962,8 +1071,9 @@ func TestRouteGetRuleStatuses(t *testing.T) {
require.Equal(t, http.StatusOK, resp.Status())
var res apimodels.RuleResponse
require.NoError(t, json.Unmarshal(resp.Body(), &res))
require.Len(t, res.Data.RuleGroups, 1)
require.Len(t, res.Data.RuleGroups, 2)
require.Len(t, res.Data.RuleGroups[0].Rules, 1)
require.Len(t, res.Data.RuleGroups[1].Rules, 1)
})
})
@ -1006,7 +1116,7 @@ func TestRouteGetRuleStatuses(t *testing.T) {
})
t.Run("then with limits", func(t *testing.T) {
r, err := http.NewRequest("GET", "/api/v1/rules?limit=1&limit_rules=1&limit_alerts=1", nil)
r, err := http.NewRequest("GET", "/api/v1/rules?limit_rules=1&limit_alerts=1", nil)
require.NoError(t, err)
c := &contextmodel.ReqContext{
Context: &web.Context{Req: r},
@ -1036,7 +1146,7 @@ func TestRouteGetRuleStatuses(t *testing.T) {
})
t.Run("then with limit larger than number of alerts", func(t *testing.T) {
r, err := http.NewRequest("GET", "/api/v1/rules?limit=1&limit_rules=1&limit_alerts=3", nil)
r, err := http.NewRequest("GET", "/api/v1/rules?limit_rules=1&limit_alerts=3", nil)
require.NoError(t, err)
c := &contextmodel.ReqContext{
Context: &web.Context{Req: r},
@ -1049,9 +1159,11 @@ func TestRouteGetRuleStatuses(t *testing.T) {
require.Equal(t, http.StatusOK, resp.Status())
var res apimodels.RuleResponse
require.NoError(t, json.Unmarshal(resp.Body(), &res))
require.Len(t, res.Data.RuleGroups, 1)
require.Len(t, res.Data.RuleGroups, 2)
require.Len(t, res.Data.RuleGroups[0].Rules, 1)
require.Len(t, res.Data.RuleGroups[0].Rules[0].Alerts, 2)
require.Len(t, res.Data.RuleGroups[1].Rules, 1)
require.Len(t, res.Data.RuleGroups[1].Rules[0].Alerts, 2)
})
})

View File

@ -72,6 +72,7 @@ type DiscoveryBase struct {
type RuleDiscovery struct {
// required: true
RuleGroups []RuleGroup `json:"groups"`
NextToken string `json:"groupNextToken,omitempty"`
Totals map[string]int64 `json:"totals,omitempty"`
}