Alerting: Export rule validation logic and make it portable (#83555)

* ValidateInterval doesn't need the entire config

* Validation no longer depends on entire folder now that we've dropped foldertitle from api

* Don't depend on entire config struct

* Export validate group
This commit is contained in:
Alexander Weaver 2024-02-28 14:40:13 -06:00 committed by GitHub
parent af528d2f66
commit a862a4264d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 46 additions and 33 deletions

View File

@ -273,7 +273,7 @@ func (srv RulerSrv) RoutePostNameRulesConfig(c *contextmodel.ReqContext, ruleGro
return ErrResp(http.StatusBadRequest, err, "")
}
rules, err := validateRuleGroup(&ruleGroupConfig, c.SignedInUser.GetOrgID(), namespace, srv.cfg)
rules, err := ValidateRuleGroup(&ruleGroupConfig, c.SignedInUser.GetOrgID(), namespace.UID, RuleLimitsFromConfig(srv.cfg))
if err != nil {
return ErrResp(http.StatusBadRequest, err, "")
}

View File

@ -20,7 +20,7 @@ func (srv RulerSrv) ExportFromPayload(c *contextmodel.ReqContext, ruleGroupConfi
return toNamespaceErrorResponse(err)
}
rulesWithOptionals, err := validateRuleGroup(&ruleGroupConfig, c.SignedInUser.GetOrgID(), namespace, srv.cfg)
rulesWithOptionals, err := ValidateRuleGroup(&ruleGroupConfig, c.SignedInUser.GetOrgID(), namespace.UID, RuleLimitsFromConfig(srv.cfg))
if err != nil {
return ErrResp(http.StatusBadRequest, err, "")
}

View File

@ -7,22 +7,35 @@ import (
"strings"
"time"
"github.com/grafana/grafana/pkg/services/folder"
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/setting"
)
type RuleLimits struct {
// The default interval if not specified.
DefaultRuleEvaluationInterval time.Duration
// All intervals must be an integer multiple of this duration.
BaseInterval time.Duration
}
func RuleLimitsFromConfig(cfg *setting.UnifiedAlertingSettings) RuleLimits {
return RuleLimits{
DefaultRuleEvaluationInterval: cfg.DefaultRuleEvaluationInterval,
BaseInterval: cfg.BaseInterval,
}
}
// validateRuleNode validates API model (definitions.PostableExtendedRuleNode) and converts it to models.AlertRule
func validateRuleNode(
ruleNode *apimodels.PostableExtendedRuleNode,
groupName string,
interval time.Duration,
orgId int64,
namespace *folder.Folder,
cfg *setting.UnifiedAlertingSettings) (*ngmodels.AlertRule, error) {
intervalSeconds, err := validateInterval(cfg, interval)
namespaceUID string,
limits RuleLimits) (*ngmodels.AlertRule, error) {
intervalSeconds, err := validateInterval(interval, limits.BaseInterval)
if err != nil {
return nil, err
}
@ -91,7 +104,7 @@ func validateRuleNode(
Data: queries,
UID: ruleNode.GrafanaManagedAlert.UID,
IntervalSeconds: intervalSeconds,
NamespaceUID: namespace.UID,
NamespaceUID: namespaceUID,
RuleGroup: groupName,
NoDataState: noDataState,
ExecErrState: errorState,
@ -162,10 +175,10 @@ func validateCondition(condition string, queries []apimodels.AlertQuery) error {
return nil
}
func validateInterval(cfg *setting.UnifiedAlertingSettings, interval time.Duration) (int64, error) {
func validateInterval(interval, baseInterval time.Duration) (int64, error) {
intervalSeconds := int64(interval.Seconds())
baseIntervalSeconds := int64(cfg.BaseInterval.Seconds())
baseIntervalSeconds := int64(baseInterval.Seconds())
if interval <= 0 {
return 0, fmt.Errorf("rule evaluation interval must be positive duration that is multiple of the base interval %d seconds", baseIntervalSeconds)
@ -193,14 +206,14 @@ func validateForInterval(ruleNode *apimodels.PostableExtendedRuleNode) (time.Dur
return duration, nil
}
// validateRuleGroup validates API model (definitions.PostableRuleGroupConfig) and converts it to a collection of models.AlertRule.
// ValidateRuleGroup validates API model (definitions.PostableRuleGroupConfig) and converts it to a collection of models.AlertRule.
// Returns a slice that contains all rules described by API model or error if either group specification or an alert definition is not valid.
// It also returns a map containing current existing alerts that don't contain the is_paused field in the body of the call.
func validateRuleGroup(
func ValidateRuleGroup(
ruleGroupConfig *apimodels.PostableRuleGroupConfig,
orgId int64,
namespace *folder.Folder,
cfg *setting.UnifiedAlertingSettings) ([]*ngmodels.AlertRuleWithOptionals, error) {
namespaceUID string,
limits RuleLimits) ([]*ngmodels.AlertRuleWithOptionals, error) {
if ruleGroupConfig.Name == "" {
return nil, errors.New("rule group name cannot be empty")
}
@ -212,11 +225,11 @@ func validateRuleGroup(
interval := time.Duration(ruleGroupConfig.Interval)
if interval == 0 {
// if group interval is 0 (undefined) then we automatically fall back to the default interval
interval = cfg.DefaultRuleEvaluationInterval
interval = limits.DefaultRuleEvaluationInterval
}
if interval < 0 || int64(interval.Seconds())%int64(cfg.BaseInterval.Seconds()) != 0 {
return nil, fmt.Errorf("rule evaluation interval (%d second) should be positive number that is multiple of the base interval of %d seconds", int64(interval.Seconds()), int64(cfg.BaseInterval.Seconds()))
if interval < 0 || int64(interval.Seconds())%int64(limits.BaseInterval.Seconds()) != 0 {
return nil, fmt.Errorf("rule evaluation interval (%d second) should be positive number that is multiple of the base interval of %d seconds", int64(interval.Seconds()), int64(limits.BaseInterval.Seconds()))
}
// TODO should we validate that interval is >= cfg.MinInterval? Currently, we allow to save but fix the specified interval if it is < cfg.MinInterval
@ -224,7 +237,7 @@ func validateRuleGroup(
result := make([]*ngmodels.AlertRuleWithOptionals, 0, len(ruleGroupConfig.Rules))
uids := make(map[string]int, cap(result))
for idx := range ruleGroupConfig.Rules {
rule, err := validateRuleNode(&ruleGroupConfig.Rules[idx], ruleGroupConfig.Name, interval, orgId, namespace, cfg)
rule, err := validateRuleNode(&ruleGroupConfig.Rules[idx], ruleGroupConfig.Name, interval, orgId, namespaceUID, limits)
// TODO do not stop on the first failure but return all failures
if err != nil {
return nil, fmt.Errorf("invalid rule specification at index [%d]: %w", idx, err)

View File

@ -197,7 +197,7 @@ func TestValidateRuleGroup(t *testing.T) {
t.Run("should validate struct and rules", func(t *testing.T) {
g := validGroup(cfg, rules...)
alerts, err := validateRuleGroup(&g, orgId, folder, cfg)
alerts, err := ValidateRuleGroup(&g, orgId, folder.UID, RuleLimitsFromConfig(cfg))
require.NoError(t, err)
require.Len(t, alerts, len(rules))
})
@ -205,7 +205,7 @@ func TestValidateRuleGroup(t *testing.T) {
t.Run("should default to default interval from config if group interval is 0", func(t *testing.T) {
g := validGroup(cfg, rules...)
g.Interval = 0
alerts, err := validateRuleGroup(&g, orgId, folder, cfg)
alerts, err := ValidateRuleGroup(&g, orgId, folder.UID, RuleLimitsFromConfig(cfg))
require.NoError(t, err)
for _, alert := range alerts {
require.Equal(t, int64(cfg.DefaultRuleEvaluationInterval.Seconds()), alert.IntervalSeconds)
@ -220,7 +220,7 @@ func TestValidateRuleGroup(t *testing.T) {
isPaused = !(isPaused)
}
g := validGroup(cfg, rules...)
alerts, err := validateRuleGroup(&g, orgId, folder, cfg)
alerts, err := ValidateRuleGroup(&g, orgId, folder.UID, RuleLimitsFromConfig(cfg))
require.NoError(t, err)
for _, alert := range alerts {
require.True(t, alert.HasPause)
@ -292,7 +292,7 @@ func TestValidateRuleGroupFailures(t *testing.T) {
for _, testCase := range testCases {
t.Run(testCase.name, func(t *testing.T) {
g := testCase.group()
_, err := validateRuleGroup(g, orgId, folder, cfg)
_, err := ValidateRuleGroup(g, orgId, folder.UID, RuleLimitsFromConfig(cfg))
require.Error(t, err)
if testCase.assert != nil {
testCase.assert(t, g, err)
@ -399,7 +399,7 @@ func TestValidateRuleNode_NoUID(t *testing.T) {
r := testCase.rule()
r.GrafanaManagedAlert.UID = ""
alert, err := validateRuleNode(r, name, interval, orgId, folder, cfg)
alert, err := validateRuleNode(r, name, interval, orgId, folder.UID, RuleLimitsFromConfig(cfg))
require.NoError(t, err)
testCase.assert(t, r, alert)
})
@ -407,7 +407,7 @@ func TestValidateRuleNode_NoUID(t *testing.T) {
t.Run("accepts empty group name", func(t *testing.T) {
r := validRule()
alert, err := validateRuleNode(&r, "", interval, orgId, folder, cfg)
alert, err := validateRuleNode(&r, "", interval, orgId, folder.UID, RuleLimitsFromConfig(cfg))
require.NoError(t, err)
require.Equal(t, "", alert.RuleGroup)
})
@ -560,7 +560,7 @@ func TestValidateRuleNodeFailures_NoUID(t *testing.T) {
interval = *testCase.interval
}
_, err := validateRuleNode(r, "", interval, orgId, folder, cfg)
_, err := validateRuleNode(r, "", interval, orgId, folder.UID, RuleLimitsFromConfig(cfg))
require.Error(t, err)
if testCase.assert != nil {
testCase.assert(t, r, err)
@ -652,7 +652,7 @@ func TestValidateRuleNode_UID(t *testing.T) {
for _, testCase := range testCases {
t.Run(testCase.name, func(t *testing.T) {
r := testCase.rule()
alert, err := validateRuleNode(r, name, interval, orgId, folder, cfg)
alert, err := validateRuleNode(r, name, interval, orgId, folder.UID, RuleLimitsFromConfig(cfg))
require.NoError(t, err)
testCase.assert(t, r, alert)
})
@ -660,7 +660,7 @@ func TestValidateRuleNode_UID(t *testing.T) {
t.Run("accepts empty group name", func(t *testing.T) {
r := validRule()
alert, err := validateRuleNode(&r, "", interval, orgId, folder, cfg)
alert, err := validateRuleNode(&r, "", interval, orgId, folder.UID, RuleLimitsFromConfig(cfg))
require.NoError(t, err)
require.Equal(t, "", alert.RuleGroup)
})
@ -755,7 +755,7 @@ func TestValidateRuleNodeFailures_UID(t *testing.T) {
interval = *testCase.interval
}
_, err := validateRuleNode(r, "", interval, orgId, folder, cfg)
_, err := validateRuleNode(r, "", interval, orgId, folder.UID, RuleLimitsFromConfig(cfg))
require.Error(t, err)
if testCase.assert != nil {
testCase.assert(t, r, err)
@ -788,7 +788,7 @@ func TestValidateRuleNodeIntervalFailures(t *testing.T) {
for _, testCase := range testCases {
t.Run(testCase.name, func(t *testing.T) {
r := validRule()
_, err := validateRuleNode(&r, util.GenerateShortUID(), testCase.interval, rand.Int63(), randFolder(), cfg)
_, err := validateRuleNode(&r, util.GenerateShortUID(), testCase.interval, rand.Int63(), randFolder().UID, RuleLimitsFromConfig(cfg))
require.Error(t, err)
})
}
@ -880,7 +880,7 @@ func TestValidateRuleNodeNotificationSettings(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
r := validRule()
r.GrafanaManagedAlert.NotificationSettings = AlertRuleNotificationSettingsFromNotificationSettings([]models.NotificationSettings{tt.notificationSettings})
_, err := validateRuleNode(&r, util.GenerateShortUID(), cfg.BaseInterval*time.Duration(rand.Int63n(10)+1), rand.Int63(), randFolder(), cfg)
_, err := validateRuleNode(&r, util.GenerateShortUID(), cfg.BaseInterval*time.Duration(rand.Int63n(10)+1), rand.Int63(), randFolder().UID, RuleLimitsFromConfig(cfg))
if tt.expErrorContains != "" {
require.Error(t, err)
@ -901,7 +901,7 @@ func TestValidateRuleNodeReservedLabels(t *testing.T) {
r.ApiRuleNode.Labels = map[string]string{
label: "true",
}
_, err := validateRuleNode(&r, util.GenerateShortUID(), cfg.BaseInterval*time.Duration(rand.Int63n(10)+1), rand.Int63(), randFolder(), cfg)
_, err := validateRuleNode(&r, util.GenerateShortUID(), cfg.BaseInterval*time.Duration(rand.Int63n(10)+1), rand.Int63(), randFolder().UID, RuleLimitsFromConfig(cfg))
require.Error(t, err)
require.ErrorContains(t, err, label)
})

View File

@ -66,8 +66,8 @@ func (srv TestingApiSrv) RouteTestGrafanaRuleConfig(c *contextmodel.ReqContext,
body.RuleGroup,
srv.cfg.BaseInterval,
c.SignedInUser.GetOrgID(),
folder,
srv.cfg,
folder.UID,
RuleLimitsFromConfig(srv.cfg),
)
if err != nil {
return ErrResp(http.StatusBadRequest, err, "")
@ -238,7 +238,7 @@ func (srv TestingApiSrv) BacktestAlertRule(c *contextmodel.ReqContext, cmd apimo
return ErrResp(400, nil, "Bad For interval")
}
intervalSeconds, err := validateInterval(srv.cfg, time.Duration(cmd.Interval))
intervalSeconds, err := validateInterval(time.Duration(cmd.Interval), srv.cfg.BaseInterval)
if err != nil {
return ErrResp(400, err, "")
}