mirror of
https://github.com/grafana/grafana.git
synced 2024-11-25 02:10:45 -06:00
Alerting: Add setting for maximum allowed rule evaluation results (#89468)
* Alerting: Add setting for maximum allowed rule evaluation results Added a new configuration setting `quota.alerting_rule_evaluation_results` to set the maximum number of alert rule evaluation results per rule. If the limit is exceeded, the evaluation will result in an error.
This commit is contained in:
parent
be1414b36b
commit
68691c9386
@ -1171,6 +1171,11 @@ global_correlations = -1
|
|||||||
# This is not strictly enforced yet, but will be enforced over time.
|
# This is not strictly enforced yet, but will be enforced over time.
|
||||||
alerting_rule_group_rules = 100
|
alerting_rule_group_rules = 100
|
||||||
|
|
||||||
|
# Limit the number of query evaluation results per alert rule.
|
||||||
|
# If the condition query of an alert rule produces more results than this limit,
|
||||||
|
# the evaluation results in an error.
|
||||||
|
alerting_rule_evaluation_results = -1
|
||||||
|
|
||||||
#################################### Unified Alerting ####################
|
#################################### Unified Alerting ####################
|
||||||
[unified_alerting]
|
[unified_alerting]
|
||||||
# Enable the Alerting sub-system and interface.
|
# Enable the Alerting sub-system and interface.
|
||||||
|
@ -1157,6 +1157,11 @@
|
|||||||
# This is not strictly enforced yet, but will be enforced over time.
|
# This is not strictly enforced yet, but will be enforced over time.
|
||||||
;alerting_rule_group_rules = 100
|
;alerting_rule_group_rules = 100
|
||||||
|
|
||||||
|
# Limit the number of query evaluation results per alert rule.
|
||||||
|
# If the condition query of an alert rule produces more results than this limit,
|
||||||
|
# the evaluation results in an error.
|
||||||
|
;alerting_rule_evaluation_results = -1
|
||||||
|
|
||||||
#################################### Unified Alerting ####################
|
#################################### Unified Alerting ####################
|
||||||
[unified_alerting]
|
[unified_alerting]
|
||||||
#Enable the Unified Alerting sub-system and interface. When enabled we'll migrate all of your alert rules and notification channels to the new system. New alert rules will be created and your notification channels will be converted into an Alertmanager configuration. Previous data is preserved to enable backwards compatibility but new data is removed.```
|
#Enable the Unified Alerting sub-system and interface. When enabled we'll migrate all of your alert rules and notification channels to the new system. New alert rules will be created and your notification channels will be converted into an Alertmanager configuration. Previous data is preserved to enable backwards compatibility but new data is removed.```
|
||||||
|
@ -1554,6 +1554,10 @@ Sets a global limit on number of alert rules that can be created. Default is -1
|
|||||||
|
|
||||||
Sets a global limit on number of correlations that can be created. Default is -1 (unlimited).
|
Sets a global limit on number of correlations that can be created. Default is -1 (unlimited).
|
||||||
|
|
||||||
|
### alerting_rule_evaluation_results
|
||||||
|
|
||||||
|
Limit the number of query evaluation results per alert rule. If the condition query of an alert rule produces more results than this limit, the evaluation results in an error. Default is -1 (unlimited).
|
||||||
|
|
||||||
<hr>
|
<hr>
|
||||||
|
|
||||||
## [unified_alerting]
|
## [unified_alerting]
|
||||||
|
@ -52,6 +52,7 @@ type conditionEvaluator struct {
|
|||||||
expressionService expressionService
|
expressionService expressionService
|
||||||
condition models.Condition
|
condition models.Condition
|
||||||
evalTimeout time.Duration
|
evalTimeout time.Duration
|
||||||
|
evalResultLimit int
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *conditionEvaluator) EvaluateRaw(ctx context.Context, now time.Time) (resp *backend.QueryDataResponse, err error) {
|
func (r *conditionEvaluator) EvaluateRaw(ctx context.Context, now time.Time) (resp *backend.QueryDataResponse, err error) {
|
||||||
@ -74,7 +75,21 @@ func (r *conditionEvaluator) EvaluateRaw(ctx context.Context, now time.Time) (re
|
|||||||
execCtx = timeoutCtx
|
execCtx = timeoutCtx
|
||||||
}
|
}
|
||||||
logger.FromContext(ctx).Debug("Executing pipeline", "commands", strings.Join(r.pipeline.GetCommandTypes(), ","), "datasources", strings.Join(r.pipeline.GetDatasourceTypes(), ","))
|
logger.FromContext(ctx).Debug("Executing pipeline", "commands", strings.Join(r.pipeline.GetCommandTypes(), ","), "datasources", strings.Join(r.pipeline.GetDatasourceTypes(), ","))
|
||||||
return r.expressionService.ExecutePipeline(execCtx, now, r.pipeline)
|
result, err := r.expressionService.ExecutePipeline(execCtx, now, r.pipeline)
|
||||||
|
|
||||||
|
// Check if the result of the condition evaluation is too large
|
||||||
|
if err == nil && result != nil && r.evalResultLimit > 0 {
|
||||||
|
conditionResultLength := 0
|
||||||
|
if conditionResponse, ok := result.Responses[r.condition.Condition]; ok {
|
||||||
|
conditionResultLength = len(conditionResponse.Frames)
|
||||||
|
}
|
||||||
|
if conditionResultLength > r.evalResultLimit {
|
||||||
|
logger.FromContext(ctx).Error("Query evaluation returned too many results", "limit", r.evalResultLimit, "actual", conditionResultLength)
|
||||||
|
return nil, fmt.Errorf("query evaluation returned too many results: %d (limit: %d)", conditionResultLength, r.evalResultLimit)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Evaluate evaluates the condition and converts the response to Results
|
// Evaluate evaluates the condition and converts the response to Results
|
||||||
@ -87,10 +102,11 @@ func (r *conditionEvaluator) Evaluate(ctx context.Context, now time.Time) (Resul
|
|||||||
}
|
}
|
||||||
|
|
||||||
type evaluatorImpl struct {
|
type evaluatorImpl struct {
|
||||||
evaluationTimeout time.Duration
|
evaluationTimeout time.Duration
|
||||||
dataSourceCache datasources.CacheService
|
evaluationResultLimit int
|
||||||
expressionService *expr.Service
|
dataSourceCache datasources.CacheService
|
||||||
pluginsStore pluginstore.Store
|
expressionService *expr.Service
|
||||||
|
pluginsStore pluginstore.Store
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewEvaluatorFactory(
|
func NewEvaluatorFactory(
|
||||||
@ -100,10 +116,11 @@ func NewEvaluatorFactory(
|
|||||||
pluginsStore pluginstore.Store,
|
pluginsStore pluginstore.Store,
|
||||||
) EvaluatorFactory {
|
) EvaluatorFactory {
|
||||||
return &evaluatorImpl{
|
return &evaluatorImpl{
|
||||||
evaluationTimeout: cfg.EvaluationTimeout,
|
evaluationTimeout: cfg.EvaluationTimeout,
|
||||||
dataSourceCache: datasourceCache,
|
evaluationResultLimit: cfg.EvaluationResultLimit,
|
||||||
expressionService: expressionService,
|
dataSourceCache: datasourceCache,
|
||||||
pluginsStore: pluginsStore,
|
expressionService: expressionService,
|
||||||
|
pluginsStore: pluginsStore,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -849,6 +866,7 @@ func (e *evaluatorImpl) create(condition models.Condition, req *expr.Request) (C
|
|||||||
expressionService: e.expressionService,
|
expressionService: e.expressionService,
|
||||||
condition: condition,
|
condition: condition,
|
||||||
evalTimeout: e.evaluationTimeout,
|
evalTimeout: e.evaluationTimeout,
|
||||||
|
evalResultLimit: e.evaluationResultLimit,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
conditions = append(conditions, node.RefID())
|
conditions = append(conditions, node.RefID())
|
||||||
|
@ -1029,6 +1029,146 @@ func TestEvaluateRaw(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestEvaluateRawLimit(t *testing.T) {
|
||||||
|
t.Run("should apply the limit to the successful query evaluation", func(t *testing.T) {
|
||||||
|
resp := backend.QueryDataResponse{
|
||||||
|
Responses: backend.Responses{
|
||||||
|
"A": {
|
||||||
|
Frames: []*data.Frame{{
|
||||||
|
RefID: "A",
|
||||||
|
Fields: []*data.Field{
|
||||||
|
data.NewField(
|
||||||
|
"Value",
|
||||||
|
data.Labels{"foo": "bar"},
|
||||||
|
[]*float64{util.Pointer(10.0)},
|
||||||
|
),
|
||||||
|
},
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
"B": {
|
||||||
|
Frames: []*data.Frame{
|
||||||
|
{
|
||||||
|
RefID: "B",
|
||||||
|
Fields: []*data.Field{
|
||||||
|
data.NewField(
|
||||||
|
"Value",
|
||||||
|
data.Labels{"foo": "bar"},
|
||||||
|
[]*float64{util.Pointer(10.0)},
|
||||||
|
),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
RefID: "B",
|
||||||
|
Fields: []*data.Field{
|
||||||
|
data.NewField(
|
||||||
|
"Value",
|
||||||
|
data.Labels{"foo": "baz"},
|
||||||
|
[]*float64{util.Pointer(10.0)},
|
||||||
|
),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
cases := []struct {
|
||||||
|
desc string
|
||||||
|
cond models.Condition
|
||||||
|
evalResultLimit int
|
||||||
|
error string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
desc: "too many results from the condition query results in an error",
|
||||||
|
cond: models.Condition{Condition: "B"},
|
||||||
|
evalResultLimit: 1,
|
||||||
|
error: "query evaluation returned too many results: 2 (limit: 1)",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "if the limit equals to the number of condition query frames, no error is returned",
|
||||||
|
cond: models.Condition{Condition: "B"},
|
||||||
|
evalResultLimit: len(resp.Responses["B"].Frames),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "if the limit is 0, no error is returned",
|
||||||
|
cond: models.Condition{Condition: "B"},
|
||||||
|
evalResultLimit: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "if the limit is -1, no error is returned",
|
||||||
|
cond: models.Condition{Condition: "B"},
|
||||||
|
evalResultLimit: -1,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range cases {
|
||||||
|
t.Run(tc.desc, func(t *testing.T) {
|
||||||
|
e := conditionEvaluator{
|
||||||
|
pipeline: nil,
|
||||||
|
expressionService: &fakeExpressionService{
|
||||||
|
hook: func(ctx context.Context, now time.Time, pipeline expr.DataPipeline) (*backend.QueryDataResponse, error) {
|
||||||
|
return &resp, nil
|
||||||
|
},
|
||||||
|
},
|
||||||
|
condition: tc.cond,
|
||||||
|
evalResultLimit: tc.evalResultLimit,
|
||||||
|
}
|
||||||
|
|
||||||
|
result, err := e.EvaluateRaw(context.Background(), time.Now())
|
||||||
|
|
||||||
|
if tc.error != "" {
|
||||||
|
require.Error(t, err)
|
||||||
|
require.EqualError(t, err, tc.error)
|
||||||
|
} else {
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, result)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("should return the original error if the evaluation did not succeed", func(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
desc string
|
||||||
|
queryEvalResult *backend.QueryDataResponse
|
||||||
|
queryEvalError error
|
||||||
|
evalResultLimit int
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
desc: "the original query evaluation result is preserved",
|
||||||
|
queryEvalResult: &backend.QueryDataResponse{},
|
||||||
|
queryEvalError: errors.New("some query error"),
|
||||||
|
evalResultLimit: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "the original query evaluation result is preserved (no evaluation result)",
|
||||||
|
queryEvalResult: nil,
|
||||||
|
queryEvalError: errors.New("some query error"),
|
||||||
|
evalResultLimit: 1,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range cases {
|
||||||
|
t.Run(tc.desc, func(t *testing.T) {
|
||||||
|
e := conditionEvaluator{
|
||||||
|
pipeline: nil,
|
||||||
|
expressionService: &fakeExpressionService{
|
||||||
|
hook: func(ctx context.Context, now time.Time, pipeline expr.DataPipeline) (*backend.QueryDataResponse, error) {
|
||||||
|
return tc.queryEvalResult, tc.queryEvalError
|
||||||
|
},
|
||||||
|
},
|
||||||
|
evalResultLimit: tc.evalResultLimit,
|
||||||
|
}
|
||||||
|
|
||||||
|
result, err := e.EvaluateRaw(context.Background(), time.Now())
|
||||||
|
require.Error(t, err)
|
||||||
|
require.Equal(t, err, tc.queryEvalError)
|
||||||
|
require.Equal(t, result, tc.queryEvalResult)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func TestResults_HasNonRetryableErrors(t *testing.T) {
|
func TestResults_HasNonRetryableErrors(t *testing.T) {
|
||||||
tc := []struct {
|
tc := []struct {
|
||||||
name string
|
name string
|
||||||
|
@ -91,6 +91,7 @@ type UnifiedAlertingSettings struct {
|
|||||||
MaxAttempts int64
|
MaxAttempts int64
|
||||||
MinInterval time.Duration
|
MinInterval time.Duration
|
||||||
EvaluationTimeout time.Duration
|
EvaluationTimeout time.Duration
|
||||||
|
EvaluationResultLimit int
|
||||||
DisableJitter bool
|
DisableJitter bool
|
||||||
ExecuteAlerts bool
|
ExecuteAlerts bool
|
||||||
DefaultConfiguration string
|
DefaultConfiguration string
|
||||||
@ -355,6 +356,7 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
|
|||||||
|
|
||||||
quotas := iniFile.Section("quota")
|
quotas := iniFile.Section("quota")
|
||||||
uaCfg.RulesPerRuleGroupLimit = quotas.Key("alerting_rule_group_rules").MustInt64(100)
|
uaCfg.RulesPerRuleGroupLimit = quotas.Key("alerting_rule_group_rules").MustInt64(100)
|
||||||
|
uaCfg.EvaluationResultLimit = quotas.Key("alerting_rule_evaluation_results").MustInt(-1)
|
||||||
|
|
||||||
remoteAlertmanager := iniFile.Section("remote.alertmanager")
|
remoteAlertmanager := iniFile.Section("remote.alertmanager")
|
||||||
uaCfgRemoteAM := RemoteAlertmanagerSettings{
|
uaCfgRemoteAM := RemoteAlertmanagerSettings{
|
||||||
|
Loading…
Reference in New Issue
Block a user