3
0
mirror of https://github.com/grafana/grafana.git synced 2025-02-25 18:55:37 -06:00

Alerting: Create DatasourceError alert if evaluation returns error ()

* Alerting: Create DatasourceError alert if evaluation returns error

* Alerting: Add docs for DatasourceError alert

* Alerting: Fix DatasourceError alert does not have dashboard_uid label

* Alerting: Add break when datasource_uid found

* Alerting: Update TestProcessEvalResults
This commit is contained in:
George Robinson 2021-11-25 10:46:47 +00:00 committed by GitHub
parent 1e5b0e64ac
commit 1b26d4d88e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 221 additions and 12 deletions
docs/sources/alerting/unified-alerting/alerting-rules
pkg/services
public/app
features/alerting/unified/components/rule-editor
types

View File

@ -69,7 +69,8 @@ Configure alerting behavior in the absence of data using information in the foll
| Alerting | Set alert rule state to `Alerting`. |
| Ok | Set alert rule state to `Normal`. |
| Error or timeout option | Description |
| ----------------------- | ---------------------------------- |
| Alerting | Set alert rule state to `Alerting` |
| OK | Set alert rule state to `Normal` |
| Error or timeout option | Description |
| ----------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
| Alerting | Set alert rule state to `Alerting` |
| OK | Set alert rule state to `Normal` |
| Error | Create a new alert `DatasourceError` with the name and UID of the alert rule, and UID of the datasource that returned no data as labels. |

View File

@ -41,6 +41,7 @@ func (executionErrorState ExecutionErrorState) String() string {
const (
AlertingErrState ExecutionErrorState = "Alerting"
ErrorErrState ExecutionErrorState = "Error"
)
const (

View File

@ -19,6 +19,7 @@ import (
const (
NoDataAlertName = "DatasourceNoData"
ErrorAlertName = "DatasourceError"
Rulename = "rulename"
)
@ -52,6 +53,10 @@ func stateToPostableAlert(alertState *state.State, appURL *url.URL) *models.Post
return noDataAlert(nL, nA, alertState, urlStr)
}
if alertState.State == eval.Error {
return errorAlert(nL, nA, alertState, urlStr)
}
return &models.PostableAlert{
Annotations: models.LabelSet(nA),
StartsAt: strfmt.DateTime(alertState.StartsAt),
@ -84,6 +89,25 @@ func noDataAlert(labels data.Labels, annotations data.Labels, alertState *state.
}
}
// errorAlert is a special alert sent when evaluation of an alert rule failed due to an error. Like noDataAlert, it
// replaces the old behaviour of "Keep Last State" creating a separate alert called DatasourceError.
func errorAlert(labels, annotations data.Labels, alertState *state.State, urlStr string) *models.PostableAlert {
if name, ok := labels[model.AlertNameLabel]; ok {
labels[Rulename] = name
}
labels[model.AlertNameLabel] = ErrorAlertName
return &models.PostableAlert{
Annotations: models.LabelSet(annotations),
StartsAt: strfmt.DateTime(alertState.StartsAt),
EndsAt: strfmt.DateTime(alertState.EndsAt),
Alert: models.Alert{
Labels: models.LabelSet(labels),
GeneratorURL: strfmt.URI(urlStr),
},
}
}
func FromAlertStateToPostableAlerts(firingStates []*state.State, stateManager *state.Manager, appURL *url.URL) apimodels.PostableAlerts {
alerts := apimodels.PostableAlerts{PostableAlerts: make([]models.PostableAlert, 0, len(firingStates))}
var sentAlerts []*state.State

View File

@ -152,6 +152,35 @@ func Test_stateToPostableAlert(t *testing.T) {
require.NotContains(t, result.Labels[model.AlertNameLabel], Rulename)
})
})
case eval.Error:
t.Run("should keep existing labels and change name", func(t *testing.T) {
alertState := randomState(tc.state)
alertState.Labels = randomMapOfStrings()
alertName := util.GenerateShortUID()
alertState.Labels[model.AlertNameLabel] = alertName
result := stateToPostableAlert(alertState, appURL)
expected := make(models.LabelSet, len(alertState.Labels)+1)
for k, v := range alertState.Labels {
expected[k] = v
}
expected[model.AlertNameLabel] = ErrorAlertName
expected[Rulename] = alertName
require.Equal(t, expected, result.Labels)
t.Run("should not backup original alert name if it does not exist", func(t *testing.T) {
alertState := randomState(tc.state)
alertState.Labels = randomMapOfStrings()
delete(alertState.Labels, model.AlertNameLabel)
result := stateToPostableAlert(alertState, appURL)
require.Equal(t, ErrorAlertName, result.Labels[model.AlertNameLabel])
require.NotContains(t, result.Labels[model.AlertNameLabel], Rulename)
})
})
default:
t.Run("should copy labels as is", func(t *testing.T) {
alertState := randomState(tc.state)

View File

@ -2,11 +2,13 @@ package state_test
import (
"context"
"errors"
"testing"
"time"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/grafana/grafana/pkg/expr"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
@ -1175,6 +1177,83 @@ func TestProcessEvalResults(t *testing.T) {
},
},
},
{
desc: "normal -> error when result is Error and ExecErrState is Error",
alertRule: &models.AlertRule{
OrgID: 1,
Title: "test_title",
UID: "test_alert_rule_uid_2",
NamespaceUID: "test_namespace_uid",
Data: []models.AlertQuery{{
RefID: "A",
DatasourceUID: "datasource_uid_1",
}},
Annotations: map[string]string{"annotation": "test"},
Labels: map[string]string{"label": "test"},
IntervalSeconds: 10,
For: 1 * time.Minute,
ExecErrState: models.ErrorErrState,
},
evalResults: []eval.Results{
{
eval.Result{
Instance: data.Labels{"instance_label": "test"},
State: eval.Normal,
EvaluatedAt: evaluationTime,
EvaluationDuration: evaluationDuration,
},
},
{
eval.Result{
Instance: data.Labels{"instance_label": "test"},
Error: expr.QueryError{
RefID: "A",
Err: errors.New("this is an error"),
},
State: eval.Error,
EvaluatedAt: evaluationTime.Add(10 * time.Second),
EvaluationDuration: evaluationDuration,
},
},
},
expectedStates: map[string]*state.State{
`[["__alert_rule_namespace_uid__","test_namespace_uid"],["__alert_rule_uid__","test_alert_rule_uid_2"],["alertname","test_title"],["instance_label","test"],["label","test"]]`: {
AlertRuleUID: "test_alert_rule_uid_2",
OrgID: 1,
CacheId: `[["__alert_rule_namespace_uid__","test_namespace_uid"],["__alert_rule_uid__","test_alert_rule_uid_2"],["alertname","test_title"],["instance_label","test"],["label","test"]]`,
Labels: data.Labels{
"__alert_rule_namespace_uid__": "test_namespace_uid",
"__alert_rule_uid__": "test_alert_rule_uid_2",
"alertname": "test_title",
"label": "test",
"instance_label": "test",
"datasource_uid": "datasource_uid_1",
},
State: eval.Error,
Error: expr.QueryError{
RefID: "A",
Err: errors.New("this is an error"),
},
Results: []state.Evaluation{
{
EvaluationTime: evaluationTime,
EvaluationState: eval.Normal,
Values: make(map[string]state.EvaluationValue),
},
{
EvaluationTime: evaluationTime.Add(10 * time.Second),
EvaluationState: eval.Error,
Values: make(map[string]state.EvaluationValue),
},
},
StartsAt: evaluationTime.Add(10 * time.Second),
EndsAt: evaluationTime.Add(10 * time.Second).Add(state.ResendDelay * 3),
LastEvaluationTime: evaluationTime.Add(10 * time.Second),
EvaluationDuration: evaluationDuration,
Annotations: map[string]string{"annotation": "test", "Error": "failed to execute query A: this is an error"},
},
},
},
{
desc: "template is correctly expanded",
alertRule: &models.AlertRule{

View File

@ -1,10 +1,12 @@
package state
import (
"errors"
"time"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/grafana/grafana/pkg/expr"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
)
@ -87,6 +89,7 @@ func (a *State) resultAlerting(alertRule *ngModels.AlertRule, result eval.Result
func (a *State) resultError(alertRule *ngModels.AlertRule, result eval.Result) {
a.Error = result.Error
if a.StartsAt.IsZero() {
a.StartsAt = result.EvaluatedAt
}
@ -94,6 +97,23 @@ func (a *State) resultError(alertRule *ngModels.AlertRule, result eval.Result) {
if alertRule.ExecErrState == ngModels.AlertingErrState {
a.State = eval.Alerting
} else if alertRule.ExecErrState == ngModels.ErrorErrState {
a.State = eval.Error
// If the evaluation failed because a query returned an error then
// update the state with the Datasource UID as a label and the error
// message as an annotation so other code can use this metadata to
// add context to alerts
var queryError expr.QueryError
if errors.As(a.Error, &queryError) {
for _, next := range alertRule.Data {
if next.RefID == queryError.RefID {
a.Labels["datasource_uid"] = next.DatasourceUID
break
}
}
a.Annotations["Error"] = queryError.Error()
}
}
}
@ -114,7 +134,7 @@ func (a *State) resultNoData(alertRule *ngModels.AlertRule, result eval.Result)
}
func (a *State) NeedsSending(resendDelay time.Duration) bool {
if a.State == eval.Pending || a.State == eval.Error || a.State == eval.Normal && !a.Resolved {
if a.State == eval.Pending || a.State == eval.Normal && !a.Resolved {
return false
}
// if LastSentAt is before or equal to LastEvaluationTime + resendDelay, send again

View File

@ -122,7 +122,7 @@ func TestNeedsSending(t *testing.T) {
},
{
name: "state: error, needs to be re-sent",
expected: false,
expected: true,
resendDelay: 1 * time.Minute,
testState: &State{
State: eval.Error,

View File

@ -131,6 +131,10 @@ func (m *migration) makeAlertRule(cond condition, da dashAlert, folderUID string
m.mg.Logger.Error("alert migration error: failed to create silence", "rule_name", ar.Title, "err", err)
}
if err := m.addErrorSilence(da, ar); err != nil {
m.mg.Logger.Error("alert migration error: failed to create silence for Error", "rule_name", ar.Title, "err", err)
}
if err := m.addNoDataSilence(da, ar); err != nil {
m.mg.Logger.Error("alert migration error: failed to create silence for NoData", "rule_name", ar.Title, "err", err)
}
@ -215,7 +219,9 @@ func transExecErr(s string) (string, error) {
case "", "alerting":
return "Alerting", nil
case "keep_state":
return "Alerting", nil
// Keep last state is translated to error as we now emit a
// DatasourceError alert when the state is error
return "Error", nil
}
return "", fmt.Errorf("unrecognized Execution Error setting %v", s)
}

View File

@ -22,6 +22,8 @@ import (
const (
// Should be the same as 'NoDataAlertName' in pkg/services/schedule/compat.go.
NoDataAlertName = "DatasourceNoData"
ErrorAlertName = "DatasourceError"
)
func (m *migration) addSilence(da dashAlert, rule *alertRule) error {
@ -61,6 +63,45 @@ func (m *migration) addSilence(da dashAlert, rule *alertRule) error {
return nil
}
func (m *migration) addErrorSilence(da dashAlert, rule *alertRule) error {
if da.ParsedSettings.ExecutionErrorState != "keep_state" {
return nil
}
uid, err := uuid.NewV4()
if err != nil {
return errors.New("failed to create uuid for silence")
}
s := &pb.MeshSilence{
Silence: &pb.Silence{
Id: uid.String(),
Matchers: []*pb.Matcher{
{
Type: pb.Matcher_EQUAL,
Name: model.AlertNameLabel,
Pattern: ErrorAlertName,
},
{
Type: pb.Matcher_EQUAL,
Name: "rule_uid",
Pattern: rule.UID,
},
},
StartsAt: time.Now(),
EndsAt: time.Now().AddDate(1, 0, 0), // 1 year
CreatedBy: "Grafana Migration",
Comment: fmt.Sprintf("Created during migration to unified alerting to silence Error state for alert rule ID '%s' and Title '%s' because the option 'Keep Last State' was selected for Error state", rule.UID, rule.Title),
},
ExpiresAt: time.Now().AddDate(1, 0, 0), // 1 year
}
if _, ok := m.silences[da.OrgId]; !ok {
m.silences[da.OrgId] = make([]*pb.MeshSilence, 0)
}
m.silences[da.OrgId] = append(m.silences[da.OrgId], s)
return nil
}
func (m *migration) addNoDataSilence(da dashAlert, rule *alertRule) error {
if da.ParsedSettings.NoDataState != "keep_state" {
return nil

View File

@ -6,20 +6,25 @@ import React, { FC, useMemo } from 'react';
type Props = Omit<SelectBaseProps<GrafanaAlertStateDecision>, 'options'> & {
includeNoData: boolean;
includeError: boolean;
};
const options: SelectableValue[] = [
{ value: GrafanaAlertStateDecision.Alerting, label: 'Alerting' },
{ value: GrafanaAlertStateDecision.NoData, label: 'No Data' },
{ value: GrafanaAlertStateDecision.OK, label: 'OK' },
{ value: GrafanaAlertStateDecision.Error, label: 'Error' },
];
export const GrafanaAlertStatePicker: FC<Props> = ({ includeNoData, ...props }) => {
export const GrafanaAlertStatePicker: FC<Props> = ({ includeNoData, includeError, ...props }) => {
const opts = useMemo(() => {
if (includeNoData) {
return options;
if (!includeNoData) {
return options.filter((opt) => opt.value !== GrafanaAlertStateDecision.NoData);
}
return options.filter((opt) => opt.value !== GrafanaAlertStateDecision.NoData);
}, [includeNoData]);
if (!includeError) {
return options.filter((opt) => opt.value !== GrafanaAlertStateDecision.Error);
}
return options;
}, [includeNoData, includeError]);
return <Select menuShouldPortal options={opts} {...props} />;
};

View File

@ -108,6 +108,7 @@ export const GrafanaConditionsStep: FC = () => {
inputId="no-data-state-input"
width={42}
includeNoData={true}
includeError={false}
onChange={(value) => onChange(value?.value)}
/>
)}
@ -122,6 +123,7 @@ export const GrafanaConditionsStep: FC = () => {
inputId="exec-err-state-input"
width={42}
includeNoData={false}
includeError={true}
onChange={(value) => onChange(value?.value)}
/>
)}

View File

@ -99,6 +99,7 @@ export enum GrafanaAlertStateDecision {
NoData = 'NoData',
KeepLastState = 'KeepLastState',
OK = 'OK',
Error = 'Error',
}
interface AlertDataQuery extends DataQuery {