diff --git a/pkg/services/ngalert/schedule/compat.go b/pkg/services/ngalert/schedule/compat.go index 09c9fa05e9b..5892840a3e7 100644 --- a/pkg/services/ngalert/schedule/compat.go +++ b/pkg/services/ngalert/schedule/compat.go @@ -8,13 +8,82 @@ import ( "github.com/go-openapi/strfmt" "github.com/grafana/grafana-plugin-sdk-go/data" - apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" "github.com/prometheus/alertmanager/api/v2/models" + "github.com/prometheus/common/model" + apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" + "github.com/grafana/grafana/pkg/services/ngalert/eval" ngModels "github.com/grafana/grafana/pkg/services/ngalert/models" "github.com/grafana/grafana/pkg/services/ngalert/state" ) +const ( + NoDataAlertName = "DatasourceNoData" + + Rulename = "rulename" +) + +// stateToPostableAlert converts a state to a model that is accepted by Alertmanager. Annotations and Labels are copied from the state. +// - if state has at least one result, a new label '__value_string__' is added to the label set +// - the alert's GeneratorURL is constructed to point to the alert edit page +// - if evaluation state is either NoData or Error, the resulting set of labels is changed: +// - original alert name (label: model.AlertNameLabel) is backed up to OriginalAlertName +// - label model.AlertNameLabel is overwritten to either NoDataAlertName or ErrorAlertName +func stateToPostableAlert(alertState *state.State, appURL *url.URL) *models.PostableAlert { + nL := alertState.Labels.Copy() + nA := data.Labels(alertState.Annotations).Copy() + + if len(alertState.Results) > 0 { + nA["__value_string__"] = alertState.Results[0].EvaluationString + } + + var urlStr string + if uid := nL[ngModels.RuleUIDLabel]; len(uid) > 0 && appURL != nil { + u := *appURL + u.Path = path.Join(u.Path, fmt.Sprintf("/alerting/%s/edit", uid)) + urlStr = u.String() + } else if appURL != nil { + urlStr = appURL.String() + } else { + urlStr = "" + } + + if alertState.State == eval.NoData { + return noDataAlert(nL, nA, alertState, urlStr) + } + + return &models.PostableAlert{ + Annotations: models.LabelSet(nA), + StartsAt: strfmt.DateTime(alertState.StartsAt), + EndsAt: strfmt.DateTime(alertState.EndsAt), + Alert: models.Alert{ + Labels: models.LabelSet(nL), + GeneratorURL: strfmt.URI(urlStr), + }, + } +} + +// NoDataAlert is a special alert sent by Grafana to the Alertmanager, that indicates we received no data from the datasource. +// It effectively replaces the legacy behavior of "Keep Last State" by separating the regular alerting flow from the no data scenario into a separate alerts. +// The Alert is defined as: +// { alertname=DatasourceNoData rulename=original_alertname } + { rule labelset } + { rule annotations } +func noDataAlert(labels data.Labels, annotations data.Labels, alertState *state.State, urlStr string) *models.PostableAlert { + if name, ok := labels[model.AlertNameLabel]; ok { + labels[Rulename] = name + } + labels[model.AlertNameLabel] = NoDataAlertName + + return &models.PostableAlert{ + Annotations: models.LabelSet(annotations), + StartsAt: strfmt.DateTime(alertState.StartsAt), + EndsAt: strfmt.DateTime(alertState.EndsAt), + Alert: models.Alert{ + Labels: models.LabelSet(labels), + GeneratorURL: strfmt.URI(urlStr), + }, + } +} + func FromAlertStateToPostableAlerts(firingStates []*state.State, stateManager *state.Manager, appURL *url.URL) apimodels.PostableAlerts { alerts := apimodels.PostableAlerts{PostableAlerts: make([]models.PostableAlert, 0, len(firingStates))} var sentAlerts []*state.State @@ -24,33 +93,8 @@ func FromAlertStateToPostableAlerts(firingStates []*state.State, stateManager *s if !alertState.NeedsSending(stateManager.ResendDelay) { continue } - nL := alertState.Labels.Copy() - nA := data.Labels(alertState.Annotations).Copy() - - if len(alertState.Results) > 0 { - nA["__value_string__"] = alertState.Results[0].EvaluationString - } - - var urlStr string - if uid := nL[ngModels.RuleUIDLabel]; len(uid) > 0 && appURL != nil { - u := *appURL - u.Path = path.Join(u.Path, fmt.Sprintf("/alerting/%s/edit", uid)) - urlStr = u.String() - } else if appURL != nil { - urlStr = appURL.String() - } else { - urlStr = "" - } - - alerts.PostableAlerts = append(alerts.PostableAlerts, models.PostableAlert{ - Annotations: models.LabelSet(nA), - StartsAt: strfmt.DateTime(alertState.StartsAt), - EndsAt: strfmt.DateTime(alertState.EndsAt), - Alert: models.Alert{ - Labels: models.LabelSet(nL), - GeneratorURL: strfmt.URI(urlStr), - }, - }) + alert := stateToPostableAlert(alertState, appURL) + alerts.PostableAlerts = append(alerts.PostableAlerts, *alert) alertState.LastSentAt = ts sentAlerts = append(sentAlerts, alertState) } diff --git a/pkg/services/ngalert/schedule/compat_test.go b/pkg/services/ngalert/schedule/compat_test.go new file mode 100644 index 00000000000..05af8c10031 --- /dev/null +++ b/pkg/services/ngalert/schedule/compat_test.go @@ -0,0 +1,200 @@ +package schedule + +import ( + "fmt" + "math/rand" + "net/url" + "testing" + "time" + + "github.com/go-openapi/strfmt" + "github.com/prometheus/alertmanager/api/v2/models" + "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" + + "github.com/grafana/grafana/pkg/services/ngalert/eval" + ngModels "github.com/grafana/grafana/pkg/services/ngalert/models" + "github.com/grafana/grafana/pkg/services/ngalert/state" + "github.com/grafana/grafana/pkg/util" +) + +func Test_stateToPostableAlert(t *testing.T) { + appURL := &url.URL{ + Scheme: "http:", + Host: fmt.Sprintf("host-%d", rand.Int()), + Path: fmt.Sprintf("path-%d", rand.Int()), + } + + testCases := []struct { + name string + state eval.State + }{ + { + name: "when state is Normal", + state: eval.Normal, + }, + { + name: "when state is Alerting", + state: eval.Alerting, + }, + { + name: "when state is Pending", + state: eval.Pending, + }, + { + name: "when state is NoData", + state: eval.NoData, + }, + { + name: "when state is Error", + state: eval.Error, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Run("it generates proper URL", func(t *testing.T) { + t.Run("to alert rule", func(t *testing.T) { + alertState := randomState(tc.state) + alertState.Labels[ngModels.RuleUIDLabel] = alertState.AlertRuleUID + result := stateToPostableAlert(alertState, appURL) + u := *appURL + u.Path = u.Path + "/alerting/" + alertState.AlertRuleUID + "/edit" + require.Equal(t, u.String(), result.Alert.GeneratorURL.String()) + }) + + t.Run("app URL as is if rule UID is not specified", func(t *testing.T) { + alertState := randomState(tc.state) + alertState.Labels[ngModels.RuleUIDLabel] = "" + result := stateToPostableAlert(alertState, appURL) + require.Equal(t, appURL.String(), result.Alert.GeneratorURL.String()) + + delete(alertState.Labels, ngModels.RuleUIDLabel) + result = stateToPostableAlert(alertState, appURL) + require.Equal(t, appURL.String(), result.Alert.GeneratorURL.String()) + }) + + t.Run("empty string if app URL is not provided", func(t *testing.T) { + alertState := randomState(tc.state) + alertState.Labels[ngModels.RuleUIDLabel] = alertState.AlertRuleUID + result := stateToPostableAlert(alertState, nil) + require.Equal(t, "", result.Alert.GeneratorURL.String()) + }) + }) + + t.Run("Start and End timestamps should be the same", func(t *testing.T) { + alertState := randomState(tc.state) + result := stateToPostableAlert(alertState, appURL) + require.Equal(t, strfmt.DateTime(alertState.StartsAt), result.StartsAt) + require.Equal(t, strfmt.DateTime(alertState.EndsAt), result.EndsAt) + }) + + t.Run("should copy annotations", func(t *testing.T) { + alertState := randomState(tc.state) + alertState.Annotations = randomMapOfStrings() + result := stateToPostableAlert(alertState, appURL) + require.Equal(t, models.LabelSet(alertState.Annotations), result.Annotations) + + t.Run("add __value_string__ if it has results", func(t *testing.T) { + alertState := randomState(tc.state) + alertState.Annotations = randomMapOfStrings() + expectedValueString := util.GenerateShortUID() + alertState.Results = []state.Evaluation{ + { + EvaluationString: expectedValueString, + }, + } + + result := stateToPostableAlert(alertState, appURL) + + expected := make(models.LabelSet, len(alertState.Annotations)+1) + for k, v := range alertState.Annotations { + expected[k] = v + } + expected["__value_string__"] = expectedValueString + + require.Equal(t, expected, result.Annotations) + + // even overwrites + alertState.Annotations["__value_string__"] = util.GenerateShortUID() + result = stateToPostableAlert(alertState, appURL) + require.Equal(t, expected, result.Annotations) + }) + }) + + switch tc.state { + case eval.NoData: + t.Run("should keep existing labels and change name", func(t *testing.T) { + alertState := randomState(tc.state) + alertState.Labels = randomMapOfStrings() + alertName := util.GenerateShortUID() + alertState.Labels[model.AlertNameLabel] = alertName + + result := stateToPostableAlert(alertState, appURL) + + expected := make(models.LabelSet, len(alertState.Labels)+1) + for k, v := range alertState.Labels { + expected[k] = v + } + expected[model.AlertNameLabel] = NoDataAlertName + expected[Rulename] = alertName + + require.Equal(t, expected, result.Labels) + + t.Run("should not backup original alert name if it does not exist", func(t *testing.T) { + alertState := randomState(tc.state) + alertState.Labels = randomMapOfStrings() + delete(alertState.Labels, model.AlertNameLabel) + + result := stateToPostableAlert(alertState, appURL) + + require.Equal(t, NoDataAlertName, result.Labels[model.AlertNameLabel]) + require.NotContains(t, result.Labels[model.AlertNameLabel], Rulename) + }) + }) + default: + t.Run("should copy labels as is", func(t *testing.T) { + alertState := randomState(tc.state) + alertState.Labels = randomMapOfStrings() + result := stateToPostableAlert(alertState, appURL) + require.Equal(t, models.LabelSet(alertState.Labels), result.Labels) + }) + } + }) + } +} + +func randomMapOfStrings() map[string]string { + max := 5 + result := make(map[string]string, max) + for i := 0; i < max; i++ { + result[util.GenerateShortUID()] = util.GenerateShortUID() + } + return result +} + +func randomDuration() time.Duration { + return time.Duration(rand.Int63n(599)+1) * time.Second +} + +func randomTimeInFuture() time.Time { + return time.Now().Add(randomDuration()) +} + +func randomTimeInPast() time.Time { + return time.Now().Add(-randomDuration()) +} + +func randomState(evalState eval.State) *state.State { + return &state.State{ + State: evalState, + AlertRuleUID: util.GenerateShortUID(), + StartsAt: time.Now(), + EndsAt: randomTimeInFuture(), + LastEvaluationTime: randomTimeInPast(), + EvaluationDuration: randomDuration(), + LastSentAt: randomTimeInPast(), + Annotations: make(map[string]string), + Labels: make(map[string]string), + } +} diff --git a/pkg/services/ngalert/state/state.go b/pkg/services/ngalert/state/state.go index 3f72ae58ca5..6fa9f93d780 100644 --- a/pkg/services/ngalert/state/state.go +++ b/pkg/services/ngalert/state/state.go @@ -4,6 +4,7 @@ import ( "time" "github.com/grafana/grafana-plugin-sdk-go/data" + "github.com/grafana/grafana/pkg/services/ngalert/eval" ngModels "github.com/grafana/grafana/pkg/services/ngalert/models" ) @@ -113,16 +114,12 @@ func (a *State) resultNoData(alertRule *ngModels.AlertRule, result eval.Result) } func (a *State) NeedsSending(resendDelay time.Duration) bool { - if a.State != eval.Alerting && a.State != eval.Normal { - return false - } - - if a.State == eval.Normal && !a.Resolved { + if a.State == eval.Pending || a.State == eval.Error || a.State == eval.Normal && !a.Resolved { return false } // if LastSentAt is before or equal to LastEvaluationTime + resendDelay, send again - return a.LastSentAt.Add(resendDelay).Before(a.LastEvaluationTime) || - a.LastSentAt.Add(resendDelay).Equal(a.LastEvaluationTime) + nextSent := a.LastSentAt.Add(resendDelay) + return nextSent.Before(a.LastEvaluationTime) || nextSent.Equal(a.LastEvaluationTime) } func (a *State) Equals(b *State) bool { diff --git a/pkg/services/ngalert/state/state_test.go b/pkg/services/ngalert/state/state_test.go index e8d5cdd2bc0..a7d27ca7d5b 100644 --- a/pkg/services/ngalert/state/state_test.go +++ b/pkg/services/ngalert/state/state_test.go @@ -1,6 +1,7 @@ package state import ( + "math/rand" "testing" "time" @@ -99,6 +100,46 @@ func TestNeedsSending(t *testing.T) { LastSentAt: evaluationTime.Add(-1 * time.Minute), }, }, + { + name: "state: no-data, needs to be re-sent", + expected: true, + resendDelay: 1 * time.Minute, + testState: &State{ + State: eval.NoData, + LastEvaluationTime: evaluationTime, + LastSentAt: evaluationTime.Add(-1 * time.Minute), + }, + }, + { + name: "state: no-data, should not be re-sent", + expected: false, + resendDelay: 1 * time.Minute, + testState: &State{ + State: eval.NoData, + LastEvaluationTime: evaluationTime, + LastSentAt: evaluationTime.Add(-time.Duration(rand.Int63n(59)+1) * time.Second), + }, + }, + { + name: "state: error, needs to be re-sent", + expected: false, + resendDelay: 1 * time.Minute, + testState: &State{ + State: eval.Error, + LastEvaluationTime: evaluationTime, + LastSentAt: evaluationTime.Add(-1 * time.Minute), + }, + }, + { + name: "state: error, should not be re-sent", + expected: false, + resendDelay: 1 * time.Minute, + testState: &State{ + State: eval.Error, + LastEvaluationTime: evaluationTime, + LastSentAt: evaluationTime.Add(-time.Duration(rand.Int63n(59)+1) * time.Second), + }, + }, } for _, tc := range testCases { diff --git a/pkg/services/sqlstore/migrations/ualert/alert_rule.go b/pkg/services/sqlstore/migrations/ualert/alert_rule.go index b110395ab66..c33b3a2be2a 100644 --- a/pkg/services/sqlstore/migrations/ualert/alert_rule.go +++ b/pkg/services/sqlstore/migrations/ualert/alert_rule.go @@ -131,6 +131,10 @@ func (m *migration) makeAlertRule(cond condition, da dashAlert, folderUID string m.mg.Logger.Error("alert migration error: failed to create silence", "rule_name", ar.Title, "err", err) } + if err := m.addNoDataSilence(da, ar); err != nil { + m.mg.Logger.Error("alert migration error: failed to create silence for NoData", "rule_name", ar.Title, "err", err) + } + return ar, nil } @@ -201,7 +205,7 @@ func transNoData(s string) (string, error) { case "alerting": return "Alerting", nil case "keep_state": - return "Alerting", nil + return "NoData", nil // "keep last state" translates to no data because we now emit a special alert when the state is "noData". The result is that the evaluation will not return firing and instead we'll raise the special alert. } return "", fmt.Errorf("unrecognized No Data setting %v", s) } diff --git a/pkg/services/sqlstore/migrations/ualert/silences.go b/pkg/services/sqlstore/migrations/ualert/silences.go index 9e47aa32088..648183d1c24 100644 --- a/pkg/services/sqlstore/migrations/ualert/silences.go +++ b/pkg/services/sqlstore/migrations/ualert/silences.go @@ -14,10 +14,16 @@ import ( "github.com/gofrs/uuid" "github.com/matttproud/golang_protobuf_extensions/pbutil" pb "github.com/prometheus/alertmanager/silence/silencepb" + "github.com/prometheus/common/model" "github.com/grafana/grafana/pkg/services/sqlstore/migrator" ) +const ( + // Should be the same as 'NoDataAlertName' in pkg/services/schedule/compat.go. + NoDataAlertName = "DatasourceNoData" +) + func (m *migration) addSilence(da dashAlert, rule *alertRule) error { if da.State != "paused" { return nil @@ -55,6 +61,46 @@ func (m *migration) addSilence(da dashAlert, rule *alertRule) error { return nil } +func (m *migration) addNoDataSilence(da dashAlert, rule *alertRule) error { + if da.ParsedSettings.NoDataState != "keep_state" { + return nil + } + + uid, err := uuid.NewV4() + if err != nil { + return errors.New("failed to create uuid for silence") + } + + s := &pb.MeshSilence{ + Silence: &pb.Silence{ + Id: uid.String(), + Matchers: []*pb.Matcher{ + { + Type: pb.Matcher_EQUAL, + Name: model.AlertNameLabel, + Pattern: NoDataAlertName, + }, + { + Type: pb.Matcher_EQUAL, + Name: "rule_uid", + Pattern: rule.UID, + }, + }, + StartsAt: time.Now(), + EndsAt: time.Now().AddDate(1, 0, 0), // 1 year. + CreatedBy: "Grafana Migration", + Comment: fmt.Sprintf("Created during migration to unified alerting to silence NoData state for alert rule ID '%s' and Title '%s' because the option 'Keep Last State' was selected for NoData state", rule.UID, rule.Title), + }, + ExpiresAt: time.Now().AddDate(1, 0, 0), // 1 year. + } + _, ok := m.silences[da.OrgId] + if !ok { + m.silences[da.OrgId] = make([]*pb.MeshSilence, 0) + } + m.silences[da.OrgId] = append(m.silences[da.OrgId], s) + return nil +} + func (m *migration) writeSilencesFile(orgID int64) error { var buf bytes.Buffer orgSilences, ok := m.silences[orgID]