mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: Special alert instance if rule is in state NoData (#40540)
* do not suppress NoData state * extract conversion of state to postable alert + tests * create a special alert instance if nodata * use NoData when converting from Keep Last State instead of Alerting * add silence during migration if NoData is mapped to KeepLastState.
This commit is contained in:
parent
b2447d3956
commit
610643a668
@ -8,13 +8,82 @@ import (
|
||||
|
||||
"github.com/go-openapi/strfmt"
|
||||
"github.com/grafana/grafana-plugin-sdk-go/data"
|
||||
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
|
||||
"github.com/prometheus/alertmanager/api/v2/models"
|
||||
"github.com/prometheus/common/model"
|
||||
|
||||
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
||||
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/state"
|
||||
)
|
||||
|
||||
const (
|
||||
NoDataAlertName = "DatasourceNoData"
|
||||
|
||||
Rulename = "rulename"
|
||||
)
|
||||
|
||||
// stateToPostableAlert converts a state to a model that is accepted by Alertmanager. Annotations and Labels are copied from the state.
|
||||
// - if state has at least one result, a new label '__value_string__' is added to the label set
|
||||
// - the alert's GeneratorURL is constructed to point to the alert edit page
|
||||
// - if evaluation state is either NoData or Error, the resulting set of labels is changed:
|
||||
// - original alert name (label: model.AlertNameLabel) is backed up to OriginalAlertName
|
||||
// - label model.AlertNameLabel is overwritten to either NoDataAlertName or ErrorAlertName
|
||||
func stateToPostableAlert(alertState *state.State, appURL *url.URL) *models.PostableAlert {
|
||||
nL := alertState.Labels.Copy()
|
||||
nA := data.Labels(alertState.Annotations).Copy()
|
||||
|
||||
if len(alertState.Results) > 0 {
|
||||
nA["__value_string__"] = alertState.Results[0].EvaluationString
|
||||
}
|
||||
|
||||
var urlStr string
|
||||
if uid := nL[ngModels.RuleUIDLabel]; len(uid) > 0 && appURL != nil {
|
||||
u := *appURL
|
||||
u.Path = path.Join(u.Path, fmt.Sprintf("/alerting/%s/edit", uid))
|
||||
urlStr = u.String()
|
||||
} else if appURL != nil {
|
||||
urlStr = appURL.String()
|
||||
} else {
|
||||
urlStr = ""
|
||||
}
|
||||
|
||||
if alertState.State == eval.NoData {
|
||||
return noDataAlert(nL, nA, alertState, urlStr)
|
||||
}
|
||||
|
||||
return &models.PostableAlert{
|
||||
Annotations: models.LabelSet(nA),
|
||||
StartsAt: strfmt.DateTime(alertState.StartsAt),
|
||||
EndsAt: strfmt.DateTime(alertState.EndsAt),
|
||||
Alert: models.Alert{
|
||||
Labels: models.LabelSet(nL),
|
||||
GeneratorURL: strfmt.URI(urlStr),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// NoDataAlert is a special alert sent by Grafana to the Alertmanager, that indicates we received no data from the datasource.
|
||||
// It effectively replaces the legacy behavior of "Keep Last State" by separating the regular alerting flow from the no data scenario into a separate alerts.
|
||||
// The Alert is defined as:
|
||||
// { alertname=DatasourceNoData rulename=original_alertname } + { rule labelset } + { rule annotations }
|
||||
func noDataAlert(labels data.Labels, annotations data.Labels, alertState *state.State, urlStr string) *models.PostableAlert {
|
||||
if name, ok := labels[model.AlertNameLabel]; ok {
|
||||
labels[Rulename] = name
|
||||
}
|
||||
labels[model.AlertNameLabel] = NoDataAlertName
|
||||
|
||||
return &models.PostableAlert{
|
||||
Annotations: models.LabelSet(annotations),
|
||||
StartsAt: strfmt.DateTime(alertState.StartsAt),
|
||||
EndsAt: strfmt.DateTime(alertState.EndsAt),
|
||||
Alert: models.Alert{
|
||||
Labels: models.LabelSet(labels),
|
||||
GeneratorURL: strfmt.URI(urlStr),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func FromAlertStateToPostableAlerts(firingStates []*state.State, stateManager *state.Manager, appURL *url.URL) apimodels.PostableAlerts {
|
||||
alerts := apimodels.PostableAlerts{PostableAlerts: make([]models.PostableAlert, 0, len(firingStates))}
|
||||
var sentAlerts []*state.State
|
||||
@ -24,33 +93,8 @@ func FromAlertStateToPostableAlerts(firingStates []*state.State, stateManager *s
|
||||
if !alertState.NeedsSending(stateManager.ResendDelay) {
|
||||
continue
|
||||
}
|
||||
nL := alertState.Labels.Copy()
|
||||
nA := data.Labels(alertState.Annotations).Copy()
|
||||
|
||||
if len(alertState.Results) > 0 {
|
||||
nA["__value_string__"] = alertState.Results[0].EvaluationString
|
||||
}
|
||||
|
||||
var urlStr string
|
||||
if uid := nL[ngModels.RuleUIDLabel]; len(uid) > 0 && appURL != nil {
|
||||
u := *appURL
|
||||
u.Path = path.Join(u.Path, fmt.Sprintf("/alerting/%s/edit", uid))
|
||||
urlStr = u.String()
|
||||
} else if appURL != nil {
|
||||
urlStr = appURL.String()
|
||||
} else {
|
||||
urlStr = ""
|
||||
}
|
||||
|
||||
alerts.PostableAlerts = append(alerts.PostableAlerts, models.PostableAlert{
|
||||
Annotations: models.LabelSet(nA),
|
||||
StartsAt: strfmt.DateTime(alertState.StartsAt),
|
||||
EndsAt: strfmt.DateTime(alertState.EndsAt),
|
||||
Alert: models.Alert{
|
||||
Labels: models.LabelSet(nL),
|
||||
GeneratorURL: strfmt.URI(urlStr),
|
||||
},
|
||||
})
|
||||
alert := stateToPostableAlert(alertState, appURL)
|
||||
alerts.PostableAlerts = append(alerts.PostableAlerts, *alert)
|
||||
alertState.LastSentAt = ts
|
||||
sentAlerts = append(sentAlerts, alertState)
|
||||
}
|
||||
|
200
pkg/services/ngalert/schedule/compat_test.go
Normal file
200
pkg/services/ngalert/schedule/compat_test.go
Normal file
@ -0,0 +1,200 @@
|
||||
package schedule
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"net/url"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/go-openapi/strfmt"
|
||||
"github.com/prometheus/alertmanager/api/v2/models"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
||||
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/state"
|
||||
"github.com/grafana/grafana/pkg/util"
|
||||
)
|
||||
|
||||
func Test_stateToPostableAlert(t *testing.T) {
|
||||
appURL := &url.URL{
|
||||
Scheme: "http:",
|
||||
Host: fmt.Sprintf("host-%d", rand.Int()),
|
||||
Path: fmt.Sprintf("path-%d", rand.Int()),
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
state eval.State
|
||||
}{
|
||||
{
|
||||
name: "when state is Normal",
|
||||
state: eval.Normal,
|
||||
},
|
||||
{
|
||||
name: "when state is Alerting",
|
||||
state: eval.Alerting,
|
||||
},
|
||||
{
|
||||
name: "when state is Pending",
|
||||
state: eval.Pending,
|
||||
},
|
||||
{
|
||||
name: "when state is NoData",
|
||||
state: eval.NoData,
|
||||
},
|
||||
{
|
||||
name: "when state is Error",
|
||||
state: eval.Error,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
t.Run("it generates proper URL", func(t *testing.T) {
|
||||
t.Run("to alert rule", func(t *testing.T) {
|
||||
alertState := randomState(tc.state)
|
||||
alertState.Labels[ngModels.RuleUIDLabel] = alertState.AlertRuleUID
|
||||
result := stateToPostableAlert(alertState, appURL)
|
||||
u := *appURL
|
||||
u.Path = u.Path + "/alerting/" + alertState.AlertRuleUID + "/edit"
|
||||
require.Equal(t, u.String(), result.Alert.GeneratorURL.String())
|
||||
})
|
||||
|
||||
t.Run("app URL as is if rule UID is not specified", func(t *testing.T) {
|
||||
alertState := randomState(tc.state)
|
||||
alertState.Labels[ngModels.RuleUIDLabel] = ""
|
||||
result := stateToPostableAlert(alertState, appURL)
|
||||
require.Equal(t, appURL.String(), result.Alert.GeneratorURL.String())
|
||||
|
||||
delete(alertState.Labels, ngModels.RuleUIDLabel)
|
||||
result = stateToPostableAlert(alertState, appURL)
|
||||
require.Equal(t, appURL.String(), result.Alert.GeneratorURL.String())
|
||||
})
|
||||
|
||||
t.Run("empty string if app URL is not provided", func(t *testing.T) {
|
||||
alertState := randomState(tc.state)
|
||||
alertState.Labels[ngModels.RuleUIDLabel] = alertState.AlertRuleUID
|
||||
result := stateToPostableAlert(alertState, nil)
|
||||
require.Equal(t, "", result.Alert.GeneratorURL.String())
|
||||
})
|
||||
})
|
||||
|
||||
t.Run("Start and End timestamps should be the same", func(t *testing.T) {
|
||||
alertState := randomState(tc.state)
|
||||
result := stateToPostableAlert(alertState, appURL)
|
||||
require.Equal(t, strfmt.DateTime(alertState.StartsAt), result.StartsAt)
|
||||
require.Equal(t, strfmt.DateTime(alertState.EndsAt), result.EndsAt)
|
||||
})
|
||||
|
||||
t.Run("should copy annotations", func(t *testing.T) {
|
||||
alertState := randomState(tc.state)
|
||||
alertState.Annotations = randomMapOfStrings()
|
||||
result := stateToPostableAlert(alertState, appURL)
|
||||
require.Equal(t, models.LabelSet(alertState.Annotations), result.Annotations)
|
||||
|
||||
t.Run("add __value_string__ if it has results", func(t *testing.T) {
|
||||
alertState := randomState(tc.state)
|
||||
alertState.Annotations = randomMapOfStrings()
|
||||
expectedValueString := util.GenerateShortUID()
|
||||
alertState.Results = []state.Evaluation{
|
||||
{
|
||||
EvaluationString: expectedValueString,
|
||||
},
|
||||
}
|
||||
|
||||
result := stateToPostableAlert(alertState, appURL)
|
||||
|
||||
expected := make(models.LabelSet, len(alertState.Annotations)+1)
|
||||
for k, v := range alertState.Annotations {
|
||||
expected[k] = v
|
||||
}
|
||||
expected["__value_string__"] = expectedValueString
|
||||
|
||||
require.Equal(t, expected, result.Annotations)
|
||||
|
||||
// even overwrites
|
||||
alertState.Annotations["__value_string__"] = util.GenerateShortUID()
|
||||
result = stateToPostableAlert(alertState, appURL)
|
||||
require.Equal(t, expected, result.Annotations)
|
||||
})
|
||||
})
|
||||
|
||||
switch tc.state {
|
||||
case eval.NoData:
|
||||
t.Run("should keep existing labels and change name", func(t *testing.T) {
|
||||
alertState := randomState(tc.state)
|
||||
alertState.Labels = randomMapOfStrings()
|
||||
alertName := util.GenerateShortUID()
|
||||
alertState.Labels[model.AlertNameLabel] = alertName
|
||||
|
||||
result := stateToPostableAlert(alertState, appURL)
|
||||
|
||||
expected := make(models.LabelSet, len(alertState.Labels)+1)
|
||||
for k, v := range alertState.Labels {
|
||||
expected[k] = v
|
||||
}
|
||||
expected[model.AlertNameLabel] = NoDataAlertName
|
||||
expected[Rulename] = alertName
|
||||
|
||||
require.Equal(t, expected, result.Labels)
|
||||
|
||||
t.Run("should not backup original alert name if it does not exist", func(t *testing.T) {
|
||||
alertState := randomState(tc.state)
|
||||
alertState.Labels = randomMapOfStrings()
|
||||
delete(alertState.Labels, model.AlertNameLabel)
|
||||
|
||||
result := stateToPostableAlert(alertState, appURL)
|
||||
|
||||
require.Equal(t, NoDataAlertName, result.Labels[model.AlertNameLabel])
|
||||
require.NotContains(t, result.Labels[model.AlertNameLabel], Rulename)
|
||||
})
|
||||
})
|
||||
default:
|
||||
t.Run("should copy labels as is", func(t *testing.T) {
|
||||
alertState := randomState(tc.state)
|
||||
alertState.Labels = randomMapOfStrings()
|
||||
result := stateToPostableAlert(alertState, appURL)
|
||||
require.Equal(t, models.LabelSet(alertState.Labels), result.Labels)
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func randomMapOfStrings() map[string]string {
|
||||
max := 5
|
||||
result := make(map[string]string, max)
|
||||
for i := 0; i < max; i++ {
|
||||
result[util.GenerateShortUID()] = util.GenerateShortUID()
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func randomDuration() time.Duration {
|
||||
return time.Duration(rand.Int63n(599)+1) * time.Second
|
||||
}
|
||||
|
||||
func randomTimeInFuture() time.Time {
|
||||
return time.Now().Add(randomDuration())
|
||||
}
|
||||
|
||||
func randomTimeInPast() time.Time {
|
||||
return time.Now().Add(-randomDuration())
|
||||
}
|
||||
|
||||
func randomState(evalState eval.State) *state.State {
|
||||
return &state.State{
|
||||
State: evalState,
|
||||
AlertRuleUID: util.GenerateShortUID(),
|
||||
StartsAt: time.Now(),
|
||||
EndsAt: randomTimeInFuture(),
|
||||
LastEvaluationTime: randomTimeInPast(),
|
||||
EvaluationDuration: randomDuration(),
|
||||
LastSentAt: randomTimeInPast(),
|
||||
Annotations: make(map[string]string),
|
||||
Labels: make(map[string]string),
|
||||
}
|
||||
}
|
@ -4,6 +4,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/grafana/grafana-plugin-sdk-go/data"
|
||||
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
||||
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||
)
|
||||
@ -113,16 +114,12 @@ func (a *State) resultNoData(alertRule *ngModels.AlertRule, result eval.Result)
|
||||
}
|
||||
|
||||
func (a *State) NeedsSending(resendDelay time.Duration) bool {
|
||||
if a.State != eval.Alerting && a.State != eval.Normal {
|
||||
return false
|
||||
}
|
||||
|
||||
if a.State == eval.Normal && !a.Resolved {
|
||||
if a.State == eval.Pending || a.State == eval.Error || a.State == eval.Normal && !a.Resolved {
|
||||
return false
|
||||
}
|
||||
// if LastSentAt is before or equal to LastEvaluationTime + resendDelay, send again
|
||||
return a.LastSentAt.Add(resendDelay).Before(a.LastEvaluationTime) ||
|
||||
a.LastSentAt.Add(resendDelay).Equal(a.LastEvaluationTime)
|
||||
nextSent := a.LastSentAt.Add(resendDelay)
|
||||
return nextSent.Before(a.LastEvaluationTime) || nextSent.Equal(a.LastEvaluationTime)
|
||||
}
|
||||
|
||||
func (a *State) Equals(b *State) bool {
|
||||
|
@ -1,6 +1,7 @@
|
||||
package state
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@ -99,6 +100,46 @@ func TestNeedsSending(t *testing.T) {
|
||||
LastSentAt: evaluationTime.Add(-1 * time.Minute),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "state: no-data, needs to be re-sent",
|
||||
expected: true,
|
||||
resendDelay: 1 * time.Minute,
|
||||
testState: &State{
|
||||
State: eval.NoData,
|
||||
LastEvaluationTime: evaluationTime,
|
||||
LastSentAt: evaluationTime.Add(-1 * time.Minute),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "state: no-data, should not be re-sent",
|
||||
expected: false,
|
||||
resendDelay: 1 * time.Minute,
|
||||
testState: &State{
|
||||
State: eval.NoData,
|
||||
LastEvaluationTime: evaluationTime,
|
||||
LastSentAt: evaluationTime.Add(-time.Duration(rand.Int63n(59)+1) * time.Second),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "state: error, needs to be re-sent",
|
||||
expected: false,
|
||||
resendDelay: 1 * time.Minute,
|
||||
testState: &State{
|
||||
State: eval.Error,
|
||||
LastEvaluationTime: evaluationTime,
|
||||
LastSentAt: evaluationTime.Add(-1 * time.Minute),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "state: error, should not be re-sent",
|
||||
expected: false,
|
||||
resendDelay: 1 * time.Minute,
|
||||
testState: &State{
|
||||
State: eval.Error,
|
||||
LastEvaluationTime: evaluationTime,
|
||||
LastSentAt: evaluationTime.Add(-time.Duration(rand.Int63n(59)+1) * time.Second),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
|
@ -131,6 +131,10 @@ func (m *migration) makeAlertRule(cond condition, da dashAlert, folderUID string
|
||||
m.mg.Logger.Error("alert migration error: failed to create silence", "rule_name", ar.Title, "err", err)
|
||||
}
|
||||
|
||||
if err := m.addNoDataSilence(da, ar); err != nil {
|
||||
m.mg.Logger.Error("alert migration error: failed to create silence for NoData", "rule_name", ar.Title, "err", err)
|
||||
}
|
||||
|
||||
return ar, nil
|
||||
}
|
||||
|
||||
@ -201,7 +205,7 @@ func transNoData(s string) (string, error) {
|
||||
case "alerting":
|
||||
return "Alerting", nil
|
||||
case "keep_state":
|
||||
return "Alerting", nil
|
||||
return "NoData", nil // "keep last state" translates to no data because we now emit a special alert when the state is "noData". The result is that the evaluation will not return firing and instead we'll raise the special alert.
|
||||
}
|
||||
return "", fmt.Errorf("unrecognized No Data setting %v", s)
|
||||
}
|
||||
|
@ -14,10 +14,16 @@ import (
|
||||
"github.com/gofrs/uuid"
|
||||
"github.com/matttproud/golang_protobuf_extensions/pbutil"
|
||||
pb "github.com/prometheus/alertmanager/silence/silencepb"
|
||||
"github.com/prometheus/common/model"
|
||||
|
||||
"github.com/grafana/grafana/pkg/services/sqlstore/migrator"
|
||||
)
|
||||
|
||||
const (
|
||||
// Should be the same as 'NoDataAlertName' in pkg/services/schedule/compat.go.
|
||||
NoDataAlertName = "DatasourceNoData"
|
||||
)
|
||||
|
||||
func (m *migration) addSilence(da dashAlert, rule *alertRule) error {
|
||||
if da.State != "paused" {
|
||||
return nil
|
||||
@ -55,6 +61,46 @@ func (m *migration) addSilence(da dashAlert, rule *alertRule) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *migration) addNoDataSilence(da dashAlert, rule *alertRule) error {
|
||||
if da.ParsedSettings.NoDataState != "keep_state" {
|
||||
return nil
|
||||
}
|
||||
|
||||
uid, err := uuid.NewV4()
|
||||
if err != nil {
|
||||
return errors.New("failed to create uuid for silence")
|
||||
}
|
||||
|
||||
s := &pb.MeshSilence{
|
||||
Silence: &pb.Silence{
|
||||
Id: uid.String(),
|
||||
Matchers: []*pb.Matcher{
|
||||
{
|
||||
Type: pb.Matcher_EQUAL,
|
||||
Name: model.AlertNameLabel,
|
||||
Pattern: NoDataAlertName,
|
||||
},
|
||||
{
|
||||
Type: pb.Matcher_EQUAL,
|
||||
Name: "rule_uid",
|
||||
Pattern: rule.UID,
|
||||
},
|
||||
},
|
||||
StartsAt: time.Now(),
|
||||
EndsAt: time.Now().AddDate(1, 0, 0), // 1 year.
|
||||
CreatedBy: "Grafana Migration",
|
||||
Comment: fmt.Sprintf("Created during migration to unified alerting to silence NoData state for alert rule ID '%s' and Title '%s' because the option 'Keep Last State' was selected for NoData state", rule.UID, rule.Title),
|
||||
},
|
||||
ExpiresAt: time.Now().AddDate(1, 0, 0), // 1 year.
|
||||
}
|
||||
_, ok := m.silences[da.OrgId]
|
||||
if !ok {
|
||||
m.silences[da.OrgId] = make([]*pb.MeshSilence, 0)
|
||||
}
|
||||
m.silences[da.OrgId] = append(m.silences[da.OrgId], s)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *migration) writeSilencesFile(orgID int64) error {
|
||||
var buf bytes.Buffer
|
||||
orgSilences, ok := m.silences[orgID]
|
||||
|
Loading…
Reference in New Issue
Block a user