Alerting: Special alert instance if rule is in state NoData (#40540)

* do not suppress NoData state
* extract conversion of state to postable alert + tests
* create a special alert instance if nodata 
* use NoData when converting from Keep Last State instead of Alerting
* add silence during migration if NoData is mapped to KeepLastState.
This commit is contained in:
Yuriy Tseretyan 2021-11-04 16:42:34 -04:00 committed by GitHub
parent b2447d3956
commit 610643a668
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 368 additions and 36 deletions

View File

@ -8,13 +8,82 @@ import (
"github.com/go-openapi/strfmt"
"github.com/grafana/grafana-plugin-sdk-go/data"
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/prometheus/alertmanager/api/v2/models"
"github.com/prometheus/common/model"
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/state"
)
const (
NoDataAlertName = "DatasourceNoData"
Rulename = "rulename"
)
// stateToPostableAlert converts a state to a model that is accepted by Alertmanager. Annotations and Labels are copied from the state.
// - if state has at least one result, a new label '__value_string__' is added to the label set
// - the alert's GeneratorURL is constructed to point to the alert edit page
// - if evaluation state is either NoData or Error, the resulting set of labels is changed:
// - original alert name (label: model.AlertNameLabel) is backed up to OriginalAlertName
// - label model.AlertNameLabel is overwritten to either NoDataAlertName or ErrorAlertName
func stateToPostableAlert(alertState *state.State, appURL *url.URL) *models.PostableAlert {
nL := alertState.Labels.Copy()
nA := data.Labels(alertState.Annotations).Copy()
if len(alertState.Results) > 0 {
nA["__value_string__"] = alertState.Results[0].EvaluationString
}
var urlStr string
if uid := nL[ngModels.RuleUIDLabel]; len(uid) > 0 && appURL != nil {
u := *appURL
u.Path = path.Join(u.Path, fmt.Sprintf("/alerting/%s/edit", uid))
urlStr = u.String()
} else if appURL != nil {
urlStr = appURL.String()
} else {
urlStr = ""
}
if alertState.State == eval.NoData {
return noDataAlert(nL, nA, alertState, urlStr)
}
return &models.PostableAlert{
Annotations: models.LabelSet(nA),
StartsAt: strfmt.DateTime(alertState.StartsAt),
EndsAt: strfmt.DateTime(alertState.EndsAt),
Alert: models.Alert{
Labels: models.LabelSet(nL),
GeneratorURL: strfmt.URI(urlStr),
},
}
}
// NoDataAlert is a special alert sent by Grafana to the Alertmanager, that indicates we received no data from the datasource.
// It effectively replaces the legacy behavior of "Keep Last State" by separating the regular alerting flow from the no data scenario into a separate alerts.
// The Alert is defined as:
// { alertname=DatasourceNoData rulename=original_alertname } + { rule labelset } + { rule annotations }
func noDataAlert(labels data.Labels, annotations data.Labels, alertState *state.State, urlStr string) *models.PostableAlert {
if name, ok := labels[model.AlertNameLabel]; ok {
labels[Rulename] = name
}
labels[model.AlertNameLabel] = NoDataAlertName
return &models.PostableAlert{
Annotations: models.LabelSet(annotations),
StartsAt: strfmt.DateTime(alertState.StartsAt),
EndsAt: strfmt.DateTime(alertState.EndsAt),
Alert: models.Alert{
Labels: models.LabelSet(labels),
GeneratorURL: strfmt.URI(urlStr),
},
}
}
func FromAlertStateToPostableAlerts(firingStates []*state.State, stateManager *state.Manager, appURL *url.URL) apimodels.PostableAlerts {
alerts := apimodels.PostableAlerts{PostableAlerts: make([]models.PostableAlert, 0, len(firingStates))}
var sentAlerts []*state.State
@ -24,33 +93,8 @@ func FromAlertStateToPostableAlerts(firingStates []*state.State, stateManager *s
if !alertState.NeedsSending(stateManager.ResendDelay) {
continue
}
nL := alertState.Labels.Copy()
nA := data.Labels(alertState.Annotations).Copy()
if len(alertState.Results) > 0 {
nA["__value_string__"] = alertState.Results[0].EvaluationString
}
var urlStr string
if uid := nL[ngModels.RuleUIDLabel]; len(uid) > 0 && appURL != nil {
u := *appURL
u.Path = path.Join(u.Path, fmt.Sprintf("/alerting/%s/edit", uid))
urlStr = u.String()
} else if appURL != nil {
urlStr = appURL.String()
} else {
urlStr = ""
}
alerts.PostableAlerts = append(alerts.PostableAlerts, models.PostableAlert{
Annotations: models.LabelSet(nA),
StartsAt: strfmt.DateTime(alertState.StartsAt),
EndsAt: strfmt.DateTime(alertState.EndsAt),
Alert: models.Alert{
Labels: models.LabelSet(nL),
GeneratorURL: strfmt.URI(urlStr),
},
})
alert := stateToPostableAlert(alertState, appURL)
alerts.PostableAlerts = append(alerts.PostableAlerts, *alert)
alertState.LastSentAt = ts
sentAlerts = append(sentAlerts, alertState)
}

View File

@ -0,0 +1,200 @@
package schedule
import (
"fmt"
"math/rand"
"net/url"
"testing"
"time"
"github.com/go-openapi/strfmt"
"github.com/prometheus/alertmanager/api/v2/models"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/state"
"github.com/grafana/grafana/pkg/util"
)
func Test_stateToPostableAlert(t *testing.T) {
appURL := &url.URL{
Scheme: "http:",
Host: fmt.Sprintf("host-%d", rand.Int()),
Path: fmt.Sprintf("path-%d", rand.Int()),
}
testCases := []struct {
name string
state eval.State
}{
{
name: "when state is Normal",
state: eval.Normal,
},
{
name: "when state is Alerting",
state: eval.Alerting,
},
{
name: "when state is Pending",
state: eval.Pending,
},
{
name: "when state is NoData",
state: eval.NoData,
},
{
name: "when state is Error",
state: eval.Error,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Run("it generates proper URL", func(t *testing.T) {
t.Run("to alert rule", func(t *testing.T) {
alertState := randomState(tc.state)
alertState.Labels[ngModels.RuleUIDLabel] = alertState.AlertRuleUID
result := stateToPostableAlert(alertState, appURL)
u := *appURL
u.Path = u.Path + "/alerting/" + alertState.AlertRuleUID + "/edit"
require.Equal(t, u.String(), result.Alert.GeneratorURL.String())
})
t.Run("app URL as is if rule UID is not specified", func(t *testing.T) {
alertState := randomState(tc.state)
alertState.Labels[ngModels.RuleUIDLabel] = ""
result := stateToPostableAlert(alertState, appURL)
require.Equal(t, appURL.String(), result.Alert.GeneratorURL.String())
delete(alertState.Labels, ngModels.RuleUIDLabel)
result = stateToPostableAlert(alertState, appURL)
require.Equal(t, appURL.String(), result.Alert.GeneratorURL.String())
})
t.Run("empty string if app URL is not provided", func(t *testing.T) {
alertState := randomState(tc.state)
alertState.Labels[ngModels.RuleUIDLabel] = alertState.AlertRuleUID
result := stateToPostableAlert(alertState, nil)
require.Equal(t, "", result.Alert.GeneratorURL.String())
})
})
t.Run("Start and End timestamps should be the same", func(t *testing.T) {
alertState := randomState(tc.state)
result := stateToPostableAlert(alertState, appURL)
require.Equal(t, strfmt.DateTime(alertState.StartsAt), result.StartsAt)
require.Equal(t, strfmt.DateTime(alertState.EndsAt), result.EndsAt)
})
t.Run("should copy annotations", func(t *testing.T) {
alertState := randomState(tc.state)
alertState.Annotations = randomMapOfStrings()
result := stateToPostableAlert(alertState, appURL)
require.Equal(t, models.LabelSet(alertState.Annotations), result.Annotations)
t.Run("add __value_string__ if it has results", func(t *testing.T) {
alertState := randomState(tc.state)
alertState.Annotations = randomMapOfStrings()
expectedValueString := util.GenerateShortUID()
alertState.Results = []state.Evaluation{
{
EvaluationString: expectedValueString,
},
}
result := stateToPostableAlert(alertState, appURL)
expected := make(models.LabelSet, len(alertState.Annotations)+1)
for k, v := range alertState.Annotations {
expected[k] = v
}
expected["__value_string__"] = expectedValueString
require.Equal(t, expected, result.Annotations)
// even overwrites
alertState.Annotations["__value_string__"] = util.GenerateShortUID()
result = stateToPostableAlert(alertState, appURL)
require.Equal(t, expected, result.Annotations)
})
})
switch tc.state {
case eval.NoData:
t.Run("should keep existing labels and change name", func(t *testing.T) {
alertState := randomState(tc.state)
alertState.Labels = randomMapOfStrings()
alertName := util.GenerateShortUID()
alertState.Labels[model.AlertNameLabel] = alertName
result := stateToPostableAlert(alertState, appURL)
expected := make(models.LabelSet, len(alertState.Labels)+1)
for k, v := range alertState.Labels {
expected[k] = v
}
expected[model.AlertNameLabel] = NoDataAlertName
expected[Rulename] = alertName
require.Equal(t, expected, result.Labels)
t.Run("should not backup original alert name if it does not exist", func(t *testing.T) {
alertState := randomState(tc.state)
alertState.Labels = randomMapOfStrings()
delete(alertState.Labels, model.AlertNameLabel)
result := stateToPostableAlert(alertState, appURL)
require.Equal(t, NoDataAlertName, result.Labels[model.AlertNameLabel])
require.NotContains(t, result.Labels[model.AlertNameLabel], Rulename)
})
})
default:
t.Run("should copy labels as is", func(t *testing.T) {
alertState := randomState(tc.state)
alertState.Labels = randomMapOfStrings()
result := stateToPostableAlert(alertState, appURL)
require.Equal(t, models.LabelSet(alertState.Labels), result.Labels)
})
}
})
}
}
func randomMapOfStrings() map[string]string {
max := 5
result := make(map[string]string, max)
for i := 0; i < max; i++ {
result[util.GenerateShortUID()] = util.GenerateShortUID()
}
return result
}
func randomDuration() time.Duration {
return time.Duration(rand.Int63n(599)+1) * time.Second
}
func randomTimeInFuture() time.Time {
return time.Now().Add(randomDuration())
}
func randomTimeInPast() time.Time {
return time.Now().Add(-randomDuration())
}
func randomState(evalState eval.State) *state.State {
return &state.State{
State: evalState,
AlertRuleUID: util.GenerateShortUID(),
StartsAt: time.Now(),
EndsAt: randomTimeInFuture(),
LastEvaluationTime: randomTimeInPast(),
EvaluationDuration: randomDuration(),
LastSentAt: randomTimeInPast(),
Annotations: make(map[string]string),
Labels: make(map[string]string),
}
}

View File

@ -4,6 +4,7 @@ import (
"time"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
)
@ -113,16 +114,12 @@ func (a *State) resultNoData(alertRule *ngModels.AlertRule, result eval.Result)
}
func (a *State) NeedsSending(resendDelay time.Duration) bool {
if a.State != eval.Alerting && a.State != eval.Normal {
return false
}
if a.State == eval.Normal && !a.Resolved {
if a.State == eval.Pending || a.State == eval.Error || a.State == eval.Normal && !a.Resolved {
return false
}
// if LastSentAt is before or equal to LastEvaluationTime + resendDelay, send again
return a.LastSentAt.Add(resendDelay).Before(a.LastEvaluationTime) ||
a.LastSentAt.Add(resendDelay).Equal(a.LastEvaluationTime)
nextSent := a.LastSentAt.Add(resendDelay)
return nextSent.Before(a.LastEvaluationTime) || nextSent.Equal(a.LastEvaluationTime)
}
func (a *State) Equals(b *State) bool {

View File

@ -1,6 +1,7 @@
package state
import (
"math/rand"
"testing"
"time"
@ -99,6 +100,46 @@ func TestNeedsSending(t *testing.T) {
LastSentAt: evaluationTime.Add(-1 * time.Minute),
},
},
{
name: "state: no-data, needs to be re-sent",
expected: true,
resendDelay: 1 * time.Minute,
testState: &State{
State: eval.NoData,
LastEvaluationTime: evaluationTime,
LastSentAt: evaluationTime.Add(-1 * time.Minute),
},
},
{
name: "state: no-data, should not be re-sent",
expected: false,
resendDelay: 1 * time.Minute,
testState: &State{
State: eval.NoData,
LastEvaluationTime: evaluationTime,
LastSentAt: evaluationTime.Add(-time.Duration(rand.Int63n(59)+1) * time.Second),
},
},
{
name: "state: error, needs to be re-sent",
expected: false,
resendDelay: 1 * time.Minute,
testState: &State{
State: eval.Error,
LastEvaluationTime: evaluationTime,
LastSentAt: evaluationTime.Add(-1 * time.Minute),
},
},
{
name: "state: error, should not be re-sent",
expected: false,
resendDelay: 1 * time.Minute,
testState: &State{
State: eval.Error,
LastEvaluationTime: evaluationTime,
LastSentAt: evaluationTime.Add(-time.Duration(rand.Int63n(59)+1) * time.Second),
},
},
}
for _, tc := range testCases {

View File

@ -131,6 +131,10 @@ func (m *migration) makeAlertRule(cond condition, da dashAlert, folderUID string
m.mg.Logger.Error("alert migration error: failed to create silence", "rule_name", ar.Title, "err", err)
}
if err := m.addNoDataSilence(da, ar); err != nil {
m.mg.Logger.Error("alert migration error: failed to create silence for NoData", "rule_name", ar.Title, "err", err)
}
return ar, nil
}
@ -201,7 +205,7 @@ func transNoData(s string) (string, error) {
case "alerting":
return "Alerting", nil
case "keep_state":
return "Alerting", nil
return "NoData", nil // "keep last state" translates to no data because we now emit a special alert when the state is "noData". The result is that the evaluation will not return firing and instead we'll raise the special alert.
}
return "", fmt.Errorf("unrecognized No Data setting %v", s)
}

View File

@ -14,10 +14,16 @@ import (
"github.com/gofrs/uuid"
"github.com/matttproud/golang_protobuf_extensions/pbutil"
pb "github.com/prometheus/alertmanager/silence/silencepb"
"github.com/prometheus/common/model"
"github.com/grafana/grafana/pkg/services/sqlstore/migrator"
)
const (
// Should be the same as 'NoDataAlertName' in pkg/services/schedule/compat.go.
NoDataAlertName = "DatasourceNoData"
)
func (m *migration) addSilence(da dashAlert, rule *alertRule) error {
if da.State != "paused" {
return nil
@ -55,6 +61,46 @@ func (m *migration) addSilence(da dashAlert, rule *alertRule) error {
return nil
}
func (m *migration) addNoDataSilence(da dashAlert, rule *alertRule) error {
if da.ParsedSettings.NoDataState != "keep_state" {
return nil
}
uid, err := uuid.NewV4()
if err != nil {
return errors.New("failed to create uuid for silence")
}
s := &pb.MeshSilence{
Silence: &pb.Silence{
Id: uid.String(),
Matchers: []*pb.Matcher{
{
Type: pb.Matcher_EQUAL,
Name: model.AlertNameLabel,
Pattern: NoDataAlertName,
},
{
Type: pb.Matcher_EQUAL,
Name: "rule_uid",
Pattern: rule.UID,
},
},
StartsAt: time.Now(),
EndsAt: time.Now().AddDate(1, 0, 0), // 1 year.
CreatedBy: "Grafana Migration",
Comment: fmt.Sprintf("Created during migration to unified alerting to silence NoData state for alert rule ID '%s' and Title '%s' because the option 'Keep Last State' was selected for NoData state", rule.UID, rule.Title),
},
ExpiresAt: time.Now().AddDate(1, 0, 0), // 1 year.
}
_, ok := m.silences[da.OrgId]
if !ok {
m.silences[da.OrgId] = make([]*pb.MeshSilence, 0)
}
m.silences[da.OrgId] = append(m.silences[da.OrgId], s)
return nil
}
func (m *migration) writeSilencesFile(orgID int64) error {
var buf bytes.Buffer
orgSilences, ok := m.silences[orgID]