mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: During legacy migration reduce the number of created silences (#78505)
* Alerting: During legacy migration reduce the number of created silences During legacy migration every migrated rule was given a label rule_uid=<uid>. This was used to silence DatasourceError/DatasourceNoData alerts for migrated rules that had either ExecutionErrorState/NoDataState set to keep_state, respectively. This could potentially create a large amount of silences and a high cardinality label. Both of these scenarios have poor outcomes for CPU load and latency in unified alerting. Instead, this change creates one label per ExecutionErrorState/NoDataState when they are set to keep_state as well as two silence rules, if rules with said labels were created during migration. These silence rules are: - __legacy_silence_error_keep_state__ = true - __legacy_silence_nodata_keep_state__ = true This will drastically reduce the number of created silence rules in most cases as well as not create the potentially high cardinality label `rule_uid`.
This commit is contained in:
@@ -94,21 +94,7 @@ func (om *OrgMigration) migrateAlert(ctx context.Context, l log.Logger, alert *l
|
|||||||
ExecErrState: transExecErr(l, parsedSettings.ExecutionErrorState),
|
ExecErrState: transExecErr(l, parsedSettings.ExecutionErrorState),
|
||||||
}
|
}
|
||||||
|
|
||||||
// Label for routing and silences.
|
om.silences.handleSilenceLabels(ar, parsedSettings)
|
||||||
n, v := getLabelForSilenceMatching(ar.UID)
|
|
||||||
ar.Labels[n] = v
|
|
||||||
|
|
||||||
if parsedSettings.ExecutionErrorState == string(legacymodels.ExecutionErrorKeepState) {
|
|
||||||
if err := om.addErrorSilence(ar); err != nil {
|
|
||||||
om.log.Error("Alert migration error: failed to create silence for Error", "rule_name", ar.Title, "err", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if parsedSettings.NoDataState == string(legacymodels.NoDataKeepState) {
|
|
||||||
if err := om.addNoDataSilence(ar); err != nil {
|
|
||||||
om.log.Error("Alert migration error: failed to create silence for NoData", "rule_name", ar.Title, "err", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// We do some validation and pre-save operations early in order to track these errors as part of the migration state.
|
// We do some validation and pre-save operations early in order to track these errors as part of the migration state.
|
||||||
if err := ar.ValidateAlertRule(om.cfg.UnifiedAlerting); err != nil {
|
if err := ar.ValidateAlertRule(om.cfg.UnifiedAlerting); err != nil {
|
||||||
|
|||||||
@@ -265,6 +265,30 @@ func TestMakeAlertRule(t *testing.T) {
|
|||||||
suffix := fmt.Sprintf(" - %ds", ar.IntervalSeconds)
|
suffix := fmt.Sprintf(" - %ds", ar.IntervalSeconds)
|
||||||
require.Equal(t, fmt.Sprintf("%s%s", strings.Repeat("a", store.AlertRuleMaxRuleGroupNameLength-len(suffix)), suffix), ar.RuleGroup)
|
require.Equal(t, fmt.Sprintf("%s%s", strings.Repeat("a", store.AlertRuleMaxRuleGroupNameLength-len(suffix)), suffix), ar.RuleGroup)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
t.Run("keep last state error dash alert is silenced", func(t *testing.T) {
|
||||||
|
service := NewTestMigrationService(t, sqlStore, nil)
|
||||||
|
m := service.newOrgMigration(1)
|
||||||
|
da := createTestDashAlert()
|
||||||
|
da.Settings.Set("executionErrorState", "keep_state")
|
||||||
|
|
||||||
|
ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, &dashboard)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.Equal(t, ar.Labels[models.MigratedSilenceLabelErrorKeepState], "true")
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("keep last state nodata dash alert is silenced", func(t *testing.T) {
|
||||||
|
service := NewTestMigrationService(t, sqlStore, nil)
|
||||||
|
m := service.newOrgMigration(1)
|
||||||
|
da := createTestDashAlert()
|
||||||
|
da.Settings.Set("noDataState", "keep_state")
|
||||||
|
|
||||||
|
ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, &dashboard)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.Equal(t, ar.Labels[models.MigratedSilenceLabelNodataKeepState], "true")
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func createTestDashAlert() *legacymodels.Alert {
|
func createTestDashAlert() *legacymodels.Alert {
|
||||||
|
|||||||
@@ -550,7 +550,6 @@ func TestDashAlertMigration(t *testing.T) {
|
|||||||
expectedRulesMap := expected[orgId]
|
expectedRulesMap := expected[orgId]
|
||||||
require.Len(t, rules, len(expectedRulesMap))
|
require.Len(t, rules, len(expectedRulesMap))
|
||||||
for _, r := range rules {
|
for _, r := range rules {
|
||||||
delete(r.Labels, "rule_uid") // Not checking this here.
|
|
||||||
exp := expectedRulesMap[r.Title].Labels
|
exp := expectedRulesMap[r.Title].Labels
|
||||||
require.Lenf(t, r.Labels, len(exp), "rule doesn't have correct number of labels: %s", r.Title)
|
require.Lenf(t, r.Labels, len(exp), "rule doesn't have correct number of labels: %s", r.Title)
|
||||||
for l := range r.Labels {
|
for l := range r.Labels {
|
||||||
@@ -657,7 +656,6 @@ func TestDashAlertMigration(t *testing.T) {
|
|||||||
expectedRulesMap := expected[orgId]
|
expectedRulesMap := expected[orgId]
|
||||||
require.Len(t, rules, len(expectedRulesMap))
|
require.Len(t, rules, len(expectedRulesMap))
|
||||||
for _, r := range rules {
|
for _, r := range rules {
|
||||||
delete(r.Labels, "rule_uid") // Not checking this here.
|
|
||||||
exp := expectedRulesMap[r.Title].Labels
|
exp := expectedRulesMap[r.Title].Labels
|
||||||
require.Lenf(t, r.Labels, len(exp), "rule doesn't have correct number of labels: %s", r.Title)
|
require.Lenf(t, r.Labels, len(exp), "rule doesn't have correct number of labels: %s", r.Title)
|
||||||
for l := range r.Labels {
|
for l := range r.Labels {
|
||||||
@@ -707,7 +705,6 @@ func TestDashAlertMigration(t *testing.T) {
|
|||||||
expectedRulesMap := expected[orgId]
|
expectedRulesMap := expected[orgId]
|
||||||
require.Len(t, rules, len(expectedRulesMap))
|
require.Len(t, rules, len(expectedRulesMap))
|
||||||
for _, r := range rules {
|
for _, r := range rules {
|
||||||
delete(r.Labels, "rule_uid") // Not checking this here.
|
|
||||||
exp := expectedRulesMap[*r.PanelID]
|
exp := expectedRulesMap[*r.PanelID]
|
||||||
require.Equal(t, exp, r.Title)
|
require.Equal(t, exp, r.Title)
|
||||||
}
|
}
|
||||||
@@ -743,7 +740,6 @@ func TestDashAlertMigration(t *testing.T) {
|
|||||||
expectedRulesMap := expected[orgId]
|
expectedRulesMap := expected[orgId]
|
||||||
require.Len(t, rules, len(expectedRulesMap))
|
require.Len(t, rules, len(expectedRulesMap))
|
||||||
for _, r := range rules {
|
for _, r := range rules {
|
||||||
delete(r.Labels, "rule_uid") // Not checking this here.
|
|
||||||
exp := expectedRulesMap[*r.PanelID]
|
exp := expectedRulesMap[*r.PanelID]
|
||||||
require.Equal(t, exp, r.Title)
|
require.Equal(t, exp, r.Title)
|
||||||
}
|
}
|
||||||
@@ -1216,8 +1212,6 @@ func TestDashAlertQueryMigration(t *testing.T) {
|
|||||||
|
|
||||||
for _, r := range rules {
|
for _, r := range rules {
|
||||||
// Remove generated fields.
|
// Remove generated fields.
|
||||||
require.NotEqual(t, r.Labels["rule_uid"], "")
|
|
||||||
delete(r.Labels, "rule_uid")
|
|
||||||
require.NotEqual(t, r.Annotations[ngModels.MigratedAlertIdAnnotation], "")
|
require.NotEqual(t, r.Annotations[ngModels.MigratedAlertIdAnnotation], "")
|
||||||
delete(r.Annotations, ngModels.MigratedAlertIdAnnotation)
|
delete(r.Annotations, ngModels.MigratedAlertIdAnnotation)
|
||||||
|
|
||||||
|
|||||||
@@ -4,8 +4,6 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
|
|
||||||
pb "github.com/prometheus/alertmanager/silence/silencepb"
|
|
||||||
|
|
||||||
"github.com/grafana/grafana/pkg/infra/log"
|
"github.com/grafana/grafana/pkg/infra/log"
|
||||||
legacymodels "github.com/grafana/grafana/pkg/services/alerting/models"
|
legacymodels "github.com/grafana/grafana/pkg/services/alerting/models"
|
||||||
migrationStore "github.com/grafana/grafana/pkg/services/ngalert/migration/store"
|
migrationStore "github.com/grafana/grafana/pkg/services/ngalert/migration/store"
|
||||||
@@ -22,7 +20,8 @@ type OrgMigration struct {
|
|||||||
encryptionService secrets.Service
|
encryptionService secrets.Service
|
||||||
|
|
||||||
orgID int64
|
orgID int64
|
||||||
silences []*pb.MeshSilence
|
|
||||||
|
silences *silenceHandler
|
||||||
}
|
}
|
||||||
|
|
||||||
// newOrgMigration creates a new OrgMigration for the given orgID.
|
// newOrgMigration creates a new OrgMigration for the given orgID.
|
||||||
@@ -33,9 +32,9 @@ func (ms *migrationService) newOrgMigration(orgID int64) *OrgMigration {
|
|||||||
|
|
||||||
migrationStore: ms.migrationStore,
|
migrationStore: ms.migrationStore,
|
||||||
encryptionService: ms.encryptionService,
|
encryptionService: ms.encryptionService,
|
||||||
|
silences: ms.silences,
|
||||||
|
|
||||||
orgID: orgID,
|
orgID: orgID,
|
||||||
silences: make([]*pb.MeshSilence, 0),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -683,8 +683,6 @@ func TestDashAlertPermissionMigration(t *testing.T) {
|
|||||||
actual := make([]expectedAlertMigration, 0, len(rules))
|
actual := make([]expectedAlertMigration, 0, len(rules))
|
||||||
for i, r := range rules {
|
for i, r := range rules {
|
||||||
// Remove generated fields.
|
// Remove generated fields.
|
||||||
require.NotEqual(t, r.Labels["rule_uid"], "")
|
|
||||||
delete(r.Labels, "rule_uid")
|
|
||||||
require.NotEqual(t, r.Annotations[ngModels.MigratedAlertIdAnnotation], "")
|
require.NotEqual(t, r.Annotations[ngModels.MigratedAlertIdAnnotation], "")
|
||||||
delete(r.Annotations, ngModels.MigratedAlertIdAnnotation)
|
delete(r.Annotations, ngModels.MigratedAlertIdAnnotation)
|
||||||
|
|
||||||
|
|||||||
@@ -47,6 +47,7 @@ type migrationService struct {
|
|||||||
migrationStore migrationStore.Store
|
migrationStore migrationStore.Store
|
||||||
|
|
||||||
encryptionService secrets.Service
|
encryptionService secrets.Service
|
||||||
|
silences *silenceHandler
|
||||||
}
|
}
|
||||||
|
|
||||||
func ProvideService(
|
func ProvideService(
|
||||||
@@ -63,6 +64,10 @@ func ProvideService(
|
|||||||
store: store,
|
store: store,
|
||||||
migrationStore: migrationStore,
|
migrationStore: migrationStore,
|
||||||
encryptionService: encryptionService,
|
encryptionService: encryptionService,
|
||||||
|
silences: &silenceHandler{
|
||||||
|
dataPath: cfg.DataPath,
|
||||||
|
createSilenceFile: openReplace,
|
||||||
|
},
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -486,11 +491,9 @@ func (ms *migrationService) migrateAllOrgs(ctx context.Context) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(om.silences) > 0 {
|
err = ms.silences.createSilences(o.ID, om.log)
|
||||||
om.log.Debug("Writing silences file", "silences", len(om.silences))
|
if err != nil {
|
||||||
if err := writeSilencesFile(ms.cfg.DataPath, o.ID, om.silences); err != nil {
|
return fmt.Errorf("create silences for org %d: %w", o.ID, err)
|
||||||
return fmt.Errorf("write silence file for org %d: %w", o.ID, err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
err = ms.migrationStore.SetMigrated(ctx, o.ID, true)
|
err = ms.migrationStore.SetMigrated(ctx, o.ID, true)
|
||||||
|
|||||||
@@ -1617,7 +1617,6 @@ func compareRules(t *testing.T, x *xorm.Engine, orgId int64, expectedRules []*mo
|
|||||||
}),
|
}),
|
||||||
cmpopts.IgnoreUnexported(models.AlertRule{}, models.AlertQuery{}),
|
cmpopts.IgnoreUnexported(models.AlertRule{}, models.AlertQuery{}),
|
||||||
cmpopts.IgnoreFields(models.AlertRule{}, "Updated", "UID", "ID", "Version"),
|
cmpopts.IgnoreFields(models.AlertRule{}, "Updated", "UID", "ID", "Version"),
|
||||||
cmpopts.IgnoreMapEntries(func(k string, v string) bool { return k == "rule_uid" }),
|
|
||||||
}
|
}
|
||||||
if !cmp.Equal(expectedRules, rules, cOpt...) {
|
if !cmp.Equal(expectedRules, rules, cOpt...) {
|
||||||
t.Errorf("Unexpected Rule: %v", cmp.Diff(expectedRules, rules, cOpt...))
|
t.Errorf("Unexpected Rule: %v", cmp.Diff(expectedRules, rules, cOpt...))
|
||||||
@@ -1708,7 +1707,6 @@ func compareState(t *testing.T, x *xorm.Engine, service *migrationService, orgId
|
|||||||
cmpopts.SortSlices(func(a, b *definitions.DashboardUpgrade) bool { return a.DashboardID < b.DashboardID }),
|
cmpopts.SortSlices(func(a, b *definitions.DashboardUpgrade) bool { return a.DashboardID < b.DashboardID }),
|
||||||
cmpopts.SortSlices(func(a, b *definitions.AlertPair) bool { return a.LegacyAlert.ID < b.LegacyAlert.ID }),
|
cmpopts.SortSlices(func(a, b *definitions.AlertPair) bool { return a.LegacyAlert.ID < b.LegacyAlert.ID }),
|
||||||
cmpopts.SortSlices(func(a, b *definitions.ContactPair) bool { return a.LegacyChannel.ID < b.LegacyChannel.ID }),
|
cmpopts.SortSlices(func(a, b *definitions.ContactPair) bool { return a.LegacyChannel.ID < b.LegacyChannel.ID }),
|
||||||
cmpopts.IgnoreMapEntries(func(k string, v string) bool { return k == "rule_uid" }),
|
|
||||||
cmpopts.IgnoreUnexported(labels.Matcher{}),
|
cmpopts.IgnoreUnexported(labels.Matcher{}),
|
||||||
cmpopts.EquateEmpty(),
|
cmpopts.EquateEmpty(),
|
||||||
}
|
}
|
||||||
@@ -1880,7 +1878,6 @@ func (h *serviceHelper) genAlertPairs(f *dashboards.Dashboard, d *dashboards.Das
|
|||||||
},
|
},
|
||||||
Labels: map[string]string{
|
Labels: map[string]string{
|
||||||
models.MigratedUseLegacyChannelsLabel: "true",
|
models.MigratedUseLegacyChannelsLabel: "true",
|
||||||
"rule_uid": uid,
|
|
||||||
},
|
},
|
||||||
IsPaused: false,
|
IsPaused: false,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ package migration
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
@@ -11,88 +10,113 @@ import (
|
|||||||
"strconv"
|
"strconv"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/google/uuid"
|
|
||||||
"github.com/matttproud/golang_protobuf_extensions/pbutil"
|
"github.com/matttproud/golang_protobuf_extensions/pbutil"
|
||||||
pb "github.com/prometheus/alertmanager/silence/silencepb"
|
pb "github.com/prometheus/alertmanager/silence/silencepb"
|
||||||
"github.com/prometheus/common/model"
|
"github.com/prometheus/common/model"
|
||||||
|
|
||||||
|
"github.com/grafana/grafana/pkg/infra/log"
|
||||||
"github.com/grafana/grafana/pkg/services/ngalert/models"
|
"github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||||
|
ngstate "github.com/grafana/grafana/pkg/services/ngalert/state"
|
||||||
|
"github.com/grafana/grafana/pkg/util"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
// TimeNow makes it possible to test usage of time
|
||||||
// Should be the same as 'NoDataAlertName' in pkg/services/schedule/compat.go.
|
var TimeNow = time.Now
|
||||||
NoDataAlertName = "DatasourceNoData"
|
|
||||||
|
|
||||||
ErrorAlertName = "DatasourceError"
|
// silenceHandler is a helper for managing and writing migration silences.
|
||||||
)
|
type silenceHandler struct {
|
||||||
|
rulesWithErrorSilenceLabels int
|
||||||
|
rulesWithNoDataSilenceLabels int
|
||||||
|
createSilenceFile func(filename string) (io.WriteCloser, error)
|
||||||
|
|
||||||
// addErrorSilence adds a silence for the given rule to the orgMigration if the ExecutionErrorState was set to keep_state.
|
dataPath string
|
||||||
func (om *OrgMigration) addErrorSilence(rule *models.AlertRule) error {
|
|
||||||
uid, err := uuid.NewRandom()
|
|
||||||
if err != nil {
|
|
||||||
return errors.New("create uuid for silence")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
s := &pb.MeshSilence{
|
// handleSilenceLabels adds labels to the alert rule if the rule requires silence labels for error/nodata keep_state.
|
||||||
|
func (sh *silenceHandler) handleSilenceLabels(ar *models.AlertRule, parsedSettings dashAlertSettings) {
|
||||||
|
if parsedSettings.ExecutionErrorState == "keep_state" {
|
||||||
|
sh.rulesWithErrorSilenceLabels++
|
||||||
|
ar.Labels[models.MigratedSilenceLabelErrorKeepState] = "true"
|
||||||
|
}
|
||||||
|
if parsedSettings.NoDataState == "keep_state" {
|
||||||
|
sh.rulesWithNoDataSilenceLabels++
|
||||||
|
ar.Labels[models.MigratedSilenceLabelNodataKeepState] = "true"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// createSilences creates silences and writes them to a file.
|
||||||
|
func (sh *silenceHandler) createSilences(orgID int64, log log.Logger) error {
|
||||||
|
var silences []*pb.MeshSilence
|
||||||
|
if sh.rulesWithErrorSilenceLabels > 0 {
|
||||||
|
log.Info("Creating silence for rules with ExecutionErrorState = keep_state", "rules", sh.rulesWithErrorSilenceLabels)
|
||||||
|
silences = append(silences, errorSilence())
|
||||||
|
}
|
||||||
|
if sh.rulesWithNoDataSilenceLabels > 0 {
|
||||||
|
log.Info("Creating silence for rules with NoDataState = keep_state", "rules", sh.rulesWithNoDataSilenceLabels)
|
||||||
|
silences = append(silences, noDataSilence())
|
||||||
|
}
|
||||||
|
if len(silences) > 0 {
|
||||||
|
log.Debug("Writing silences file", "silences", len(silences))
|
||||||
|
if err := sh.writeSilencesFile(orgID, silences); err != nil {
|
||||||
|
return fmt.Errorf("write silence file: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// errorSilence creates a silence that matches DatasourceError alerts for rules which have a label attached when ExecutionErrorState was set to keep_state.
|
||||||
|
func errorSilence() *pb.MeshSilence {
|
||||||
|
return &pb.MeshSilence{
|
||||||
Silence: &pb.Silence{
|
Silence: &pb.Silence{
|
||||||
Id: uid.String(),
|
Id: util.GenerateShortUID(),
|
||||||
Matchers: []*pb.Matcher{
|
Matchers: []*pb.Matcher{
|
||||||
{
|
{
|
||||||
Type: pb.Matcher_EQUAL,
|
Type: pb.Matcher_EQUAL,
|
||||||
Name: model.AlertNameLabel,
|
Name: model.AlertNameLabel,
|
||||||
Pattern: ErrorAlertName,
|
Pattern: ngstate.ErrorAlertName,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Type: pb.Matcher_EQUAL,
|
Type: pb.Matcher_EQUAL,
|
||||||
Name: "rule_uid",
|
Name: models.MigratedSilenceLabelErrorKeepState,
|
||||||
Pattern: rule.UID,
|
Pattern: "true",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
StartsAt: time.Now(),
|
StartsAt: TimeNow(),
|
||||||
EndsAt: time.Now().AddDate(1, 0, 0), // 1 year
|
EndsAt: TimeNow().AddDate(1, 0, 0), // 1 year
|
||||||
CreatedBy: "Grafana Migration",
|
CreatedBy: "Grafana Migration",
|
||||||
Comment: fmt.Sprintf("Created during migration to unified alerting to silence Error state for alert rule ID '%s' and Title '%s' because the option 'Keep Last State' was selected for Error state", rule.UID, rule.Title),
|
Comment: "Created during migration to unified alerting to silence Error state when the option 'Keep Last State' was selected for Error state",
|
||||||
},
|
},
|
||||||
ExpiresAt: time.Now().AddDate(1, 0, 0), // 1 year
|
ExpiresAt: TimeNow().AddDate(1, 0, 0), // 1 year
|
||||||
}
|
}
|
||||||
om.silences = append(om.silences, s)
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// addNoDataSilence adds a silence for the given rule to the orgMigration if the NoDataState was set to keep_state.
|
// noDataSilence creates a silence that matches DatasourceNoData alerts for rules which have a label attached when NoDataState was set to keep_state.
|
||||||
func (om *OrgMigration) addNoDataSilence(rule *models.AlertRule) error {
|
func noDataSilence() *pb.MeshSilence {
|
||||||
uid, err := uuid.NewRandom()
|
return &pb.MeshSilence{
|
||||||
if err != nil {
|
|
||||||
return errors.New("create uuid for silence")
|
|
||||||
}
|
|
||||||
|
|
||||||
s := &pb.MeshSilence{
|
|
||||||
Silence: &pb.Silence{
|
Silence: &pb.Silence{
|
||||||
Id: uid.String(),
|
Id: util.GenerateShortUID(),
|
||||||
Matchers: []*pb.Matcher{
|
Matchers: []*pb.Matcher{
|
||||||
{
|
{
|
||||||
Type: pb.Matcher_EQUAL,
|
Type: pb.Matcher_EQUAL,
|
||||||
Name: model.AlertNameLabel,
|
Name: model.AlertNameLabel,
|
||||||
Pattern: NoDataAlertName,
|
Pattern: ngstate.NoDataAlertName,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Type: pb.Matcher_EQUAL,
|
Type: pb.Matcher_EQUAL,
|
||||||
Name: "rule_uid",
|
Name: models.MigratedSilenceLabelNodataKeepState,
|
||||||
Pattern: rule.UID,
|
Pattern: "true",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
StartsAt: time.Now(),
|
StartsAt: TimeNow(),
|
||||||
EndsAt: time.Now().AddDate(1, 0, 0), // 1 year.
|
EndsAt: TimeNow().AddDate(1, 0, 0), // 1 year.
|
||||||
CreatedBy: "Grafana Migration",
|
CreatedBy: "Grafana Migration",
|
||||||
Comment: fmt.Sprintf("Created during migration to unified alerting to silence NoData state for alert rule ID '%s' and Title '%s' because the option 'Keep Last State' was selected for NoData state", rule.UID, rule.Title),
|
Comment: "Created during migration to unified alerting to silence NoData state when the option 'Keep Last State' was selected for NoData state",
|
||||||
},
|
},
|
||||||
ExpiresAt: time.Now().AddDate(1, 0, 0), // 1 year.
|
ExpiresAt: TimeNow().AddDate(1, 0, 0), // 1 year.
|
||||||
}
|
}
|
||||||
om.silences = append(om.silences, s)
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func writeSilencesFile(dataPath string, orgID int64, silences []*pb.MeshSilence) error {
|
func (sh *silenceHandler) writeSilencesFile(orgId int64, silences []*pb.MeshSilence) error {
|
||||||
var buf bytes.Buffer
|
var buf bytes.Buffer
|
||||||
for _, e := range silences {
|
for _, e := range silences {
|
||||||
if _, err := pbutil.WriteDelimited(&buf, e); err != nil {
|
if _, err := pbutil.WriteDelimited(&buf, e); err != nil {
|
||||||
@@ -100,7 +124,7 @@ func writeSilencesFile(dataPath string, orgID int64, silences []*pb.MeshSilence)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
f, err := openReplace(silencesFileNameForOrg(dataPath, orgID))
|
f, err := sh.createSilenceFile(silencesFileNameForOrg(sh.dataPath, orgId))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -133,7 +157,7 @@ func (f *replaceFile) Close() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// openReplace opens a new temporary file that is moved to filename on closing.
|
// openReplace opens a new temporary file that is moved to filename on closing.
|
||||||
func openReplace(filename string) (*replaceFile, error) {
|
func openReplace(filename string) (io.WriteCloser, error) {
|
||||||
tmpFilename := fmt.Sprintf("%s.%x", filename, uint64(rand.Int63()))
|
tmpFilename := fmt.Sprintf("%s.%x", filename, uint64(rand.Int63()))
|
||||||
|
|
||||||
if err := os.MkdirAll(filepath.Dir(tmpFilename), os.ModePerm); err != nil {
|
if err := os.MkdirAll(filepath.Dir(tmpFilename), os.ModePerm); err != nil {
|
||||||
@@ -152,7 +176,3 @@ func openReplace(filename string) (*replaceFile, error) {
|
|||||||
}
|
}
|
||||||
return rf, nil
|
return rf, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getLabelForSilenceMatching(ruleUID string) (string, string) {
|
|
||||||
return "rule_uid", ruleUID
|
|
||||||
}
|
|
||||||
|
|||||||
161
pkg/services/ngalert/migration/silences_test.go
Normal file
161
pkg/services/ngalert/migration/silences_test.go
Normal file
@@ -0,0 +1,161 @@
|
|||||||
|
package migration
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/google/go-cmp/cmp"
|
||||||
|
"github.com/google/go-cmp/cmp/cmpopts"
|
||||||
|
"github.com/matttproud/golang_protobuf_extensions/pbutil"
|
||||||
|
pb "github.com/prometheus/alertmanager/silence/silencepb"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/grafana/grafana/pkg/infra/db"
|
||||||
|
legacymodels "github.com/grafana/grafana/pkg/services/alerting/models"
|
||||||
|
"github.com/grafana/grafana/pkg/setting"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestSilences(t *testing.T) {
|
||||||
|
t.Run("when some alerts have executionErrorState, create and write silence", func(t *testing.T) {
|
||||||
|
withSetting := func(alert *legacymodels.Alert, key, val string) *legacymodels.Alert {
|
||||||
|
alert.Settings.Set(key, val)
|
||||||
|
return alert
|
||||||
|
}
|
||||||
|
|
||||||
|
now = time.Now()
|
||||||
|
TimeNow = func() time.Time {
|
||||||
|
return now
|
||||||
|
}
|
||||||
|
|
||||||
|
o := createOrg(t, 1)
|
||||||
|
folder1 := createFolder(t, 1, o.ID, "folder-1")
|
||||||
|
dash1 := createDashboard(t, 3, o.ID, "dash1", folder1.ID, nil)
|
||||||
|
|
||||||
|
silenceTests := []struct {
|
||||||
|
name string
|
||||||
|
alerts []*legacymodels.Alert
|
||||||
|
expectedSilences []*pb.MeshSilence
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "single alert with executionErrorState",
|
||||||
|
alerts: []*legacymodels.Alert{withSetting(createAlert(t, int(o.ID), int(dash1.ID), 1, "alert-1", []string{}), "executionErrorState", "keep_state")},
|
||||||
|
expectedSilences: []*pb.MeshSilence{errorSilence()},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "single alert with noDataState",
|
||||||
|
alerts: []*legacymodels.Alert{withSetting(createAlert(t, int(o.ID), int(dash1.ID), 1, "alert-1", []string{}), "noDataState", "keep_state")},
|
||||||
|
expectedSilences: []*pb.MeshSilence{noDataSilence()},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "multiple alerts with both executionErrorState and noDataState",
|
||||||
|
alerts: []*legacymodels.Alert{
|
||||||
|
withSetting(createAlert(t, int(o.ID), int(dash1.ID), 1, "alert-1", []string{}), "executionErrorState", "keep_state"),
|
||||||
|
withSetting(createAlert(t, int(o.ID), int(dash1.ID), 2, "alert-2", []string{}), "noDataState", "keep_state"),
|
||||||
|
},
|
||||||
|
expectedSilences: []*pb.MeshSilence{errorSilence(), noDataSilence()},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no alerts with keep_state, no silences",
|
||||||
|
alerts: []*legacymodels.Alert{
|
||||||
|
createAlert(t, int(o.ID), int(dash1.ID), 1, "alert-1", []string{}),
|
||||||
|
createAlert(t, int(o.ID), int(dash1.ID), 2, "alert-2", []string{}),
|
||||||
|
},
|
||||||
|
expectedSilences: []*pb.MeshSilence{},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range silenceTests {
|
||||||
|
t.Run(test.name, func(t *testing.T) {
|
||||||
|
sqlStore := db.InitTestDB(t)
|
||||||
|
x := sqlStore.GetEngine()
|
||||||
|
|
||||||
|
_, err := x.Insert(o, folder1, dash1)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
_, err = x.Insert(test.alerts)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
cfg := setting.NewCfg()
|
||||||
|
cfg.DataPath = "/a/b/c"
|
||||||
|
service := NewTestMigrationService(t, sqlStore, cfg)
|
||||||
|
|
||||||
|
sb := stringsBuilderCloser{
|
||||||
|
Builder: &strings.Builder{},
|
||||||
|
}
|
||||||
|
silenceFileAsString := func(filename string) (io.WriteCloser, error) {
|
||||||
|
_, err := sb.WriteString(filename)
|
||||||
|
require.NoError(t, err)
|
||||||
|
return sb, nil
|
||||||
|
}
|
||||||
|
service.silences.createSilenceFile = silenceFileAsString
|
||||||
|
|
||||||
|
require.NoError(t, service.migrateAllOrgs(context.Background()))
|
||||||
|
|
||||||
|
expectedFilename := ""
|
||||||
|
if len(test.expectedSilences) > 0 {
|
||||||
|
expectedFilename = cfg.DataPath + "/alerting/1/silences"
|
||||||
|
filename := sb.String()[:len(expectedFilename)]
|
||||||
|
require.Equal(t, expectedFilename, filename)
|
||||||
|
}
|
||||||
|
|
||||||
|
contents := sb.String()[len(expectedFilename):]
|
||||||
|
st, err := decodeState(strings.NewReader(contents))
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, st, len(test.expectedSilences))
|
||||||
|
|
||||||
|
silences := make([]*pb.MeshSilence, 0, len(st))
|
||||||
|
for _, s := range st {
|
||||||
|
silences = append(silences, s)
|
||||||
|
}
|
||||||
|
|
||||||
|
cOpt := []cmp.Option{
|
||||||
|
cmpopts.SortSlices(func(a, b *pb.MeshSilence) bool { return a.Silence.Comment < b.Silence.Comment }),
|
||||||
|
cmpopts.IgnoreUnexported(pb.MeshSilence{}),
|
||||||
|
cmpopts.IgnoreFields(pb.Silence{}, "Id"),
|
||||||
|
}
|
||||||
|
if !cmp.Equal(silences, test.expectedSilences, cOpt...) {
|
||||||
|
t.Errorf("Unexpected Silence: %v", cmp.Diff(silences, test.expectedSilences, cOpt...))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
type stringsBuilderCloser struct {
|
||||||
|
*strings.Builder
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s stringsBuilderCloser) Close() error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// state copied from prometheus-alertmanager/silence/silence.go.
|
||||||
|
type state map[string]*pb.MeshSilence
|
||||||
|
|
||||||
|
// decodeState copied from prometheus-alertmanager/silence/silence.go.
|
||||||
|
func decodeState(r io.Reader) (state, error) {
|
||||||
|
st := state{}
|
||||||
|
for {
|
||||||
|
var s pb.MeshSilence
|
||||||
|
_, err := pbutil.ReadDelimited(r, &s)
|
||||||
|
if err == nil {
|
||||||
|
if s.Silence == nil {
|
||||||
|
return nil, ErrInvalidState
|
||||||
|
}
|
||||||
|
st[s.Silence.Id] = &s
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
//nolint:errorlint
|
||||||
|
if err == io.EOF {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return st, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var ErrInvalidState = fmt.Errorf("invalid state")
|
||||||
@@ -4,7 +4,8 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/grafana/grafana/pkg/infra/log/logtest"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
"github.com/grafana/grafana/pkg/infra/serverlock"
|
"github.com/grafana/grafana/pkg/infra/serverlock"
|
||||||
"github.com/grafana/grafana/pkg/infra/tracing"
|
"github.com/grafana/grafana/pkg/infra/tracing"
|
||||||
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
|
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
|
||||||
@@ -19,14 +20,16 @@ func NewTestMigrationService(t *testing.T, sqlStore *sqlstore.SQLStore, cfg *set
|
|||||||
if cfg == nil {
|
if cfg == nil {
|
||||||
cfg = setting.NewCfg()
|
cfg = setting.NewCfg()
|
||||||
}
|
}
|
||||||
return &migrationService{
|
|
||||||
lock: serverlock.ProvideService(sqlStore, tracing.InitializeTracerForTest()),
|
svc, err := ProvideService(
|
||||||
log: &logtest.Fake{},
|
serverlock.ProvideService(sqlStore, tracing.InitializeTracerForTest()),
|
||||||
cfg: cfg,
|
cfg,
|
||||||
store: sqlStore,
|
sqlStore,
|
||||||
migrationStore: migrationStore.NewTestMigrationStore(t, sqlStore, cfg),
|
migrationStore.NewTestMigrationStore(t, sqlStore, cfg),
|
||||||
encryptionService: fake_secrets.NewFakeSecretsService(),
|
fake_secrets.NewFakeSecretsService(),
|
||||||
}
|
)
|
||||||
|
require.NoError(t, err)
|
||||||
|
return svc.(*migrationService)
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewFakeMigrationService(t testing.TB) *fakeMigrationService {
|
func NewFakeMigrationService(t testing.TB) *fakeMigrationService {
|
||||||
|
|||||||
@@ -113,6 +113,10 @@ const (
|
|||||||
MigratedUseLegacyChannelsLabel = MigratedLabelPrefix + "use_channels__"
|
MigratedUseLegacyChannelsLabel = MigratedLabelPrefix + "use_channels__"
|
||||||
// MigratedContactLabelPrefix is created during legacy migration to route a migrated alert rule to a specific migrated channel.
|
// MigratedContactLabelPrefix is created during legacy migration to route a migrated alert rule to a specific migrated channel.
|
||||||
MigratedContactLabelPrefix = MigratedLabelPrefix + "c_"
|
MigratedContactLabelPrefix = MigratedLabelPrefix + "c_"
|
||||||
|
// MigratedSilenceLabelErrorKeepState is a label that will match a silence rule intended for legacy alerts with error state = keep_state.
|
||||||
|
MigratedSilenceLabelErrorKeepState = MigratedLabelPrefix + "silence_error_keep_state__"
|
||||||
|
// MigratedSilenceLabelNodataKeepState is a label that will match a silence rule intended for legacy alerts with nodata state = keep_state.
|
||||||
|
MigratedSilenceLabelNodataKeepState = MigratedLabelPrefix + "silence_nodata_keep_state__"
|
||||||
// MigratedAlertIdAnnotation is created during legacy migration to store the ID of the migrated legacy alert rule.
|
// MigratedAlertIdAnnotation is created during legacy migration to store the ID of the migrated legacy alert rule.
|
||||||
MigratedAlertIdAnnotation = "__alertId__"
|
MigratedAlertIdAnnotation = "__alertId__"
|
||||||
// MigratedMessageAnnotation is created during legacy migration to store the migrated alert message.
|
// MigratedMessageAnnotation is created during legacy migration to store the migrated alert message.
|
||||||
|
|||||||
Reference in New Issue
Block a user