diff --git a/pkg/services/ngalert/migration/alert_rule.go b/pkg/services/ngalert/migration/alert_rule.go index 8ea089c3724..1d813863eb6 100644 --- a/pkg/services/ngalert/migration/alert_rule.go +++ b/pkg/services/ngalert/migration/alert_rule.go @@ -9,19 +9,20 @@ import ( "github.com/prometheus/common/model" "github.com/grafana/grafana-plugin-sdk-go/data" + "github.com/grafana/grafana/pkg/infra/log" legacymodels "github.com/grafana/grafana/pkg/services/alerting/models" + "github.com/grafana/grafana/pkg/services/dashboards" "github.com/grafana/grafana/pkg/services/datasources" - migmodels "github.com/grafana/grafana/pkg/services/ngalert/migration/models" ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models" "github.com/grafana/grafana/pkg/services/ngalert/store" "github.com/grafana/grafana/pkg/tsdb/graphite" "github.com/grafana/grafana/pkg/util" ) -func addLabelsAndAnnotations(l log.Logger, alert *legacymodels.Alert, dashboardUID string, channels []*legacymodels.AlertNotification) (data.Labels, data.Labels) { +func addLabelsAndAnnotations(l log.Logger, alert *legacymodels.Alert, dashboardUID string) (data.Labels, data.Labels) { tags := alert.GetTagsFromSettings() - lbls := make(data.Labels, len(tags)+len(channels)+1) + lbls := make(data.Labels, len(tags)+1) for _, t := range tags { lbls[t.Key] = t.Value @@ -29,9 +30,6 @@ func addLabelsAndAnnotations(l log.Logger, alert *legacymodels.Alert, dashboardU // Add a label for routing lbls[ngmodels.MigratedUseLegacyChannelsLabel] = "true" - for _, c := range channels { - lbls[contactLabel(c.Name)] = "true" - } annotations := make(data.Labels, 4) annotations[ngmodels.DashboardUIDAnnotation] = dashboardUID @@ -45,7 +43,7 @@ func addLabelsAndAnnotations(l log.Logger, alert *legacymodels.Alert, dashboardU } // migrateAlert migrates a single dashboard alert from legacy alerting to unified alerting. -func (om *OrgMigration) migrateAlert(ctx context.Context, l log.Logger, alert *legacymodels.Alert, info migmodels.DashboardUpgradeInfo) (*ngmodels.AlertRule, error) { +func (om *OrgMigration) migrateAlert(ctx context.Context, l log.Logger, alert *legacymodels.Alert, dashboard *dashboards.Dashboard) (*ngmodels.AlertRule, error) { l.Debug("Migrating alert rule to Unified Alerting") rawSettings, err := json.Marshal(alert.Settings) if err != nil { @@ -61,9 +59,7 @@ func (om *OrgMigration) migrateAlert(ctx context.Context, l log.Logger, alert *l return nil, fmt.Errorf("transform conditions: %w", err) } - channels := om.extractChannels(l, parsedSettings) - - lbls, annotations := addLabelsAndAnnotations(l, alert, info.DashboardUID, channels) + lbls, annotations := addLabelsAndAnnotations(l, alert, dashboard.UID) data, err := migrateAlertRuleQueries(l, cond.Data) if err != nil { @@ -75,29 +71,19 @@ func (om *OrgMigration) migrateAlert(ctx context.Context, l log.Logger, alert *l isPaused = true } - // Here we ensure that the alert rule title is unique within the folder. - titleDeduplicator := om.titleDeduplicatorForFolder(info.NewFolderUID) - name, err := titleDeduplicator.Deduplicate(alert.Name) - if err != nil { - return nil, err - } - if name != alert.Name { - l.Info(fmt.Sprintf("Alert rule title modified to be unique within the folder and fit within the maximum length of %d", store.AlertDefinitionMaxTitleLength), "old", alert.Name, "new", name) - } - - dashUID := info.DashboardUID + dashUID := dashboard.UID ar := &ngmodels.AlertRule{ OrgID: alert.OrgID, - Title: name, + Title: alert.Name, // Title will be deduplicated on persist. UID: util.GenerateShortUID(), Condition: cond.Condition, Data: data, IntervalSeconds: ruleAdjustInterval(alert.Frequency), Version: 1, - NamespaceUID: info.NewFolderUID, + NamespaceUID: "", // The folder for this alert is determined later. DashboardUID: &dashUID, PanelID: &alert.PanelID, - RuleGroup: groupName(ruleAdjustInterval(alert.Frequency), info.DashboardName), + RuleGroup: groupName(ruleAdjustInterval(alert.Frequency), dashboard.Title), For: alert.For, Updated: time.Now().UTC(), Annotations: annotations, @@ -124,6 +110,14 @@ func (om *OrgMigration) migrateAlert(ctx context.Context, l log.Logger, alert *l } } + // We do some validation and pre-save operations early in order to track these errors as part of the migration state. + if err := ar.ValidateAlertRule(om.cfg.UnifiedAlerting); err != nil { + return nil, err + } + if err := ar.PreSave(time.Now); err != nil { + return nil, err + } + return ar, nil } @@ -286,31 +280,6 @@ func truncate(daName string, length int) string { return daName } -// extractChannels extracts notification channels from the given legacy dashboard alert parsed settings. -func (om *OrgMigration) extractChannels(l log.Logger, parsedSettings dashAlertSettings) []*legacymodels.AlertNotification { - // Extracting channels. - channels := make([]*legacymodels.AlertNotification, 0, len(parsedSettings.Notifications)) - for _, key := range parsedSettings.Notifications { - // Either id or uid can be defined in the dashboard alert notification settings. See alerting.NewRuleFromDBAlert. - if key.ID > 0 { - if c, ok := om.channelCache.GetChannelByID(key.ID); ok { - channels = append(channels, c) - continue - } - } - - if key.UID != "" { - if c, ok := om.channelCache.GetChannelByUID(key.UID); ok { - channels = append(channels, c) - continue - } - } - - l.Warn("Failed to get alert notification, skipping", "notificationKey", key) - } - return channels -} - // groupName constructs a group name from the dashboard title and the interval. It truncates the dashboard title // if necessary to ensure that the group name is not longer than the maximum allowed length. func groupName(interval int64, dashboardTitle string) string { diff --git a/pkg/services/ngalert/migration/alert_rule_test.go b/pkg/services/ngalert/migration/alert_rule_test.go index a45a0595dc7..b22e7a45684 100644 --- a/pkg/services/ngalert/migration/alert_rule_test.go +++ b/pkg/services/ngalert/migration/alert_rule_test.go @@ -14,7 +14,7 @@ import ( "github.com/grafana/grafana/pkg/infra/db" "github.com/grafana/grafana/pkg/infra/log/logtest" legacymodels "github.com/grafana/grafana/pkg/services/alerting/models" - migmodels "github.com/grafana/grafana/pkg/services/ngalert/migration/models" + "github.com/grafana/grafana/pkg/services/dashboards" "github.com/grafana/grafana/pkg/services/ngalert/models" "github.com/grafana/grafana/pkg/services/ngalert/store" ) @@ -123,7 +123,7 @@ func TestAddMigrationInfo(t *testing.T) { for _, tc := range tt { t.Run(tc.name, func(t *testing.T) { - labels, annotations := addLabelsAndAnnotations(&logtest.Fake{}, tc.alert, tc.dashboard, nil) + labels, annotations := addLabelsAndAnnotations(&logtest.Fake{}, tc.alert, tc.dashboard) require.Equal(t, tc.expectedLabels, labels) require.Equal(t, tc.expectedAnnotations, annotations) }) @@ -132,56 +132,18 @@ func TestAddMigrationInfo(t *testing.T) { func TestMakeAlertRule(t *testing.T) { sqlStore := db.InitTestDB(t) - info := migmodels.DashboardUpgradeInfo{ - DashboardUID: "dashboarduid", - DashboardName: "dashboardname", - NewFolderUID: "newfolderuid", - NewFolderName: "newfoldername", - } + dashboard := dashboards.Dashboard{ID: 1, UID: "dashboarduid", Title: "dashboardname"} t.Run("when mapping rule names", func(t *testing.T) { t.Run("leaves basic names untouched", func(t *testing.T) { service := NewTestMigrationService(t, sqlStore, nil) m := service.newOrgMigration(1) da := createTestDashAlert() - ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, info) + ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, &dashboard) require.NoError(t, err) require.Equal(t, da.Name, ar.Title) }) - - t.Run("truncates very long names to max length", func(t *testing.T) { - service := NewTestMigrationService(t, sqlStore, nil) - m := service.newOrgMigration(1) - da := createTestDashAlert() - da.Name = strings.Repeat("a", store.AlertDefinitionMaxTitleLength+1) - - ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, info) - - require.NoError(t, err) - require.Len(t, ar.Title, store.AlertDefinitionMaxTitleLength) - }) - - t.Run("deduplicate names in same org and folder", func(t *testing.T) { - service := NewTestMigrationService(t, sqlStore, nil) - m := service.newOrgMigration(1) - da := createTestDashAlert() - da.Name = strings.Repeat("a", store.AlertDefinitionMaxTitleLength+1) - - ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, info) - - require.NoError(t, err) - require.Len(t, ar.Title, store.AlertDefinitionMaxTitleLength) - - da = createTestDashAlert() - da.Name = strings.Repeat("a", store.AlertDefinitionMaxTitleLength+1) - - ar, err = m.migrateAlert(context.Background(), &logtest.Fake{}, da, info) - - require.NoError(t, err) - require.Len(t, ar.Title, store.AlertDefinitionMaxTitleLength) - require.Equal(t, ar.Title, fmt.Sprintf("%s #2", strings.Repeat("a", store.AlertDefinitionMaxTitleLength-3))) - }) }) t.Run("alert is not paused", func(t *testing.T) { @@ -189,7 +151,7 @@ func TestMakeAlertRule(t *testing.T) { m := service.newOrgMigration(1) da := createTestDashAlert() - ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, info) + ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, &dashboard) require.NoError(t, err) require.False(t, ar.IsPaused) }) @@ -200,7 +162,7 @@ func TestMakeAlertRule(t *testing.T) { da := createTestDashAlert() da.State = "paused" - ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, info) + ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, &dashboard) require.NoError(t, err) require.True(t, ar.IsPaused) }) @@ -211,7 +173,7 @@ func TestMakeAlertRule(t *testing.T) { da := createTestDashAlert() da.Settings.Set("noDataState", uuid.NewString()) - ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, info) + ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, &dashboard) require.Nil(t, err) require.Equal(t, models.NoData, ar.NoDataState) }) @@ -222,7 +184,7 @@ func TestMakeAlertRule(t *testing.T) { da := createTestDashAlert() da.Settings.Set("executionErrorState", uuid.NewString()) - ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, info) + ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, &dashboard) require.Nil(t, err) require.Equal(t, models.ErrorErrState, ar.ExecErrState) }) @@ -233,7 +195,7 @@ func TestMakeAlertRule(t *testing.T) { da := createTestDashAlert() da.Message = "Instance ${instance} is down" - ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, info) + ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, &dashboard) require.Nil(t, err) expected := "{{- $mergedLabels := mergeLabelValues $values -}}\n" + @@ -282,10 +244,10 @@ func TestMakeAlertRule(t *testing.T) { t.Run(fmt.Sprintf("interval %ds should be %s", test.interval, test.expected), func(t *testing.T) { da.Frequency = test.interval - ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, info) + ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, &dashboard) require.NoError(t, err) - require.Equal(t, fmt.Sprintf("%s - %s", info.DashboardName, test.expected), ar.RuleGroup) + require.Equal(t, fmt.Sprintf("%s - %s", dashboard.Title, test.expected), ar.RuleGroup) }) } }) @@ -294,14 +256,9 @@ func TestMakeAlertRule(t *testing.T) { service := NewTestMigrationService(t, sqlStore, nil) m := service.newOrgMigration(1) da := createTestDashAlert() - info := migmodels.DashboardUpgradeInfo{ - DashboardUID: "dashboarduid", - DashboardName: strings.Repeat("a", store.AlertRuleMaxRuleGroupNameLength-1), - NewFolderUID: "newfolderuid", - NewFolderName: "newfoldername", - } + longNamedDashboard := dashboards.Dashboard{UID: "dashboarduid", Title: strings.Repeat("a", store.AlertRuleMaxRuleGroupNameLength-1)} - ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, info) + ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, &longNamedDashboard) require.NoError(t, err) require.Len(t, ar.RuleGroup, store.AlertRuleMaxRuleGroupNameLength) @@ -312,6 +269,7 @@ func TestMakeAlertRule(t *testing.T) { func createTestDashAlert() *legacymodels.Alert { return &legacymodels.Alert{ + OrgID: 1, ID: 1, Name: "test", Settings: simplejson.New(), diff --git a/pkg/services/ngalert/migration/channel.go b/pkg/services/ngalert/migration/channel.go index 78dc2c9c57d..770711c2c68 100644 --- a/pkg/services/ngalert/migration/channel.go +++ b/pkg/services/ngalert/migration/channel.go @@ -7,8 +7,6 @@ import ( "fmt" "time" - alertingNotify "github.com/grafana/alerting/notify" - "github.com/prometheus/alertmanager/config" "github.com/prometheus/alertmanager/pkg/labels" "github.com/prometheus/common/model" @@ -25,69 +23,45 @@ const ( DisabledRepeatInterval = model.Duration(time.Duration(8736) * time.Hour) // 1y ) +// ErrDiscontinued is used for channels that are no longer supported after migration. +var ErrDiscontinued = errors.New("discontinued") + // migrateChannels creates Alertmanager configs with migrated receivers and routes. -func (om *OrgMigration) migrateChannels(channels []*legacymodels.AlertNotification) (*migmodels.Alertmanager, error) { - amConfig := migmodels.NewAlertmanager() - empty := true +func (om *OrgMigration) migrateChannels(channels []*legacymodels.AlertNotification) ([]*migmodels.ContactPair, error) { // Create all newly migrated receivers from legacy notification channels. + pairs := make([]*migmodels.ContactPair, 0, len(channels)) for _, c := range channels { + pair := &migmodels.ContactPair{ + Channel: c, + } receiver, err := om.createReceiver(c) if err != nil { - if errors.Is(err, ErrDiscontinued) { - om.log.Error("Alert migration error: discontinued notification channel found", "type", c.Type, "name", c.Name, "uid", c.UID) - continue - } - return nil, fmt.Errorf("channel '%s': %w", c.Name, err) + om.log.Warn("Failed to create receiver", "type", c.Type, "name", c.Name, "uid", c.UID, "error", err) + pair.Error = err + pairs = append(pairs, pair) + continue } + pair.ContactPoint = receiver - empty = false route, err := createRoute(c, receiver.Name) if err != nil { - return nil, fmt.Errorf("channel '%s': %w", c.Name, err) + om.log.Warn("Failed to create route", "type", c.Type, "name", c.Name, "uid", c.UID, "error", err) + pair.Error = err + pairs = append(pairs, pair) + continue } - amConfig.AddRoute(route) - amConfig.AddReceiver(receiver) - } - if empty { - return nil, nil + pair.Route = route + pairs = append(pairs, pair) } - return amConfig, nil -} - -// validateAlertmanagerConfig validates the alertmanager configuration produced by the migration against the receivers. -func (om *OrgMigration) validateAlertmanagerConfig(config *apimodels.PostableUserConfig) error { - for _, r := range config.AlertmanagerConfig.Receivers { - for _, gr := range r.GrafanaManagedReceivers { - data, err := gr.Settings.MarshalJSON() - if err != nil { - return err - } - var ( - cfg = &alertingNotify.GrafanaIntegrationConfig{ - UID: gr.UID, - Name: gr.Name, - Type: gr.Type, - DisableResolveMessage: gr.DisableResolveMessage, - Settings: data, - SecureSettings: gr.SecureSettings, - } - ) - - _, err = alertingNotify.BuildReceiverConfiguration(context.Background(), &alertingNotify.APIReceiver{ - GrafanaIntegrations: alertingNotify.GrafanaIntegrations{Integrations: []*alertingNotify.GrafanaIntegrationConfig{cfg}}, - }, om.encryptionService.GetDecryptedValue) - if err != nil { - return err - } - } - } - - return nil + return pairs, nil } // createNotifier creates a PostableGrafanaReceiver from a legacy notification channel. -func (om *OrgMigration) createNotifier(c *legacymodels.AlertNotification) (*apimodels.PostableGrafanaReceiver, error) { +func (om *OrgMigration) createReceiver(c *legacymodels.AlertNotification) (*apimodels.PostableGrafanaReceiver, error) { + if c.Type == "hipchat" || c.Type == "sensu" { + return nil, fmt.Errorf("'%s': %w", c.Type, ErrDiscontinued) + } settings, secureSettings, err := om.migrateSettingsToSecureSettings(c.Type, c.Settings, c.SecureSettings) if err != nil { return nil, err @@ -108,29 +82,6 @@ func (om *OrgMigration) createNotifier(c *legacymodels.AlertNotification) (*apim }, nil } -var ErrDiscontinued = errors.New("discontinued") - -// createReceiver creates a receiver from a legacy notification channel. -func (om *OrgMigration) createReceiver(channel *legacymodels.AlertNotification) (*apimodels.PostableApiReceiver, error) { - if channel.Type == "hipchat" || channel.Type == "sensu" { - return nil, fmt.Errorf("'%s': %w", channel.Type, ErrDiscontinued) - } - - notifier, err := om.createNotifier(channel) - if err != nil { - return nil, err - } - - return &apimodels.PostableApiReceiver{ - Receiver: config.Receiver{ - Name: channel.Name, // Channel name is unique within an Org. - }, - PostableGrafanaReceivers: apimodels.PostableGrafanaReceivers{ - GrafanaManagedReceivers: []*apimodels.PostableGrafanaReceiver{notifier}, - }, - }, nil -} - // createRoute creates a route from a legacy notification channel, and matches using a label based on the channel UID. func createRoute(channel *legacymodels.AlertNotification, receiverName string) (*apimodels.Route, error) { // We create a matchers based on channel name so that we only need a single route per channel. diff --git a/pkg/services/ngalert/migration/channel_test.go b/pkg/services/ngalert/migration/channel_test.go index 8bb5713f96f..eac2672dc92 100644 --- a/pkg/services/ngalert/migration/channel_test.go +++ b/pkg/services/ngalert/migration/channel_test.go @@ -10,7 +10,6 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" - "github.com/prometheus/alertmanager/config" "github.com/prometheus/alertmanager/pkg/labels" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" @@ -19,7 +18,7 @@ import ( "github.com/grafana/grafana/pkg/infra/db" legacymodels "github.com/grafana/grafana/pkg/services/alerting/models" apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" - ngModels "github.com/grafana/grafana/pkg/services/ngalert/models" + migmodels "github.com/grafana/grafana/pkg/services/ngalert/migration/models" "github.com/grafana/grafana/pkg/services/ngalert/notifier/channels_config" "github.com/grafana/grafana/pkg/setting" "github.com/grafana/grafana/pkg/util" @@ -29,13 +28,13 @@ func TestCreateRoute(t *testing.T) { tc := []struct { name string channel *legacymodels.AlertNotification - recv *apimodels.PostableApiReceiver + recv *apimodels.PostableGrafanaReceiver expected *apimodels.Route }{ { name: "when a receiver is passed in, the route should exact match based on channel uid with continue=true", channel: &legacymodels.AlertNotification{UID: "uid1", Name: "recv1"}, - recv: createPostableApiReceiver("uid1", "recv1"), + recv: createPostableGrafanaReceiver("uid1", "recv1"), expected: &apimodels.Route{ Receiver: "recv1", ObjectMatchers: apimodels.ObjectMatchers{{Type: labels.MatchEqual, Name: contactLabel("recv1"), Value: "true"}}, @@ -48,7 +47,7 @@ func TestCreateRoute(t *testing.T) { { name: "notification channel labels matcher should work with special characters", channel: &legacymodels.AlertNotification{UID: "uid1", Name: `. ^ $ * + - ? ( ) [ ] { } \ |`}, - recv: createPostableApiReceiver("uid1", `. ^ $ * + - ? ( ) [ ] { } \ |`), + recv: createPostableGrafanaReceiver("uid1", `. ^ $ * + - ? ( ) [ ] { } \ |`), expected: &apimodels.Route{ Receiver: `. ^ $ * + - ? ( ) [ ] { } \ |`, ObjectMatchers: apimodels.ObjectMatchers{{Type: labels.MatchEqual, Name: contactLabel(`. ^ $ * + - ? ( ) [ ] { } \ |`), Value: "true"}}, @@ -61,7 +60,7 @@ func TestCreateRoute(t *testing.T) { { name: "when a channel has sendReminder=true, the route should use the frequency in repeat interval", channel: &legacymodels.AlertNotification{SendReminder: true, Frequency: time.Duration(42) * time.Hour, UID: "uid1", Name: "recv1"}, - recv: createPostableApiReceiver("uid1", "recv1"), + recv: createPostableGrafanaReceiver("uid1", "recv1"), expected: &apimodels.Route{ Receiver: "recv1", ObjectMatchers: apimodels.ObjectMatchers{{Type: labels.MatchEqual, Name: contactLabel("recv1"), Value: "true"}}, @@ -74,7 +73,7 @@ func TestCreateRoute(t *testing.T) { { name: "when a channel has sendReminder=false, the route should ignore the frequency in repeat interval and use DisabledRepeatInterval", channel: &legacymodels.AlertNotification{SendReminder: false, Frequency: time.Duration(42) * time.Hour, UID: "uid1", Name: "recv1"}, - recv: createPostableApiReceiver("uid1", "recv1"), + recv: createPostableGrafanaReceiver("uid1", "recv1"), expected: &apimodels.Route{ Receiver: "recv1", ObjectMatchers: apimodels.ObjectMatchers{{Type: labels.MatchEqual, Name: contactLabel("recv1"), Value: "true"}}, @@ -137,13 +136,13 @@ func TestCreateReceivers(t *testing.T) { tc := []struct { name string channel *legacymodels.AlertNotification - expRecv *apimodels.PostableApiReceiver + expRecv *apimodels.PostableGrafanaReceiver expErr error }{ { name: "when given notification channels migrate them to receivers", channel: createNotChannel(t, "uid1", int64(1), "name1", false, 0), - expRecv: createPostableApiReceiver("uid1", "name1"), + expRecv: createPostableGrafanaReceiver("uid1", "name1"), }, { name: "when given hipchat return discontinued error", @@ -180,7 +179,7 @@ func TestMigrateNotificationChannelSecureSettings(t *testing.T) { require.NoError(t, err) return string(raw) } - decryptFn := func(data string, m *OrgMigration) string { + decryptFn := func(data string, m *migrationService) string { decoded, err := base64.StdEncoding.DecodeString(data) require.NoError(t, err) raw, err := m.encryptionService.Decrypt(context.Background(), decoded) @@ -266,7 +265,7 @@ func TestMigrateNotificationChannelSecureSettings(t *testing.T) { t.Run(tt.name, func(t *testing.T) { service := NewTestMigrationService(t, sqlStore, nil) m := service.newOrgMigration(1) - recv, err := m.createNotifier(tt.channel) + recv, err := m.createReceiver(tt.channel) if tt.expErr != nil { require.Error(t, err) require.EqualError(t, err, tt.expErr.Error()) @@ -278,7 +277,7 @@ func TestMigrateNotificationChannelSecureSettings(t *testing.T) { require.NotEqual(t, tt.expRecv, recv) // Make sure they were actually encrypted at first. } for k, v := range recv.SecureSettings { - recv.SecureSettings[k] = decryptFn(v, m) + recv.SecureSettings[k] = decryptFn(v, service) } require.Equal(t, tt.expRecv, recv) }) @@ -300,7 +299,7 @@ func TestMigrateNotificationChannelSecureSettings(t *testing.T) { channel.SecureSettings[key] = []byte(legacyEncryptFn("secure " + key)) } }) - recv, err := m.createNotifier(channel) + recv, err := m.createReceiver(channel) require.NoError(t, err) require.Equal(t, nType, recv.Type) @@ -311,7 +310,7 @@ func TestMigrateNotificationChannelSecureSettings(t *testing.T) { } require.Len(t, recv.SecureSettings, len(secureSettings)) for _, key := range secureSettings { - require.Equal(t, "secure "+key, decryptFn(recv.SecureSettings[key], m)) + require.Equal(t, "secure "+key, decryptFn(recv.SecureSettings[key], service)) } }) } @@ -335,7 +334,7 @@ func TestMigrateNotificationChannelSecureSettings(t *testing.T) { channel.Settings.Set(key, "secure "+key) } }) - recv, err := m.createNotifier(channel) + recv, err := m.createReceiver(channel) require.NoError(t, err) require.Equal(t, nType, recv.Type) @@ -346,7 +345,7 @@ func TestMigrateNotificationChannelSecureSettings(t *testing.T) { } require.Len(t, recv.SecureSettings, len(secureSettings)) for _, key := range secureSettings { - require.Equal(t, "secure "+key, decryptFn(recv.SecureSettings[key], m)) + require.Equal(t, "secure "+key, decryptFn(recv.SecureSettings[key], service)) } }) } @@ -356,88 +355,56 @@ func TestMigrateNotificationChannelSecureSettings(t *testing.T) { func TestSetupAlertmanagerConfig(t *testing.T) { tc := []struct { - name string - channels []*legacymodels.AlertNotification - amConfig *apimodels.PostableUserConfig - expErr error + name string + channels []*legacymodels.AlertNotification + expContactPairs []*migmodels.ContactPair + expErr error }{ { name: "when given multiple notification channels migrate them to receivers", channels: []*legacymodels.AlertNotification{createNotChannel(t, "uid1", int64(1), "notifier1", false, 0), createNotChannel(t, "uid2", int64(2), "notifier2", false, 0)}, - amConfig: &apimodels.PostableUserConfig{ - AlertmanagerConfig: apimodels.PostableApiAlertingConfig{ - Config: apimodels.Config{Route: &apimodels.Route{ - Receiver: "autogen-contact-point-default", - GroupByStr: []string{ngModels.FolderTitleLabel, model.AlertNameLabel}, - Routes: []*apimodels.Route{ - { - ObjectMatchers: apimodels.ObjectMatchers{{Type: labels.MatchEqual, Name: ngModels.MigratedUseLegacyChannelsLabel, Value: "true"}}, - Continue: true, - Routes: []*apimodels.Route{ - {Receiver: "notifier1", ObjectMatchers: apimodels.ObjectMatchers{{Type: labels.MatchEqual, Name: contactLabel("notifier1"), Value: "true"}}, Routes: nil, Continue: true, RepeatInterval: durationPointer(DisabledRepeatInterval)}, - {Receiver: "notifier2", ObjectMatchers: apimodels.ObjectMatchers{{Type: labels.MatchEqual, Name: contactLabel("notifier2"), Value: "true"}}, Routes: nil, Continue: true, RepeatInterval: durationPointer(DisabledRepeatInterval)}, - }, - }, - }, - }}, - Receivers: []*apimodels.PostableApiReceiver{ - {Receiver: config.Receiver{Name: "autogen-contact-point-default"}, PostableGrafanaReceivers: apimodels.PostableGrafanaReceivers{GrafanaManagedReceivers: []*apimodels.PostableGrafanaReceiver{}}}, - createPostableApiReceiver("uid1", "notifier1"), - createPostableApiReceiver("uid2", "notifier2"), - }, + expContactPairs: []*migmodels.ContactPair{ + { + Channel: createNotChannel(t, "uid1", int64(1), "notifier1", false, 0), + ContactPoint: createPostableGrafanaReceiver("uid1", "notifier1"), + Route: &apimodels.Route{Receiver: "notifier1", ObjectMatchers: apimodels.ObjectMatchers{{Type: labels.MatchEqual, Name: contactLabel("notifier1"), Value: "true"}}, Routes: nil, Continue: true, RepeatInterval: durationPointer(DisabledRepeatInterval)}, + }, + { + Channel: createNotChannel(t, "uid2", int64(2), "notifier2", false, 0), + ContactPoint: createPostableGrafanaReceiver("uid2", "notifier2"), + Route: &apimodels.Route{Receiver: "notifier2", ObjectMatchers: apimodels.ObjectMatchers{{Type: labels.MatchEqual, Name: contactLabel("notifier2"), Value: "true"}}, Routes: nil, Continue: true, RepeatInterval: durationPointer(DisabledRepeatInterval)}, }, }, }, { name: "when given default notification channels migrate them to a routes with catchall matcher", channels: []*legacymodels.AlertNotification{createNotChannel(t, "uid1", int64(1), "notifier1", false, 0), createNotChannel(t, "uid2", int64(2), "notifier2", true, 0)}, - amConfig: &apimodels.PostableUserConfig{ - AlertmanagerConfig: apimodels.PostableApiAlertingConfig{ - Config: apimodels.Config{Route: &apimodels.Route{ - Receiver: "autogen-contact-point-default", - GroupByStr: []string{ngModels.FolderTitleLabel, model.AlertNameLabel}, - Routes: []*apimodels.Route{ - { - ObjectMatchers: apimodels.ObjectMatchers{{Type: labels.MatchEqual, Name: ngModels.MigratedUseLegacyChannelsLabel, Value: "true"}}, - Continue: true, - Routes: []*apimodels.Route{ - {Receiver: "notifier2", ObjectMatchers: apimodels.ObjectMatchers{{Type: labels.MatchRegexp, Name: model.AlertNameLabel, Value: ".+"}}, Routes: nil, Continue: true, RepeatInterval: durationPointer(DisabledRepeatInterval)}, - {Receiver: "notifier1", ObjectMatchers: apimodels.ObjectMatchers{{Type: labels.MatchEqual, Name: contactLabel("notifier1"), Value: "true"}}, Routes: nil, Continue: true, RepeatInterval: durationPointer(DisabledRepeatInterval)}, - }, - }, - }, - }}, - Receivers: []*apimodels.PostableApiReceiver{ - {Receiver: config.Receiver{Name: "autogen-contact-point-default"}, PostableGrafanaReceivers: apimodels.PostableGrafanaReceivers{GrafanaManagedReceivers: []*apimodels.PostableGrafanaReceiver{}}}, - createPostableApiReceiver("uid1", "notifier1"), - createPostableApiReceiver("uid2", "notifier2"), - }, + expContactPairs: []*migmodels.ContactPair{ + { + Channel: createNotChannel(t, "uid1", int64(1), "notifier1", false, 0), + ContactPoint: createPostableGrafanaReceiver("uid1", "notifier1"), + Route: &apimodels.Route{Receiver: "notifier1", ObjectMatchers: apimodels.ObjectMatchers{{Type: labels.MatchEqual, Name: contactLabel("notifier1"), Value: "true"}}, Routes: nil, Continue: true, RepeatInterval: durationPointer(DisabledRepeatInterval)}, + }, + { + Channel: createNotChannel(t, "uid2", int64(2), "notifier2", true, 0), + ContactPoint: createPostableGrafanaReceiver("uid2", "notifier2"), + Route: &apimodels.Route{Receiver: "notifier2", ObjectMatchers: apimodels.ObjectMatchers{{Type: labels.MatchRegexp, Name: model.AlertNameLabel, Value: ".+"}}, Routes: nil, Continue: true, RepeatInterval: durationPointer(DisabledRepeatInterval)}, }, }, }, { name: "when given notification channels with SendReminder true migrate them to a route with frequency set", channels: []*legacymodels.AlertNotification{createNotChannel(t, "uid1", int64(1), "notifier1", false, time.Duration(42)), createNotChannel(t, "uid2", int64(2), "notifier2", false, time.Duration(43))}, - amConfig: &apimodels.PostableUserConfig{ - AlertmanagerConfig: apimodels.PostableApiAlertingConfig{ - Config: apimodels.Config{Route: &apimodels.Route{ - Receiver: "autogen-contact-point-default", - GroupByStr: []string{ngModels.FolderTitleLabel, model.AlertNameLabel}, - Routes: []*apimodels.Route{ - { - ObjectMatchers: apimodels.ObjectMatchers{{Type: labels.MatchEqual, Name: ngModels.MigratedUseLegacyChannelsLabel, Value: "true"}}, - Continue: true, - Routes: []*apimodels.Route{ - {Receiver: "notifier1", ObjectMatchers: apimodels.ObjectMatchers{{Type: labels.MatchEqual, Name: contactLabel("notifier1"), Value: "true"}}, Routes: nil, Continue: true, RepeatInterval: durationPointer(42)}, - {Receiver: "notifier2", ObjectMatchers: apimodels.ObjectMatchers{{Type: labels.MatchEqual, Name: contactLabel("notifier2"), Value: "true"}}, Routes: nil, Continue: true, RepeatInterval: durationPointer(43)}, - }, - }, - }, - }}, - Receivers: []*apimodels.PostableApiReceiver{ - {Receiver: config.Receiver{Name: "autogen-contact-point-default"}, PostableGrafanaReceivers: apimodels.PostableGrafanaReceivers{GrafanaManagedReceivers: []*apimodels.PostableGrafanaReceiver{}}}, - createPostableApiReceiver("uid1", "notifier1"), - createPostableApiReceiver("uid2", "notifier2")}, + expContactPairs: []*migmodels.ContactPair{ + { + Channel: createNotChannel(t, "uid1", int64(1), "notifier1", false, time.Duration(42)), + ContactPoint: createPostableGrafanaReceiver("uid1", "notifier1"), + Route: &apimodels.Route{Receiver: "notifier1", ObjectMatchers: apimodels.ObjectMatchers{{Type: labels.MatchEqual, Name: contactLabel("notifier1"), Value: "true"}}, Routes: nil, Continue: true, RepeatInterval: durationPointer(42)}, + }, + { + Channel: createNotChannel(t, "uid2", int64(2), "notifier2", false, time.Duration(43)), + ContactPoint: createPostableGrafanaReceiver("uid2", "notifier2"), + Route: &apimodels.Route{Receiver: "notifier2", ObjectMatchers: apimodels.ObjectMatchers{{Type: labels.MatchEqual, Name: contactLabel("notifier2"), Value: "true"}}, Routes: nil, Continue: true, RepeatInterval: durationPointer(43)}, }, }, }, @@ -449,7 +416,7 @@ func TestSetupAlertmanagerConfig(t *testing.T) { service := NewTestMigrationService(t, sqlStore, nil) m := service.newOrgMigration(1) - am, err := m.migrateChannels(tt.channels) + pairs, err := m.migrateChannels(tt.channels) if tt.expErr != nil { require.Error(t, err) require.EqualError(t, err, tt.expErr.Error()) @@ -457,34 +424,27 @@ func TestSetupAlertmanagerConfig(t *testing.T) { } require.NoError(t, err) - amConfig := am.Config + require.Lenf(t, pairs, len(tt.expContactPairs), "Unexpected number of migrated channels: %v", len(pairs)) + opts := []cmp.Option{ - cmpopts.IgnoreUnexported(apimodels.PostableUserConfig{}, labels.Matcher{}), - cmpopts.SortSlices(func(a, b *apimodels.Route) bool { return a.Receiver < b.Receiver }), + cmpopts.IgnoreUnexported(labels.Matcher{}), + cmpopts.IgnoreFields(legacymodels.AlertNotification{}, "Settings"), + cmpopts.SortSlices(func(a, b *migmodels.ContactPair) bool { return a.Channel.ID < b.Channel.ID }), } - if !cmp.Equal(tt.amConfig, amConfig, opts...) { - t.Errorf("Unexpected Config: %v", cmp.Diff(tt.amConfig, amConfig, opts...)) + if !cmp.Equal(pairs, tt.expContactPairs, opts...) { + t.Errorf("Unexpected Config: %v", cmp.Diff(pairs, tt.expContactPairs, opts...)) } }) } } -func createPostableApiReceiver(uid string, name string) *apimodels.PostableApiReceiver { - return &apimodels.PostableApiReceiver{ - Receiver: config.Receiver{ - Name: name, - }, - PostableGrafanaReceivers: apimodels.PostableGrafanaReceivers{ - GrafanaManagedReceivers: []*apimodels.PostableGrafanaReceiver{ - { - UID: uid, - Type: "email", - Name: name, - Settings: apimodels.RawMessage("{}"), - SecureSettings: map[string]string{}, - }, - }, - }, +func createPostableGrafanaReceiver(uid string, name string) *apimodels.PostableGrafanaReceiver { + return &apimodels.PostableGrafanaReceiver{ + UID: uid, + Type: "email", + Name: name, + Settings: apimodels.RawMessage("{}"), + SecureSettings: map[string]string{}, } } diff --git a/pkg/services/ngalert/migration/cond_trans.go b/pkg/services/ngalert/migration/cond_trans.go index 0f5af561325..dc169bcaffa 100644 --- a/pkg/services/ngalert/migration/cond_trans.go +++ b/pkg/services/ngalert/migration/cond_trans.go @@ -66,7 +66,7 @@ type evaluator struct { } //nolint:gocyclo -func transConditions(ctx context.Context, l log.Logger, set dashAlertSettings, orgID int64, store migrationStore.Store) (*condition, error) { +func transConditions(ctx context.Context, l log.Logger, set dashAlertSettings, orgID int64, store migrationStore.ReadStore) (*condition, error) { // TODO: needs a significant refactor to reduce complexity. usr := getMigrationUser(orgID) diff --git a/pkg/services/ngalert/migration/migration_test.go b/pkg/services/ngalert/migration/migration_test.go index 9806746559c..bee2082311c 100644 --- a/pkg/services/ngalert/migration/migration_test.go +++ b/pkg/services/ngalert/migration/migration_test.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "strconv" + "strings" "testing" "time" @@ -25,6 +26,7 @@ import ( apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" migrationStore "github.com/grafana/grafana/pkg/services/ngalert/migration/store" ngModels "github.com/grafana/grafana/pkg/services/ngalert/models" + "github.com/grafana/grafana/pkg/services/ngalert/store" "github.com/grafana/grafana/pkg/services/org" "github.com/grafana/grafana/pkg/setting" "github.com/grafana/grafana/pkg/tsdb/legacydata" @@ -664,6 +666,89 @@ func TestDashAlertMigration(t *testing.T) { } } }) + + t.Run("when migrated rules contain duplicate titles", func(t *testing.T) { + sqlStore := db.InitTestDB(t) + x := sqlStore.GetEngine() + service := NewTestMigrationService(t, sqlStore, &setting.Cfg{}) + alerts := []*models.Alert{ + createAlert(t, 1, 1, 1, "alert1", []string{}), + createAlert(t, 1, 1, 2, "alert1", []string{}), + createAlert(t, 1, 2, 3, "alert1", []string{}), + createAlert(t, 1, 3, 4, "alert1", []string{}), + createAlert(t, 1, 3, 5, "alert1", []string{}), + createAlert(t, 1, 3, 6, "alert1", []string{}), + } + expected := map[int64]map[int64]string{ + int64(1): { + 1: "alert1", + 2: "alert1 #2", + 3: "alert1 #3", + 4: "alert1", + 5: "alert1 #2", + 6: "alert1 #3", + }, + } + dashes := []*dashboards.Dashboard{ + createDashboard(t, 1, 1, "dash1-1", 5, nil), + createDashboard(t, 2, 1, "dash2-1", 5, nil), + createDashboard(t, 3, 1, "dash3-1", 6, nil), + } + folders := []*dashboards.Dashboard{ + createFolder(t, 5, 1, "folder5-1"), + createFolder(t, 6, 1, "folder6-1"), + } + setupLegacyAlertsTables(t, x, nil, alerts, folders, dashes) + err := service.Run(context.Background()) + require.NoError(t, err) + + for orgId := range expected { + rules := getAlertRules(t, x, orgId) + expectedRulesMap := expected[orgId] + require.Len(t, rules, len(expectedRulesMap)) + for _, r := range rules { + delete(r.Labels, "rule_uid") // Not checking this here. + exp := expectedRulesMap[*r.PanelID] + require.Equal(t, exp, r.Title) + } + } + }) + + t.Run("when migrated rules contain titles that are too long", func(t *testing.T) { + sqlStore := db.InitTestDB(t) + x := sqlStore.GetEngine() + service := NewTestMigrationService(t, sqlStore, &setting.Cfg{}) + alerts := []*models.Alert{ + createAlert(t, 1, 1, 1, strings.Repeat("a", store.AlertDefinitionMaxTitleLength+1), []string{}), + createAlert(t, 1, 1, 2, strings.Repeat("a", store.AlertDefinitionMaxTitleLength+2), []string{}), + } + expected := map[int64]map[int64]string{ + int64(1): { + 1: strings.Repeat("a", store.AlertDefinitionMaxTitleLength), + 2: strings.Repeat("a", store.AlertDefinitionMaxTitleLength-3) + " #2", + }, + } + dashes := []*dashboards.Dashboard{ + createDashboard(t, 1, 1, "dash1-1", 5, nil), + } + folders := []*dashboards.Dashboard{ + createFolder(t, 5, 1, "folder5-1"), + } + setupLegacyAlertsTables(t, x, nil, alerts, folders, dashes) + err := service.Run(context.Background()) + require.NoError(t, err) + + for orgId := range expected { + rules := getAlertRules(t, x, orgId) + expectedRulesMap := expected[orgId] + require.Len(t, rules, len(expectedRulesMap)) + for _, r := range rules { + delete(r.Labels, "rule_uid") // Not checking this here. + exp := expectedRulesMap[*r.PanelID] + require.Equal(t, exp, r.Title) + } + } + }) } // TestDashAlertQueryMigration tests the execution of the migration specifically for alert rule queries. @@ -744,7 +829,7 @@ func TestDashAlertQueryMigration(t *testing.T) { ExecErrState: ngModels.AlertingErrState, For: 60 * time.Second, Annotations: map[string]string{ - "message": "message", + ngModels.MigratedMessageAnnotation: "message", }, Labels: map[string]string{ngModels.MigratedUseLegacyChannelsLabel: "true"}, IsPaused: false, @@ -756,8 +841,8 @@ func TestDashAlertQueryMigration(t *testing.T) { rule.RuleGroup = fmt.Sprintf("%s - 1m", *rule.DashboardUID) - rule.Annotations["__dashboardUid__"] = *rule.DashboardUID - rule.Annotations["__panelId__"] = strconv.FormatInt(*rule.PanelID, 10) + rule.Annotations[ngModels.DashboardUIDAnnotation] = *rule.DashboardUID + rule.Annotations[ngModels.PanelIDAnnotation] = strconv.FormatInt(*rule.PanelID, 10) return rule } diff --git a/pkg/services/ngalert/migration/models.go b/pkg/services/ngalert/migration/models.go index 4bb05b2561d..42827ed8e3a 100644 --- a/pkg/services/ngalert/migration/models.go +++ b/pkg/services/ngalert/migration/models.go @@ -1,15 +1,14 @@ package migration import ( + "context" + "errors" + pb "github.com/prometheus/alertmanager/silence/silencepb" "github.com/grafana/grafana/pkg/infra/log" - "github.com/grafana/grafana/pkg/services/accesscontrol" legacymodels "github.com/grafana/grafana/pkg/services/alerting/models" - "github.com/grafana/grafana/pkg/services/folder" - migmodels "github.com/grafana/grafana/pkg/services/ngalert/migration/models" migrationStore "github.com/grafana/grafana/pkg/services/ngalert/migration/store" - "github.com/grafana/grafana/pkg/services/ngalert/store" "github.com/grafana/grafana/pkg/services/secrets" "github.com/grafana/grafana/pkg/setting" ) @@ -22,23 +21,12 @@ type OrgMigration struct { migrationStore migrationStore.Store encryptionService secrets.Service - orgID int64 - silences []*pb.MeshSilence - titleDeduplicatorForFolder func(folderUID string) *migmodels.Deduplicator - channelCache *ChannelCache - - // Migrated folder for a dashboard based on permissions. Parent Folder ID -> unique dashboard permission -> custom folder. - permissionsMap map[int64]map[permissionHash]*folder.Folder - folderCache map[int64]*folder.Folder // Folder ID -> Folder. - folderPermissionCache map[string][]accesscontrol.ResourcePermission // Folder UID -> Folder Permissions. - generalAlertingFolder *folder.Folder - - state *migmodels.OrgMigrationState + orgID int64 + silences []*pb.MeshSilence } // newOrgMigration creates a new OrgMigration for the given orgID. func (ms *migrationService) newOrgMigration(orgID int64) *OrgMigration { - titlededuplicatorPerFolder := make(map[string]*migmodels.Deduplicator) return &OrgMigration{ cfg: ms.cfg, log: ms.log.New("orgID", orgID), @@ -48,43 +36,58 @@ func (ms *migrationService) newOrgMigration(orgID int64) *OrgMigration { orgID: orgID, silences: make([]*pb.MeshSilence, 0), - titleDeduplicatorForFolder: func(folderUID string) *migmodels.Deduplicator { - if _, ok := titlededuplicatorPerFolder[folderUID]; !ok { - titlededuplicatorPerFolder[folderUID] = migmodels.NewDeduplicator(ms.migrationStore.CaseInsensitive(), store.AlertDefinitionMaxTitleLength) - } - return titlededuplicatorPerFolder[folderUID] - }, - channelCache: &ChannelCache{cache: make(map[any]*legacymodels.AlertNotification)}, - - permissionsMap: make(map[int64]map[permissionHash]*folder.Folder), - folderCache: make(map[int64]*folder.Folder), - folderPermissionCache: make(map[string][]accesscontrol.ResourcePermission), - - state: &migmodels.OrgMigrationState{ - OrgID: orgID, - CreatedFolders: make([]string, 0), - }, } } // ChannelCache caches channels by ID and UID. type ChannelCache struct { - cache map[any]*legacymodels.AlertNotification + channels []*legacymodels.AlertNotification + cache map[any]*legacymodels.AlertNotification + fetch func(ctx context.Context, key notificationKey) (*legacymodels.AlertNotification, error) } -func (c *ChannelCache) LoadChannels(channels []*legacymodels.AlertNotification) { - for _, channel := range channels { - c.cache[channel.ID] = channel - c.cache[channel.UID] = channel +func (c *ChannelCache) Get(ctx context.Context, key notificationKey) (*legacymodels.AlertNotification, error) { + if key.ID > 0 { + if channel, ok := c.cache[key.ID]; ok { + return channel, nil + } + } + if key.UID != "" { + if channel, ok := c.cache[key.UID]; ok { + return channel, nil + } + } + + channel, err := c.fetch(ctx, key) + if err != nil { + if errors.Is(err, migrationStore.ErrNotFound) { + if key.ID > 0 { + c.cache[key.ID] = nil + } + if key.UID != "" { + c.cache[key.UID] = nil + } + return nil, nil + } + return nil, err + } + + c.cache[channel.ID] = channel + c.cache[channel.UID] = channel + c.channels = append(c.channels, channel) + + return channel, nil +} + +func (ms *migrationService) newChannelCache(orgID int64) *ChannelCache { + return &ChannelCache{ + cache: make(map[any]*legacymodels.AlertNotification), + fetch: func(ctx context.Context, key notificationKey) (*legacymodels.AlertNotification, error) { + c, err := ms.migrationStore.GetNotificationChannel(ctx, migrationStore.GetNotificationChannelQuery{OrgID: orgID, ID: key.ID, UID: key.UID}) + if err != nil { + return nil, err + } + return c, nil + }, } } - -func (c *ChannelCache) GetChannelByID(id int64) (*legacymodels.AlertNotification, bool) { - channel, ok := c.cache[id] - return channel, ok -} - -func (c *ChannelCache) GetChannelByUID(uid string) (*legacymodels.AlertNotification, bool) { - channel, ok := c.cache[uid] - return channel, ok -} diff --git a/pkg/services/ngalert/migration/models/alertmanager.go b/pkg/services/ngalert/migration/models/alertmanager.go index 3d0869e8fd9..5fcac6ee1e7 100644 --- a/pkg/services/ngalert/migration/models/alertmanager.go +++ b/pkg/services/ngalert/migration/models/alertmanager.go @@ -26,12 +26,25 @@ func NewAlertmanager() *Alertmanager { // AddRoute adds a route to the alertmanager config. func (am *Alertmanager) AddRoute(route *apiModels.Route) { + if route == nil { + return + } am.legacyRoute.Routes = append(am.legacyRoute.Routes, route) } // AddReceiver adds a receiver to the alertmanager config. -func (am *Alertmanager) AddReceiver(recv *apiModels.PostableApiReceiver) { - am.Config.AlertmanagerConfig.Receivers = append(am.Config.AlertmanagerConfig.Receivers, recv) +func (am *Alertmanager) AddReceiver(recv *apiModels.PostableGrafanaReceiver) { + if recv == nil { + return + } + am.Config.AlertmanagerConfig.Receivers = append(am.Config.AlertmanagerConfig.Receivers, &apiModels.PostableApiReceiver{ + Receiver: config.Receiver{ + Name: recv.Name, // Channel name is unique within an Org. + }, + PostableGrafanaReceivers: apiModels.PostableGrafanaReceivers{ + GrafanaManagedReceivers: []*apiModels.PostableGrafanaReceiver{recv}, + }, + }) } // createBaseConfig creates an alertmanager config with the root-level route, default receiver, and nested route diff --git a/pkg/services/ngalert/migration/models/models.go b/pkg/services/ngalert/migration/models/models.go index 8764d77ee3f..74368f09c27 100644 --- a/pkg/services/ngalert/migration/models/models.go +++ b/pkg/services/ngalert/migration/models/models.go @@ -28,13 +28,19 @@ type Deduplicator struct { // caseInsensitive determines whether the string comparison should be case-insensitive. // maxLen determines the maximum length of deduplicated strings. If the deduplicated string would be longer than // maxLen, it will be truncated. -func NewDeduplicator(caseInsensitive bool, maxLen int) *Deduplicator { - return &Deduplicator{ - set: make(map[string]int), +func NewDeduplicator(caseInsensitive bool, maxLen int, initial ...string) *Deduplicator { + d := &Deduplicator{ + set: make(map[string]int, len(initial)), caseInsensitive: caseInsensitive, maxLen: maxLen, uidGenerator: util.GenerateShortUID, } + if len(initial) > 0 { + for _, u := range initial { + d.add(u, 0) + } + } + return d } // Deduplicate returns a unique string based on the given base string. If the base string has not already been seen by diff --git a/pkg/services/ngalert/migration/models/state.go b/pkg/services/ngalert/migration/models/state.go index c965443ba67..a64bad26496 100644 --- a/pkg/services/ngalert/migration/models/state.go +++ b/pkg/services/ngalert/migration/models/state.go @@ -1,15 +1,69 @@ package models -// OrgMigrationState contains information about the state of an org migration. -type OrgMigrationState struct { - OrgID int64 `json:"orgId"` - CreatedFolders []string `json:"createdFolders"` +import ( + "fmt" + + legacymodels "github.com/grafana/grafana/pkg/services/alerting/models" + apiModels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" + ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models" +) + +type DashboardUpgrade struct { + ID int64 + UID string + Title string + FolderID int64 + MigratedAlerts map[int64]*AlertPair } -// DashboardUpgradeInfo contains information about a dashboard that was upgraded. -type DashboardUpgradeInfo struct { - DashboardUID string - DashboardName string - NewFolderUID string - NewFolderName string +type AlertPair struct { + LegacyRule *legacymodels.Alert + Rule *ngmodels.AlertRule + Error error +} + +type ContactPair struct { + Channel *legacymodels.AlertNotification + ContactPoint *apiModels.PostableGrafanaReceiver + Route *apiModels.Route + Error error +} + +func NewAlertPair(da *legacymodels.Alert, err error) *AlertPair { + return &AlertPair{ + LegacyRule: da, + Error: err, + } +} + +func NewDashboardUpgrade(id int64) *DashboardUpgrade { + return &DashboardUpgrade{ + ID: id, + MigratedAlerts: make(map[int64]*AlertPair), + } +} + +func (du *DashboardUpgrade) AddAlertErrors(err error, alerts ...*legacymodels.Alert) { + for _, da := range alerts { + pair := NewAlertPair(da, err) + du.MigratedAlerts[da.PanelID] = pair + } +} + +// ExtractErrors extracts errors from migrated dashboards and channels. +func ExtractErrors(dus []*DashboardUpgrade, contactPairs []*ContactPair) []error { + errs := make([]error, 0) + for _, du := range dus { + for _, pair := range du.MigratedAlerts { + if pair.Error != nil { + errs = append(errs, fmt.Errorf("migrate alert '%s': %w", pair.LegacyRule.Name, pair.Error)) + } + } + } + for _, pair := range contactPairs { + if pair.Error != nil { + errs = append(errs, fmt.Errorf("migrate channel '%s': %w", pair.Channel.Name, pair.Error)) + } + } + return errs } diff --git a/pkg/services/ngalert/migration/permissions.go b/pkg/services/ngalert/migration/permissions.go index 5013f02ac60..45424161b56 100644 --- a/pkg/services/ngalert/migration/permissions.go +++ b/pkg/services/ngalert/migration/permissions.go @@ -16,7 +16,6 @@ import ( "github.com/grafana/grafana/pkg/services/dashboards/dashboardaccess" "github.com/grafana/grafana/pkg/services/datasources" "github.com/grafana/grafana/pkg/services/folder" - migmodels "github.com/grafana/grafana/pkg/services/ngalert/migration/models" "github.com/grafana/grafana/pkg/services/org" "github.com/grafana/grafana/pkg/services/user" ) @@ -57,45 +56,52 @@ func getMigrationUser(orgID int64) identity.Requester { return accesscontrol.BackgroundUser("ngalert_migration", orgID, org.RoleAdmin, migratorPermissions) } -func (om *OrgMigration) migratedFolder(ctx context.Context, log log.Logger, dashID int64) (*migmodels.DashboardUpgradeInfo, error) { - dash, err := om.migrationStore.GetDashboard(ctx, om.orgID, dashID) - if err != nil { - return nil, err - } - l := log.New("dashboardTitle", dash.Title, "dashboardUid", dash.UID) +type migrationFolder struct { + uid string + created bool + warning string +} - dashFolder, err := om.getFolder(ctx, dash) +func (sync *sync) migratedFolder(ctx context.Context, l log.Logger, dashboardUID string, folderID int64) (*migrationFolder, error) { + dashFolder, err := sync.getFolder(ctx, folderID) if err != nil { // nolint:staticcheck - l.Warn("Failed to find folder for dashboard", "missing_folder_id", dash.FolderID, "error", err) + l.Warn("Failed to find folder for dashboard", "missing_folder_id", folderID, "error", err) } if dashFolder != nil { l = l.New("folderUid", dashFolder.UID, "folderName", dashFolder.Title) } - migratedFolder, err := om.getOrCreateMigratedFolder(ctx, l, dash, dashFolder) + migratedFolder, err := sync.getOrCreateMigratedFolder(ctx, l, dashboardUID, dashFolder) if err != nil { return nil, err } - return &migmodels.DashboardUpgradeInfo{ - DashboardUID: dash.UID, - DashboardName: dash.Title, - NewFolderUID: migratedFolder.UID, - NewFolderName: migratedFolder.Title, - }, nil + du := migrationFolder{ + uid: migratedFolder.UID, + created: migratedFolder.CreatedBy == newFolder, + } + if dashFolder == nil && migratedFolder.Title == generalAlertingFolderTitle { + du.warning = "dashboard alerts moved to general alerting folder during upgrade: original folder not found" + } else if folderID <= 0 && strings.HasPrefix(migratedFolder.Title, generalAlertingFolderTitle) { + du.warning = "dashboard alerts moved to general alerting folder during upgrade: general folder not supported" + } else if migratedFolder.ID != folderID { // nolint:staticcheck + du.warning = "dashboard alerts moved to new folder during upgrade: folder permission changes were needed" + } + + return &du, nil } // getOrCreateMigratedFolder returns the folder that alerts in a given dashboard should migrate to. // If the dashboard has no custom permissions, this should be the same folder as dash.FolderID. // If the dashboard has custom permissions that affect access, this should be a new folder with migrated permissions relating to both the dashboard and parent folder. // Any dashboard that has greater read/write permissions for an orgRole/team/user compared to its folder will necessitate creating a new folder with the same permissions as the dashboard. -func (om *OrgMigration) getOrCreateMigratedFolder(ctx context.Context, l log.Logger, dash *dashboards.Dashboard, parentFolder *folder.Folder) (*folder.Folder, error) { +func (sync *sync) getOrCreateMigratedFolder(ctx context.Context, l log.Logger, dashboardUID string, parentFolder *folder.Folder) (*folder.Folder, error) { // If parentFolder does not exist then the dashboard is an orphan. We migrate the alert to the general alerting folder. // The general alerting folder is only accessible to admins. if parentFolder == nil { - l.Warn("Migrating alert to the general alerting folder: original folder not found") - f, err := om.getOrCreateGeneralAlertingFolder(ctx, om.orgID) + l.Info("Migrating alert to the general alerting folder") + f, err := sync.getOrCreateGeneralAlertingFolder(ctx, sync.orgID) if err != nil { return nil, fmt.Errorf("general alerting folder: %w", err) } @@ -105,17 +111,17 @@ func (om *OrgMigration) getOrCreateMigratedFolder(ctx context.Context, l log.Log // Check if the dashboard has custom permissions. If it does, we need to create a new folder for it. // This folder will be cached for re-use for each dashboard in the folder with the same permissions. // nolint:staticcheck - permissionsToFolder, ok := om.permissionsMap[parentFolder.ID] + permissionsToFolder, ok := sync.permissionsMap[parentFolder.ID] if !ok { permissionsToFolder = make(map[permissionHash]*folder.Folder) // nolint:staticcheck - om.permissionsMap[parentFolder.ID] = permissionsToFolder + sync.permissionsMap[parentFolder.ID] = permissionsToFolder - folderPerms, err := om.getFolderPermissions(ctx, parentFolder) + folderPerms, err := sync.getFolderPermissions(ctx, parentFolder) if err != nil { return nil, fmt.Errorf("folder permissions: %w", err) } - newFolderPerms, _ := om.convertResourcePerms(folderPerms) + newFolderPerms, _ := sync.convertResourcePerms(folderPerms) // We assign the folder to the cache so that any dashboards with identical equivalent permissions will use the parent folder instead of creating a new one. folderPermsHash, err := createHash(newFolderPerms) @@ -126,11 +132,11 @@ func (om *OrgMigration) getOrCreateMigratedFolder(ctx context.Context, l log.Log } // Now we compute the hash of the dashboard permissions and check if we have a folder for it. If not, we create a new one. - perms, err := om.getDashboardPermissions(ctx, dash) + perms, err := sync.getDashboardPermissions(ctx, dashboardUID) if err != nil { return nil, fmt.Errorf("dashboard permissions: %w", err) } - newPerms, unusedPerms := om.convertResourcePerms(perms) + newPerms, unusedPerms := sync.convertResourcePerms(perms) hash, err := createHash(newPerms) if err != nil { return nil, fmt.Errorf("hash of dashboard permissions: %w", err) @@ -140,7 +146,7 @@ func (om *OrgMigration) getOrCreateMigratedFolder(ctx context.Context, l log.Log if !ok { folderName := generateAlertFolderName(parentFolder, hash) l.Info("Dashboard has custom permissions, create a new folder for alerts.", "newFolder", folderName) - f, err := om.createFolder(ctx, om.orgID, folderName, newPerms) + f, err := sync.createFolder(ctx, sync.orgID, folderName, newPerms) if err != nil { return nil, err } @@ -205,12 +211,12 @@ func isBasic(roleName string) bool { // // For now, we choose the simpler approach of handling managed and basic roles. Fixed and custom roles will not // be taken into account, but we will log a warning if they had the potential to override the folder permissions. -func (om *OrgMigration) convertResourcePerms(rperms []accesscontrol.ResourcePermission) ([]accesscontrol.SetResourcePermissionCommand, []accesscontrol.ResourcePermission) { +func (sync *sync) convertResourcePerms(rperms []accesscontrol.ResourcePermission) ([]accesscontrol.SetResourcePermissionCommand, []accesscontrol.ResourcePermission) { keep := make(map[accesscontrol.SetResourcePermissionCommand]dashboardaccess.PermissionType) unusedPerms := make([]accesscontrol.ResourcePermission, 0) for _, p := range rperms { if p.IsManaged || p.IsInherited || isBasic(p.RoleName) { - if permission := om.migrationStore.MapActions(p); permission != "" { + if permission := sync.migrationStore.MapActions(p); permission != "" { sp := accesscontrol.SetResourcePermissionCommand{ UserID: p.UserId, TeamID: p.TeamId, @@ -309,21 +315,21 @@ func createHash(setResourcePermissionCommands []accesscontrol.SetResourcePermiss } // getFolderPermissions Get permissions for folder. -func (om *OrgMigration) getFolderPermissions(ctx context.Context, f *folder.Folder) ([]accesscontrol.ResourcePermission, error) { - if p, ok := om.folderPermissionCache[f.UID]; ok { +func (sync *sync) getFolderPermissions(ctx context.Context, f *folder.Folder) ([]accesscontrol.ResourcePermission, error) { + if p, ok := sync.folderPermissionCache[f.UID]; ok { return p, nil } - p, err := om.migrationStore.GetFolderPermissions(ctx, getMigrationUser(f.OrgID), f.UID) + p, err := sync.migrationStore.GetFolderPermissions(ctx, getMigrationUser(f.OrgID), f.UID) if err != nil { return nil, err } - om.folderPermissionCache[f.UID] = p + sync.folderPermissionCache[f.UID] = p return p, nil } // getDashboardPermissions Get permissions for dashboard. -func (om *OrgMigration) getDashboardPermissions(ctx context.Context, d *dashboards.Dashboard) ([]accesscontrol.ResourcePermission, error) { - p, err := om.migrationStore.GetDashboardPermissions(ctx, getMigrationUser(om.orgID), d.UID) +func (sync *sync) getDashboardPermissions(ctx context.Context, uid string) ([]accesscontrol.ResourcePermission, error) { + p, err := sync.migrationStore.GetDashboardPermissions(ctx, getMigrationUser(sync.orgID), uid) if err != nil { return nil, err } @@ -331,63 +337,61 @@ func (om *OrgMigration) getDashboardPermissions(ctx context.Context, d *dashboar } // getFolder returns the parent folder for the given dashboard. If the dashboard is in the general folder, it returns the general alerting folder. -func (om *OrgMigration) getFolder(ctx context.Context, dash *dashboards.Dashboard) (*folder.Folder, error) { - // nolint:staticcheck - if f, ok := om.folderCache[dash.FolderID]; ok { +func (sync *sync) getFolder(ctx context.Context, folderId int64) (*folder.Folder, error) { + if f, ok := sync.folderCache[folderId]; ok { return f, nil } - // nolint:staticcheck - if dash.FolderID <= 0 { + if folderId <= 0 { // Don't use general folder since it has no uid, instead we use a new "General Alerting" folder. - migratedFolder, err := om.getOrCreateGeneralAlertingFolder(ctx, om.orgID) + migratedFolder, err := sync.getOrCreateGeneralAlertingFolder(ctx, sync.orgID) if err != nil { return nil, fmt.Errorf("get or create general folder: %w", err) } - return migratedFolder, err + return migratedFolder, nil } - // nolint:staticcheck - f, err := om.migrationStore.GetFolder(ctx, &folder.GetFolderQuery{ID: &dash.FolderID, OrgID: om.orgID, SignedInUser: getMigrationUser(om.orgID)}) + f, err := sync.migrationStore.GetFolder(ctx, &folder.GetFolderQuery{ID: &folderId, OrgID: sync.orgID, SignedInUser: getMigrationUser(sync.orgID)}) if err != nil { if errors.Is(err, dashboards.ErrFolderNotFound) { - // nolint:staticcheck - return nil, fmt.Errorf("folder with id %v not found", dash.FolderID) + return nil, fmt.Errorf("folder with id %v not found", folderId) } - // nolint:staticcheck - return nil, fmt.Errorf("get folder %d: %w", dash.FolderID, err) + return nil, fmt.Errorf("get folder %d: %w", folderId, err) } - // nolint:staticcheck - om.folderCache[dash.FolderID] = f + sync.folderCache[folderId] = f return f, nil } // getOrCreateGeneralAlertingFolder returns the general alerting folder under the specific organisation // If the general alerting folder does not exist it creates it. -func (om *OrgMigration) getOrCreateGeneralAlertingFolder(ctx context.Context, orgID int64) (*folder.Folder, error) { - if om.generalAlertingFolder != nil { - return om.generalAlertingFolder, nil +func (sync *sync) getOrCreateGeneralAlertingFolder(ctx context.Context, orgID int64) (*folder.Folder, error) { + if sync.generalAlertingFolder != nil { + return sync.generalAlertingFolder, nil } - f, err := om.migrationStore.GetFolder(ctx, &folder.GetFolderQuery{OrgID: orgID, Title: &generalAlertingFolderTitle, SignedInUser: getMigrationUser(orgID)}) + f, err := sync.migrationStore.GetFolder(ctx, &folder.GetFolderQuery{OrgID: orgID, Title: &generalAlertingFolderTitle, SignedInUser: getMigrationUser(orgID)}) if err != nil { if errors.Is(err, dashboards.ErrFolderNotFound) { // create general alerting folder without permissions to mimic the general folder. - f, err := om.createFolder(ctx, orgID, generalAlertingFolderTitle, nil) + f, err := sync.createFolder(ctx, orgID, generalAlertingFolderTitle, nil) if err != nil { return nil, fmt.Errorf("create general alerting folder: %w", err) } - om.generalAlertingFolder = f + sync.generalAlertingFolder = f return f, err } return nil, fmt.Errorf("get folder '%s': %w", generalAlertingFolderTitle, err) } - om.generalAlertingFolder = f + sync.generalAlertingFolder = f return f, nil } +// newFolder is used to as the value of createdBy when the folder has been created by this migration. It is not persisted +// to the database. -8 is chosen as it's the same value that was used in the original version of migration. +const newFolder = -8 + // createFolder creates a new folder with given permissions. -func (om *OrgMigration) createFolder(ctx context.Context, orgID int64, title string, newPerms []accesscontrol.SetResourcePermissionCommand) (*folder.Folder, error) { - f, err := om.migrationStore.CreateFolder(ctx, &folder.CreateFolderCommand{ +func (sync *sync) createFolder(ctx context.Context, orgID int64, title string, newPerms []accesscontrol.SetResourcePermissionCommand) (*folder.Folder, error) { + f, err := sync.migrationStore.CreateFolder(ctx, &folder.CreateFolderCommand{ OrgID: orgID, Title: title, SignedInUser: getMigrationUser(orgID).(*user.SignedInUser), @@ -399,8 +403,8 @@ func (om *OrgMigration) createFolder(ctx context.Context, orgID int64, title str // but the only folders we should be creating here are ones with permission // hash suffix or general alerting. Neither of which is likely to spuriously // conflict with an existing folder. - om.log.Warn("Folder already exists, using existing folder", "title", title) - f, err := om.migrationStore.GetFolder(ctx, &folder.GetFolderQuery{OrgID: orgID, Title: &title, SignedInUser: getMigrationUser(orgID)}) + sync.log.Warn("Folder already exists, using existing folder", "title", title) + f, err := sync.migrationStore.GetFolder(ctx, &folder.GetFolderQuery{OrgID: orgID, Title: &title, SignedInUser: getMigrationUser(orgID)}) if err != nil { return nil, err } @@ -410,13 +414,13 @@ func (om *OrgMigration) createFolder(ctx context.Context, orgID int64, title str } if len(newPerms) > 0 { - _, err = om.migrationStore.SetFolderPermissions(ctx, orgID, f.UID, newPerms...) + _, err = sync.migrationStore.SetFolderPermissions(ctx, orgID, f.UID, newPerms...) if err != nil { return nil, fmt.Errorf("set permissions: %w", err) } } - om.state.CreatedFolders = append(om.state.CreatedFolders, f.UID) + f.CreatedBy = newFolder // We don't persist this, it's just to let callers know this is a newly created folder. return f, nil } diff --git a/pkg/services/ngalert/migration/persist.go b/pkg/services/ngalert/migration/persist.go new file mode 100644 index 00000000000..297b2136737 --- /dev/null +++ b/pkg/services/ngalert/migration/persist.go @@ -0,0 +1,318 @@ +package migration + +import ( + "context" + "encoding/json" + "fmt" + "sort" + + alertingNotify "github.com/grafana/alerting/notify" + + "github.com/grafana/grafana/pkg/infra/log" + "github.com/grafana/grafana/pkg/services/accesscontrol" + legacymodels "github.com/grafana/grafana/pkg/services/alerting/models" + "github.com/grafana/grafana/pkg/services/folder" + apiModels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" + migmodels "github.com/grafana/grafana/pkg/services/ngalert/migration/models" + migrationStore "github.com/grafana/grafana/pkg/services/ngalert/migration/store" + "github.com/grafana/grafana/pkg/services/ngalert/models" + "github.com/grafana/grafana/pkg/services/ngalert/store" +) + +// sync is a helper struct for persisting migration changes to the database. +type sync struct { + log log.Logger + orgID int64 + + migrationStore migrationStore.Store + getDecryptedValue func(ctx context.Context, sjd map[string][]byte, key, fallback string) string + channelCache *ChannelCache + + // Caches used during custom folder creation. + permissionsMap map[int64]map[permissionHash]*folder.Folder // Parent Folder ID -> unique dashboard permission -> custom folder. + folderCache map[int64]*folder.Folder // Folder ID -> Folder. + folderPermissionCache map[string][]accesscontrol.ResourcePermission // Folder UID -> Folder Permissions. + generalAlertingFolder *folder.Folder +} + +// newSync creates a new migrationService for the given orgID. +func (ms *migrationService) newSync(orgID int64) *sync { + return &sync{ + orgID: orgID, + log: ms.log.New("orgID", orgID), + + migrationStore: ms.migrationStore, + getDecryptedValue: func(ctx context.Context, sjd map[string][]byte, key, fallback string) string { + return ms.encryptionService.GetDecryptedValue(ctx, sjd, key, fallback) + }, + channelCache: ms.newChannelCache(orgID), + + permissionsMap: make(map[int64]map[permissionHash]*folder.Folder), + folderCache: make(map[int64]*folder.Folder), + folderPermissionCache: make(map[string][]accesscontrol.ResourcePermission), + } +} + +// syncAndSaveState persists the given dashboardUpgrades and contactPairs to the database. +func (sync *sync) syncAndSaveState( + ctx context.Context, + dashboardUpgrades []*migmodels.DashboardUpgrade, + contactPairs []*migmodels.ContactPair, +) error { + delta := createDelta(dashboardUpgrades, contactPairs) + state, err := sync.syncDelta(ctx, delta) + if err != nil { + return fmt.Errorf("sync state: %w", err) + } + + err = sync.migrationStore.SetOrgMigrationState(ctx, sync.orgID, state) + if err != nil { + return fmt.Errorf("save state: %w", err) + } + + return nil +} + +// StateDelta contains the changes to be made to the database based on the difference between +// existing migration state and new migration state. +type StateDelta struct { + DashboardsToAdd []*migmodels.DashboardUpgrade + ChannelsToAdd []*migmodels.ContactPair +} + +// createDelta creates a StateDelta from the new dashboards upgrades and contact pairs. +func createDelta( + dashboardUpgrades []*migmodels.DashboardUpgrade, + contactPairs []*migmodels.ContactPair, +) StateDelta { + return StateDelta{ + DashboardsToAdd: dashboardUpgrades, + ChannelsToAdd: contactPairs, + } +} + +// syncDelta persists the given delta to the state and database. +func (sync *sync) syncDelta(ctx context.Context, delta StateDelta) (*migrationStore.OrgMigrationState, error) { + state := &migrationStore.OrgMigrationState{ + OrgID: sync.orgID, + CreatedFolders: make([]string, 0), + } + + amConfig, err := sync.handleAlertmanager(ctx, delta) + if err != nil { + return nil, err + } + + err = sync.handleAddRules(ctx, state, delta, amConfig) + if err != nil { + return nil, err + } + + return state, nil +} + +// handleAlertmanager persists the given channel delta to the state and database. +func (sync *sync) handleAlertmanager(ctx context.Context, delta StateDelta) (*migmodels.Alertmanager, error) { + amConfig := migmodels.NewAlertmanager() + + if len(delta.ChannelsToAdd) == 0 { + return amConfig, nil + } + + for _, pair := range delta.ChannelsToAdd { + amConfig.AddReceiver(pair.ContactPoint) + amConfig.AddRoute(pair.Route) + } + + // Validate the alertmanager configuration produced, this gives a chance to catch bad configuration at migration time. + // Validation between legacy and unified alerting can be different (e.g. due to bug fixes) so this would fail the migration in that case. + if err := sync.validateAlertmanagerConfig(amConfig.Config); err != nil { + return nil, fmt.Errorf("validate AlertmanagerConfig: %w", err) + } + + sync.log.Info("Writing alertmanager config", "receivers", len(amConfig.Config.AlertmanagerConfig.Receivers), "routes", len(amConfig.Config.AlertmanagerConfig.Route.Routes)) + if err := sync.migrationStore.SaveAlertmanagerConfiguration(ctx, sync.orgID, amConfig.Config); err != nil { + return nil, fmt.Errorf("write AlertmanagerConfig: %w", err) + } + + return amConfig, nil +} + +// handleAddRules persists the given add rule delta to the state and database. +func (sync *sync) handleAddRules(ctx context.Context, state *migrationStore.OrgMigrationState, delta StateDelta, amConfig *migmodels.Alertmanager) error { + pairs := make([]*migmodels.AlertPair, 0) + createdFolderUIDs := make(map[string]struct{}) + for _, duToAdd := range delta.DashboardsToAdd { + pairsWithRules := make([]*migmodels.AlertPair, 0, len(duToAdd.MigratedAlerts)) + for _, pair := range duToAdd.MigratedAlerts { + if pair.Rule != nil { + pairsWithRules = append(pairsWithRules, pair) + } + } + + if len(pairsWithRules) > 0 { + l := sync.log.New("dashboardTitle", duToAdd.Title, "dashboardUid", duToAdd.UID) + migratedFolder, err := sync.migratedFolder(ctx, l, duToAdd.UID, duToAdd.FolderID) + if err != nil { + return err + } + + // Keep track of folders created by the migration. + if _, exists := createdFolderUIDs[migratedFolder.uid]; migratedFolder.created && !exists { + createdFolderUIDs[migratedFolder.uid] = struct{}{} + state.CreatedFolders = append(state.CreatedFolders, migratedFolder.uid) + } + + for _, pair := range pairsWithRules { + pair.Rule.NamespaceUID = migratedFolder.uid + pairs = append(pairs, pair) + } + } + } + + if len(pairs) > 0 { + sync.log.Debug("Inserting migrated alert rules", "count", len(pairs)) + + // We ensure consistency in title deduplication as well as insertions by sorting pairs first. + sort.SliceStable(pairs, func(i, j int) bool { + return pairs[i].LegacyRule.ID < pairs[j].LegacyRule.ID + }) + + err := sync.deduplicateTitles(ctx, pairs) + if err != nil { + return fmt.Errorf("deduplicate titles: %w", err) + } + rules, err := sync.attachContactPointLabels(ctx, pairs, amConfig) + if err != nil { + return fmt.Errorf("attach contact point labels: %w", err) + } + + err = sync.migrationStore.InsertAlertRules(ctx, rules...) + if err != nil { + return fmt.Errorf("insert alert rules: %w", err) + } + } + return nil +} + +// deduplicateTitles ensures that the alert rule titles are unique within the folder. +func (sync *sync) deduplicateTitles(ctx context.Context, pairs []*migmodels.AlertPair) error { + // First pass to find namespaces. + seen := make(map[string]struct{}) + namespaces := make([]string, 0) + for _, pair := range pairs { + if _, ok := seen[pair.Rule.NamespaceUID]; !ok { + namespaces = append(namespaces, pair.Rule.NamespaceUID) + seen[pair.Rule.NamespaceUID] = struct{}{} + } + } + + // Populate deduplicators from database. + titles, err := sync.migrationStore.GetAlertRuleTitles(ctx, sync.orgID, namespaces...) + if err != nil { + sync.log.Warn("Failed to get alert rule titles for title deduplication", "error", err) + } + + titleDedups := make(map[string]*migmodels.Deduplicator, len(namespaces)) + for _, ns := range namespaces { + titleDedups[ns] = migmodels.NewDeduplicator(sync.migrationStore.CaseInsensitive(), store.AlertDefinitionMaxTitleLength, titles[ns]...) + } + + for _, pair := range pairs { + l := sync.log.New("legacyRuleId", pair.LegacyRule.ID, "ruleUid", pair.Rule.UID) + + // Here we ensure that the alert rule title is unique within the folder. + titleDeduplicator := titleDedups[pair.Rule.NamespaceUID] + name, err := titleDeduplicator.Deduplicate(pair.Rule.Title) + if err != nil { + return err + } + if name != pair.Rule.Title { + l.Info("Alert rule title modified to be unique within folder", "old", pair.Rule.Title, "new", name) + pair.Rule.Title = name + } + } + + return nil +} + +// attachContactPointLabels attaches contact point labels to the given alert rules. +func (sync *sync) attachContactPointLabels(ctx context.Context, pairs []*migmodels.AlertPair, amConfig *migmodels.Alertmanager) ([]models.AlertRule, error) { + rules := make([]models.AlertRule, 0, len(pairs)) + for _, pair := range pairs { + alertChannels, err := sync.extractChannels(ctx, pair.LegacyRule) + if err != nil { + return nil, fmt.Errorf("extract channel IDs: %w", err) + } + + for _, c := range alertChannels { + pair.Rule.Labels[contactLabel(c.Name)] = "true" + } + + rules = append(rules, *pair.Rule) + } + return rules, nil +} + +// extractChannels extracts notification channels from the given legacy dashboard alert parsed settings. +func (sync *sync) extractChannels(ctx context.Context, alert *legacymodels.Alert) ([]*legacymodels.AlertNotification, error) { + l := sync.log.New("ruleId", alert.ID, "ruleName", alert.Name) + rawSettings, err := json.Marshal(alert.Settings) + if err != nil { + return nil, fmt.Errorf("get settings: %w", err) + } + var parsedSettings dashAlertSettings + err = json.Unmarshal(rawSettings, &parsedSettings) + if err != nil { + return nil, fmt.Errorf("parse settings: %w", err) + } + + // Extracting channels. + channels := make([]*legacymodels.AlertNotification, 0, len(parsedSettings.Notifications)) + for _, key := range parsedSettings.Notifications { + // Either id or uid can be defined in the dashboard alert notification settings. See alerting.NewRuleFromDBAlert. + if key.ID == 0 && key.UID == "" { + l.Warn("Alert notification has no ID or UID, skipping", "notificationKey", key) + continue + } + if c, err := sync.channelCache.Get(ctx, key); err != nil { + return nil, fmt.Errorf("get alert notification: %w", err) + } else if c != nil { + channels = append(channels, c) + continue + } + l.Warn("Failed to get alert notification, skipping", "notificationKey", key) + } + return channels, nil +} + +// validateAlertmanagerConfig validates the alertmanager configuration produced by the migration against the receivers. +func (sync *sync) validateAlertmanagerConfig(config *apiModels.PostableUserConfig) error { + for _, r := range config.AlertmanagerConfig.Receivers { + for _, gr := range r.GrafanaManagedReceivers { + data, err := gr.Settings.MarshalJSON() + if err != nil { + return err + } + var ( + cfg = &alertingNotify.GrafanaIntegrationConfig{ + UID: gr.UID, + Name: gr.Name, + Type: gr.Type, + DisableResolveMessage: gr.DisableResolveMessage, + Settings: data, + SecureSettings: gr.SecureSettings, + } + ) + + _, err = alertingNotify.BuildReceiverConfiguration(context.Background(), &alertingNotify.APIReceiver{ + GrafanaIntegrations: alertingNotify.GrafanaIntegrations{Integrations: []*alertingNotify.GrafanaIntegrationConfig{cfg}}, + }, sync.getDecryptedValue) + if err != nil { + return err + } + } + } + + return nil +} diff --git a/pkg/services/ngalert/migration/service.go b/pkg/services/ngalert/migration/service.go index 6cd36953f74..96f0de7ab80 100644 --- a/pkg/services/ngalert/migration/service.go +++ b/pkg/services/ngalert/migration/service.go @@ -2,12 +2,14 @@ package migration import ( "context" + "errors" "fmt" "time" "github.com/grafana/grafana/pkg/infra/db" "github.com/grafana/grafana/pkg/infra/log" "github.com/grafana/grafana/pkg/infra/serverlock" + migmodels "github.com/grafana/grafana/pkg/services/ngalert/migration/models" migrationStore "github.com/grafana/grafana/pkg/services/ngalert/migration/store" "github.com/grafana/grafana/pkg/services/secrets" "github.com/grafana/grafana/pkg/setting" @@ -171,13 +173,40 @@ func (ms *migrationService) migrateAllOrgs(ctx context.Context) error { continue } - if err := om.migrateOrg(ctx); err != nil { + dashboardUpgrades, contactPairs, err := om.migrateOrg(ctx) + if err != nil { return fmt.Errorf("migrate org %d: %w", o.ID, err) } - err = om.migrationStore.SetOrgMigrationState(ctx, o.ID, om.state) + // Check for errors, if any exist log and fail the migration. + errs := migmodels.ExtractErrors(dashboardUpgrades, contactPairs) + var migrationErr error + for _, e := range errs { + // Skip certain errors as historically they are not fatal to the migration. We can revisit these if necessary. + if errors.Is(e, ErrDiscontinued) { + // Discontinued notification type. + continue + } + if errors.Is(e, ErrOrphanedAlert) { + // Orphaned alerts. + continue + } + migrationErr = errors.Join(migrationErr, e) + } + if migrationErr != nil { + return fmt.Errorf("migrate org %d: %w", o.ID, migrationErr) + } + + err = ms.newSync(o.ID).syncAndSaveState(ctx, dashboardUpgrades, contactPairs) if err != nil { - return fmt.Errorf("set org migration state: %w", err) + return err + } + + if len(om.silences) > 0 { + om.log.Debug("Writing silences file", "silences", len(om.silences)) + if err := writeSilencesFile(ms.cfg.DataPath, o.ID, om.silences); err != nil { + return fmt.Errorf("write silence file for org %d: %w", o.ID, err) + } } err = ms.migrationStore.SetMigrated(ctx, o.ID, true) diff --git a/pkg/services/ngalert/migration/silences.go b/pkg/services/ngalert/migration/silences.go index b5431e854f8..6a2b001d2d0 100644 --- a/pkg/services/ngalert/migration/silences.go +++ b/pkg/services/ngalert/migration/silences.go @@ -92,16 +92,15 @@ func (om *OrgMigration) addNoDataSilence(rule *models.AlertRule) error { return nil } -func (om *OrgMigration) writeSilencesFile() error { +func writeSilencesFile(dataPath string, orgID int64, silences []*pb.MeshSilence) error { var buf bytes.Buffer - om.log.Debug("Writing silences file", "silences", len(om.silences)) - for _, e := range om.silences { + for _, e := range silences { if _, err := pbutil.WriteDelimited(&buf, e); err != nil { return err } } - f, err := openReplace(silencesFileNameForOrg(om.cfg.DataPath, om.orgID)) + f, err := openReplace(silencesFileNameForOrg(dataPath, orgID)) if err != nil { return err } diff --git a/pkg/services/ngalert/migration/store/database.go b/pkg/services/ngalert/migration/store/database.go index b82a96b3cba..1e2240e8da7 100644 --- a/pkg/services/ngalert/migration/store/database.go +++ b/pkg/services/ngalert/migration/store/database.go @@ -21,52 +21,65 @@ import ( "github.com/grafana/grafana/pkg/services/datasources" "github.com/grafana/grafana/pkg/services/folder" apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" - migmodels "github.com/grafana/grafana/pkg/services/ngalert/migration/models" "github.com/grafana/grafana/pkg/services/ngalert/models" "github.com/grafana/grafana/pkg/services/ngalert/notifier" "github.com/grafana/grafana/pkg/services/ngalert/store" "github.com/grafana/grafana/pkg/services/org" - "github.com/grafana/grafana/pkg/services/sqlstore/migrator" "github.com/grafana/grafana/pkg/services/user" "github.com/grafana/grafana/pkg/setting" ) // Store is the database abstraction for migration persistence. type Store interface { - InsertAlertRules(ctx context.Context, rules ...models.AlertRule) error - - SaveAlertmanagerConfiguration(ctx context.Context, orgID int64, amConfig *apimodels.PostableUserConfig) error + ReadStore + WriteStore +} +// ReadStore is the database abstraction for read-only migration persistence. +type ReadStore interface { GetAllOrgs(ctx context.Context) ([]*org.OrgDTO, error) GetDatasource(ctx context.Context, datasourceID int64, user identity.Requester) (*datasources.DataSource, error) GetNotificationChannels(ctx context.Context, orgID int64) ([]*legacymodels.AlertNotification, error) + GetNotificationChannel(ctx context.Context, q GetNotificationChannelQuery) (*legacymodels.AlertNotification, error) GetOrgDashboardAlerts(ctx context.Context, orgID int64) (map[int64][]*legacymodels.Alert, int, error) GetDashboardPermissions(ctx context.Context, user identity.Requester, resourceID string) ([]accesscontrol.ResourcePermission, error) GetFolderPermissions(ctx context.Context, user identity.Requester, resourceID string) ([]accesscontrol.ResourcePermission, error) - SetDashboardPermissions(ctx context.Context, orgID int64, resourceID string, commands ...accesscontrol.SetResourcePermissionCommand) ([]accesscontrol.ResourcePermission, error) - SetFolderPermissions(ctx context.Context, orgID int64, resourceID string, commands ...accesscontrol.SetResourcePermissionCommand) ([]accesscontrol.ResourcePermission, error) MapActions(permission accesscontrol.ResourcePermission) string GetDashboard(ctx context.Context, orgID int64, id int64) (*dashboards.Dashboard, error) GetFolder(ctx context.Context, cmd *folder.GetFolderQuery) (*folder.Folder, error) - CreateFolder(ctx context.Context, cmd *folder.CreateFolderCommand) (*folder.Folder, error) IsMigrated(ctx context.Context, orgID int64) (bool, error) - SetMigrated(ctx context.Context, orgID int64, migrated bool) error GetCurrentAlertingType(ctx context.Context) (AlertingType, error) - SetCurrentAlertingType(ctx context.Context, t AlertingType) error - GetOrgMigrationState(ctx context.Context, orgID int64) (*migmodels.OrgMigrationState, error) - SetOrgMigrationState(ctx context.Context, orgID int64, summary *migmodels.OrgMigrationState) error + GetOrgMigrationState(ctx context.Context, orgID int64) (*OrgMigrationState, error) - RevertAllOrgs(ctx context.Context) error + GetAlertRuleTitles(ctx context.Context, orgID int64, namespaceUIDs ...string) (map[string][]string, error) // NamespaceUID -> Titles CaseInsensitive() bool } +// WriteStore is the database abstraction for write migration persistence. +type WriteStore interface { + InsertAlertRules(ctx context.Context, rules ...models.AlertRule) error + + SaveAlertmanagerConfiguration(ctx context.Context, orgID int64, amConfig *apimodels.PostableUserConfig) error + + SetDashboardPermissions(ctx context.Context, orgID int64, resourceID string, commands ...accesscontrol.SetResourcePermissionCommand) ([]accesscontrol.ResourcePermission, error) + SetFolderPermissions(ctx context.Context, orgID int64, resourceID string, commands ...accesscontrol.SetResourcePermissionCommand) ([]accesscontrol.ResourcePermission, error) + + CreateFolder(ctx context.Context, cmd *folder.CreateFolderCommand) (*folder.Folder, error) + + SetMigrated(ctx context.Context, orgID int64, migrated bool) error + SetCurrentAlertingType(ctx context.Context, t AlertingType) error + SetOrgMigrationState(ctx context.Context, orgID int64, summary *OrgMigrationState) error + + RevertAllOrgs(ctx context.Context) error +} + type migrationStore struct { store db.DB cfg *setting.Cfg @@ -197,8 +210,8 @@ func (ms *migrationStore) SetCurrentAlertingType(ctx context.Context, t Alerting return kv.Set(ctx, typeKey, string(t)) } -// GetOrgMigrationState returns a summary of a previous migration. -func (ms *migrationStore) GetOrgMigrationState(ctx context.Context, orgID int64) (*migmodels.OrgMigrationState, error) { +// GetOrgMigrationState returns the state of the previous migration. +func (ms *migrationStore) GetOrgMigrationState(ctx context.Context, orgID int64) (*OrgMigrationState, error) { kv := kvstore.WithNamespace(ms.kv, orgID, KVNamespace) content, exists, err := kv.Get(ctx, stateKey) if err != nil { @@ -206,22 +219,23 @@ func (ms *migrationStore) GetOrgMigrationState(ctx context.Context, orgID int64) } if !exists { - return &migmodels.OrgMigrationState{OrgID: orgID}, nil + return &OrgMigrationState{OrgID: orgID}, nil } - var summary migmodels.OrgMigrationState - err = json.Unmarshal([]byte(content), &summary) + var state OrgMigrationState + err = json.Unmarshal([]byte(content), &state) if err != nil { return nil, err } - return &summary, nil + return &state, nil } // SetOrgMigrationState sets the summary of a previous migration. -func (ms *migrationStore) SetOrgMigrationState(ctx context.Context, orgID int64, summary *migmodels.OrgMigrationState) error { +func (ms *migrationStore) SetOrgMigrationState(ctx context.Context, orgID int64, state *OrgMigrationState) error { kv := kvstore.WithNamespace(ms.kv, orgID, KVNamespace) - raw, err := json.Marshal(summary) + + raw, err := json.Marshal(state) if err != nil { return err } @@ -229,27 +243,55 @@ func (ms *migrationStore) SetOrgMigrationState(ctx context.Context, orgID int64, return kv.Set(ctx, stateKey, string(raw)) } -func (ms *migrationStore) InsertAlertRules(ctx context.Context, rules ...models.AlertRule) error { - if ms.store.GetDialect().DriverName() == migrator.Postgres { - // Postgresql which will automatically rollback the whole transaction on constraint violation. - // So, for postgresql, insertions will execute in a subtransaction. - err := ms.store.InTransaction(ctx, func(subCtx context.Context) error { - _, err := ms.alertingStore.InsertAlertRules(subCtx, rules) - if err != nil { - return err - } - return nil - }) +// GetAlertRuleTitles returns a map of namespaceUID -> title for all alert rules in the given org and namespace uids. +func (ms *migrationStore) GetAlertRuleTitles(ctx context.Context, orgID int64, namespaceUIDs ...string) (map[string][]string, error) { + res := make(map[string][]string) + err := ms.store.WithTransactionalDbSession(ctx, func(sess *db.Session) error { + type title struct { + NamespaceUID string `xorm:"namespace_uid"` + Title string + } + titles := make([]title, 0) + s := sess.Table("alert_rule").Cols("namespace_uid", "title").Where("org_id = ?", orgID) + if len(namespaceUIDs) > 0 { + s = s.In("namespace_uid", namespaceUIDs) + } + err := s.Find(&titles) if err != nil { return err } - } else { - _, err := ms.alertingStore.InsertAlertRules(ctx, rules) + + for _, t := range titles { + if _, ok := res[t.NamespaceUID]; !ok { + res[t.NamespaceUID] = make([]string, 0) + } + res[t.NamespaceUID] = append(res[t.NamespaceUID], t.Title) + } + return nil + }) + return res, err +} + +// BATCHSIZE is a reasonable SQL batch size to prevent hitting placeholder limits (such as Error 1390 in MySQL) or packet size limits. +const BATCHSIZE = 1000 + +// batchBy batches a given slice in a way as to minimize allocations, see https://github.com/golang/go/wiki/SliceTricks#batching-with-minimal-allocation. +func batchBy[T any](items []T, batchSize int) (batches [][]T) { + for batchSize < len(items) { + items, batches = items[batchSize:], append(batches, items[0:batchSize:batchSize]) + } + return append(batches, items) +} + +// InsertAlertRules inserts alert rules. +func (ms *migrationStore) InsertAlertRules(ctx context.Context, rules ...models.AlertRule) error { + batches := batchBy(rules, BATCHSIZE) + for _, batch := range batches { + _, err := ms.alertingStore.InsertAlertRules(ctx, batch) if err != nil { return err } } - return nil } @@ -295,7 +337,11 @@ func (ms *migrationStore) RevertAllOrgs(ctx context.Context) error { return fmt.Errorf("get orgs: %w", err) } for _, o := range orgs { - if err := ms.DeleteMigratedFolders(ctx, o.ID); err != nil { + state, err := ms.GetOrgMigrationState(ctx, o.ID) + if err != nil { + return err + } + if err := ms.DeleteFolders(ctx, o.ID, state.CreatedFolders...); err != nil { ms.log.Warn("Failed to delete migrated folders", "orgID", o.ID, "err", err) continue } @@ -336,16 +382,6 @@ func (ms *migrationStore) RevertAllOrgs(ctx context.Context) error { }) } -// DeleteMigratedFolders deletes all folders created by the previous migration run for the given org. This includes all folder permissions. -// If the folder is not empty of all descendants the operation will fail and return an error. -func (ms *migrationStore) DeleteMigratedFolders(ctx context.Context, orgID int64) error { - summary, err := ms.GetOrgMigrationState(ctx, orgID) - if err != nil { - return err - } - return ms.DeleteFolders(ctx, orgID, summary.CreatedFolders...) -} - var ErrFolderNotDeleted = fmt.Errorf("folder not deleted") // DeleteFolders deletes the folders from the given orgs with the given UIDs. This includes all folder permissions. @@ -431,11 +467,48 @@ func (ms *migrationStore) GetNotificationChannels(ctx context.Context, orgID int }) } +type GetNotificationChannelQuery struct { + OrgID int64 + ID int64 + UID string +} + +var ErrNotFound = errors.New("not found") + +// GetNotificationChannel returns a single channel for this org by id or uid. +func (ms *migrationStore) GetNotificationChannel(ctx context.Context, q GetNotificationChannelQuery) (*legacymodels.AlertNotification, error) { + if q.OrgID == 0 { + return nil, fmt.Errorf("org id must be set") + } + if q.ID == 0 && q.UID == "" { + return nil, fmt.Errorf("id or uid must be set") + } + var res legacymodels.AlertNotification + err := ms.store.WithDbSession(ctx, func(sess *db.Session) error { + var exists bool + var err error + s := sess.Table("alert_notification").Where("org_id = ?", q.OrgID) + if q.ID > 0 { + exists, err = s.Where("id = ?", q.ID).Get(&res) + } else if q.UID != "" { + exists, err = s.Where("uid = ?", q.UID).Get(&res) + } + if err != nil { + return err + } + if !exists { + return ErrNotFound + } + return nil + }) + return &res, err +} + // GetOrgDashboardAlerts loads all legacy dashboard alerts for the given org mapped by dashboard id. func (ms *migrationStore) GetOrgDashboardAlerts(ctx context.Context, orgID int64) (map[int64][]*legacymodels.Alert, int, error) { var dashAlerts []*legacymodels.Alert err := ms.store.WithDbSession(ctx, func(sess *db.Session) error { - return sess.SQL("select * from alert WHERE org_id = ? AND dashboard_id IN (SELECT id from dashboard)", orgID).Find(&dashAlerts) + return sess.SQL("select * from alert WHERE org_id = ?", orgID).Find(&dashAlerts) }) if err != nil { return nil, 0, err diff --git a/pkg/services/ngalert/migration/store/state.go b/pkg/services/ngalert/migration/store/state.go new file mode 100644 index 00000000000..e83a1e420b6 --- /dev/null +++ b/pkg/services/ngalert/migration/store/state.go @@ -0,0 +1,7 @@ +package store + +// OrgMigrationState contains basic information about the state of an org migration. +type OrgMigrationState struct { + OrgID int64 `json:"orgId"` + CreatedFolders []string `json:"createdFolders"` +} diff --git a/pkg/services/ngalert/migration/ualert.go b/pkg/services/ngalert/migration/ualert.go index f44783828b3..1b8b02bd2e9 100644 --- a/pkg/services/ngalert/migration/ualert.go +++ b/pkg/services/ngalert/migration/ualert.go @@ -2,109 +2,106 @@ package migration import ( "context" + "errors" "fmt" + "github.com/grafana/grafana/pkg/infra/log" legacymodels "github.com/grafana/grafana/pkg/services/alerting/models" + "github.com/grafana/grafana/pkg/services/dashboards" migmodels "github.com/grafana/grafana/pkg/services/ngalert/migration/models" - "github.com/grafana/grafana/pkg/services/ngalert/models" ) -func (om *OrgMigration) migrateAlerts(ctx context.Context, alerts []*legacymodels.Alert, info migmodels.DashboardUpgradeInfo) ([]models.AlertRule, error) { - log := om.log.New( - "dashboardUid", info.DashboardUID, - "dashboardName", info.DashboardName, - "newFolderUid", info.NewFolderUID, - "newFolderNane", info.NewFolderName, - ) +// ErrOrphanedAlert is used for legacy alerts that are missing their dashboard. +var ErrOrphanedAlert = errors.New("orphaned") - rules := make([]models.AlertRule, 0, len(alerts)) +func (om *OrgMigration) migrateAlerts(ctx context.Context, l log.Logger, alerts []*legacymodels.Alert, dashboard *dashboards.Dashboard) []*migmodels.AlertPair { + pairs := make([]*migmodels.AlertPair, 0, len(alerts)) for _, da := range alerts { - al := log.New("ruleID", da.ID, "ruleName", da.Name) - alertRule, err := om.migrateAlert(ctx, al, da, info) + al := l.New("ruleId", da.ID, "ruleName", da.Name) + + alertRule, err := om.migrateAlert(ctx, al, da, dashboard) if err != nil { - return nil, fmt.Errorf("migrate alert '%s': %w", da.Name, err) + al.Warn("Failed to migrate alert", "error", err) + pairs = append(pairs, migmodels.NewAlertPair(da, err)) + continue } - rules = append(rules, *alertRule) + + pair := migmodels.NewAlertPair(da, nil) + pair.Rule = alertRule + pairs = append(pairs, pair) } - return rules, nil + return pairs } -func (om *OrgMigration) migrateDashboard(ctx context.Context, dashID int64, alerts []*legacymodels.Alert) ([]models.AlertRule, error) { - info, err := om.migratedFolder(ctx, om.log, dashID) +func (om *OrgMigration) migrateDashboard(ctx context.Context, dashID int64, alerts []*legacymodels.Alert) *migmodels.DashboardUpgrade { + dashboard, err := om.migrationStore.GetDashboard(ctx, om.orgID, dashID) if err != nil { - return nil, fmt.Errorf("get or create migrated folder: %w", err) - } - rules, err := om.migrateAlerts(ctx, alerts, *info) - if err != nil { - return nil, fmt.Errorf("migrate and save alerts: %w", err) + if errors.Is(err, dashboards.ErrDashboardNotFound) { + err = fmt.Errorf("%w: missing dashboard", ErrOrphanedAlert) + } + du := migmodels.NewDashboardUpgrade(dashID) + du.AddAlertErrors(err, alerts...) + return du } + l := om.log.New( + "dashboardTitle", dashboard.Title, + "dashboardUid", dashboard.UID, + ) + l.Info("Migrating alerts for dashboard", "alertCount", len(alerts)) - return rules, nil + du := migmodels.NewDashboardUpgrade(dashID) + du.UID = dashboard.UID + du.Title = dashboard.Title + // nolint:staticcheck + du.FolderID = dashboard.FolderID + + pairs := om.migrateAlerts(ctx, l, alerts, dashboard) + for _, pair := range pairs { + du.MigratedAlerts[pair.LegacyRule.PanelID] = pair + } + return du } -func (om *OrgMigration) migrateOrgAlerts(ctx context.Context) error { +func (om *OrgMigration) migrateOrgAlerts(ctx context.Context) ([]*migmodels.DashboardUpgrade, error) { mappedAlerts, cnt, err := om.migrationStore.GetOrgDashboardAlerts(ctx, om.orgID) if err != nil { - return fmt.Errorf("load alerts: %w", err) + return nil, fmt.Errorf("load alerts: %w", err) } om.log.Info("Alerts found to migrate", "alerts", cnt) + dashboardUpgrades := make([]*migmodels.DashboardUpgrade, 0, len(mappedAlerts)) for dashID, alerts := range mappedAlerts { - rules, err := om.migrateDashboard(ctx, dashID, alerts) - if err != nil { - return fmt.Errorf("migrate and save dashboard '%d': %w", dashID, err) - } - - if len(rules) > 0 { - om.log.Debug("Inserting migrated alert rules", "count", len(rules)) - err := om.migrationStore.InsertAlertRules(ctx, rules...) - if err != nil { - return fmt.Errorf("insert alert rules: %w", err) - } - } + du := om.migrateDashboard(ctx, dashID, alerts) + dashboardUpgrades = append(dashboardUpgrades, du) } - return nil + return dashboardUpgrades, nil } -func (om *OrgMigration) migrateOrgChannels(ctx context.Context) (*migmodels.Alertmanager, error) { +func (om *OrgMigration) migrateOrgChannels(ctx context.Context) ([]*migmodels.ContactPair, error) { channels, err := om.migrationStore.GetNotificationChannels(ctx, om.orgID) if err != nil { return nil, fmt.Errorf("load notification channels: %w", err) } - // Cache for later use by alerts - om.channelCache.LoadChannels(channels) - - amConfig, err := om.migrateChannels(channels) + pairs, err := om.migrateChannels(channels) if err != nil { return nil, err } - return amConfig, nil + return pairs, nil } -func (om *OrgMigration) migrateOrg(ctx context.Context) error { +func (om *OrgMigration) migrateOrg(ctx context.Context) ([]*migmodels.DashboardUpgrade, []*migmodels.ContactPair, error) { om.log.Info("Migrating alerts for organisation") - - amConfig, err := om.migrateOrgChannels(ctx) + pairs, err := om.migrateOrgChannels(ctx) if err != nil { - return fmt.Errorf("migrate channels: %w", err) + return nil, nil, fmt.Errorf("migrate channels: %w", err) } - err = om.migrateOrgAlerts(ctx) + dashboardUpgrades, err := om.migrateOrgAlerts(ctx) if err != nil { - return fmt.Errorf("migrate alerts: %w", err) + return nil, nil, fmt.Errorf("migrate alerts: %w", err) } - if err := om.writeSilencesFile(); err != nil { - return fmt.Errorf("write silence file for org %d: %w", om.orgID, err) - } - - if amConfig != nil { - if err := om.migrationStore.SaveAlertmanagerConfiguration(ctx, om.orgID, amConfig.Config); err != nil { - return err - } - } - - return nil + return dashboardUpgrades, pairs, nil } diff --git a/pkg/services/ngalert/migration/ualert_test.go b/pkg/services/ngalert/migration/ualert_test.go index 0cea95ad99e..038f3eb6c73 100644 --- a/pkg/services/ngalert/migration/ualert_test.go +++ b/pkg/services/ngalert/migration/ualert_test.go @@ -66,7 +66,7 @@ func Test_validateAlertmanagerConfig(t *testing.T) { config := configFromReceivers(t, tt.receivers) require.NoError(t, encryptSecureSettings(config, mg)) // make sure we encrypt the settings - err := mg.validateAlertmanagerConfig(config) + err := service.newSync(1).validateAlertmanagerConfig(config) if tt.err != nil { require.Error(t, err) require.EqualError(t, err, tt.err.Error())