mirror of
https://github.com/grafana/grafana.git
synced 2025-02-10 15:45:43 -06:00
Alerting: Dry-run legacy upgrade on startup (#82835)
Adds a feature flag (alertingUpgradeDryrunOnStart) that will dry-run the legacy alert upgrade on startup. It is enabled by default. When on legacy alerting, this feature flag will log the results of the legacy alerting upgrade on startup and draw attention to anything in the current legacy alerting configuration that will cause issues when the upgrade is eventually performed. It acts as a log warning for those where action is required before upgrading to Grafana v11 where legacy alerting will be removed.
This commit is contained in:
parent
bc8952b9f1
commit
dfaf6d1e2e
@ -58,6 +58,7 @@ Some features are enabled by default. You can disable these feature by setting t
|
||||
| `lokiQueryHints` | Enables query hints for Loki | Yes |
|
||||
| `alertingPreviewUpgrade` | Show Unified Alerting preview and upgrade page in legacy alerting | Yes |
|
||||
| `alertingQueryOptimization` | Optimizes eligible queries in order to reduce load on datasources | |
|
||||
| `alertingUpgradeDryrunOnStart` | When activated in legacy alerting mode, this initiates a dry-run of the Unified Alerting upgrade during each startup. It logs any issues detected without implementing any actual changes. | Yes |
|
||||
|
||||
## Preview feature toggles
|
||||
|
||||
|
@ -181,4 +181,5 @@ export interface FeatureToggles {
|
||||
newPDFRendering?: boolean;
|
||||
kubernetesAggregator?: boolean;
|
||||
groupByVariable?: boolean;
|
||||
alertingUpgradeDryrunOnStart?: boolean;
|
||||
}
|
||||
|
@ -1214,6 +1214,15 @@ var (
|
||||
HideFromDocs: true,
|
||||
HideFromAdminPage: true,
|
||||
},
|
||||
{
|
||||
Name: "alertingUpgradeDryrunOnStart",
|
||||
Description: "When activated in legacy alerting mode, this initiates a dry-run of the Unified Alerting upgrade during each startup. It logs any issues detected without implementing any actual changes.",
|
||||
FrontendOnly: false,
|
||||
Stage: FeatureStageGeneralAvailability,
|
||||
Owner: grafanaAlertingSquad,
|
||||
RequiresRestart: true,
|
||||
Expression: "true", // enabled by default
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -162,3 +162,4 @@ groupToNestedTableTransformation,preview,@grafana/dataviz-squad,false,false,true
|
||||
newPDFRendering,experimental,@grafana/sharing-squad,false,false,false
|
||||
kubernetesAggregator,experimental,@grafana/grafana-app-platform-squad,false,true,false
|
||||
groupByVariable,experimental,@grafana/dashboards-squad,false,false,false
|
||||
alertingUpgradeDryrunOnStart,GA,@grafana/alerting-squad,false,true,false
|
||||
|
|
@ -658,4 +658,8 @@ const (
|
||||
// FlagGroupByVariable
|
||||
// Enable groupBy variable support in scenes dashboards
|
||||
FlagGroupByVariable = "groupByVariable"
|
||||
|
||||
// FlagAlertingUpgradeDryrunOnStart
|
||||
// When activated in legacy alerting mode, this initiates a dry-run of the Unified Alerting upgrade during each startup. It logs any issues detected without implementing any actual changes.
|
||||
FlagAlertingUpgradeDryrunOnStart = "alertingUpgradeDryrunOnStart"
|
||||
)
|
||||
|
@ -2124,6 +2124,22 @@
|
||||
"codeowner": "@grafana/plugins-platform-backend",
|
||||
"requiresRestart": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"name": "alertingUpgradeDryrunOnStart",
|
||||
"resourceVersion": "1708098586429",
|
||||
"creationTimestamp": "2024-02-15T21:01:16Z",
|
||||
"annotations": {
|
||||
"grafana.app/updatedTimestamp": "2024-02-16 15:49:46.429030423 +0000 UTC"
|
||||
}
|
||||
},
|
||||
"spec": {
|
||||
"description": "When activated in legacy alerting mode, this initiates a dry-run of the Unified Alerting upgrade during each startup. It logs any issues detected without implementing any actual changes.",
|
||||
"stage": "GA",
|
||||
"codeowner": "@grafana/alerting-squad",
|
||||
"requiresRestart": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
@ -11,6 +11,7 @@ import (
|
||||
"github.com/prometheus/common/model"
|
||||
|
||||
"github.com/grafana/grafana/pkg/components/simplejson"
|
||||
"github.com/grafana/grafana/pkg/infra/log"
|
||||
legacymodels "github.com/grafana/grafana/pkg/services/alerting/models"
|
||||
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
|
||||
migmodels "github.com/grafana/grafana/pkg/services/ngalert/migration/models"
|
||||
@ -27,16 +28,17 @@ const (
|
||||
var ErrDiscontinued = errors.New("discontinued")
|
||||
|
||||
// migrateChannels creates Alertmanager configs with migrated receivers and routes.
|
||||
func (om *OrgMigration) migrateChannels(channels []*legacymodels.AlertNotification) ([]*migmodels.ContactPair, error) {
|
||||
func (om *OrgMigration) migrateChannels(channels []*legacymodels.AlertNotification, log log.Logger) ([]*migmodels.ContactPair, error) {
|
||||
// Create all newly migrated receivers from legacy notification channels.
|
||||
pairs := make([]*migmodels.ContactPair, 0, len(channels))
|
||||
for _, c := range channels {
|
||||
l := log.New("type", c.Type, "name", c.Name, "uid", c.UID)
|
||||
pair := &migmodels.ContactPair{
|
||||
Channel: c,
|
||||
}
|
||||
receiver, err := om.createReceiver(c)
|
||||
if err != nil {
|
||||
om.log.Warn("Failed to create receiver", "type", c.Type, "name", c.Name, "uid", c.UID, "error", err)
|
||||
l.Warn("Failed to create receiver", "error", err)
|
||||
pair.Error = err
|
||||
pairs = append(pairs, pair)
|
||||
continue
|
||||
@ -45,7 +47,7 @@ func (om *OrgMigration) migrateChannels(channels []*legacymodels.AlertNotificati
|
||||
|
||||
route, err := createRoute(c, receiver.Name)
|
||||
if err != nil {
|
||||
om.log.Warn("Failed to create route", "type", c.Type, "name", c.Name, "uid", c.UID, "error", err)
|
||||
l.Warn("Failed to create route", "error", err)
|
||||
pair.Error = err
|
||||
pairs = append(pairs, pair)
|
||||
continue
|
||||
|
@ -417,7 +417,7 @@ func TestSetupAlertmanagerConfig(t *testing.T) {
|
||||
|
||||
service := NewTestMigrationService(t, sqlStore, nil)
|
||||
m := service.newOrgMigration(1)
|
||||
pairs, err := m.migrateChannels(tt.channels)
|
||||
pairs, err := m.migrateChannels(tt.channels, m.log)
|
||||
if tt.expErr != nil {
|
||||
require.Error(t, err)
|
||||
require.EqualError(t, err, tt.expErr.Error())
|
||||
|
@ -408,7 +408,7 @@ func (sync *sync) createFolder(ctx context.Context, orgID int64, title string, n
|
||||
// but the only folders we should be creating here are ones with permission
|
||||
// hash suffix or general alerting. Neither of which is likely to spuriously
|
||||
// conflict with an existing folder.
|
||||
sync.log.Warn("Folder already exists, using existing folder", "title", title)
|
||||
sync.log.FromContext(ctx).Warn("Folder already exists, using existing folder", "title", title)
|
||||
f, err := sync.migrationStore.GetFolder(ctx, &folder.GetFolderQuery{OrgID: orgID, Title: &title, SignedInUser: getMigrationUser(orgID)})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -257,7 +257,7 @@ func (sync *sync) handleAlertmanager(ctx context.Context, state *migrationStore.
|
||||
return nil, fmt.Errorf("validate AlertmanagerConfig: %w", err)
|
||||
}
|
||||
|
||||
sync.log.Info("Writing alertmanager config", "receivers", len(config.AlertmanagerConfig.Receivers), "routes", len(config.AlertmanagerConfig.Route.Routes))
|
||||
sync.log.FromContext(ctx).Info("Writing alertmanager config", "receivers", len(config.AlertmanagerConfig.Receivers), "routes", len(config.AlertmanagerConfig.Route.Routes))
|
||||
if err := sync.migrationStore.SaveAlertmanagerConfiguration(ctx, sync.orgID, config); err != nil {
|
||||
return nil, fmt.Errorf("write AlertmanagerConfig: %w", err)
|
||||
}
|
||||
@ -350,7 +350,7 @@ func (sync *sync) handleDeleteRules(ctx context.Context, state *migrationStore.O
|
||||
if !errors.Is(err, migrationStore.ErrFolderNotDeleted) {
|
||||
return fmt.Errorf("delete folder '%s': %w", du.AlertFolderUID, err)
|
||||
}
|
||||
sync.log.Info("Failed to delete folder during cleanup", "error", err)
|
||||
sync.log.FromContext(ctx).Info("Failed to delete folder during cleanup", "error", err)
|
||||
} else {
|
||||
delete(createdbyMigration, du.AlertFolderUID)
|
||||
}
|
||||
@ -402,7 +402,7 @@ func (sync *sync) handleAddRules(ctx context.Context, state *migrationStore.OrgM
|
||||
}
|
||||
|
||||
if len(pairsWithRules) > 0 {
|
||||
l := sync.log.New("dashboardTitle", duToAdd.Title, "dashboardUid", duToAdd.UID)
|
||||
l := sync.log.FromContext(ctx).New("dashboardTitle", duToAdd.Title, "dashboardUid", duToAdd.UID)
|
||||
migratedFolder, err := sync.migratedFolder(ctx, l, duToAdd.UID, duToAdd.FolderID)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -426,7 +426,7 @@ func (sync *sync) handleAddRules(ctx context.Context, state *migrationStore.OrgM
|
||||
}
|
||||
|
||||
if len(pairs) > 0 {
|
||||
sync.log.Debug("Inserting migrated alert rules", "count", len(pairs))
|
||||
sync.log.FromContext(ctx).Debug("Inserting migrated alert rules", "count", len(pairs))
|
||||
|
||||
// We ensure consistency in title deduplication as well as insertions by sorting pairs first.
|
||||
sort.SliceStable(pairs, func(i, j int) bool {
|
||||
@ -465,7 +465,7 @@ func (sync *sync) deduplicateTitles(ctx context.Context, pairs []*migmodels.Aler
|
||||
// Populate deduplicators from database.
|
||||
titles, err := sync.migrationStore.GetAlertRuleTitles(ctx, sync.orgID, namespaces...)
|
||||
if err != nil {
|
||||
sync.log.Warn("Failed to get alert rule titles for title deduplication", "error", err)
|
||||
sync.log.FromContext(ctx).Warn("Failed to get alert rule titles for title deduplication", "error", err)
|
||||
}
|
||||
|
||||
titleDedups := make(map[string]*migmodels.Deduplicator, len(namespaces))
|
||||
@ -474,7 +474,7 @@ func (sync *sync) deduplicateTitles(ctx context.Context, pairs []*migmodels.Aler
|
||||
}
|
||||
|
||||
for _, pair := range pairs {
|
||||
l := sync.log.New("legacyRuleId", pair.LegacyRule.ID, "ruleUid", pair.Rule.UID)
|
||||
l := sync.log.FromContext(ctx).New("legacyRuleId", pair.LegacyRule.ID, "ruleUid", pair.Rule.UID)
|
||||
|
||||
// Here we ensure that the alert rule title is unique within the folder.
|
||||
titleDeduplicator := titleDedups[pair.Rule.NamespaceUID]
|
||||
@ -495,7 +495,7 @@ func (sync *sync) deduplicateTitles(ctx context.Context, pairs []*migmodels.Aler
|
||||
func (sync *sync) attachContactPointLabels(ctx context.Context, state *migrationStore.OrgMigrationState, pairs []*migmodels.AlertPair, amConfig *migmodels.Alertmanager) ([]models.AlertRule, error) {
|
||||
rules := make([]models.AlertRule, 0, len(pairs))
|
||||
for _, pair := range pairs {
|
||||
l := sync.log.New("legacyRuleId", pair.LegacyRule.ID, "ruleUid", pair.Rule.UID)
|
||||
l := sync.log.FromContext(ctx).New("legacyRuleId", pair.LegacyRule.ID, "ruleUid", pair.Rule.UID)
|
||||
alertChannels, err := sync.extractChannels(ctx, pair.LegacyRule)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("extract channel IDs: %w", err)
|
||||
@ -524,7 +524,7 @@ func (sync *sync) attachContactPointLabels(ctx context.Context, state *migration
|
||||
|
||||
// extractChannels extracts notification channels from the given legacy dashboard alert parsed settings.
|
||||
func (sync *sync) extractChannels(ctx context.Context, alert *legacymodels.Alert) ([]*legacymodels.AlertNotification, error) {
|
||||
l := sync.log.New("ruleId", alert.ID, "ruleName", alert.Name)
|
||||
l := sync.log.FromContext(ctx).New("ruleId", alert.ID, "ruleName", alert.Name)
|
||||
rawSettings, err := json.Marshal(alert.Settings)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("get settings: %w", err)
|
||||
|
@ -14,6 +14,7 @@ import (
|
||||
"github.com/grafana/grafana/pkg/infra/log"
|
||||
"github.com/grafana/grafana/pkg/infra/serverlock"
|
||||
legacymodels "github.com/grafana/grafana/pkg/services/alerting/models"
|
||||
"github.com/grafana/grafana/pkg/services/featuremgmt"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
|
||||
migmodels "github.com/grafana/grafana/pkg/services/ngalert/migration/models"
|
||||
migrationStore "github.com/grafana/grafana/pkg/services/ngalert/migration/store"
|
||||
@ -42,6 +43,7 @@ type UpgradeService interface {
|
||||
type migrationService struct {
|
||||
lock *serverlock.ServerLockService
|
||||
cfg *setting.Cfg
|
||||
features featuremgmt.FeatureToggles
|
||||
log log.Logger
|
||||
store db.DB
|
||||
migrationStore migrationStore.Store
|
||||
@ -53,6 +55,7 @@ type migrationService struct {
|
||||
func ProvideService(
|
||||
lock *serverlock.ServerLockService,
|
||||
cfg *setting.Cfg,
|
||||
features featuremgmt.FeatureToggles,
|
||||
store db.DB,
|
||||
migrationStore migrationStore.Store,
|
||||
encryptionService secrets.Service,
|
||||
@ -61,6 +64,7 @@ func ProvideService(
|
||||
lock: lock,
|
||||
log: log.New("ngalert.migration"),
|
||||
cfg: cfg,
|
||||
features: features,
|
||||
store: store,
|
||||
migrationStore: migrationStore,
|
||||
encryptionService: encryptionService,
|
||||
@ -122,6 +126,7 @@ func (ms *migrationService) MigrateChannel(ctx context.Context, orgID int64, cha
|
||||
return ms.tryAndSet(ctx, orgID, func(ctx context.Context) (*definitions.OrgMigrationSummary, error) {
|
||||
summary := definitions.OrgMigrationSummary{}
|
||||
om := ms.newOrgMigration(orgID)
|
||||
l := om.log.FromContext(ctx)
|
||||
oldState, err := om.migrationStore.GetOrgMigrationState(ctx, orgID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("get org migration state: %w", err)
|
||||
@ -135,7 +140,7 @@ func (ms *migrationService) MigrateChannel(ctx context.Context, orgID int64, cha
|
||||
var delta StateDelta
|
||||
if err != nil && errors.Is(err, migrationStore.ErrNotFound) {
|
||||
// Notification channel no longer exists, delete this record from the state as well as delete any contacts points and routes.
|
||||
om.log.Debug("Notification channel no longer exists", "channelId", channelID)
|
||||
l.Debug("Notification channel no longer exists", "channelId", channelID)
|
||||
summary.Removed = true
|
||||
pair, ok := oldState.MigratedChannels[channelID]
|
||||
if !ok {
|
||||
@ -145,7 +150,7 @@ func (ms *migrationService) MigrateChannel(ctx context.Context, orgID int64, cha
|
||||
ChannelsToDelete: []*migrationStore.ContactPair{pair},
|
||||
}
|
||||
} else {
|
||||
pairs, err := om.migrateChannels([]*legacymodels.AlertNotification{channel})
|
||||
pairs, err := om.migrateChannels([]*legacymodels.AlertNotification{channel}, l)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -215,7 +220,7 @@ func (ms *migrationService) MigrateAlert(ctx context.Context, orgID int64, dashb
|
||||
|
||||
if err != nil && errors.Is(err, migrationStore.ErrNotFound) {
|
||||
// Legacy alert no longer exists, delete this record from the state.
|
||||
om.log.Debug("Alert no longer exists", "dashboardId", dashboardID, "panelId", panelID)
|
||||
om.log.FromContext(ctx).Debug("Alert no longer exists", "dashboardId", dashboardID, "panelId", panelID)
|
||||
summary.Removed = true
|
||||
} else {
|
||||
newDu := om.migrateDashboard(ctx, dashboardID, []*legacymodels.Alert{alert})
|
||||
@ -290,7 +295,7 @@ func (ms *migrationService) MigrateAllDashboardAlerts(ctx context.Context, orgID
|
||||
func (ms *migrationService) MigrateOrg(ctx context.Context, orgID int64, skipExisting bool) (definitions.OrgMigrationSummary, error) {
|
||||
return ms.tryAndSet(ctx, orgID, func(ctx context.Context) (*definitions.OrgMigrationSummary, error) {
|
||||
summary := definitions.OrgMigrationSummary{}
|
||||
ms.log.Info("Starting legacy migration for org", "orgId", orgID, "skipExisting", skipExisting)
|
||||
ms.log.FromContext(ctx).Info("Starting legacy upgrade for org", "orgId", orgID, "skipExisting", skipExisting)
|
||||
om := ms.newOrgMigration(orgID)
|
||||
dashboardUpgrades, pairs, err := om.migrateOrg(ctx)
|
||||
if err != nil {
|
||||
@ -339,22 +344,26 @@ func (ms *migrationService) GetOrgMigrationState(ctx context.Context, orgID int6
|
||||
}, nil
|
||||
}
|
||||
|
||||
// ErrSuccessRollback is returned when a dry-run succeeded and the changes were rolled back.
|
||||
var ErrSuccessRollback = errors.New("dry-run succeeded, rolling back")
|
||||
|
||||
// Run starts the migration to transition between legacy alerting and unified alerting based on the current and desired
|
||||
// alerting type as determined by the kvstore and configuration, respectively.
|
||||
func (ms *migrationService) Run(ctx context.Context) error {
|
||||
var errMigration error
|
||||
errLock := ms.lock.LockExecuteAndRelease(ctx, actionName, time.Minute*10, func(ctx context.Context) {
|
||||
ms.log.Info("Starting")
|
||||
errMigration = ms.store.InTransaction(ctx, func(ctx context.Context) error {
|
||||
currentType, err := ms.migrationStore.GetCurrentAlertingType(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("getting migration status: %w", err)
|
||||
}
|
||||
return ms.applyTransition(ctx, newTransition(currentType, ms.cfg))
|
||||
})
|
||||
l := ms.log.FromContext(ctx)
|
||||
l.Info("Starting")
|
||||
currentType, err := ms.migrationStore.GetCurrentAlertingType(ctx)
|
||||
if err != nil {
|
||||
errMigration = fmt.Errorf("getting migration status: %w", err)
|
||||
return
|
||||
}
|
||||
|
||||
errMigration = ms.applyTransition(ctx, ms.newTransition(currentType))
|
||||
})
|
||||
if errLock != nil {
|
||||
ms.log.Warn("Server lock for alerting migration already exists")
|
||||
ms.log.FromContext(ctx).Warn("Server lock for alerting migration already exists")
|
||||
return nil
|
||||
}
|
||||
if errMigration != nil {
|
||||
@ -364,16 +373,20 @@ func (ms *migrationService) Run(ctx context.Context) error {
|
||||
}
|
||||
|
||||
// newTransition creates a transition based on the current alerting type and the current configuration.
|
||||
func newTransition(currentType migrationStore.AlertingType, cfg *setting.Cfg) transition {
|
||||
func (ms *migrationService) newTransition(currentType migrationStore.AlertingType) transition {
|
||||
desiredType := migrationStore.Legacy
|
||||
if cfg.UnifiedAlerting.IsEnabled() {
|
||||
if ms.cfg.UnifiedAlerting.IsEnabled() {
|
||||
desiredType = migrationStore.UnifiedAlerting
|
||||
}
|
||||
return transition{
|
||||
CurrentType: currentType,
|
||||
DesiredType: desiredType,
|
||||
CleanOnDowngrade: cfg.ForceMigration,
|
||||
CleanOnUpgrade: cfg.UnifiedAlerting.Upgrade.CleanUpgrade,
|
||||
CleanOnDowngrade: ms.cfg.ForceMigration,
|
||||
CleanOnUpgrade: ms.cfg.UnifiedAlerting.Upgrade.CleanUpgrade,
|
||||
// In 10.4.0+, even if legacy alerting is enabled and the user is not intending to update, we want to "test the waters".
|
||||
// This is intended to surface any potential issues that would exist if the upgrade would be run right now but without
|
||||
// risk of failing startup.
|
||||
DryrunUpgrade: ms.features.IsEnabledGlobally(featuremgmt.FlagAlertingUpgradeDryrunOnStart) && currentType == migrationStore.Legacy && desiredType == migrationStore.Legacy,
|
||||
}
|
||||
}
|
||||
|
||||
@ -383,16 +396,17 @@ type transition struct {
|
||||
DesiredType migrationStore.AlertingType
|
||||
CleanOnDowngrade bool
|
||||
CleanOnUpgrade bool
|
||||
DryrunUpgrade bool
|
||||
}
|
||||
|
||||
// isNoChange returns true if the migration is a no-op.
|
||||
func (t transition) isNoChange() bool {
|
||||
return t.CurrentType == t.DesiredType
|
||||
return t.CurrentType == t.DesiredType && !t.DryrunUpgrade
|
||||
}
|
||||
|
||||
// isUpgrading returns true if the migration is an upgrade from legacy alerting to unified alerting.
|
||||
func (t transition) isUpgrading() bool {
|
||||
return t.CurrentType == migrationStore.Legacy && t.DesiredType == migrationStore.UnifiedAlerting
|
||||
return (t.CurrentType == migrationStore.Legacy && t.DesiredType == migrationStore.UnifiedAlerting) || t.DryrunUpgrade
|
||||
}
|
||||
|
||||
// isDowngrading returns true if the migration is a downgrade from unified alerting to legacy alerting.
|
||||
@ -412,37 +426,60 @@ func (t transition) shouldClean() bool {
|
||||
// If the transition is an upgrade and CleanOnUpgrade is false, all orgs will be migrated.
|
||||
// If the transition is an upgrade and CleanOnUpgrade is true, all unified alerting data will be deleted and then all orgs will be migrated.
|
||||
func (ms *migrationService) applyTransition(ctx context.Context, t transition) error {
|
||||
l := ms.log.New(
|
||||
"CurrentType", t.CurrentType,
|
||||
"DesiredType", t.DesiredType,
|
||||
"CleanOnDowngrade", t.CleanOnDowngrade,
|
||||
"CleanOnUpgrade", t.CleanOnUpgrade,
|
||||
)
|
||||
if t.isNoChange() {
|
||||
l.Info("Migration already complete")
|
||||
if t.DryrunUpgrade {
|
||||
ctx = log.WithContextualAttributes(ctx, []any{"dryrun", "true"})
|
||||
}
|
||||
|
||||
err := ms.store.InTransaction(ctx, func(ctx context.Context) error {
|
||||
l := ms.log.FromContext(ctx)
|
||||
if t.isNoChange() {
|
||||
l.Debug("No change in alerting type")
|
||||
return nil
|
||||
}
|
||||
|
||||
if t.DryrunUpgrade {
|
||||
l.Info(fmt.Sprintf("Dry-running upgrade. To deactivate on-start dry-run, disable the feature flag '%s'", featuremgmt.FlagAlertingUpgradeDryrunOnStart), "cleanOnUpgrade", t.CleanOnUpgrade)
|
||||
} else {
|
||||
l.Info("Applying transition", "currentType", t.CurrentType, "desiredType", t.DesiredType, "cleanOnDowngrade", t.CleanOnDowngrade, "cleanOnUpgrade", t.CleanOnUpgrade)
|
||||
}
|
||||
|
||||
if t.shouldClean() {
|
||||
l.Info("Cleaning up unified alerting data")
|
||||
if err := ms.migrationStore.RevertAllOrgs(ctx); err != nil {
|
||||
return fmt.Errorf("cleaning up unified alerting data: %w", err)
|
||||
}
|
||||
l.Info("Unified alerting data deleted")
|
||||
}
|
||||
|
||||
if t.isUpgrading() {
|
||||
if err := ms.migrateAllOrgs(ctx); err != nil {
|
||||
return fmt.Errorf("executing migration: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := ms.migrationStore.SetCurrentAlertingType(ctx, t.DesiredType); err != nil {
|
||||
return fmt.Errorf("setting migration status: %w", err)
|
||||
}
|
||||
|
||||
if t.DryrunUpgrade {
|
||||
// Ensure we rollback the changes made during the dry-run.
|
||||
return ErrSuccessRollback
|
||||
}
|
||||
|
||||
l.Info("Completed alerting migration")
|
||||
return nil
|
||||
})
|
||||
if t.DryrunUpgrade {
|
||||
if errors.Is(err, ErrSuccessRollback) {
|
||||
ms.log.FromContext(ctx).Info("Dry-run upgrade succeeded. No changes were made. Current legacy alerting setup is ready to upgrade.")
|
||||
} else {
|
||||
ms.log.FromContext(ctx).Warn("Dry-run upgrade failed. No changes were made. Current legacy alerting setup will fail to upgrade, issues must be fixed before upgrading Grafana to v11 as legacy alerting will be removed. See https://grafana.com/docs/grafana/v10.4/alerting/set-up/migrating-alerts/ for more details.", "err", err)
|
||||
}
|
||||
// Dry should never error.
|
||||
return nil
|
||||
}
|
||||
|
||||
if t.shouldClean() {
|
||||
l.Info("Cleaning up unified alerting data")
|
||||
if err := ms.migrationStore.RevertAllOrgs(ctx); err != nil {
|
||||
return fmt.Errorf("cleaning up unified alerting data: %w", err)
|
||||
}
|
||||
l.Info("Unified alerting data deleted")
|
||||
}
|
||||
|
||||
if t.isUpgrading() {
|
||||
if err := ms.migrateAllOrgs(ctx); err != nil {
|
||||
return fmt.Errorf("executing migration: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := ms.migrationStore.SetCurrentAlertingType(ctx, t.DesiredType); err != nil {
|
||||
return fmt.Errorf("setting migration status: %w", err)
|
||||
}
|
||||
|
||||
l.Info("Completed legacy migration")
|
||||
return nil
|
||||
return err
|
||||
}
|
||||
|
||||
// migrateAllOrgs executes the migration for all orgs.
|
||||
@ -454,12 +491,13 @@ func (ms *migrationService) migrateAllOrgs(ctx context.Context) error {
|
||||
|
||||
for _, o := range orgs {
|
||||
om := ms.newOrgMigration(o.ID)
|
||||
l := om.log.FromContext(ctx)
|
||||
migrated, err := ms.migrationStore.IsMigrated(ctx, o.ID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("getting migration status for org %d: %w", o.ID, err)
|
||||
}
|
||||
if migrated {
|
||||
om.log.Info("Org already migrated, skipping")
|
||||
l.Info("Org already migrated, skipping")
|
||||
continue
|
||||
}
|
||||
|
||||
@ -492,7 +530,7 @@ func (ms *migrationService) migrateAllOrgs(ctx context.Context) error {
|
||||
return err
|
||||
}
|
||||
|
||||
err = ms.silences.createSilences(ctx, o.ID, om.log)
|
||||
err = ms.silences.createSilences(ctx, o.ID, l)
|
||||
if err != nil {
|
||||
return fmt.Errorf("create silences for org %d: %w", o.ID, err)
|
||||
}
|
||||
@ -509,7 +547,7 @@ func (ms *migrationService) migrateAllOrgs(ctx context.Context) error {
|
||||
// configurations, and silence files for a single organization.
|
||||
// In addition, it will delete all folders and permissions originally created by this migration.
|
||||
func (ms *migrationService) RevertOrg(ctx context.Context, orgID int64) error {
|
||||
ms.log.Info("Reverting legacy migration for org", "orgId", orgID)
|
||||
ms.log.FromContext(ctx).Info("Reverting legacy upgrade for org", "orgId", orgID)
|
||||
_, err := ms.try(ctx, func(ctx context.Context) (*definitions.OrgMigrationSummary, error) {
|
||||
return nil, ms.migrationStore.RevertOrg(ctx, orgID)
|
||||
})
|
||||
@ -519,7 +557,7 @@ func (ms *migrationService) RevertOrg(ctx context.Context, orgID int64) error {
|
||||
// RevertAllOrgs reverts the migration for all orgs, deleting all unified alerting resources such as alert rules, alertmanager configurations, and silence files.
|
||||
// In addition, it will delete all folders and permissions originally created by this migration.
|
||||
func (ms *migrationService) RevertAllOrgs(ctx context.Context) error {
|
||||
ms.log.Info("Reverting legacy migration for all orgs")
|
||||
ms.log.FromContext(ctx).Info("Reverting legacy upgrade for all orgs")
|
||||
_, err := ms.try(ctx, func(ctx context.Context) (*definitions.OrgMigrationSummary, error) {
|
||||
return nil, ms.migrationStore.RevertAllOrgs(ctx)
|
||||
})
|
||||
@ -605,7 +643,7 @@ func (ms *migrationService) fromDashboardUpgrades(ctx context.Context, orgID int
|
||||
} else {
|
||||
// We could potentially set an error here, but it's not really an error. It just means that the
|
||||
// user deleted the migrated rule after the migration. This could just as easily be intentional.
|
||||
ms.log.Debug("Could not find rule for migrated alert", "alertId", a.ID, "ruleUid", p.NewRuleUID)
|
||||
ms.log.FromContext(ctx).Debug("Could not find rule for migrated alert", "alertId", a.ID, "ruleUid", p.NewRuleUID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -443,6 +443,7 @@ var revertPermissions = []accesscontrol.Permission{
|
||||
func (ms *migrationStore) RevertOrg(ctx context.Context, orgID int64) error {
|
||||
return ms.store.InTransaction(ctx, func(ctx context.Context) error {
|
||||
return ms.store.WithDbSession(ctx, func(sess *db.Session) error {
|
||||
l := ms.log.FromContext(ctx)
|
||||
if _, err := sess.Exec("DELETE FROM alert_rule WHERE org_id = ?", orgID); err != nil {
|
||||
return err
|
||||
}
|
||||
@ -456,7 +457,7 @@ func (ms *migrationStore) RevertOrg(ctx context.Context, orgID int64) error {
|
||||
return err
|
||||
}
|
||||
if err := ms.DeleteFolders(ctx, orgID, state.CreatedFolders...); err != nil {
|
||||
ms.log.Warn("Failed to delete migrated folders", "orgId", orgID, "err", err)
|
||||
l.Warn("Failed to delete migrated folders", "orgId", orgID, "err", err)
|
||||
}
|
||||
|
||||
if _, err := sess.Exec("DELETE FROM alert_configuration WHERE org_id = ?", orgID); err != nil {
|
||||
@ -485,7 +486,7 @@ func (ms *migrationStore) RevertOrg(ctx context.Context, orgID int64) error {
|
||||
}
|
||||
for _, f := range files {
|
||||
if err := os.Remove(f); err != nil {
|
||||
ms.log.Error("Failed to remove silence file", "file", f, "err", err)
|
||||
l.Error("Failed to remove silence file", "file", f, "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
@ -500,6 +501,7 @@ func (ms *migrationStore) RevertOrg(ctx context.Context, orgID int64) error {
|
||||
func (ms *migrationStore) RevertAllOrgs(ctx context.Context) error {
|
||||
return ms.store.InTransaction(ctx, func(ctx context.Context) error {
|
||||
return ms.store.WithDbSession(ctx, func(sess *db.Session) error {
|
||||
l := ms.log.FromContext(ctx)
|
||||
if _, err := sess.Exec("DELETE FROM alert_rule"); err != nil {
|
||||
return err
|
||||
}
|
||||
@ -518,7 +520,7 @@ func (ms *migrationStore) RevertAllOrgs(ctx context.Context) error {
|
||||
return err
|
||||
}
|
||||
if err := ms.DeleteFolders(ctx, o.ID, state.CreatedFolders...); err != nil {
|
||||
ms.log.Warn("Failed to delete migrated folders", "orgId", o.ID, "err", err)
|
||||
l.Warn("Failed to delete migrated folders", "orgId", o.ID, "err", err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
@ -549,7 +551,7 @@ func (ms *migrationStore) RevertAllOrgs(ctx context.Context) error {
|
||||
}
|
||||
for _, f := range files {
|
||||
if err := os.Remove(f); err != nil {
|
||||
ms.log.Error("Failed to remove silence file", "file", f, "err", err)
|
||||
l.Error("Failed to remove silence file", "file", f, "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -8,6 +8,7 @@ import (
|
||||
|
||||
"github.com/grafana/grafana/pkg/infra/serverlock"
|
||||
"github.com/grafana/grafana/pkg/infra/tracing"
|
||||
"github.com/grafana/grafana/pkg/services/featuremgmt"
|
||||
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
|
||||
migrationStore "github.com/grafana/grafana/pkg/services/ngalert/migration/store"
|
||||
fake_secrets "github.com/grafana/grafana/pkg/services/secrets/fakes"
|
||||
@ -24,6 +25,7 @@ func NewTestMigrationService(t *testing.T, sqlStore *sqlstore.SQLStore, cfg *set
|
||||
svc, err := ProvideService(
|
||||
serverlock.ProvideService(sqlStore, tracing.InitializeTracerForTest()),
|
||||
cfg,
|
||||
featuremgmt.WithFeatures(),
|
||||
sqlStore,
|
||||
migrationStore.NewTestMigrationStore(t, sqlStore, cfg),
|
||||
fake_secrets.NewFakeSecretsService(),
|
||||
|
@ -45,11 +45,11 @@ func (om *OrgMigration) migrateDashboard(ctx context.Context, dashID int64, aler
|
||||
du.AddAlertErrors(err, alerts...)
|
||||
return du
|
||||
}
|
||||
l := om.log.New(
|
||||
l := om.log.FromContext(ctx).New(
|
||||
"dashboardTitle", dashboard.Title,
|
||||
"dashboardUid", dashboard.UID,
|
||||
)
|
||||
l.Info("Migrating alerts for dashboard", "alertCount", len(alerts))
|
||||
l.Debug("Migrating alerts for dashboard", "alertCount", len(alerts))
|
||||
|
||||
du := migmodels.NewDashboardUpgrade(dashID)
|
||||
du.UID = dashboard.UID
|
||||
@ -70,7 +70,7 @@ func (om *OrgMigration) migrateOrgAlerts(ctx context.Context) ([]*migmodels.Dash
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("load alerts: %w", err)
|
||||
}
|
||||
om.log.Info("Alerts found to migrate", "alerts", cnt)
|
||||
om.log.FromContext(ctx).Info("Alerts found to migrate", "alerts", cnt)
|
||||
|
||||
dashboardUpgrades := make([]*migmodels.DashboardUpgrade, 0, len(mappedAlerts))
|
||||
for dashID, alerts := range mappedAlerts {
|
||||
@ -86,7 +86,7 @@ func (om *OrgMigration) migrateOrgChannels(ctx context.Context) ([]*migmodels.Co
|
||||
return nil, fmt.Errorf("load notification channels: %w", err)
|
||||
}
|
||||
|
||||
pairs, err := om.migrateChannels(channels)
|
||||
pairs, err := om.migrateChannels(channels, om.log.FromContext(ctx))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -94,7 +94,7 @@ func (om *OrgMigration) migrateOrgChannels(ctx context.Context) ([]*migmodels.Co
|
||||
}
|
||||
|
||||
func (om *OrgMigration) migrateOrg(ctx context.Context) ([]*migmodels.DashboardUpgrade, []*migmodels.ContactPair, error) {
|
||||
om.log.Info("Migrating alerts for organisation")
|
||||
om.log.FromContext(ctx).Info("Migrating alerts for organisation")
|
||||
pairs, err := om.migrateOrgChannels(ctx)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("migrate channels: %w", err)
|
||||
|
Loading…
Reference in New Issue
Block a user