Alerting: Dry-run legacy upgrade on startup (#82835)

Adds a feature flag (alertingUpgradeDryrunOnStart) that will dry-run the legacy 
alert upgrade on startup. It is enabled by default.

When on legacy alerting, this feature flag will log the results of the legacy 
alerting upgrade on startup and draw attention to anything in the current legacy 
alerting configuration that will cause issues when the upgrade is eventually 
performed. It acts as a log warning for those where action is required before 
upgrading to Grafana v11 where legacy alerting will be removed.
This commit is contained in:
Matthew Jacobson 2024-02-16 11:29:54 -05:00 committed by GitHub
parent bc8952b9f1
commit dfaf6d1e2e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 150 additions and 74 deletions

View File

@ -58,6 +58,7 @@ Some features are enabled by default. You can disable these feature by setting t
| `lokiQueryHints` | Enables query hints for Loki | Yes |
| `alertingPreviewUpgrade` | Show Unified Alerting preview and upgrade page in legacy alerting | Yes |
| `alertingQueryOptimization` | Optimizes eligible queries in order to reduce load on datasources | |
| `alertingUpgradeDryrunOnStart` | When activated in legacy alerting mode, this initiates a dry-run of the Unified Alerting upgrade during each startup. It logs any issues detected without implementing any actual changes. | Yes |
## Preview feature toggles

View File

@ -181,4 +181,5 @@ export interface FeatureToggles {
newPDFRendering?: boolean;
kubernetesAggregator?: boolean;
groupByVariable?: boolean;
alertingUpgradeDryrunOnStart?: boolean;
}

View File

@ -1214,6 +1214,15 @@ var (
HideFromDocs: true,
HideFromAdminPage: true,
},
{
Name: "alertingUpgradeDryrunOnStart",
Description: "When activated in legacy alerting mode, this initiates a dry-run of the Unified Alerting upgrade during each startup. It logs any issues detected without implementing any actual changes.",
FrontendOnly: false,
Stage: FeatureStageGeneralAvailability,
Owner: grafanaAlertingSquad,
RequiresRestart: true,
Expression: "true", // enabled by default
},
}
)

View File

@ -162,3 +162,4 @@ groupToNestedTableTransformation,preview,@grafana/dataviz-squad,false,false,true
newPDFRendering,experimental,@grafana/sharing-squad,false,false,false
kubernetesAggregator,experimental,@grafana/grafana-app-platform-squad,false,true,false
groupByVariable,experimental,@grafana/dashboards-squad,false,false,false
alertingUpgradeDryrunOnStart,GA,@grafana/alerting-squad,false,true,false

1 Name Stage Owner requiresDevMode RequiresRestart FrontendOnly
162 newPDFRendering experimental @grafana/sharing-squad false false false
163 kubernetesAggregator experimental @grafana/grafana-app-platform-squad false true false
164 groupByVariable experimental @grafana/dashboards-squad false false false
165 alertingUpgradeDryrunOnStart GA @grafana/alerting-squad false true false

View File

@ -658,4 +658,8 @@ const (
// FlagGroupByVariable
// Enable groupBy variable support in scenes dashboards
FlagGroupByVariable = "groupByVariable"
// FlagAlertingUpgradeDryrunOnStart
// When activated in legacy alerting mode, this initiates a dry-run of the Unified Alerting upgrade during each startup. It logs any issues detected without implementing any actual changes.
FlagAlertingUpgradeDryrunOnStart = "alertingUpgradeDryrunOnStart"
)

View File

@ -2124,6 +2124,22 @@
"codeowner": "@grafana/plugins-platform-backend",
"requiresRestart": true
}
},
{
"metadata": {
"name": "alertingUpgradeDryrunOnStart",
"resourceVersion": "1708098586429",
"creationTimestamp": "2024-02-15T21:01:16Z",
"annotations": {
"grafana.app/updatedTimestamp": "2024-02-16 15:49:46.429030423 +0000 UTC"
}
},
"spec": {
"description": "When activated in legacy alerting mode, this initiates a dry-run of the Unified Alerting upgrade during each startup. It logs any issues detected without implementing any actual changes.",
"stage": "GA",
"codeowner": "@grafana/alerting-squad",
"requiresRestart": true
}
}
]
}

View File

@ -11,6 +11,7 @@ import (
"github.com/prometheus/common/model"
"github.com/grafana/grafana/pkg/components/simplejson"
"github.com/grafana/grafana/pkg/infra/log"
legacymodels "github.com/grafana/grafana/pkg/services/alerting/models"
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
migmodels "github.com/grafana/grafana/pkg/services/ngalert/migration/models"
@ -27,16 +28,17 @@ const (
var ErrDiscontinued = errors.New("discontinued")
// migrateChannels creates Alertmanager configs with migrated receivers and routes.
func (om *OrgMigration) migrateChannels(channels []*legacymodels.AlertNotification) ([]*migmodels.ContactPair, error) {
func (om *OrgMigration) migrateChannels(channels []*legacymodels.AlertNotification, log log.Logger) ([]*migmodels.ContactPair, error) {
// Create all newly migrated receivers from legacy notification channels.
pairs := make([]*migmodels.ContactPair, 0, len(channels))
for _, c := range channels {
l := log.New("type", c.Type, "name", c.Name, "uid", c.UID)
pair := &migmodels.ContactPair{
Channel: c,
}
receiver, err := om.createReceiver(c)
if err != nil {
om.log.Warn("Failed to create receiver", "type", c.Type, "name", c.Name, "uid", c.UID, "error", err)
l.Warn("Failed to create receiver", "error", err)
pair.Error = err
pairs = append(pairs, pair)
continue
@ -45,7 +47,7 @@ func (om *OrgMigration) migrateChannels(channels []*legacymodels.AlertNotificati
route, err := createRoute(c, receiver.Name)
if err != nil {
om.log.Warn("Failed to create route", "type", c.Type, "name", c.Name, "uid", c.UID, "error", err)
l.Warn("Failed to create route", "error", err)
pair.Error = err
pairs = append(pairs, pair)
continue

View File

@ -417,7 +417,7 @@ func TestSetupAlertmanagerConfig(t *testing.T) {
service := NewTestMigrationService(t, sqlStore, nil)
m := service.newOrgMigration(1)
pairs, err := m.migrateChannels(tt.channels)
pairs, err := m.migrateChannels(tt.channels, m.log)
if tt.expErr != nil {
require.Error(t, err)
require.EqualError(t, err, tt.expErr.Error())

View File

@ -408,7 +408,7 @@ func (sync *sync) createFolder(ctx context.Context, orgID int64, title string, n
// but the only folders we should be creating here are ones with permission
// hash suffix or general alerting. Neither of which is likely to spuriously
// conflict with an existing folder.
sync.log.Warn("Folder already exists, using existing folder", "title", title)
sync.log.FromContext(ctx).Warn("Folder already exists, using existing folder", "title", title)
f, err := sync.migrationStore.GetFolder(ctx, &folder.GetFolderQuery{OrgID: orgID, Title: &title, SignedInUser: getMigrationUser(orgID)})
if err != nil {
return nil, err

View File

@ -257,7 +257,7 @@ func (sync *sync) handleAlertmanager(ctx context.Context, state *migrationStore.
return nil, fmt.Errorf("validate AlertmanagerConfig: %w", err)
}
sync.log.Info("Writing alertmanager config", "receivers", len(config.AlertmanagerConfig.Receivers), "routes", len(config.AlertmanagerConfig.Route.Routes))
sync.log.FromContext(ctx).Info("Writing alertmanager config", "receivers", len(config.AlertmanagerConfig.Receivers), "routes", len(config.AlertmanagerConfig.Route.Routes))
if err := sync.migrationStore.SaveAlertmanagerConfiguration(ctx, sync.orgID, config); err != nil {
return nil, fmt.Errorf("write AlertmanagerConfig: %w", err)
}
@ -350,7 +350,7 @@ func (sync *sync) handleDeleteRules(ctx context.Context, state *migrationStore.O
if !errors.Is(err, migrationStore.ErrFolderNotDeleted) {
return fmt.Errorf("delete folder '%s': %w", du.AlertFolderUID, err)
}
sync.log.Info("Failed to delete folder during cleanup", "error", err)
sync.log.FromContext(ctx).Info("Failed to delete folder during cleanup", "error", err)
} else {
delete(createdbyMigration, du.AlertFolderUID)
}
@ -402,7 +402,7 @@ func (sync *sync) handleAddRules(ctx context.Context, state *migrationStore.OrgM
}
if len(pairsWithRules) > 0 {
l := sync.log.New("dashboardTitle", duToAdd.Title, "dashboardUid", duToAdd.UID)
l := sync.log.FromContext(ctx).New("dashboardTitle", duToAdd.Title, "dashboardUid", duToAdd.UID)
migratedFolder, err := sync.migratedFolder(ctx, l, duToAdd.UID, duToAdd.FolderID)
if err != nil {
return err
@ -426,7 +426,7 @@ func (sync *sync) handleAddRules(ctx context.Context, state *migrationStore.OrgM
}
if len(pairs) > 0 {
sync.log.Debug("Inserting migrated alert rules", "count", len(pairs))
sync.log.FromContext(ctx).Debug("Inserting migrated alert rules", "count", len(pairs))
// We ensure consistency in title deduplication as well as insertions by sorting pairs first.
sort.SliceStable(pairs, func(i, j int) bool {
@ -465,7 +465,7 @@ func (sync *sync) deduplicateTitles(ctx context.Context, pairs []*migmodels.Aler
// Populate deduplicators from database.
titles, err := sync.migrationStore.GetAlertRuleTitles(ctx, sync.orgID, namespaces...)
if err != nil {
sync.log.Warn("Failed to get alert rule titles for title deduplication", "error", err)
sync.log.FromContext(ctx).Warn("Failed to get alert rule titles for title deduplication", "error", err)
}
titleDedups := make(map[string]*migmodels.Deduplicator, len(namespaces))
@ -474,7 +474,7 @@ func (sync *sync) deduplicateTitles(ctx context.Context, pairs []*migmodels.Aler
}
for _, pair := range pairs {
l := sync.log.New("legacyRuleId", pair.LegacyRule.ID, "ruleUid", pair.Rule.UID)
l := sync.log.FromContext(ctx).New("legacyRuleId", pair.LegacyRule.ID, "ruleUid", pair.Rule.UID)
// Here we ensure that the alert rule title is unique within the folder.
titleDeduplicator := titleDedups[pair.Rule.NamespaceUID]
@ -495,7 +495,7 @@ func (sync *sync) deduplicateTitles(ctx context.Context, pairs []*migmodels.Aler
func (sync *sync) attachContactPointLabels(ctx context.Context, state *migrationStore.OrgMigrationState, pairs []*migmodels.AlertPair, amConfig *migmodels.Alertmanager) ([]models.AlertRule, error) {
rules := make([]models.AlertRule, 0, len(pairs))
for _, pair := range pairs {
l := sync.log.New("legacyRuleId", pair.LegacyRule.ID, "ruleUid", pair.Rule.UID)
l := sync.log.FromContext(ctx).New("legacyRuleId", pair.LegacyRule.ID, "ruleUid", pair.Rule.UID)
alertChannels, err := sync.extractChannels(ctx, pair.LegacyRule)
if err != nil {
return nil, fmt.Errorf("extract channel IDs: %w", err)
@ -524,7 +524,7 @@ func (sync *sync) attachContactPointLabels(ctx context.Context, state *migration
// extractChannels extracts notification channels from the given legacy dashboard alert parsed settings.
func (sync *sync) extractChannels(ctx context.Context, alert *legacymodels.Alert) ([]*legacymodels.AlertNotification, error) {
l := sync.log.New("ruleId", alert.ID, "ruleName", alert.Name)
l := sync.log.FromContext(ctx).New("ruleId", alert.ID, "ruleName", alert.Name)
rawSettings, err := json.Marshal(alert.Settings)
if err != nil {
return nil, fmt.Errorf("get settings: %w", err)

View File

@ -14,6 +14,7 @@ import (
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/infra/serverlock"
legacymodels "github.com/grafana/grafana/pkg/services/alerting/models"
"github.com/grafana/grafana/pkg/services/featuremgmt"
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
migmodels "github.com/grafana/grafana/pkg/services/ngalert/migration/models"
migrationStore "github.com/grafana/grafana/pkg/services/ngalert/migration/store"
@ -42,6 +43,7 @@ type UpgradeService interface {
type migrationService struct {
lock *serverlock.ServerLockService
cfg *setting.Cfg
features featuremgmt.FeatureToggles
log log.Logger
store db.DB
migrationStore migrationStore.Store
@ -53,6 +55,7 @@ type migrationService struct {
func ProvideService(
lock *serverlock.ServerLockService,
cfg *setting.Cfg,
features featuremgmt.FeatureToggles,
store db.DB,
migrationStore migrationStore.Store,
encryptionService secrets.Service,
@ -61,6 +64,7 @@ func ProvideService(
lock: lock,
log: log.New("ngalert.migration"),
cfg: cfg,
features: features,
store: store,
migrationStore: migrationStore,
encryptionService: encryptionService,
@ -122,6 +126,7 @@ func (ms *migrationService) MigrateChannel(ctx context.Context, orgID int64, cha
return ms.tryAndSet(ctx, orgID, func(ctx context.Context) (*definitions.OrgMigrationSummary, error) {
summary := definitions.OrgMigrationSummary{}
om := ms.newOrgMigration(orgID)
l := om.log.FromContext(ctx)
oldState, err := om.migrationStore.GetOrgMigrationState(ctx, orgID)
if err != nil {
return nil, fmt.Errorf("get org migration state: %w", err)
@ -135,7 +140,7 @@ func (ms *migrationService) MigrateChannel(ctx context.Context, orgID int64, cha
var delta StateDelta
if err != nil && errors.Is(err, migrationStore.ErrNotFound) {
// Notification channel no longer exists, delete this record from the state as well as delete any contacts points and routes.
om.log.Debug("Notification channel no longer exists", "channelId", channelID)
l.Debug("Notification channel no longer exists", "channelId", channelID)
summary.Removed = true
pair, ok := oldState.MigratedChannels[channelID]
if !ok {
@ -145,7 +150,7 @@ func (ms *migrationService) MigrateChannel(ctx context.Context, orgID int64, cha
ChannelsToDelete: []*migrationStore.ContactPair{pair},
}
} else {
pairs, err := om.migrateChannels([]*legacymodels.AlertNotification{channel})
pairs, err := om.migrateChannels([]*legacymodels.AlertNotification{channel}, l)
if err != nil {
return nil, err
}
@ -215,7 +220,7 @@ func (ms *migrationService) MigrateAlert(ctx context.Context, orgID int64, dashb
if err != nil && errors.Is(err, migrationStore.ErrNotFound) {
// Legacy alert no longer exists, delete this record from the state.
om.log.Debug("Alert no longer exists", "dashboardId", dashboardID, "panelId", panelID)
om.log.FromContext(ctx).Debug("Alert no longer exists", "dashboardId", dashboardID, "panelId", panelID)
summary.Removed = true
} else {
newDu := om.migrateDashboard(ctx, dashboardID, []*legacymodels.Alert{alert})
@ -290,7 +295,7 @@ func (ms *migrationService) MigrateAllDashboardAlerts(ctx context.Context, orgID
func (ms *migrationService) MigrateOrg(ctx context.Context, orgID int64, skipExisting bool) (definitions.OrgMigrationSummary, error) {
return ms.tryAndSet(ctx, orgID, func(ctx context.Context) (*definitions.OrgMigrationSummary, error) {
summary := definitions.OrgMigrationSummary{}
ms.log.Info("Starting legacy migration for org", "orgId", orgID, "skipExisting", skipExisting)
ms.log.FromContext(ctx).Info("Starting legacy upgrade for org", "orgId", orgID, "skipExisting", skipExisting)
om := ms.newOrgMigration(orgID)
dashboardUpgrades, pairs, err := om.migrateOrg(ctx)
if err != nil {
@ -339,22 +344,26 @@ func (ms *migrationService) GetOrgMigrationState(ctx context.Context, orgID int6
}, nil
}
// ErrSuccessRollback is returned when a dry-run succeeded and the changes were rolled back.
var ErrSuccessRollback = errors.New("dry-run succeeded, rolling back")
// Run starts the migration to transition between legacy alerting and unified alerting based on the current and desired
// alerting type as determined by the kvstore and configuration, respectively.
func (ms *migrationService) Run(ctx context.Context) error {
var errMigration error
errLock := ms.lock.LockExecuteAndRelease(ctx, actionName, time.Minute*10, func(ctx context.Context) {
ms.log.Info("Starting")
errMigration = ms.store.InTransaction(ctx, func(ctx context.Context) error {
currentType, err := ms.migrationStore.GetCurrentAlertingType(ctx)
if err != nil {
return fmt.Errorf("getting migration status: %w", err)
}
return ms.applyTransition(ctx, newTransition(currentType, ms.cfg))
})
l := ms.log.FromContext(ctx)
l.Info("Starting")
currentType, err := ms.migrationStore.GetCurrentAlertingType(ctx)
if err != nil {
errMigration = fmt.Errorf("getting migration status: %w", err)
return
}
errMigration = ms.applyTransition(ctx, ms.newTransition(currentType))
})
if errLock != nil {
ms.log.Warn("Server lock for alerting migration already exists")
ms.log.FromContext(ctx).Warn("Server lock for alerting migration already exists")
return nil
}
if errMigration != nil {
@ -364,16 +373,20 @@ func (ms *migrationService) Run(ctx context.Context) error {
}
// newTransition creates a transition based on the current alerting type and the current configuration.
func newTransition(currentType migrationStore.AlertingType, cfg *setting.Cfg) transition {
func (ms *migrationService) newTransition(currentType migrationStore.AlertingType) transition {
desiredType := migrationStore.Legacy
if cfg.UnifiedAlerting.IsEnabled() {
if ms.cfg.UnifiedAlerting.IsEnabled() {
desiredType = migrationStore.UnifiedAlerting
}
return transition{
CurrentType: currentType,
DesiredType: desiredType,
CleanOnDowngrade: cfg.ForceMigration,
CleanOnUpgrade: cfg.UnifiedAlerting.Upgrade.CleanUpgrade,
CleanOnDowngrade: ms.cfg.ForceMigration,
CleanOnUpgrade: ms.cfg.UnifiedAlerting.Upgrade.CleanUpgrade,
// In 10.4.0+, even if legacy alerting is enabled and the user is not intending to update, we want to "test the waters".
// This is intended to surface any potential issues that would exist if the upgrade would be run right now but without
// risk of failing startup.
DryrunUpgrade: ms.features.IsEnabledGlobally(featuremgmt.FlagAlertingUpgradeDryrunOnStart) && currentType == migrationStore.Legacy && desiredType == migrationStore.Legacy,
}
}
@ -383,16 +396,17 @@ type transition struct {
DesiredType migrationStore.AlertingType
CleanOnDowngrade bool
CleanOnUpgrade bool
DryrunUpgrade bool
}
// isNoChange returns true if the migration is a no-op.
func (t transition) isNoChange() bool {
return t.CurrentType == t.DesiredType
return t.CurrentType == t.DesiredType && !t.DryrunUpgrade
}
// isUpgrading returns true if the migration is an upgrade from legacy alerting to unified alerting.
func (t transition) isUpgrading() bool {
return t.CurrentType == migrationStore.Legacy && t.DesiredType == migrationStore.UnifiedAlerting
return (t.CurrentType == migrationStore.Legacy && t.DesiredType == migrationStore.UnifiedAlerting) || t.DryrunUpgrade
}
// isDowngrading returns true if the migration is a downgrade from unified alerting to legacy alerting.
@ -412,37 +426,60 @@ func (t transition) shouldClean() bool {
// If the transition is an upgrade and CleanOnUpgrade is false, all orgs will be migrated.
// If the transition is an upgrade and CleanOnUpgrade is true, all unified alerting data will be deleted and then all orgs will be migrated.
func (ms *migrationService) applyTransition(ctx context.Context, t transition) error {
l := ms.log.New(
"CurrentType", t.CurrentType,
"DesiredType", t.DesiredType,
"CleanOnDowngrade", t.CleanOnDowngrade,
"CleanOnUpgrade", t.CleanOnUpgrade,
)
if t.isNoChange() {
l.Info("Migration already complete")
if t.DryrunUpgrade {
ctx = log.WithContextualAttributes(ctx, []any{"dryrun", "true"})
}
err := ms.store.InTransaction(ctx, func(ctx context.Context) error {
l := ms.log.FromContext(ctx)
if t.isNoChange() {
l.Debug("No change in alerting type")
return nil
}
if t.DryrunUpgrade {
l.Info(fmt.Sprintf("Dry-running upgrade. To deactivate on-start dry-run, disable the feature flag '%s'", featuremgmt.FlagAlertingUpgradeDryrunOnStart), "cleanOnUpgrade", t.CleanOnUpgrade)
} else {
l.Info("Applying transition", "currentType", t.CurrentType, "desiredType", t.DesiredType, "cleanOnDowngrade", t.CleanOnDowngrade, "cleanOnUpgrade", t.CleanOnUpgrade)
}
if t.shouldClean() {
l.Info("Cleaning up unified alerting data")
if err := ms.migrationStore.RevertAllOrgs(ctx); err != nil {
return fmt.Errorf("cleaning up unified alerting data: %w", err)
}
l.Info("Unified alerting data deleted")
}
if t.isUpgrading() {
if err := ms.migrateAllOrgs(ctx); err != nil {
return fmt.Errorf("executing migration: %w", err)
}
}
if err := ms.migrationStore.SetCurrentAlertingType(ctx, t.DesiredType); err != nil {
return fmt.Errorf("setting migration status: %w", err)
}
if t.DryrunUpgrade {
// Ensure we rollback the changes made during the dry-run.
return ErrSuccessRollback
}
l.Info("Completed alerting migration")
return nil
})
if t.DryrunUpgrade {
if errors.Is(err, ErrSuccessRollback) {
ms.log.FromContext(ctx).Info("Dry-run upgrade succeeded. No changes were made. Current legacy alerting setup is ready to upgrade.")
} else {
ms.log.FromContext(ctx).Warn("Dry-run upgrade failed. No changes were made. Current legacy alerting setup will fail to upgrade, issues must be fixed before upgrading Grafana to v11 as legacy alerting will be removed. See https://grafana.com/docs/grafana/v10.4/alerting/set-up/migrating-alerts/ for more details.", "err", err)
}
// Dry should never error.
return nil
}
if t.shouldClean() {
l.Info("Cleaning up unified alerting data")
if err := ms.migrationStore.RevertAllOrgs(ctx); err != nil {
return fmt.Errorf("cleaning up unified alerting data: %w", err)
}
l.Info("Unified alerting data deleted")
}
if t.isUpgrading() {
if err := ms.migrateAllOrgs(ctx); err != nil {
return fmt.Errorf("executing migration: %w", err)
}
}
if err := ms.migrationStore.SetCurrentAlertingType(ctx, t.DesiredType); err != nil {
return fmt.Errorf("setting migration status: %w", err)
}
l.Info("Completed legacy migration")
return nil
return err
}
// migrateAllOrgs executes the migration for all orgs.
@ -454,12 +491,13 @@ func (ms *migrationService) migrateAllOrgs(ctx context.Context) error {
for _, o := range orgs {
om := ms.newOrgMigration(o.ID)
l := om.log.FromContext(ctx)
migrated, err := ms.migrationStore.IsMigrated(ctx, o.ID)
if err != nil {
return fmt.Errorf("getting migration status for org %d: %w", o.ID, err)
}
if migrated {
om.log.Info("Org already migrated, skipping")
l.Info("Org already migrated, skipping")
continue
}
@ -492,7 +530,7 @@ func (ms *migrationService) migrateAllOrgs(ctx context.Context) error {
return err
}
err = ms.silences.createSilences(ctx, o.ID, om.log)
err = ms.silences.createSilences(ctx, o.ID, l)
if err != nil {
return fmt.Errorf("create silences for org %d: %w", o.ID, err)
}
@ -509,7 +547,7 @@ func (ms *migrationService) migrateAllOrgs(ctx context.Context) error {
// configurations, and silence files for a single organization.
// In addition, it will delete all folders and permissions originally created by this migration.
func (ms *migrationService) RevertOrg(ctx context.Context, orgID int64) error {
ms.log.Info("Reverting legacy migration for org", "orgId", orgID)
ms.log.FromContext(ctx).Info("Reverting legacy upgrade for org", "orgId", orgID)
_, err := ms.try(ctx, func(ctx context.Context) (*definitions.OrgMigrationSummary, error) {
return nil, ms.migrationStore.RevertOrg(ctx, orgID)
})
@ -519,7 +557,7 @@ func (ms *migrationService) RevertOrg(ctx context.Context, orgID int64) error {
// RevertAllOrgs reverts the migration for all orgs, deleting all unified alerting resources such as alert rules, alertmanager configurations, and silence files.
// In addition, it will delete all folders and permissions originally created by this migration.
func (ms *migrationService) RevertAllOrgs(ctx context.Context) error {
ms.log.Info("Reverting legacy migration for all orgs")
ms.log.FromContext(ctx).Info("Reverting legacy upgrade for all orgs")
_, err := ms.try(ctx, func(ctx context.Context) (*definitions.OrgMigrationSummary, error) {
return nil, ms.migrationStore.RevertAllOrgs(ctx)
})
@ -605,7 +643,7 @@ func (ms *migrationService) fromDashboardUpgrades(ctx context.Context, orgID int
} else {
// We could potentially set an error here, but it's not really an error. It just means that the
// user deleted the migrated rule after the migration. This could just as easily be intentional.
ms.log.Debug("Could not find rule for migrated alert", "alertId", a.ID, "ruleUid", p.NewRuleUID)
ms.log.FromContext(ctx).Debug("Could not find rule for migrated alert", "alertId", a.ID, "ruleUid", p.NewRuleUID)
}
}
}

View File

@ -443,6 +443,7 @@ var revertPermissions = []accesscontrol.Permission{
func (ms *migrationStore) RevertOrg(ctx context.Context, orgID int64) error {
return ms.store.InTransaction(ctx, func(ctx context.Context) error {
return ms.store.WithDbSession(ctx, func(sess *db.Session) error {
l := ms.log.FromContext(ctx)
if _, err := sess.Exec("DELETE FROM alert_rule WHERE org_id = ?", orgID); err != nil {
return err
}
@ -456,7 +457,7 @@ func (ms *migrationStore) RevertOrg(ctx context.Context, orgID int64) error {
return err
}
if err := ms.DeleteFolders(ctx, orgID, state.CreatedFolders...); err != nil {
ms.log.Warn("Failed to delete migrated folders", "orgId", orgID, "err", err)
l.Warn("Failed to delete migrated folders", "orgId", orgID, "err", err)
}
if _, err := sess.Exec("DELETE FROM alert_configuration WHERE org_id = ?", orgID); err != nil {
@ -485,7 +486,7 @@ func (ms *migrationStore) RevertOrg(ctx context.Context, orgID int64) error {
}
for _, f := range files {
if err := os.Remove(f); err != nil {
ms.log.Error("Failed to remove silence file", "file", f, "err", err)
l.Error("Failed to remove silence file", "file", f, "err", err)
}
}
@ -500,6 +501,7 @@ func (ms *migrationStore) RevertOrg(ctx context.Context, orgID int64) error {
func (ms *migrationStore) RevertAllOrgs(ctx context.Context) error {
return ms.store.InTransaction(ctx, func(ctx context.Context) error {
return ms.store.WithDbSession(ctx, func(sess *db.Session) error {
l := ms.log.FromContext(ctx)
if _, err := sess.Exec("DELETE FROM alert_rule"); err != nil {
return err
}
@ -518,7 +520,7 @@ func (ms *migrationStore) RevertAllOrgs(ctx context.Context) error {
return err
}
if err := ms.DeleteFolders(ctx, o.ID, state.CreatedFolders...); err != nil {
ms.log.Warn("Failed to delete migrated folders", "orgId", o.ID, "err", err)
l.Warn("Failed to delete migrated folders", "orgId", o.ID, "err", err)
continue
}
}
@ -549,7 +551,7 @@ func (ms *migrationStore) RevertAllOrgs(ctx context.Context) error {
}
for _, f := range files {
if err := os.Remove(f); err != nil {
ms.log.Error("Failed to remove silence file", "file", f, "err", err)
l.Error("Failed to remove silence file", "file", f, "err", err)
}
}

View File

@ -8,6 +8,7 @@ import (
"github.com/grafana/grafana/pkg/infra/serverlock"
"github.com/grafana/grafana/pkg/infra/tracing"
"github.com/grafana/grafana/pkg/services/featuremgmt"
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
migrationStore "github.com/grafana/grafana/pkg/services/ngalert/migration/store"
fake_secrets "github.com/grafana/grafana/pkg/services/secrets/fakes"
@ -24,6 +25,7 @@ func NewTestMigrationService(t *testing.T, sqlStore *sqlstore.SQLStore, cfg *set
svc, err := ProvideService(
serverlock.ProvideService(sqlStore, tracing.InitializeTracerForTest()),
cfg,
featuremgmt.WithFeatures(),
sqlStore,
migrationStore.NewTestMigrationStore(t, sqlStore, cfg),
fake_secrets.NewFakeSecretsService(),

View File

@ -45,11 +45,11 @@ func (om *OrgMigration) migrateDashboard(ctx context.Context, dashID int64, aler
du.AddAlertErrors(err, alerts...)
return du
}
l := om.log.New(
l := om.log.FromContext(ctx).New(
"dashboardTitle", dashboard.Title,
"dashboardUid", dashboard.UID,
)
l.Info("Migrating alerts for dashboard", "alertCount", len(alerts))
l.Debug("Migrating alerts for dashboard", "alertCount", len(alerts))
du := migmodels.NewDashboardUpgrade(dashID)
du.UID = dashboard.UID
@ -70,7 +70,7 @@ func (om *OrgMigration) migrateOrgAlerts(ctx context.Context) ([]*migmodels.Dash
if err != nil {
return nil, fmt.Errorf("load alerts: %w", err)
}
om.log.Info("Alerts found to migrate", "alerts", cnt)
om.log.FromContext(ctx).Info("Alerts found to migrate", "alerts", cnt)
dashboardUpgrades := make([]*migmodels.DashboardUpgrade, 0, len(mappedAlerts))
for dashID, alerts := range mappedAlerts {
@ -86,7 +86,7 @@ func (om *OrgMigration) migrateOrgChannels(ctx context.Context) ([]*migmodels.Co
return nil, fmt.Errorf("load notification channels: %w", err)
}
pairs, err := om.migrateChannels(channels)
pairs, err := om.migrateChannels(channels, om.log.FromContext(ctx))
if err != nil {
return nil, err
}
@ -94,7 +94,7 @@ func (om *OrgMigration) migrateOrgChannels(ctx context.Context) ([]*migmodels.Co
}
func (om *OrgMigration) migrateOrg(ctx context.Context) ([]*migmodels.DashboardUpgrade, []*migmodels.ContactPair, error) {
om.log.Info("Migrating alerts for organisation")
om.log.FromContext(ctx).Info("Migrating alerts for organisation")
pairs, err := om.migrateOrgChannels(ctx)
if err != nil {
return nil, nil, fmt.Errorf("migrate channels: %w", err)