2023-10-12 07:43:10 -05:00
|
|
|
package migration
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2024-01-05 04:37:13 -06:00
|
|
|
"errors"
|
2023-10-12 07:43:10 -05:00
|
|
|
"fmt"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/grafana/grafana/pkg/infra/db"
|
|
|
|
"github.com/grafana/grafana/pkg/infra/log"
|
|
|
|
"github.com/grafana/grafana/pkg/infra/serverlock"
|
2024-01-05 04:37:13 -06:00
|
|
|
migmodels "github.com/grafana/grafana/pkg/services/ngalert/migration/models"
|
2023-10-12 07:43:10 -05:00
|
|
|
migrationStore "github.com/grafana/grafana/pkg/services/ngalert/migration/store"
|
|
|
|
"github.com/grafana/grafana/pkg/services/secrets"
|
|
|
|
"github.com/grafana/grafana/pkg/setting"
|
|
|
|
)
|
|
|
|
|
|
|
|
// actionName is the unique row-level lock name for serverlock.ServerLockService.
|
|
|
|
const actionName = "alerting migration"
|
|
|
|
|
2023-10-19 09:03:00 -05:00
|
|
|
type UpgradeService interface {
|
|
|
|
Run(ctx context.Context) error
|
|
|
|
}
|
|
|
|
|
|
|
|
type migrationService struct {
|
2023-10-12 07:43:10 -05:00
|
|
|
lock *serverlock.ServerLockService
|
|
|
|
cfg *setting.Cfg
|
|
|
|
log log.Logger
|
|
|
|
store db.DB
|
|
|
|
migrationStore migrationStore.Store
|
|
|
|
|
|
|
|
encryptionService secrets.Service
|
|
|
|
}
|
|
|
|
|
|
|
|
func ProvideService(
|
|
|
|
lock *serverlock.ServerLockService,
|
|
|
|
cfg *setting.Cfg,
|
|
|
|
store db.DB,
|
|
|
|
migrationStore migrationStore.Store,
|
|
|
|
encryptionService secrets.Service,
|
2023-10-19 09:03:00 -05:00
|
|
|
) (UpgradeService, error) {
|
|
|
|
return &migrationService{
|
2023-10-12 07:43:10 -05:00
|
|
|
lock: lock,
|
|
|
|
log: log.New("ngalert.migration"),
|
|
|
|
cfg: cfg,
|
|
|
|
store: store,
|
|
|
|
migrationStore: migrationStore,
|
|
|
|
encryptionService: encryptionService,
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
2023-11-30 09:25:59 -06:00
|
|
|
// Run starts the migration to transition between legacy alerting and unified alerting based on the current and desired
|
|
|
|
// alerting type as determined by the kvstore and configuration, respectively.
|
2023-10-19 09:03:00 -05:00
|
|
|
func (ms *migrationService) Run(ctx context.Context) error {
|
2023-10-12 07:43:10 -05:00
|
|
|
var errMigration error
|
2023-10-19 09:03:00 -05:00
|
|
|
errLock := ms.lock.LockExecuteAndRelease(ctx, actionName, time.Minute*10, func(ctx context.Context) {
|
2023-10-12 07:43:10 -05:00
|
|
|
ms.log.Info("Starting")
|
|
|
|
errMigration = ms.store.InTransaction(ctx, func(ctx context.Context) error {
|
2023-11-30 09:25:59 -06:00
|
|
|
currentType, err := ms.migrationStore.GetCurrentAlertingType(ctx)
|
2023-10-12 07:43:10 -05:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("getting migration status: %w", err)
|
|
|
|
}
|
2023-11-30 09:25:59 -06:00
|
|
|
return ms.applyTransition(ctx, newTransition(currentType, ms.cfg))
|
2023-10-12 07:43:10 -05:00
|
|
|
})
|
|
|
|
})
|
|
|
|
if errLock != nil {
|
|
|
|
ms.log.Warn("Server lock for alerting migration already exists")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
if errMigration != nil {
|
|
|
|
return fmt.Errorf("migration failed: %w", errMigration)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-11-30 09:25:59 -06:00
|
|
|
// newTransition creates a transition based on the current alerting type and the current configuration.
|
|
|
|
func newTransition(currentType migrationStore.AlertingType, cfg *setting.Cfg) transition {
|
|
|
|
desiredType := migrationStore.Legacy
|
|
|
|
if cfg.UnifiedAlerting.IsEnabled() {
|
|
|
|
desiredType = migrationStore.UnifiedAlerting
|
|
|
|
}
|
|
|
|
return transition{
|
|
|
|
CurrentType: currentType,
|
|
|
|
DesiredType: desiredType,
|
|
|
|
CleanOnDowngrade: cfg.ForceMigration,
|
2023-11-30 10:01:11 -06:00
|
|
|
CleanOnUpgrade: cfg.UnifiedAlerting.Upgrade.CleanUpgrade,
|
2023-11-30 09:25:59 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// transition represents a migration from one alerting type to another.
|
|
|
|
type transition struct {
|
|
|
|
CurrentType migrationStore.AlertingType
|
|
|
|
DesiredType migrationStore.AlertingType
|
|
|
|
CleanOnDowngrade bool
|
2023-11-30 10:01:11 -06:00
|
|
|
CleanOnUpgrade bool
|
2023-11-30 09:25:59 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// isNoChange returns true if the migration is a no-op.
|
|
|
|
func (t transition) isNoChange() bool {
|
|
|
|
return t.CurrentType == t.DesiredType
|
|
|
|
}
|
|
|
|
|
|
|
|
// isUpgrading returns true if the migration is an upgrade from legacy alerting to unified alerting.
|
|
|
|
func (t transition) isUpgrading() bool {
|
|
|
|
return t.CurrentType == migrationStore.Legacy && t.DesiredType == migrationStore.UnifiedAlerting
|
|
|
|
}
|
|
|
|
|
|
|
|
// isDowngrading returns true if the migration is a downgrade from unified alerting to legacy alerting.
|
|
|
|
func (t transition) isDowngrading() bool {
|
|
|
|
return t.CurrentType == migrationStore.UnifiedAlerting && t.DesiredType == migrationStore.Legacy
|
|
|
|
}
|
|
|
|
|
|
|
|
// shouldClean returns true if the migration should delete all unified alerting data.
|
|
|
|
func (t transition) shouldClean() bool {
|
2023-11-30 10:01:11 -06:00
|
|
|
return t.isDowngrading() && t.CleanOnDowngrade || t.isUpgrading() && t.CleanOnUpgrade
|
2023-11-30 09:25:59 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// applyTransition applies the transition to the database.
|
|
|
|
// If the transition is a no-op, nothing will be done.
|
2023-11-30 10:01:11 -06:00
|
|
|
// If the transition is a downgrade and CleanOnDowngrade is false, nothing will be done.
|
2023-11-30 09:25:59 -06:00
|
|
|
// If the transition is a downgrade and CleanOnDowngrade is true, all unified alerting data will be deleted.
|
2023-11-30 10:01:11 -06:00
|
|
|
// If the transition is an upgrade and CleanOnUpgrade is false, all orgs will be migrated.
|
|
|
|
// If the transition is an upgrade and CleanOnUpgrade is true, all unified alerting data will be deleted and then all orgs will be migrated.
|
2023-11-30 09:25:59 -06:00
|
|
|
func (ms *migrationService) applyTransition(ctx context.Context, t transition) error {
|
|
|
|
l := ms.log.New(
|
|
|
|
"CurrentType", t.CurrentType,
|
|
|
|
"DesiredType", t.DesiredType,
|
|
|
|
"CleanOnDowngrade", t.CleanOnDowngrade,
|
2023-11-30 10:01:11 -06:00
|
|
|
"CleanOnUpgrade", t.CleanOnUpgrade,
|
2023-11-30 09:25:59 -06:00
|
|
|
)
|
|
|
|
if t.isNoChange() {
|
|
|
|
l.Info("Migration already complete")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if t.shouldClean() {
|
|
|
|
l.Info("Cleaning up unified alerting data")
|
|
|
|
if err := ms.migrationStore.RevertAllOrgs(ctx); err != nil {
|
|
|
|
return fmt.Errorf("cleaning up unified alerting data: %w", err)
|
|
|
|
}
|
|
|
|
l.Info("Unified alerting data deleted")
|
|
|
|
}
|
|
|
|
|
|
|
|
if t.isUpgrading() {
|
|
|
|
if err := ms.migrateAllOrgs(ctx); err != nil {
|
|
|
|
return fmt.Errorf("executing migration: %w", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := ms.migrationStore.SetCurrentAlertingType(ctx, t.DesiredType); err != nil {
|
|
|
|
return fmt.Errorf("setting migration status: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
l.Info("Completed legacy migration")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-10-12 07:43:10 -05:00
|
|
|
// migrateAllOrgs executes the migration for all orgs.
|
2023-10-19 09:03:00 -05:00
|
|
|
func (ms *migrationService) migrateAllOrgs(ctx context.Context) error {
|
2023-10-12 07:43:10 -05:00
|
|
|
orgs, err := ms.migrationStore.GetAllOrgs(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("get orgs: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, o := range orgs {
|
|
|
|
om := ms.newOrgMigration(o.ID)
|
2023-11-30 09:25:59 -06:00
|
|
|
migrated, err := ms.migrationStore.IsMigrated(ctx, o.ID)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("getting migration status for org %d: %w", o.ID, err)
|
|
|
|
}
|
|
|
|
if migrated {
|
|
|
|
om.log.Info("Org already migrated, skipping")
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2024-01-05 04:37:13 -06:00
|
|
|
dashboardUpgrades, contactPairs, err := om.migrateOrg(ctx)
|
|
|
|
if err != nil {
|
2023-10-12 07:43:10 -05:00
|
|
|
return fmt.Errorf("migrate org %d: %w", o.ID, err)
|
|
|
|
}
|
|
|
|
|
2024-01-05 04:37:13 -06:00
|
|
|
// Check for errors, if any exist log and fail the migration.
|
|
|
|
errs := migmodels.ExtractErrors(dashboardUpgrades, contactPairs)
|
|
|
|
var migrationErr error
|
|
|
|
for _, e := range errs {
|
|
|
|
// Skip certain errors as historically they are not fatal to the migration. We can revisit these if necessary.
|
|
|
|
if errors.Is(e, ErrDiscontinued) {
|
|
|
|
// Discontinued notification type.
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if errors.Is(e, ErrOrphanedAlert) {
|
|
|
|
// Orphaned alerts.
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
migrationErr = errors.Join(migrationErr, e)
|
|
|
|
}
|
|
|
|
if migrationErr != nil {
|
|
|
|
return fmt.Errorf("migrate org %d: %w", o.ID, migrationErr)
|
|
|
|
}
|
|
|
|
|
|
|
|
err = ms.newSync(o.ID).syncAndSaveState(ctx, dashboardUpgrades, contactPairs)
|
2023-10-12 07:43:10 -05:00
|
|
|
if err != nil {
|
2024-01-05 04:37:13 -06:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(om.silences) > 0 {
|
|
|
|
om.log.Debug("Writing silences file", "silences", len(om.silences))
|
|
|
|
if err := writeSilencesFile(ms.cfg.DataPath, o.ID, om.silences); err != nil {
|
|
|
|
return fmt.Errorf("write silence file for org %d: %w", o.ID, err)
|
|
|
|
}
|
2023-10-12 07:43:10 -05:00
|
|
|
}
|
2023-11-30 09:25:59 -06:00
|
|
|
|
|
|
|
err = ms.migrationStore.SetMigrated(ctx, o.ID, true)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("setting migration status: %w", err)
|
|
|
|
}
|
2023-10-12 07:43:10 -05:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|