2021-04-29 13:24:37 -04:00
package ualert
import (
2023-10-11 14:21:21 -05:00
"context"
"encoding/json"
"errors"
2021-04-29 13:24:37 -04:00
"fmt"
"os"
2021-09-28 10:27:23 -04:00
"path/filepath"
"strconv"
2023-10-11 14:21:21 -05:00
"strings"
"time"
2021-04-29 13:24:37 -04:00
2023-10-11 14:21:21 -05:00
alertingNotify "github.com/grafana/alerting/notify"
pb "github.com/prometheus/alertmanager/silence/silencepb"
2022-04-20 22:02:23 -04:00
"xorm.io/xorm"
2021-11-04 18:47:21 +02:00
2021-10-07 17:30:06 -04:00
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
2021-10-22 10:11:06 +01:00
"github.com/grafana/grafana/pkg/services/sqlstore/migrator"
2023-10-11 14:21:21 -05:00
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/util"
2021-04-29 13:24:37 -04:00
)
2023-10-11 14:21:21 -05:00
const GENERAL_FOLDER = "General Alerting"
const DASHBOARD_FOLDER = "%s Alerts - %s"
// MaxFolderName is the maximum length of the folder name generated using DASHBOARD_FOLDER format
const MaxFolderName = 255
// FOLDER_CREATED_BY us used to track folders created by this migration
// during alert migration cleanup.
const FOLDER_CREATED_BY = - 8
const KV_NAMESPACE = "alertmanager"
2021-05-11 08:08:39 -04:00
var migTitle = "move dashboard alerts to unified alerting"
2023-10-11 14:21:21 -05:00
var rmMigTitle = "remove unified alerting data"
const clearMigrationEntryTitle = "clear migration entry %q"
2022-06-22 10:52:46 -04:00
const codeMigration = "code migration"
2021-08-12 16:04:09 +03:00
2023-10-11 14:21:21 -05:00
// It is defined in pkg/expr/service.go as "DatasourceType"
const expressionDatasourceUID = "__expr__"
2021-04-29 13:24:37 -04:00
type MigrationError struct {
AlertId int64
Err error
}
func ( e MigrationError ) Error ( ) string {
return fmt . Sprintf ( "failed to migrate alert %d: %s" , e . AlertId , e . Err . Error ( ) )
}
func ( e * MigrationError ) Unwrap ( ) error { return e . Err }
2023-10-11 14:21:21 -05:00
func AddDashAlertMigration ( mg * migrator . Migrator ) {
logs , err := mg . GetMigrationLog ( )
if err != nil {
mg . Logger . Error ( "Alert migration failure: could not get migration log" , "error" , err )
os . Exit ( 1 )
}
_ , migrationRun := logs [ migTitle ]
switch {
// If unified alerting is enabled and the upgrade migration has not been run
case mg . Cfg . UnifiedAlerting . IsEnabled ( ) && ! migrationRun :
// Remove the migration entry that removes all unified alerting data. This is so when the feature
// flag is removed in future the "remove unified alerting data" migration will be run again.
mg . AddMigration ( fmt . Sprintf ( clearMigrationEntryTitle , rmMigTitle ) , & clearMigrationEntry {
migrationID : rmMigTitle ,
} )
if err != nil {
mg . Logger . Error ( "Alert migration error: could not clear alert migration for removing data" , "error" , err )
}
mg . AddMigration ( migTitle , & migration {
// We deduplicate for case-insensitive matching in MySQL-compatible backend flavours because they use case-insensitive collation.
seenUIDs : uidSet { set : make ( map [ string ] struct { } ) , caseInsensitive : mg . Dialect . SupportEngine ( ) } ,
silences : make ( map [ int64 ] [ ] * pb . MeshSilence ) ,
} )
// If unified alerting is disabled and upgrade migration has been run
case ! mg . Cfg . UnifiedAlerting . IsEnabled ( ) && migrationRun :
// If legacy alerting is also disabled, there is nothing to do
if setting . AlertingEnabled != nil && ! * setting . AlertingEnabled {
return
}
// Safeguard to prevent data loss when migrating from UA to LA
if ! mg . Cfg . ForceMigration {
panic ( "Grafana has already been migrated to Unified Alerting.\nAny alert rules created while using Unified Alerting will be deleted by rolling back.\n\nSet force_migration=true in your grafana.ini and restart Grafana to roll back and delete Unified Alerting configuration data." )
}
// Remove the migration entry that creates unified alerting data. This is so when the feature
// flag is enabled in the future the migration "move dashboard alerts to unified alerting" will be run again.
mg . AddMigration ( fmt . Sprintf ( clearMigrationEntryTitle , migTitle ) , & clearMigrationEntry {
migrationID : migTitle ,
} )
if err != nil {
mg . Logger . Error ( "Alert migration error: could not clear dashboard alert migration" , "error" , err )
}
mg . AddMigration ( rmMigTitle , & rmMigration { } )
}
}
// RerunDashAlertMigration force the dashboard alert migration to run
// to make sure that the Alertmanager configurations will be created for each organisation
func RerunDashAlertMigration ( mg * migrator . Migrator ) {
logs , err := mg . GetMigrationLog ( )
if err != nil {
mg . Logger . Error ( "Alert migration failure: could not get migration log" , "error" , err )
os . Exit ( 1 )
}
2021-08-12 16:04:09 +03:00
cloneMigTitle := fmt . Sprintf ( "clone %s" , migTitle )
2023-10-11 14:21:21 -05:00
_ , migrationRun := logs [ cloneMigTitle ]
ngEnabled := mg . Cfg . UnifiedAlerting . IsEnabled ( )
switch {
case ngEnabled && ! migrationRun :
// The only use of this migration is when a user enabled ng-alerting before 8.2.
mg . AddMigration ( cloneMigTitle , & upgradeNgAlerting { } )
// if user disables the feature flag and enables it back.
// This migration does not need to be run because the original migration AddDashAlertMigration does what's needed
}
2021-08-12 16:04:09 +03:00
}
2021-10-04 16:33:55 +01:00
func AddDashboardUIDPanelIDMigration ( mg * migrator . Migrator ) {
2023-10-11 14:21:21 -05:00
logs , err := mg . GetMigrationLog ( )
if err != nil {
mg . Logger . Error ( "Alert migration failure: could not get migration log" , "error" , err )
os . Exit ( 1 )
}
2021-10-04 16:33:55 +01:00
migrationID := "update dashboard_uid and panel_id from existing annotations"
2023-10-11 14:21:21 -05:00
_ , migrationRun := logs [ migrationID ]
ngEnabled := mg . Cfg . UnifiedAlerting . IsEnabled ( )
undoMigrationID := "undo " + migrationID
if ngEnabled && ! migrationRun {
// If ngalert is enabled and the migration has not been run then run it.
mg . AddMigration ( migrationID , & updateDashboardUIDPanelIDMigration { } )
} else if ! ngEnabled && migrationRun {
// If ngalert is disabled and the migration has been run then remove it
// from the migration log so it will run if ngalert is re-enabled.
mg . AddMigration ( undoMigrationID , & clearMigrationEntry {
migrationID : migrationID ,
} )
}
2021-10-04 16:33:55 +01:00
}
// updateDashboardUIDPanelIDMigration sets the dashboard_uid and panel_id columns
// from the __dashboardUid__ and __panelId__ annotations.
type updateDashboardUIDPanelIDMigration struct {
migrator . MigrationBase
}
func ( m * updateDashboardUIDPanelIDMigration ) SQL ( _ migrator . Dialect ) string {
return "set dashboard_uid and panel_id migration"
}
func ( m * updateDashboardUIDPanelIDMigration ) Exec ( sess * xorm . Session , mg * migrator . Migrator ) error {
var results [ ] struct {
ID int64 ` xorm:"id" `
Annotations map [ string ] string ` xorm:"annotations" `
}
if err := sess . SQL ( ` SELECT id, annotations FROM alert_rule ` ) . Find ( & results ) ; err != nil {
return fmt . Errorf ( "failed to get annotations for all alert rules: %w" , err )
}
for _ , next := range results {
var (
dashboardUID * string
panelID * int64
)
2021-10-07 17:30:06 -04:00
if s , ok := next . Annotations [ ngmodels . DashboardUIDAnnotation ] ; ok {
2021-10-04 16:33:55 +01:00
dashboardUID = & s
}
2021-10-07 17:30:06 -04:00
if s , ok := next . Annotations [ ngmodels . PanelIDAnnotation ] ; ok {
2021-10-04 16:33:55 +01:00
i , err := strconv . ParseInt ( s , 10 , 64 )
if err != nil {
2021-10-07 17:30:06 -04:00
return fmt . Errorf ( "the %s annotation does not contain a valid Panel ID: %w" , ngmodels . PanelIDAnnotation , err )
2021-10-04 16:33:55 +01:00
}
panelID = & i
}
2021-10-06 11:34:11 +01:00
// We do not want to set panel_id to a non-nil value when dashboard_uid is nil
// as panel_id is not unique and so cannot be queried without its dashboard_uid.
// This can happen where users have deleted the dashboard_uid annotation but kept
// the panel_id annotation.
if dashboardUID != nil {
if _ , err := sess . Exec ( ` UPDATE alert_rule SET dashboard_uid = ?, panel_id = ? WHERE id = ? ` ,
dashboardUID ,
panelID ,
next . ID ) ; err != nil {
return fmt . Errorf ( "failed to set dashboard_uid and panel_id for alert rule: %w" , err )
}
2021-10-04 16:33:55 +01:00
}
}
return nil
}
2023-10-11 14:21:21 -05:00
// clearMigrationEntry removes an entry fromt the migration_log table.
// This migration is not recorded in the migration_log so that it can re-run several times.
type clearMigrationEntry struct {
migrator . MigrationBase
migrationID string
}
func ( m * clearMigrationEntry ) SQL ( dialect migrator . Dialect ) string {
return "clear migration entry code migration"
}
func ( m * clearMigrationEntry ) Exec ( sess * xorm . Session , mg * migrator . Migrator ) error {
_ , err := sess . SQL ( ` DELETE from migration_log where migration_id = ? ` , m . migrationID ) . Query ( )
if err != nil {
return fmt . Errorf ( "failed to clear migration entry %v: %w" , m . migrationID , err )
}
return nil
}
func ( m * clearMigrationEntry ) SkipMigrationLog ( ) bool {
return true
}
type migration struct {
migrator . MigrationBase
// session and mg are attached for convenience.
sess * xorm . Session
mg * migrator . Migrator
seenUIDs uidSet
silences map [ int64 ] [ ] * pb . MeshSilence
}
func ( m * migration ) SQL ( dialect migrator . Dialect ) string {
return codeMigration
}
//nolint:gocyclo
func ( m * migration ) Exec ( sess * xorm . Session , mg * migrator . Migrator ) error {
m . sess = sess
m . mg = mg
dashAlerts , err := m . slurpDashAlerts ( )
if err != nil {
return err
}
mg . Logger . Info ( "Alerts found to migrate" , "alerts" , len ( dashAlerts ) )
// [orgID, dataSourceId] -> UID
dsIDMap , err := m . slurpDSIDs ( )
if err != nil {
return err
}
// [orgID, dashboardId] -> dashUID
dashIDMap , err := m . slurpDashUIDs ( )
if err != nil {
return err
}
// cache for folders created for dashboards that have custom permissions
folderCache := make ( map [ string ] * dashboard )
// cache for the general folders
generalFolderCache := make ( map [ int64 ] * dashboard )
folderHelper := folderHelper {
sess : sess ,
mg : mg ,
}
gf := func ( dash dashboard , da dashAlert ) ( * dashboard , error ) {
f , ok := generalFolderCache [ dash . OrgId ]
if ! ok {
// get or create general folder
f , err = folderHelper . getOrCreateGeneralFolder ( dash . OrgId )
if err != nil {
return nil , MigrationError {
Err : fmt . Errorf ( "failed to get or create general folder under organisation %d: %w" , dash . OrgId , err ) ,
AlertId : da . Id ,
}
}
generalFolderCache [ dash . OrgId ] = f
}
// No need to assign default permissions to general folder
// because they are included to the query result if it's a folder with no permissions
// https://github.com/grafana/grafana/blob/076e2ce06a6ecf15804423fcc8dca1b620a321e5/pkg/services/sqlstore/dashboard_acl.go#L109
return f , nil
}
// Per org map of newly created rules to which notification channels it should send to.
rulesPerOrg := make ( map [ int64 ] map [ * alertRule ] [ ] uidOrID )
for _ , da := range dashAlerts {
l := mg . Logger . New ( "ruleID" , da . Id , "ruleName" , da . Name , "dashboardUID" , da . DashboardUID , "orgID" , da . OrgId )
l . Debug ( "Migrating alert rule to Unified Alerting" )
newCond , err := transConditions ( * da . ParsedSettings , da . OrgId , dsIDMap )
if err != nil {
return err
}
da . DashboardUID = dashIDMap [ [ 2 ] int64 { da . OrgId , da . DashboardId } ]
// get dashboard
dash := dashboard { }
exists , err := m . sess . Where ( "org_id=? AND uid=?" , da . OrgId , da . DashboardUID ) . Get ( & dash )
if err != nil {
return MigrationError {
Err : fmt . Errorf ( "failed to get dashboard %s under organisation %d: %w" , da . DashboardUID , da . OrgId , err ) ,
AlertId : da . Id ,
}
}
if ! exists {
return MigrationError {
Err : fmt . Errorf ( "dashboard with UID %v under organisation %d not found: %w" , da . DashboardUID , da . OrgId , err ) ,
AlertId : da . Id ,
}
}
var folder * dashboard
switch {
case dash . HasACL :
folderName := getAlertFolderNameFromDashboard ( & dash )
f , ok := folderCache [ folderName ]
if ! ok {
l . Info ( "Create a new folder for alerts that belongs to dashboard because it has custom permissions" , "folder" , folderName )
// create folder and assign the permissions of the dashboard (included default and inherited)
f , err = folderHelper . createFolder ( dash . OrgId , folderName )
if err != nil {
return MigrationError {
Err : fmt . Errorf ( "failed to create folder: %w" , err ) ,
AlertId : da . Id ,
}
}
permissions , err := folderHelper . getACL ( dash . OrgId , dash . Id )
if err != nil {
return MigrationError {
Err : fmt . Errorf ( "failed to get dashboard %d under organisation %d permissions: %w" , dash . Id , dash . OrgId , err ) ,
AlertId : da . Id ,
}
}
err = folderHelper . setACL ( f . OrgId , f . Id , permissions )
if err != nil {
return MigrationError {
Err : fmt . Errorf ( "failed to set folder %d under organisation %d permissions: %w" , f . Id , f . OrgId , err ) ,
AlertId : da . Id ,
}
}
folderCache [ folderName ] = f
}
folder = f
case dash . FolderId > 0 :
// get folder if exists
f , err := folderHelper . getFolder ( dash , da )
if err != nil {
// If folder does not exist then the dashboard is an orphan and we migrate the alert to the general folder.
l . Warn ( "Failed to find folder for dashboard. Migrate rule to the default folder" , "rule_name" , da . Name , "dashboard_uid" , da . DashboardUID , "missing_folder_id" , dash . FolderId )
folder , err = gf ( dash , da )
if err != nil {
return err
}
} else {
folder = & f
}
default :
folder , err = gf ( dash , da )
if err != nil {
return err
}
}
if folder . Uid == "" {
return MigrationError {
Err : fmt . Errorf ( "empty folder identifier" ) ,
AlertId : da . Id ,
}
}
rule , err := m . makeAlertRule ( l , * newCond , da , folder . Uid )
if err != nil {
return fmt . Errorf ( "failed to migrate alert rule '%s' [ID:%d, DashboardUID:%s, orgID:%d]: %w" , da . Name , da . Id , da . DashboardUID , da . OrgId , err )
}
if _ , ok := rulesPerOrg [ rule . OrgID ] ; ! ok {
rulesPerOrg [ rule . OrgID ] = make ( map [ * alertRule ] [ ] uidOrID )
}
if _ , ok := rulesPerOrg [ rule . OrgID ] [ rule ] ; ! ok {
rulesPerOrg [ rule . OrgID ] [ rule ] = extractChannelIDs ( da )
} else {
return MigrationError {
Err : fmt . Errorf ( "duplicate generated rule UID" ) ,
AlertId : da . Id ,
}
}
}
for orgID := range rulesPerOrg {
if err := m . writeSilencesFile ( orgID ) ; err != nil {
m . mg . Logger . Error ( "Alert migration error: failed to write silence file" , "err" , err )
}
}
amConfigPerOrg , err := m . setupAlertmanagerConfigs ( rulesPerOrg )
if err != nil {
return err
}
err = m . insertRules ( mg , rulesPerOrg )
if err != nil {
return err
}
for orgID , amConfig := range amConfigPerOrg {
if err := m . writeAlertmanagerConfig ( orgID , amConfig ) ; err != nil {
return err
}
}
return nil
}
func ( m * migration ) insertRules ( mg * migrator . Migrator , rulesPerOrg map [ int64 ] map [ * alertRule ] [ ] uidOrID ) error {
for _ , rules := range rulesPerOrg {
for rule := range rules {
var err error
if strings . HasPrefix ( mg . Dialect . DriverName ( ) , migrator . Postgres ) {
err = mg . InTransaction ( func ( sess * xorm . Session ) error {
_ , err := sess . Insert ( rule )
return err
} )
} else {
_ , err = m . sess . Insert ( rule )
}
if err != nil {
// TODO better error handling, if constraint
rule . Title += fmt . Sprintf ( " %v" , rule . UID )
rule . RuleGroup += fmt . Sprintf ( " %v" , rule . UID )
_ , err = m . sess . Insert ( rule )
if err != nil {
return err
}
}
// create entry in alert_rule_version
_ , err = m . sess . Insert ( rule . makeVersion ( ) )
if err != nil {
return err
}
}
}
return nil
}
func ( m * migration ) writeAlertmanagerConfig ( orgID int64 , amConfig * PostableUserConfig ) error {
rawAmConfig , err := json . Marshal ( amConfig )
if err != nil {
return err
}
// remove an existing configuration, which could have been left during switching back to legacy alerting
_ , _ = m . sess . Delete ( AlertConfiguration { OrgID : orgID } )
// We don't need to apply the configuration, given the multi org alertmanager will do an initial sync before the server is ready.
_ , err = m . sess . Insert ( AlertConfiguration {
AlertmanagerConfiguration : string ( rawAmConfig ) ,
// Since we are migration for a snapshot of the code, it is always going to migrate to
// the v1 config.
ConfigurationVersion : "v1" ,
OrgID : orgID ,
} )
if err != nil {
return err
}
return nil
}
// validateAlertmanagerConfig validates the alertmanager configuration produced by the migration against the receivers.
func ( m * migration ) validateAlertmanagerConfig ( config * PostableUserConfig ) error {
for _ , r := range config . AlertmanagerConfig . Receivers {
for _ , gr := range r . GrafanaManagedReceivers {
data , err := gr . Settings . MarshalJSON ( )
if err != nil {
return err
}
var (
cfg = & alertingNotify . GrafanaIntegrationConfig {
UID : gr . UID ,
Name : gr . Name ,
Type : gr . Type ,
DisableResolveMessage : gr . DisableResolveMessage ,
Settings : data ,
SecureSettings : gr . SecureSettings ,
}
)
// decryptFunc represents the legacy way of decrypting data. Before the migration, we don't need any new way,
// given that the previous alerting will never support it.
decryptFunc := func ( _ context . Context , sjd map [ string ] [ ] byte , key string , fallback string ) string {
if value , ok := sjd [ key ] ; ok {
decryptedData , err := util . Decrypt ( value , setting . SecretKey )
if err != nil {
m . mg . Logger . Warn ( "Unable to decrypt key '%s' for %s receiver with uid %s, returning fallback." , key , gr . Type , gr . UID )
return fallback
}
return string ( decryptedData )
}
return fallback
}
_ , err = alertingNotify . BuildReceiverConfiguration ( context . Background ( ) , & alertingNotify . APIReceiver {
GrafanaIntegrations : alertingNotify . GrafanaIntegrations { Integrations : [ ] * alertingNotify . GrafanaIntegrationConfig { cfg } } ,
} , decryptFunc )
if err != nil {
return err
}
}
}
return nil
}
2021-05-20 00:40:12 +05:30
type AlertConfiguration struct {
2021-08-12 16:04:09 +03:00
ID int64 ` xorm:"pk autoincr 'id'" `
OrgID int64 ` xorm:"org_id" `
2021-05-20 00:40:12 +05:30
AlertmanagerConfiguration string
ConfigurationVersion string
2021-06-04 15:52:41 +03:00
CreatedAt int64 ` xorm:"created" `
2021-04-29 13:24:37 -04:00
}
2021-05-11 08:08:39 -04:00
2023-10-11 14:21:21 -05:00
// rmMigration removes Grafana 8 alert data
type rmMigration struct {
migrator . MigrationBase
}
func ( m * rmMigration ) SQL ( dialect migrator . Dialect ) string {
return codeMigration
}
func ( m * rmMigration ) Exec ( sess * xorm . Session , mg * migrator . Migrator ) error {
_ , err := sess . Exec ( "delete from alert_rule" )
if err != nil {
return err
}
_ , err = sess . Exec ( "delete from alert_rule_version" )
if err != nil {
return err
}
_ , err = sess . Exec ( "delete from dashboard_acl where dashboard_id IN (select id from dashboard where created_by = ?)" , FOLDER_CREATED_BY )
if err != nil {
return err
}
_ , err = sess . Exec ( "delete from dashboard where created_by = ?" , FOLDER_CREATED_BY )
if err != nil {
return err
}
_ , err = sess . Exec ( "delete from alert_configuration" )
if err != nil {
return err
}
_ , err = sess . Exec ( "delete from ngalert_configuration" )
if err != nil {
return err
}
_ , err = sess . Exec ( "delete from alert_instance" )
if err != nil {
return err
}
exists , err := sess . IsTableExist ( "kv_store" )
if err != nil {
return err
}
if exists {
_ , err = sess . Exec ( "delete from kv_store where namespace = ?" , KV_NAMESPACE )
if err != nil {
return err
}
}
files , err := getSilenceFileNamesForAllOrgs ( mg )
if err != nil {
return err
}
for _ , f := range files {
if err := os . Remove ( f ) ; err != nil {
mg . Logger . Error ( "Alert migration error: failed to remove silence file" , "file" , f , "err" , err )
}
}
return nil
}
// rmMigrationWithoutLogging is similar migration to rmMigration
// but is not recorded in the migration_log table so that it can rerun in the future
type rmMigrationWithoutLogging = rmMigration
func ( m * rmMigrationWithoutLogging ) SkipMigrationLog ( ) bool {
return true
}
2021-09-28 10:27:23 -04:00
type upgradeNgAlerting struct {
migrator . MigrationBase
}
var _ migrator . CodeMigration = & upgradeNgAlerting { }
func ( u * upgradeNgAlerting ) Exec ( sess * xorm . Session , migrator * migrator . Migrator ) error {
firstOrgId , err := u . updateAlertConfigurations ( sess , migrator )
if err != nil {
return err
}
u . updateAlertmanagerFiles ( firstOrgId , migrator )
return nil
}
func ( u * upgradeNgAlerting ) updateAlertConfigurations ( sess * xorm . Session , migrator * migrator . Migrator ) ( int64 , error ) {
// if there are records with org_id == 0 then the feature flag was enabled before 8.2 that introduced org separation.
// if feature is enabled in 8.2 the migration "AddDashAlertMigration", which is effectively different from what was run in 8.1.x and earlier versions,
// will handle organizations correctly, and, therefore, nothing needs to be fixed
count , err := sess . Table ( & AlertConfiguration { } ) . Where ( "org_id = 0" ) . Count ( )
if err != nil {
return 0 , fmt . Errorf ( "failed to query table alert_configuration: %w" , err )
}
if count == 0 {
return 0 , nil // NOTHING TO DO
}
orgs := make ( [ ] int64 , 0 )
// get all org IDs sorted in ascending order
if err = sess . Table ( "org" ) . OrderBy ( "id" ) . Cols ( "id" ) . Find ( & orgs ) ; err != nil {
return 0 , fmt . Errorf ( "failed to query table org: %w" , err )
}
if len ( orgs ) == 0 { // should not really happen
migrator . Logger . Info ( "No organizations are found. Nothing to migrate" )
return 0 , nil
}
firstOrg := orgs [ 0 ]
// assigning all configurations to the first org because 0 does not usually point to any
migrator . Logger . Info ( "Assigning all existing records from alert_configuration to the first organization" , "org" , firstOrg )
_ , err = sess . Cols ( "org_id" ) . Where ( "org_id = 0" ) . Update ( & AlertConfiguration { OrgID : firstOrg } )
if err != nil {
return 0 , fmt . Errorf ( "failed to update org_id for all rows in the table alert_configuration: %w" , err )
}
// if there is a single organization it is safe to assume that all configurations belong to it.
if len ( orgs ) == 1 {
return firstOrg , nil
}
// if there are many organizations we cannot safely assume what organization an alert_configuration belongs to.
// Therefore, we apply the default configuration to all organizations. The previous version could be restored if needed.
migrator . Logger . Warn ( "Detected many organizations. The current alertmanager configuration will be replaced by the default one" )
configs := make ( [ ] * AlertConfiguration , 0 , len ( orgs ) )
for _ , org := range orgs {
configs = append ( configs , & AlertConfiguration {
AlertmanagerConfiguration : migrator . Cfg . UnifiedAlerting . DefaultConfiguration ,
// Since we are migration for a snapshot of the code, it is always going to migrate to
// the v1 config.
ConfigurationVersion : "v1" ,
OrgID : org ,
} )
}
_ , err = sess . InsertMulti ( configs )
if err != nil {
return 0 , fmt . Errorf ( "failed to add default alertmanager configurations to every organization: %w" , err )
}
return 0 , nil
}
// updateAlertmanagerFiles scans the existing alerting directory '<data_dir>/alerting' for known files.
// If argument 'orgId' is not 0 updateAlertmanagerFiles moves all known files to the directory <data_dir>/alerting/<orgId>.
// Otherwise, it deletes those files.
// pre-8.2 version put all configuration files into the root of alerting directory. Since 8.2 configuration files are put in organization specific directory
func ( u * upgradeNgAlerting ) updateAlertmanagerFiles ( orgId int64 , migrator * migrator . Migrator ) {
2023-08-30 08:46:47 -07:00
knownFiles := map [ string ] any { "__default__.tmpl" : nil , "silences" : nil , "notifications" : nil }
2021-09-28 10:27:23 -04:00
alertingDir := filepath . Join ( migrator . Cfg . DataPath , "alerting" )
// do not fail if something goes wrong because these files are not used anymore. the worst that can happen is that we leave some leftovers behind
deleteFile := func ( fileName string ) {
path := filepath . Join ( alertingDir , fileName )
migrator . Logger . Info ( "Deleting alerting configuration file" , "file" , fileName )
err := os . Remove ( path )
if err != nil {
migrator . Logger . Warn ( "Failed to delete file" , "file" , path , "error" , err )
}
}
moveFile := func ( fileName string ) {
alertingOrgDir := filepath . Join ( alertingDir , strconv . FormatInt ( orgId , 10 ) )
if err := os . MkdirAll ( alertingOrgDir , 0750 ) ; err != nil {
migrator . Logger . Error ( "Failed to create alerting directory for organization. Skip moving the file and delete it instead" , "target_dir" , alertingOrgDir , "org_id" , orgId , "error" , err , "file" , fileName )
deleteFile ( fileName )
return
}
err := os . Rename ( filepath . Join ( alertingDir , fileName ) , filepath . Join ( alertingOrgDir , fileName ) )
if err != nil {
migrator . Logger . Error ( "Failed to move alertmanager configuration file to organization." , "source_dir" , alertingDir , "target_dir" , alertingOrgDir , "org_id" , orgId , "error" , err , "file" , fileName )
deleteFile ( fileName )
}
}
entries , err := os . ReadDir ( alertingDir )
if err != nil {
if ! os . IsNotExist ( err ) {
keys := make ( [ ] string , 0 , len ( knownFiles ) )
for key := range knownFiles {
keys = append ( keys , key )
}
migrator . Logger . Warn ( "Failed to clean up alerting directory. There may be files that are not used anymore." , "path" , alertingDir , "files_to_delete" , keys , "error" , err )
}
}
for _ , entry := range entries {
_ , known := knownFiles [ entry . Name ( ) ]
if known {
if orgId == 0 {
deleteFile ( entry . Name ( ) )
} else {
moveFile ( entry . Name ( ) )
}
}
}
}
func ( u * upgradeNgAlerting ) SQL ( migrator . Dialect ) string {
2022-06-22 10:52:46 -04:00
return codeMigration
2021-09-28 10:27:23 -04:00
}
2023-10-11 14:21:21 -05:00
// getAlertFolderNameFromDashboard generates a folder name for alerts that belong to a dashboard. Formats the string according to DASHBOARD_FOLDER format.
// If the resulting string exceeds the migrations.MaxTitleLength, the dashboard title is stripped to be at the maximum length
func getAlertFolderNameFromDashboard ( dash * dashboard ) string {
maxLen := MaxFolderName - len ( fmt . Sprintf ( DASHBOARD_FOLDER , "" , dash . Uid ) )
title := dash . Title
if len ( title ) > maxLen {
title = title [ : maxLen ]
}
return fmt . Sprintf ( DASHBOARD_FOLDER , title , dash . Uid ) // include UID to the name to avoid collision
}
// CreateDefaultFoldersForAlertingMigration creates a folder dedicated for alerting if no folders exist
func CreateDefaultFoldersForAlertingMigration ( mg * migrator . Migrator ) {
if ! mg . Cfg . UnifiedAlerting . IsEnabled ( ) {
return
}
mg . AddMigration ( "create default alerting folders" , & createDefaultFoldersForAlertingMigration { } )
}
type createDefaultFoldersForAlertingMigration struct {
migrator . MigrationBase
}
func ( c createDefaultFoldersForAlertingMigration ) Exec ( sess * xorm . Session , migrator * migrator . Migrator ) error {
helper := folderHelper {
sess : sess ,
mg : migrator ,
}
var rows [ ] struct {
Id int64
Name string
}
if err := sess . Table ( "org" ) . Cols ( "id" , "name" ) . Find ( & rows ) ; err != nil {
return fmt . Errorf ( "failed to read the list of organizations: %w" , err )
}
orgsWithFolders , err := helper . getOrgsIDThatHaveFolders ( )
if err != nil {
return fmt . Errorf ( "failed to list organizations that have at least one folder: %w" , err )
}
for _ , row := range rows {
// if there's at least one folder in the org or if alerting is disabled for that org, skip adding the default folder
if _ , ok := orgsWithFolders [ row . Id ] ; ok {
migrator . Logger . Debug ( "Skip adding default alerting folder because organization already has at least one folder" , "org_id" , row . Id )
continue
}
if _ , ok := migrator . Cfg . UnifiedAlerting . DisabledOrgs [ row . Id ] ; ok {
migrator . Logger . Debug ( "Skip adding default alerting folder because alerting is disabled for the organization " , "org_id" , row . Id )
continue
}
folder , err := helper . createGeneralFolder ( row . Id )
if err != nil {
return fmt . Errorf ( "failed to create the default alerting folder for organization %s (ID: %d): %w" , row . Name , row . Id , err )
}
migrator . Logger . Info ( "Created the default folder for alerting" , "org_id" , row . Id , "folder_name" , folder . Title , "folder_uid" , folder . Uid )
}
return nil
}
func ( c createDefaultFoldersForAlertingMigration ) SQL ( migrator . Dialect ) string {
return codeMigration
}
// UpdateRuleGroupIndexMigration updates a new field rule_group_index for alert rules that belong to a group with more than 1 alert.
func UpdateRuleGroupIndexMigration ( mg * migrator . Migrator ) {
if ! mg . Cfg . UnifiedAlerting . IsEnabled ( ) {
return
}
mg . AddMigration ( "update group index for alert rules" , & updateRulesOrderInGroup { } )
}
type updateRulesOrderInGroup struct {
migrator . MigrationBase
}
func ( c updateRulesOrderInGroup ) SQL ( migrator . Dialect ) string {
return codeMigration
}
func ( c updateRulesOrderInGroup ) Exec ( sess * xorm . Session , migrator * migrator . Migrator ) error {
var rows [ ] * alertRule
if err := sess . Table ( alertRule { } ) . Asc ( "id" ) . Find ( & rows ) ; err != nil {
return fmt . Errorf ( "failed to read the list of alert rules: %w" , err )
}
if len ( rows ) == 0 {
migrator . Logger . Debug ( "No rules to migrate." )
return nil
}
groups := map [ ngmodels . AlertRuleGroupKey ] [ ] * alertRule { }
for _ , row := range rows {
groupKey := ngmodels . AlertRuleGroupKey {
OrgID : row . OrgID ,
NamespaceUID : row . NamespaceUID ,
RuleGroup : row . RuleGroup ,
}
groups [ groupKey ] = append ( groups [ groupKey ] , row )
}
toUpdate := make ( [ ] * alertRule , 0 , len ( rows ) )
for _ , rules := range groups {
for i , rule := range rules {
if rule . RuleGroupIndex == i + 1 {
continue
}
rule . RuleGroupIndex = i + 1
toUpdate = append ( toUpdate , rule )
}
}
if len ( toUpdate ) == 0 {
migrator . Logger . Debug ( "No rules to upgrade group index" )
return nil
}
updated := time . Now ( )
versions := make ( [ ] any , 0 , len ( toUpdate ) )
for _ , rule := range toUpdate {
rule . Updated = updated
version := rule . makeVersion ( )
version . Version = rule . Version + 1
version . ParentVersion = rule . Version
rule . Version ++
_ , err := sess . ID ( rule . ID ) . Cols ( "version" , "updated" , "rule_group_idx" ) . Update ( rule )
if err != nil {
migrator . Logger . Error ( "Failed to update alert rule" , "uid" , rule . UID , "err" , err )
return fmt . Errorf ( "unable to update alert rules with group index: %w" , err )
}
migrator . Logger . Debug ( "Updated group index for alert rule" , "rule_uid" , rule . UID )
versions = append ( versions , version )
}
_ , err := sess . Insert ( versions ... )
if err != nil {
migrator . Logger . Error ( "Failed to insert changes to alert_rule_version" , "err" , err )
return fmt . Errorf ( "unable to update alert rules with group index: %w" , err )
}
return nil
}
// uidSet is a wrapper around map[string]struct{} and util.GenerateShortUID() which aims help generate uids in quick
// succession while taking into consideration case sensitivity requirements. if caseInsensitive is true, all generated
// uids must also be unique when compared in a case-insensitive manner.
type uidSet struct {
set map [ string ] struct { }
caseInsensitive bool
}
// contains checks whether the given uid has already been generated in this uidSet.
func ( s * uidSet ) contains ( uid string ) bool {
dedup := uid
if s . caseInsensitive {
dedup = strings . ToLower ( dedup )
}
_ , seen := s . set [ dedup ]
return seen
}
// add adds the given uid to the uidSet.
func ( s * uidSet ) add ( uid string ) {
dedup := uid
if s . caseInsensitive {
dedup = strings . ToLower ( dedup )
}
s . set [ dedup ] = struct { } { }
}
// generateUid will generate a new unique uid that is not already contained in the uidSet.
// If it fails to create one that has not already been generated it will make multiple, but not unlimited, attempts.
// If all attempts are exhausted an error will be returned.
func ( s * uidSet ) generateUid ( ) ( string , error ) {
for i := 0 ; i < 5 ; i ++ {
gen := util . GenerateShortUID ( )
if ! s . contains ( gen ) {
s . add ( gen )
return gen , nil
}
}
return "" , errors . New ( "failed to generate UID" )
}