2023-10-12 13:43:10 +01:00
package migration
2021-04-29 13:24:37 -04:00
import (
2023-10-12 13:43:10 +01:00
"context"
2021-04-29 13:24:37 -04:00
"encoding/json"
"fmt"
"time"
2023-11-29 10:05:00 -05:00
"github.com/prometheus/common/model"
2023-12-19 13:25:13 -05:00
"github.com/grafana/grafana-plugin-sdk-go/data"
2024-01-05 05:37:13 -05:00
2023-05-24 13:09:17 -04:00
"github.com/grafana/grafana/pkg/infra/log"
2023-01-23 08:19:25 -05:00
legacymodels "github.com/grafana/grafana/pkg/services/alerting/models"
2024-01-05 05:37:13 -05:00
"github.com/grafana/grafana/pkg/services/dashboards"
2023-11-17 01:07:35 +01:00
"github.com/grafana/grafana/pkg/services/datasources"
2021-10-07 17:30:06 -04:00
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
2023-10-12 13:43:10 +01:00
"github.com/grafana/grafana/pkg/services/ngalert/store"
2021-12-01 06:45:27 -05:00
"github.com/grafana/grafana/pkg/tsdb/graphite"
2023-10-12 13:43:10 +01:00
"github.com/grafana/grafana/pkg/util"
2021-04-29 13:24:37 -04:00
)
2024-01-05 05:37:13 -05:00
func addLabelsAndAnnotations ( l log . Logger , alert * legacymodels . Alert , dashboardUID string ) ( data . Labels , data . Labels ) {
2023-12-19 13:25:13 -05:00
tags := alert . GetTagsFromSettings ( )
2024-01-05 05:37:13 -05:00
lbls := make ( data . Labels , len ( tags ) + 1 )
2022-10-18 00:47:39 -04:00
2023-12-19 13:25:13 -05:00
for _ , t := range tags {
lbls [ t . Key ] = t . Value
}
2022-06-22 17:39:17 +01:00
2023-12-19 13:25:13 -05:00
// Add a label for routing
lbls [ ngmodels . MigratedUseLegacyChannelsLabel ] = "true"
2021-05-03 11:42:31 -04:00
2023-12-19 13:25:13 -05:00
annotations := make ( data . Labels , 4 )
2023-10-12 13:43:10 +01:00
annotations [ ngmodels . DashboardUIDAnnotation ] = dashboardUID
2023-12-19 13:25:13 -05:00
annotations [ ngmodels . PanelIDAnnotation ] = fmt . Sprintf ( "%v" , alert . PanelID )
annotations [ ngmodels . MigratedAlertIdAnnotation ] = fmt . Sprintf ( "%v" , alert . ID )
message := MigrateTmpl ( l . New ( "field" , "message" ) , alert . Message )
annotations [ ngmodels . MigratedMessageAnnotation ] = message
2021-05-03 11:42:31 -04:00
2021-05-31 19:47:17 +05:30
return lbls , annotations
2021-05-03 11:42:31 -04:00
}
2023-12-19 13:25:13 -05:00
// migrateAlert migrates a single dashboard alert from legacy alerting to unified alerting.
2024-01-05 05:37:13 -05:00
func ( om * OrgMigration ) migrateAlert ( ctx context . Context , l log . Logger , alert * legacymodels . Alert , dashboard * dashboards . Dashboard ) ( * ngmodels . AlertRule , error ) {
2023-10-12 13:43:10 +01:00
l . Debug ( "Migrating alert rule to Unified Alerting" )
2023-12-19 13:25:13 -05:00
rawSettings , err := json . Marshal ( alert . Settings )
if err != nil {
return nil , fmt . Errorf ( "get settings: %w" , err )
}
var parsedSettings dashAlertSettings
err = json . Unmarshal ( rawSettings , & parsedSettings )
if err != nil {
return nil , fmt . Errorf ( "parse settings: %w" , err )
}
cond , err := transConditions ( ctx , l , parsedSettings , alert . OrgID , om . migrationStore )
2023-10-12 13:43:10 +01:00
if err != nil {
return nil , fmt . Errorf ( "transform conditions: %w" , err )
}
2024-01-05 05:37:13 -05:00
lbls , annotations := addLabelsAndAnnotations ( l , alert , dashboard . UID )
2021-12-01 06:45:27 -05:00
2023-06-28 14:02:57 -04:00
data , err := migrateAlertRuleQueries ( l , cond . Data )
2021-12-01 06:45:27 -05:00
if err != nil {
2023-12-19 13:25:13 -05:00
return nil , fmt . Errorf ( "queries: %w" , err )
2021-12-01 06:45:27 -05:00
}
2021-04-29 13:24:37 -04:00
2023-02-02 21:49:05 +00:00
isPaused := false
2023-12-19 13:25:13 -05:00
if alert . State == "paused" {
2023-02-02 21:49:05 +00:00
isPaused = true
}
2024-01-05 05:37:13 -05:00
dashUID := dashboard . UID
2023-10-12 13:43:10 +01:00
ar := & ngmodels . AlertRule {
2023-12-19 13:25:13 -05:00
OrgID : alert . OrgID ,
2024-01-05 05:37:13 -05:00
Title : alert . Name , // Title will be deduplicated on persist.
2023-10-12 13:43:10 +01:00
UID : util . GenerateShortUID ( ) ,
2021-04-29 13:24:37 -04:00
Condition : cond . Condition ,
2021-12-01 06:45:27 -05:00
Data : data ,
2023-12-19 13:25:13 -05:00
IntervalSeconds : ruleAdjustInterval ( alert . Frequency ) ,
2021-04-30 15:08:01 -04:00
Version : 1 ,
2024-01-05 05:37:13 -05:00
NamespaceUID : "" , // The folder for this alert is determined later.
2023-10-12 23:12:40 +01:00
DashboardUID : & dashUID ,
2023-12-19 13:25:13 -05:00
PanelID : & alert . PanelID ,
2024-01-05 05:37:13 -05:00
RuleGroup : groupName ( ruleAdjustInterval ( alert . Frequency ) , dashboard . Title ) ,
2023-12-19 13:25:13 -05:00
For : alert . For ,
2021-04-29 13:24:37 -04:00
Updated : time . Now ( ) . UTC ( ) ,
Annotations : annotations ,
2021-05-31 19:47:17 +05:30
Labels : lbls ,
2023-10-12 13:43:10 +01:00
RuleGroupIndex : 1 , // Every rule is in its own group.
2023-02-02 21:49:05 +00:00
IsPaused : isPaused ,
2023-12-19 13:25:13 -05:00
NoDataState : transNoData ( l , parsedSettings . NoDataState ) ,
ExecErrState : transExecErr ( l , parsedSettings . ExecutionErrorState ) ,
2021-04-29 13:24:37 -04:00
}
2024-01-24 15:56:19 -05:00
om . silences . handleSilenceLabels ( ar , parsedSettings )
2021-11-04 16:42:34 -04:00
2024-01-05 05:37:13 -05:00
// We do some validation and pre-save operations early in order to track these errors as part of the migration state.
if err := ar . ValidateAlertRule ( om . cfg . UnifiedAlerting ) ; err != nil {
return nil , err
}
if err := ar . PreSave ( time . Now ) ; err != nil {
return nil , err
}
2021-04-29 13:24:37 -04:00
return ar , nil
}
2021-12-01 06:45:27 -05:00
// migrateAlertRuleQueries attempts to fix alert rule queries so they can work in unified alerting. Queries of some data sources are not compatible with unified alerting.
2023-10-12 13:43:10 +01:00
func migrateAlertRuleQueries ( l log . Logger , data [ ] ngmodels . AlertQuery ) ( [ ] ngmodels . AlertQuery , error ) {
result := make ( [ ] ngmodels . AlertQuery , 0 , len ( data ) )
2021-12-01 06:45:27 -05:00
for _ , d := range data {
// queries that are expression are not relevant, skip them.
2023-03-23 21:55:54 +01:00
if d . DatasourceUID == expressionDatasourceUID {
2021-12-01 06:45:27 -05:00
result = append ( result , d )
continue
}
var fixedData map [ string ] json . RawMessage
err := json . Unmarshal ( d . Model , & fixedData )
if err != nil {
return nil , err
}
2022-05-04 09:31:05 -04:00
// remove hidden tag from the query (if exists)
delete ( fixedData , "hide" )
2021-12-01 06:45:27 -05:00
fixedData = fixGraphiteReferencedSubQueries ( fixedData )
2023-06-28 14:02:57 -04:00
fixedData = fixPrometheusBothTypeQuery ( l , fixedData )
2021-12-01 06:45:27 -05:00
updatedModel , err := json . Marshal ( fixedData )
if err != nil {
return nil , err
}
d . Model = updatedModel
result = append ( result , d )
}
return result , nil
}
// fixGraphiteReferencedSubQueries attempts to fix graphite referenced sub queries, given unified alerting does not support this.
// targetFull of Graphite data source contains the expanded version of field 'target', so let's copy that.
func fixGraphiteReferencedSubQueries ( queryData map [ string ] json . RawMessage ) map [ string ] json . RawMessage {
fullQuery , ok := queryData [ graphite . TargetFullModelField ]
if ok {
delete ( queryData , graphite . TargetFullModelField )
queryData [ graphite . TargetModelField ] = fullQuery
}
return queryData
}
2023-06-28 14:02:57 -04:00
// fixPrometheusBothTypeQuery converts Prometheus 'Both' type queries to range queries.
func fixPrometheusBothTypeQuery ( l log . Logger , queryData map [ string ] json . RawMessage ) map [ string ] json . RawMessage {
// There is the possibility to support this functionality by:
// - Splitting the query into two: one for instant and one for range.
// - Splitting the condition into two: one for each query, separated by OR.
// However, relying on a 'Both' query instead of multiple conditions to do this in legacy is likely
// to be unintentional. In addition, this would require more robust operator precedence in classic conditions.
// Given these reasons, we opt to convert them to range queries and log a warning.
var instant bool
if instantRaw , ok := queryData [ "instant" ] ; ok {
if err := json . Unmarshal ( instantRaw , & instant ) ; err != nil {
// Nothing to do here, we can't parse the instant field.
if isPrometheus , _ := isPrometheusQuery ( queryData ) ; isPrometheus {
l . Info ( "Failed to parse instant field on Prometheus query" , "instant" , string ( instantRaw ) , "err" , err )
}
return queryData
}
}
var rng bool
if rangeRaw , ok := queryData [ "range" ] ; ok {
if err := json . Unmarshal ( rangeRaw , & rng ) ; err != nil {
// Nothing to do here, we can't parse the range field.
if isPrometheus , _ := isPrometheusQuery ( queryData ) ; isPrometheus {
l . Info ( "Failed to parse range field on Prometheus query" , "range" , string ( rangeRaw ) , "err" , err )
}
return queryData
}
}
if ! instant || ! rng {
// Only apply this fix to 'Both' type queries.
return queryData
}
isPrometheus , err := isPrometheusQuery ( queryData )
if err != nil {
l . Info ( "Unable to convert alert rule that resembles a Prometheus 'Both' type query to 'Range'" , "err" , err )
return queryData
}
if ! isPrometheus {
// Only apply this fix to Prometheus.
return queryData
}
// Convert 'Both' type queries to `Range` queries by disabling the `Instant` portion.
l . Warn ( "Prometheus 'Both' type queries are not supported in unified alerting. Converting to range query." )
queryData [ "instant" ] = [ ] byte ( "false" )
return queryData
}
// isPrometheusQuery checks if the query is for Prometheus.
func isPrometheusQuery ( queryData map [ string ] json . RawMessage ) ( bool , error ) {
ds , ok := queryData [ "datasource" ]
if ! ok {
return false , fmt . Errorf ( "missing datasource field" )
}
var datasource struct {
Type string ` json:"type" `
}
if err := json . Unmarshal ( ds , & datasource ) ; err != nil {
2023-12-19 13:25:13 -05:00
return false , fmt . Errorf ( "parse datasource '%s': %w" , string ( ds ) , err )
2023-06-28 14:02:57 -04:00
}
if datasource . Type == "" {
return false , fmt . Errorf ( "missing type field '%s'" , string ( ds ) )
}
2023-11-17 01:07:35 +01:00
return datasource . Type == datasources . DS_PROMETHEUS , nil
2023-06-28 14:02:57 -04:00
}
2021-04-29 13:24:37 -04:00
func ruleAdjustInterval ( freq int64 ) int64 {
// 10 corresponds to the SchedulerCfg, but TODO not worrying about fetching for now.
var baseFreq int64 = 10
if freq <= baseFreq {
return 10
}
return freq - ( freq % baseFreq )
}
2023-10-12 13:43:10 +01:00
func transNoData ( l log . Logger , s string ) ngmodels . NoDataState {
2022-03-02 19:07:55 -05:00
switch legacymodels . NoDataOption ( s ) {
case legacymodels . NoDataSetOK :
2023-10-12 13:43:10 +01:00
return ngmodels . OK // values from ngalert/models/rule
2022-03-02 19:07:55 -05:00
case "" , legacymodels . NoDataSetNoData :
2023-10-12 13:43:10 +01:00
return ngmodels . NoData
2022-03-02 19:07:55 -05:00
case legacymodels . NoDataSetAlerting :
2023-10-12 13:43:10 +01:00
return ngmodels . Alerting
2022-03-02 19:07:55 -05:00
case legacymodels . NoDataKeepState :
2024-03-12 10:00:43 -04:00
return ngmodels . KeepLast
2023-05-24 13:09:17 -04:00
default :
l . Warn ( "Unable to translate execution of NoData state. Using default execution" , "old" , s , "new" , ngmodels . NoData )
2023-10-12 13:43:10 +01:00
return ngmodels . NoData
2021-04-29 13:24:37 -04:00
}
}
2023-10-12 13:43:10 +01:00
func transExecErr ( l log . Logger , s string ) ngmodels . ExecutionErrorState {
2022-03-02 19:07:55 -05:00
switch legacymodels . ExecutionErrorOption ( s ) {
case "" , legacymodels . ExecutionErrorSetAlerting :
2023-10-12 13:43:10 +01:00
return ngmodels . AlertingErrState
2022-03-02 19:07:55 -05:00
case legacymodels . ExecutionErrorKeepState :
2024-03-12 10:00:43 -04:00
return ngmodels . KeepLastErrState
2022-03-02 19:07:55 -05:00
case legacymodels . ExecutionErrorSetOk :
2023-10-12 13:43:10 +01:00
return ngmodels . OkErrState
2023-05-24 13:09:17 -04:00
default :
l . Warn ( "Unable to translate execution of Error state. Using default execution" , "old" , s , "new" , ngmodels . ErrorErrState )
2023-10-12 13:43:10 +01:00
return ngmodels . ErrorErrState
2021-04-29 13:24:37 -04:00
}
}
2022-09-13 13:53:09 -05:00
2023-10-12 23:12:40 +01:00
// truncate truncates the given name to the maximum allowed length.
func truncate ( daName string , length int ) string {
if len ( daName ) > length {
return daName [ : length ]
2022-09-13 13:53:09 -05:00
}
return daName
}
2022-10-18 00:47:39 -04:00
2023-11-29 10:05:00 -05:00
// groupName constructs a group name from the dashboard title and the interval. It truncates the dashboard title
// if necessary to ensure that the group name is not longer than the maximum allowed length.
func groupName ( interval int64 , dashboardTitle string ) string {
duration := model . Duration ( time . Duration ( interval ) * time . Second ) // Humanize.
panelSuffix := fmt . Sprintf ( " - %s" , duration . String ( ) )
truncatedDashboard := truncate ( dashboardTitle , store . AlertRuleMaxRuleGroupNameLength - len ( panelSuffix ) )
return fmt . Sprintf ( "%s%s" , truncatedDashboard , panelSuffix )
}