2023-10-12 07:43:10 -05:00
package migration
2021-04-29 12:24:37 -05:00
import (
2023-10-12 07:43:10 -05:00
"context"
2021-04-29 12:24:37 -05:00
"encoding/json"
"fmt"
"time"
2023-11-29 09:05:00 -06:00
"github.com/prometheus/common/model"
2023-12-19 12:25:13 -06:00
"github.com/grafana/grafana-plugin-sdk-go/data"
2024-01-05 04:37:13 -06:00
2023-05-24 12:09:17 -05:00
"github.com/grafana/grafana/pkg/infra/log"
2023-01-23 07:19:25 -06:00
legacymodels "github.com/grafana/grafana/pkg/services/alerting/models"
2024-01-05 04:37:13 -06:00
"github.com/grafana/grafana/pkg/services/dashboards"
2023-11-16 18:07:35 -06:00
"github.com/grafana/grafana/pkg/services/datasources"
2021-10-07 16:30:06 -05:00
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
2023-10-12 07:43:10 -05:00
"github.com/grafana/grafana/pkg/services/ngalert/store"
2021-12-01 05:45:27 -06:00
"github.com/grafana/grafana/pkg/tsdb/graphite"
2023-10-12 07:43:10 -05:00
"github.com/grafana/grafana/pkg/util"
2021-04-29 12:24:37 -05:00
)
2024-01-05 04:37:13 -06:00
func addLabelsAndAnnotations ( l log . Logger , alert * legacymodels . Alert , dashboardUID string ) ( data . Labels , data . Labels ) {
2023-12-19 12:25:13 -06:00
tags := alert . GetTagsFromSettings ( )
2024-01-05 04:37:13 -06:00
lbls := make ( data . Labels , len ( tags ) + 1 )
2022-10-17 23:47:39 -05:00
2023-12-19 12:25:13 -06:00
for _ , t := range tags {
lbls [ t . Key ] = t . Value
}
2022-06-22 11:39:17 -05:00
2023-12-19 12:25:13 -06:00
// Add a label for routing
lbls [ ngmodels . MigratedUseLegacyChannelsLabel ] = "true"
2021-05-03 10:42:31 -05:00
2023-12-19 12:25:13 -06:00
annotations := make ( data . Labels , 4 )
2023-10-12 07:43:10 -05:00
annotations [ ngmodels . DashboardUIDAnnotation ] = dashboardUID
2023-12-19 12:25:13 -06:00
annotations [ ngmodels . PanelIDAnnotation ] = fmt . Sprintf ( "%v" , alert . PanelID )
annotations [ ngmodels . MigratedAlertIdAnnotation ] = fmt . Sprintf ( "%v" , alert . ID )
message := MigrateTmpl ( l . New ( "field" , "message" ) , alert . Message )
annotations [ ngmodels . MigratedMessageAnnotation ] = message
2021-05-03 10:42:31 -05:00
2021-05-31 09:17:17 -05:00
return lbls , annotations
2021-05-03 10:42:31 -05:00
}
2023-12-19 12:25:13 -06:00
// migrateAlert migrates a single dashboard alert from legacy alerting to unified alerting.
2024-01-05 04:37:13 -06:00
func ( om * OrgMigration ) migrateAlert ( ctx context . Context , l log . Logger , alert * legacymodels . Alert , dashboard * dashboards . Dashboard ) ( * ngmodels . AlertRule , error ) {
2023-10-12 07:43:10 -05:00
l . Debug ( "Migrating alert rule to Unified Alerting" )
2023-12-19 12:25:13 -06:00
rawSettings , err := json . Marshal ( alert . Settings )
if err != nil {
return nil , fmt . Errorf ( "get settings: %w" , err )
}
var parsedSettings dashAlertSettings
err = json . Unmarshal ( rawSettings , & parsedSettings )
if err != nil {
return nil , fmt . Errorf ( "parse settings: %w" , err )
}
cond , err := transConditions ( ctx , l , parsedSettings , alert . OrgID , om . migrationStore )
2023-10-12 07:43:10 -05:00
if err != nil {
return nil , fmt . Errorf ( "transform conditions: %w" , err )
}
2024-01-05 04:37:13 -06:00
lbls , annotations := addLabelsAndAnnotations ( l , alert , dashboard . UID )
2021-12-01 05:45:27 -06:00
2023-06-28 13:02:57 -05:00
data , err := migrateAlertRuleQueries ( l , cond . Data )
2021-12-01 05:45:27 -06:00
if err != nil {
2023-12-19 12:25:13 -06:00
return nil , fmt . Errorf ( "queries: %w" , err )
2021-12-01 05:45:27 -06:00
}
2021-04-29 12:24:37 -05:00
2023-02-02 15:49:05 -06:00
isPaused := false
2023-12-19 12:25:13 -06:00
if alert . State == "paused" {
2023-02-02 15:49:05 -06:00
isPaused = true
}
2024-01-05 04:37:13 -06:00
dashUID := dashboard . UID
2023-10-12 07:43:10 -05:00
ar := & ngmodels . AlertRule {
2023-12-19 12:25:13 -06:00
OrgID : alert . OrgID ,
2024-01-05 04:37:13 -06:00
Title : alert . Name , // Title will be deduplicated on persist.
2023-10-12 07:43:10 -05:00
UID : util . GenerateShortUID ( ) ,
2021-04-29 12:24:37 -05:00
Condition : cond . Condition ,
2021-12-01 05:45:27 -06:00
Data : data ,
2023-12-19 12:25:13 -06:00
IntervalSeconds : ruleAdjustInterval ( alert . Frequency ) ,
2021-04-30 14:08:01 -05:00
Version : 1 ,
2024-01-05 04:37:13 -06:00
NamespaceUID : "" , // The folder for this alert is determined later.
2023-10-12 17:12:40 -05:00
DashboardUID : & dashUID ,
2023-12-19 12:25:13 -06:00
PanelID : & alert . PanelID ,
2024-01-05 04:37:13 -06:00
RuleGroup : groupName ( ruleAdjustInterval ( alert . Frequency ) , dashboard . Title ) ,
2023-12-19 12:25:13 -06:00
For : alert . For ,
2021-04-29 12:24:37 -05:00
Updated : time . Now ( ) . UTC ( ) ,
Annotations : annotations ,
2021-05-31 09:17:17 -05:00
Labels : lbls ,
2023-10-12 07:43:10 -05:00
RuleGroupIndex : 1 , // Every rule is in its own group.
2023-02-02 15:49:05 -06:00
IsPaused : isPaused ,
2023-12-19 12:25:13 -06:00
NoDataState : transNoData ( l , parsedSettings . NoDataState ) ,
ExecErrState : transExecErr ( l , parsedSettings . ExecutionErrorState ) ,
2021-04-29 12:24:37 -05:00
}
2021-05-31 07:00:58 -05:00
// Label for routing and silences.
2022-10-17 23:47:39 -05:00
n , v := getLabelForSilenceMatching ( ar . UID )
2021-05-31 07:00:58 -05:00
ar . Labels [ n ] = v
2023-12-19 12:25:13 -06:00
if parsedSettings . ExecutionErrorState == string ( legacymodels . ExecutionErrorKeepState ) {
2023-10-12 07:43:10 -05:00
if err := om . addErrorSilence ( ar ) ; err != nil {
om . log . Error ( "Alert migration error: failed to create silence for Error" , "rule_name" , ar . Title , "err" , err )
}
2021-11-25 04:46:47 -06:00
}
2023-12-19 12:25:13 -06:00
if parsedSettings . NoDataState == string ( legacymodels . NoDataKeepState ) {
2023-10-12 07:43:10 -05:00
if err := om . addNoDataSilence ( ar ) ; err != nil {
om . log . Error ( "Alert migration error: failed to create silence for NoData" , "rule_name" , ar . Title , "err" , err )
}
2021-11-04 15:42:34 -05:00
}
2024-01-05 04:37:13 -06:00
// We do some validation and pre-save operations early in order to track these errors as part of the migration state.
if err := ar . ValidateAlertRule ( om . cfg . UnifiedAlerting ) ; err != nil {
return nil , err
}
if err := ar . PreSave ( time . Now ) ; err != nil {
return nil , err
}
2021-04-29 12:24:37 -05:00
return ar , nil
}
2021-12-01 05:45:27 -06:00
// migrateAlertRuleQueries attempts to fix alert rule queries so they can work in unified alerting. Queries of some data sources are not compatible with unified alerting.
2023-10-12 07:43:10 -05:00
func migrateAlertRuleQueries ( l log . Logger , data [ ] ngmodels . AlertQuery ) ( [ ] ngmodels . AlertQuery , error ) {
result := make ( [ ] ngmodels . AlertQuery , 0 , len ( data ) )
2021-12-01 05:45:27 -06:00
for _ , d := range data {
// queries that are expression are not relevant, skip them.
2023-03-23 15:55:54 -05:00
if d . DatasourceUID == expressionDatasourceUID {
2021-12-01 05:45:27 -06:00
result = append ( result , d )
continue
}
var fixedData map [ string ] json . RawMessage
err := json . Unmarshal ( d . Model , & fixedData )
if err != nil {
return nil , err
}
2022-05-04 08:31:05 -05:00
// remove hidden tag from the query (if exists)
delete ( fixedData , "hide" )
2021-12-01 05:45:27 -06:00
fixedData = fixGraphiteReferencedSubQueries ( fixedData )
2023-06-28 13:02:57 -05:00
fixedData = fixPrometheusBothTypeQuery ( l , fixedData )
2021-12-01 05:45:27 -06:00
updatedModel , err := json . Marshal ( fixedData )
if err != nil {
return nil , err
}
d . Model = updatedModel
result = append ( result , d )
}
return result , nil
}
// fixGraphiteReferencedSubQueries attempts to fix graphite referenced sub queries, given unified alerting does not support this.
// targetFull of Graphite data source contains the expanded version of field 'target', so let's copy that.
func fixGraphiteReferencedSubQueries ( queryData map [ string ] json . RawMessage ) map [ string ] json . RawMessage {
fullQuery , ok := queryData [ graphite . TargetFullModelField ]
if ok {
delete ( queryData , graphite . TargetFullModelField )
queryData [ graphite . TargetModelField ] = fullQuery
}
return queryData
}
2023-06-28 13:02:57 -05:00
// fixPrometheusBothTypeQuery converts Prometheus 'Both' type queries to range queries.
func fixPrometheusBothTypeQuery ( l log . Logger , queryData map [ string ] json . RawMessage ) map [ string ] json . RawMessage {
// There is the possibility to support this functionality by:
// - Splitting the query into two: one for instant and one for range.
// - Splitting the condition into two: one for each query, separated by OR.
// However, relying on a 'Both' query instead of multiple conditions to do this in legacy is likely
// to be unintentional. In addition, this would require more robust operator precedence in classic conditions.
// Given these reasons, we opt to convert them to range queries and log a warning.
var instant bool
if instantRaw , ok := queryData [ "instant" ] ; ok {
if err := json . Unmarshal ( instantRaw , & instant ) ; err != nil {
// Nothing to do here, we can't parse the instant field.
if isPrometheus , _ := isPrometheusQuery ( queryData ) ; isPrometheus {
l . Info ( "Failed to parse instant field on Prometheus query" , "instant" , string ( instantRaw ) , "err" , err )
}
return queryData
}
}
var rng bool
if rangeRaw , ok := queryData [ "range" ] ; ok {
if err := json . Unmarshal ( rangeRaw , & rng ) ; err != nil {
// Nothing to do here, we can't parse the range field.
if isPrometheus , _ := isPrometheusQuery ( queryData ) ; isPrometheus {
l . Info ( "Failed to parse range field on Prometheus query" , "range" , string ( rangeRaw ) , "err" , err )
}
return queryData
}
}
if ! instant || ! rng {
// Only apply this fix to 'Both' type queries.
return queryData
}
isPrometheus , err := isPrometheusQuery ( queryData )
if err != nil {
l . Info ( "Unable to convert alert rule that resembles a Prometheus 'Both' type query to 'Range'" , "err" , err )
return queryData
}
if ! isPrometheus {
// Only apply this fix to Prometheus.
return queryData
}
// Convert 'Both' type queries to `Range` queries by disabling the `Instant` portion.
l . Warn ( "Prometheus 'Both' type queries are not supported in unified alerting. Converting to range query." )
queryData [ "instant" ] = [ ] byte ( "false" )
return queryData
}
// isPrometheusQuery checks if the query is for Prometheus.
func isPrometheusQuery ( queryData map [ string ] json . RawMessage ) ( bool , error ) {
ds , ok := queryData [ "datasource" ]
if ! ok {
return false , fmt . Errorf ( "missing datasource field" )
}
var datasource struct {
Type string ` json:"type" `
}
if err := json . Unmarshal ( ds , & datasource ) ; err != nil {
2023-12-19 12:25:13 -06:00
return false , fmt . Errorf ( "parse datasource '%s': %w" , string ( ds ) , err )
2023-06-28 13:02:57 -05:00
}
if datasource . Type == "" {
return false , fmt . Errorf ( "missing type field '%s'" , string ( ds ) )
}
2023-11-16 18:07:35 -06:00
return datasource . Type == datasources . DS_PROMETHEUS , nil
2023-06-28 13:02:57 -05:00
}
2021-04-29 12:24:37 -05:00
func ruleAdjustInterval ( freq int64 ) int64 {
// 10 corresponds to the SchedulerCfg, but TODO not worrying about fetching for now.
var baseFreq int64 = 10
if freq <= baseFreq {
return 10
}
return freq - ( freq % baseFreq )
}
2023-10-12 07:43:10 -05:00
func transNoData ( l log . Logger , s string ) ngmodels . NoDataState {
2022-03-02 18:07:55 -06:00
switch legacymodels . NoDataOption ( s ) {
case legacymodels . NoDataSetOK :
2023-10-12 07:43:10 -05:00
return ngmodels . OK // values from ngalert/models/rule
2022-03-02 18:07:55 -06:00
case "" , legacymodels . NoDataSetNoData :
2023-10-12 07:43:10 -05:00
return ngmodels . NoData
2022-03-02 18:07:55 -06:00
case legacymodels . NoDataSetAlerting :
2023-10-12 07:43:10 -05:00
return ngmodels . Alerting
2022-03-02 18:07:55 -06:00
case legacymodels . NoDataKeepState :
2023-10-12 07:43:10 -05:00
return ngmodels . NoData // "keep last state" translates to no data because we now emit a special alert when the state is "noData". The result is that the evaluation will not return firing and instead we'll raise the special alert.
2023-05-24 12:09:17 -05:00
default :
l . Warn ( "Unable to translate execution of NoData state. Using default execution" , "old" , s , "new" , ngmodels . NoData )
2023-10-12 07:43:10 -05:00
return ngmodels . NoData
2021-04-29 12:24:37 -05:00
}
}
2023-10-12 07:43:10 -05:00
func transExecErr ( l log . Logger , s string ) ngmodels . ExecutionErrorState {
2022-03-02 18:07:55 -06:00
switch legacymodels . ExecutionErrorOption ( s ) {
case "" , legacymodels . ExecutionErrorSetAlerting :
2023-10-12 07:43:10 -05:00
return ngmodels . AlertingErrState
2022-03-02 18:07:55 -06:00
case legacymodels . ExecutionErrorKeepState :
2021-11-25 04:46:47 -06:00
// Keep last state is translated to error as we now emit a
// DatasourceError alert when the state is error
2023-10-12 07:43:10 -05:00
return ngmodels . ErrorErrState
2022-03-02 18:07:55 -06:00
case legacymodels . ExecutionErrorSetOk :
2023-10-12 07:43:10 -05:00
return ngmodels . OkErrState
2023-05-24 12:09:17 -05:00
default :
l . Warn ( "Unable to translate execution of Error state. Using default execution" , "old" , s , "new" , ngmodels . ErrorErrState )
2023-10-12 07:43:10 -05:00
return ngmodels . ErrorErrState
2021-04-29 12:24:37 -05:00
}
}
2022-09-13 13:53:09 -05:00
2023-10-12 17:12:40 -05:00
// truncate truncates the given name to the maximum allowed length.
func truncate ( daName string , length int ) string {
if len ( daName ) > length {
return daName [ : length ]
2022-09-13 13:53:09 -05:00
}
return daName
}
2022-10-17 23:47:39 -05:00
2023-11-29 09:05:00 -06:00
// groupName constructs a group name from the dashboard title and the interval. It truncates the dashboard title
// if necessary to ensure that the group name is not longer than the maximum allowed length.
func groupName ( interval int64 , dashboardTitle string ) string {
duration := model . Duration ( time . Duration ( interval ) * time . Second ) // Humanize.
panelSuffix := fmt . Sprintf ( " - %s" , duration . String ( ) )
truncatedDashboard := truncate ( dashboardTitle , store . AlertRuleMaxRuleGroupNameLength - len ( panelSuffix ) )
return fmt . Sprintf ( "%s%s" , truncatedDashboard , panelSuffix )
}