CloudMigrations: Add config for alert rules state behavior and remove experimental feature toggle on alerts migration (#97254)

* CloudMigrations: add config for controlling alert rules state behavior

* CloudMigrations: remove experimental 'onPremToCloudMigrationsAlerts' feature toggle
This commit is contained in:
Matheus Macabu 2024-12-17 12:56:18 +01:00 committed by GitHub
parent ae7cb6866d
commit c824f5b9bf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 69 additions and 95 deletions

View File

@ -2027,6 +2027,10 @@ snapshot_folder = ""
feedback_url = https://docs.google.com/forms/d/e/1FAIpQLSeEE33vhbSpR8A8S1A1ocZ1ByVRRwiRl1GZr2FSrEer_tSa8w/viewform?usp=sf_link
# How frequently should the frontend UI poll for changes while resources are migrating
frontend_poll_interval = 2s
# Controls how the Alert Rules are migrated. Available choices: "paused" and "unchanged". Default: "paused".
# With "paused", all Alert Rules will be created in Paused state. This is helpful to avoid double notifications.
# With "unchanged", all Alert Rules will be created with the pause state unchanged coming from the source instance.
alert_rules_state = "paused"
################################## Frontend development configuration ###################################
# Warning! Any settings placed in this section will be available on `process.env.frontend_dev_{foo}` within frontend code

View File

@ -1949,6 +1949,10 @@ timeout = 30s
;feedback_url = ""
# How frequently should the frontend UI poll for changes while resources are migrating
;frontend_poll_interval = 2s
# Controls how the Alert Rules are migrated. Available choices: "paused" and "unchanged". Default: "paused".
# With "paused", all Alert Rules will be created in Paused state. This is helpful to avoid double notifications.
# With "unchanged", all Alert Rules will be created with the pause state unchanged coming from the source instance.
;alert_rules_state = "paused"
################################## Frontend development configuration ###################################
# Warning! Any settings placed in this section will be available on `process.env.frontend_dev_{foo}` within frontend code

View File

@ -191,7 +191,6 @@ Experimental features might be changed or removed without prior notice.
| `tableSharedCrosshair` | Enables shared crosshair in table panel |
| `kubernetesFeatureToggles` | Use the kubernetes API for feature toggle management in the frontend |
| `newFolderPicker` | Enables the nested folder picker without having nested folders enabled |
| `onPremToCloudMigrationsAlerts` | Enables the migration of alerts and its child resources to your Grafana Cloud stack. Requires `onPremToCloudMigrations` to be enabled in conjunction. |
| `onPremToCloudMigrationsAuthApiMig` | Enables the use of auth api instead of gcom for internal token services. Requires `onPremToCloudMigrations` to be enabled in conjunction. |
| `scopeApi` | In-development feature flag for the scope api using the app platform. |
| `sqlExpressions` | Enables using SQL and DuckDB functions as Expressions. |

View File

@ -156,7 +156,6 @@ export interface FeatureToggles {
newFolderPicker?: boolean;
jitterAlertRulesWithinGroups?: boolean;
onPremToCloudMigrations?: boolean;
onPremToCloudMigrationsAlerts?: boolean;
onPremToCloudMigrationsAuthApiMig?: boolean;
alertingSaveStatePeriodic?: boolean;
scopeApi?: boolean;

View File

@ -826,7 +826,9 @@ func ctxWithSignedInUser() context.Context {
return ctx
}
func setUpServiceTest(t *testing.T, withDashboardMock bool) cloudmigration.Service {
type configOverrides func(c *setting.Cfg)
func setUpServiceTest(t *testing.T, withDashboardMock bool, cfgOverrides ...configOverrides) cloudmigration.Service {
sqlStore := db.InitTestDB(t)
secretsService := secretsfakes.NewFakeSecretsService()
rr := routing.NewRouteRegister()
@ -866,7 +868,6 @@ func setUpServiceTest(t *testing.T, withDashboardMock bool) cloudmigration.Servi
featureToggles := featuremgmt.WithFeatures(
featuremgmt.FlagOnPremToCloudMigrations,
featuremgmt.FlagOnPremToCloudMigrationsAlerts,
featuremgmt.FlagDashboardRestore, // needed for skipping creating soft-deleted dashboards in the snapshot.
)
@ -930,6 +931,10 @@ func setUpServiceTest(t *testing.T, withDashboardMock bool) cloudmigration.Servi
require.NoError(t, err)
}
for _, cfgOverride := range cfgOverrides {
cfgOverride(cfg)
}
s, err := ProvideService(
cfg,
httpclient.NewProvider(),

View File

@ -9,11 +9,11 @@ import (
"github.com/prometheus/common/model"
"github.com/grafana/grafana/pkg/components/simplejson"
"github.com/grafana/grafana/pkg/services/featuremgmt"
ngalertapi "github.com/grafana/grafana/pkg/services/ngalert/api"
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/provisioning"
"github.com/grafana/grafana/pkg/services/user"
"github.com/grafana/grafana/pkg/setting"
)
type muteTimeInterval struct {
@ -25,10 +25,6 @@ type muteTimeInterval struct {
}
func (s *Service) getAlertMuteTimings(ctx context.Context, signedInUser *user.SignedInUser) ([]muteTimeInterval, error) {
if !s.features.IsEnabledGlobally(featuremgmt.FlagOnPremToCloudMigrationsAlerts) {
return nil, nil
}
muteTimings, err := s.ngAlert.Api.MuteTimings.GetMuteTimings(ctx, signedInUser.OrgID)
if err != nil {
return nil, fmt.Errorf("fetching ngalert mute timings: %w", err)
@ -56,10 +52,6 @@ type notificationTemplate struct {
}
func (s *Service) getNotificationTemplates(ctx context.Context, signedInUser *user.SignedInUser) ([]notificationTemplate, error) {
if !s.features.IsEnabledGlobally(featuremgmt.FlagOnPremToCloudMigrationsAlerts) {
return nil, nil
}
templates, err := s.ngAlert.Api.Templates.GetTemplates(ctx, signedInUser.OrgID)
if err != nil {
return nil, fmt.Errorf("fetching ngalert notification templates: %w", err)
@ -87,10 +79,6 @@ type contactPoint struct {
}
func (s *Service) getContactPoints(ctx context.Context, signedInUser *user.SignedInUser) ([]contactPoint, error) {
if !s.features.IsEnabledGlobally(featuremgmt.FlagOnPremToCloudMigrationsAlerts) {
return nil, nil
}
query := provisioning.ContactPointQuery{
OrgID: signedInUser.GetOrgID(),
Decrypt: true, // needed to recreate the settings in the target instance.
@ -122,10 +110,6 @@ type notificationPolicy struct {
}
func (s *Service) getNotificationPolicies(ctx context.Context, signedInUser *user.SignedInUser) (notificationPolicy, error) {
if !s.features.IsEnabledGlobally(featuremgmt.FlagOnPremToCloudMigrationsAlerts) {
return notificationPolicy{}, nil
}
policyTree, _, err := s.ngAlert.Api.Policies.GetPolicyTree(ctx, signedInUser.GetOrgID())
if err != nil {
return notificationPolicy{}, fmt.Errorf("fetching ngalert notification policy tree: %w", err)
@ -158,18 +142,21 @@ type alertRule struct {
}
func (s *Service) getAlertRules(ctx context.Context, signedInUser *user.SignedInUser) ([]alertRule, error) {
if !s.features.IsEnabledGlobally(featuremgmt.FlagOnPremToCloudMigrationsAlerts) {
return nil, nil
}
alertRules, _, err := s.ngAlert.Api.AlertRules.GetAlertRules(ctx, signedInUser)
if err != nil {
return nil, fmt.Errorf("fetching alert rules: %w", err)
}
settingAlertRulesPaused := s.cfg.CloudMigration.AlertRulesState == setting.GMSAlertRulesPaused
provisionedAlertRules := make([]alertRule, 0, len(alertRules))
for _, rule := range alertRules {
isPaused := rule.IsPaused
if settingAlertRulesPaused {
isPaused = true
}
provisionedAlertRules = append(provisionedAlertRules, alertRule{
ID: rule.ID,
UID: rule.UID,
@ -185,7 +172,7 @@ func (s *Service) getAlertRules(ctx context.Context, signedInUser *user.SignedIn
ExecErrState: rule.ExecErrState.String(),
Annotations: rule.Annotations,
Labels: rule.Labels,
IsPaused: rule.IsPaused,
IsPaused: isPaused,
NotificationSettings: ngalertapi.AlertRuleNotificationSettingsFromNotificationSettings(rule.NotificationSettings),
Record: ngalertapi.ApiRecordFromModelRecord(rule.Record),
})

View File

@ -3,6 +3,7 @@ package cloudmigrationimpl
import (
"context"
"encoding/json"
"fmt"
"testing"
"time"
@ -17,25 +18,17 @@ import (
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/user"
"github.com/grafana/grafana/pkg/setting"
)
func TestGetAlertMuteTimings(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
t.Run("when the feature flag `onPremToCloudMigrationsAlerts` is not enabled it returns nil", func(t *testing.T) {
t.Run("it returns the mute timings", func(t *testing.T) {
s := setUpServiceTest(t, false).(*Service)
s.features = featuremgmt.WithFeatures(featuremgmt.FlagOnPremToCloudMigrations)
muteTimeIntervals, err := s.getAlertMuteTimings(ctx, nil)
require.NoError(t, err)
require.Nil(t, muteTimeIntervals)
})
t.Run("when the feature flag `onPremToCloudMigrationsAlerts` is enabled it returns the mute timings", func(t *testing.T) {
s := setUpServiceTest(t, false).(*Service)
s.features = featuremgmt.WithFeatures(featuremgmt.FlagOnPremToCloudMigrations, featuremgmt.FlagOnPremToCloudMigrationsAlerts)
user := &user.SignedInUser{OrgID: 1}
createdMuteTiming := createMuteTiming(t, ctx, s, user)
@ -52,18 +45,8 @@ func TestGetNotificationTemplates(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
t.Run("when the feature flag `onPremToCloudMigrationsAlerts` is not enabled it returns nil", func(t *testing.T) {
t.Run("it returns the notification templates", func(t *testing.T) {
s := setUpServiceTest(t, false).(*Service)
s.features = featuremgmt.WithFeatures(featuremgmt.FlagOnPremToCloudMigrations)
notificationTemplates, err := s.getNotificationTemplates(ctx, nil)
require.NoError(t, err)
require.Nil(t, notificationTemplates)
})
t.Run("when the feature flag `onPremToCloudMigrationsAlerts` is enabled it returns the notification templates", func(t *testing.T) {
s := setUpServiceTest(t, false).(*Service)
s.features = featuremgmt.WithFeatures(featuremgmt.FlagOnPremToCloudMigrations, featuremgmt.FlagOnPremToCloudMigrationsAlerts)
user := &user.SignedInUser{OrgID: 1}
@ -81,18 +64,8 @@ func TestGetContactPoints(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
t.Run("when the feature flag `onPremToCloudMigrationsAlerts` is not enabled it returns nil", func(t *testing.T) {
t.Run("it returns the contact points", func(t *testing.T) {
s := setUpServiceTest(t, false).(*Service)
s.features = featuremgmt.WithFeatures(featuremgmt.FlagOnPremToCloudMigrations)
contactPoints, err := s.getContactPoints(ctx, nil)
require.NoError(t, err)
require.Nil(t, contactPoints)
})
t.Run("when the feature flag `onPremToCloudMigrationsAlerts` is enabled it returns the contact points", func(t *testing.T) {
s := setUpServiceTest(t, false).(*Service)
s.features = featuremgmt.WithFeatures(featuremgmt.FlagOnPremToCloudMigrations, featuremgmt.FlagOnPremToCloudMigrationsAlerts)
user := &user.SignedInUser{
OrgID: 1,
@ -119,18 +92,8 @@ func TestGetNotificationPolicies(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
t.Run("when the feature flag `onPremToCloudMigrationsAlerts` is not enabled it returns nil", func(t *testing.T) {
t.Run("it returns the contact points", func(t *testing.T) {
s := setUpServiceTest(t, false).(*Service)
s.features = featuremgmt.WithFeatures(featuremgmt.FlagOnPremToCloudMigrations)
notificationPolicies, err := s.getNotificationPolicies(ctx, nil)
require.NoError(t, err)
require.Empty(t, notificationPolicies)
})
t.Run("when the feature flag `onPremToCloudMigrationsAlerts` is enabled it returns the contact points", func(t *testing.T) {
s := setUpServiceTest(t, false).(*Service)
s.features = featuremgmt.WithFeatures(featuremgmt.FlagOnPremToCloudMigrations, featuremgmt.FlagOnPremToCloudMigrationsAlerts)
user := &user.SignedInUser{OrgID: 1}
@ -153,28 +116,40 @@ func TestGetAlertRules(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
t.Run("when the feature flag `onPremToCloudMigrationsAlerts` is not enabled it returns nil", func(t *testing.T) {
t.Run("it returns the alert rules", func(t *testing.T) {
s := setUpServiceTest(t, false).(*Service)
s.features = featuremgmt.WithFeatures(featuremgmt.FlagOnPremToCloudMigrations)
alertRules, err := s.getAlertRules(ctx, nil)
require.NoError(t, err)
require.Nil(t, alertRules)
})
t.Run("when the feature flag `onPremToCloudMigrationsAlerts` is enabled it returns the alert rules", func(t *testing.T) {
s := setUpServiceTest(t, false).(*Service)
s.features = featuremgmt.WithFeatures(featuremgmt.FlagOnPremToCloudMigrations, featuremgmt.FlagOnPremToCloudMigrationsAlerts)
user := &user.SignedInUser{OrgID: 1}
alertRule := createAlertRule(t, ctx, s, user)
alertRule := createAlertRule(t, ctx, s, user, false)
alertRules, err := s.getAlertRules(ctx, user)
require.NoError(t, err)
require.Len(t, alertRules, 1)
require.Equal(t, alertRule.UID, alertRules[0].UID)
})
t.Run("when the alert_rules_state config is `paused`, then the alert rules are all returned in `paused` state", func(t *testing.T) {
alertRulesState := func(c *setting.Cfg) {
c.CloudMigration.AlertRulesState = setting.GMSAlertRulesPaused
}
s := setUpServiceTest(t, false, alertRulesState).(*Service)
user := &user.SignedInUser{OrgID: 1}
alertRulePaused := createAlertRule(t, ctx, s, user, true)
require.True(t, alertRulePaused.IsPaused)
alertRuleUnpaused := createAlertRule(t, ctx, s, user, false)
require.False(t, alertRuleUnpaused.IsPaused)
alertRules, err := s.getAlertRules(ctx, user)
require.NoError(t, err)
require.Len(t, alertRules, 2)
require.True(t, alertRules[0].IsPaused)
require.True(t, alertRules[1].IsPaused)
})
}
func createMuteTiming(t *testing.T, ctx context.Context, service *Service, user *user.SignedInUser) definitions.MuteTimeInterval {
@ -292,12 +267,12 @@ func updateNotificationPolicyTree(t *testing.T, ctx context.Context, service *Se
require.NoError(t, err)
}
func createAlertRule(t *testing.T, ctx context.Context, service *Service, user *user.SignedInUser) models.AlertRule {
func createAlertRule(t *testing.T, ctx context.Context, service *Service, user *user.SignedInUser, isPaused bool) models.AlertRule {
t.Helper()
rule := models.AlertRule{
OrgID: user.GetOrgID(),
Title: "Alert Rule SLO",
Title: fmt.Sprintf("Alert Rule SLO (Paused: %v)", isPaused),
NamespaceUID: "folderUID",
Condition: "A",
Data: []models.AlertQuery{
@ -310,6 +285,7 @@ func createAlertRule(t *testing.T, ctx context.Context, service *Service, user *
},
},
},
IsPaused: isPaused,
RuleGroup: "ruleGroup",
For: time.Minute,
IntervalSeconds: 60,

View File

@ -1044,12 +1044,6 @@ var (
Owner: grafanaOperatorExperienceSquad,
Expression: "false",
},
{
Name: "onPremToCloudMigrationsAlerts",
Description: "Enables the migration of alerts and its child resources to your Grafana Cloud stack. Requires `onPremToCloudMigrations` to be enabled in conjunction.",
Stage: FeatureStageExperimental,
Owner: grafanaOperatorExperienceSquad,
},
{
Name: "onPremToCloudMigrationsAuthApiMig",
Description: "Enables the use of auth api instead of gcom for internal token services. Requires `onPremToCloudMigrations` to be enabled in conjunction.",

View File

@ -137,7 +137,6 @@ alertingQueryOptimization,GA,@grafana/alerting-squad,false,false,false
newFolderPicker,experimental,@grafana/grafana-frontend-platform,false,false,true
jitterAlertRulesWithinGroups,preview,@grafana/alerting-squad,false,true,false
onPremToCloudMigrations,preview,@grafana/grafana-operator-experience-squad,false,false,false
onPremToCloudMigrationsAlerts,experimental,@grafana/grafana-operator-experience-squad,false,false,false
onPremToCloudMigrationsAuthApiMig,experimental,@grafana/grafana-operator-experience-squad,false,false,false
alertingSaveStatePeriodic,privatePreview,@grafana/alerting-squad,false,false,false
scopeApi,experimental,@grafana/grafana-app-platform-squad,false,false,false

1 Name Stage Owner requiresDevMode RequiresRestart FrontendOnly
137 newFolderPicker experimental @grafana/grafana-frontend-platform false false true
138 jitterAlertRulesWithinGroups preview @grafana/alerting-squad false true false
139 onPremToCloudMigrations preview @grafana/grafana-operator-experience-squad false false false
onPremToCloudMigrationsAlerts experimental @grafana/grafana-operator-experience-squad false false false
140 onPremToCloudMigrationsAuthApiMig experimental @grafana/grafana-operator-experience-squad false false false
141 alertingSaveStatePeriodic privatePreview @grafana/alerting-squad false false false
142 scopeApi experimental @grafana/grafana-app-platform-squad false false false

View File

@ -559,10 +559,6 @@ const (
// Enable the Grafana Migration Assistant, which helps you easily migrate on-prem dashboards, folders, and data source configurations to your Grafana Cloud stack.
FlagOnPremToCloudMigrations = "onPremToCloudMigrations"
// FlagOnPremToCloudMigrationsAlerts
// Enables the migration of alerts and its child resources to your Grafana Cloud stack. Requires `onPremToCloudMigrations` to be enabled in conjunction.
FlagOnPremToCloudMigrationsAlerts = "onPremToCloudMigrationsAlerts"
// FlagOnPremToCloudMigrationsAuthApiMig
// Enables the use of auth api instead of gcom for internal token services. Requires `onPremToCloudMigrations` to be enabled in conjunction.
FlagOnPremToCloudMigrationsAuthApiMig = "onPremToCloudMigrationsAuthApiMig"

View File

@ -2474,7 +2474,8 @@
"metadata": {
"name": "onPremToCloudMigrationsAlerts",
"resourceVersion": "1728048163201",
"creationTimestamp": "2024-10-04T13:22:43Z"
"creationTimestamp": "2024-10-04T13:22:43Z",
"deletionTimestamp": "2024-12-02T13:37:41Z"
},
"spec": {
"description": "Enables the migration of alerts and its child resources to your Grafana Cloud stack. Requires `onPremToCloudMigrations` to be enabled in conjunction.",

View File

@ -5,6 +5,14 @@ import (
"time"
)
const (
// GMSAlertRulesPaused configures Alert Rules to all be in Paused state.
GMSAlertRulesPaused = "paused"
// GMSAlertRulesUnchanged will not change the Alert Rules' states.
GMSAlertRulesUnchanged = "unchanged"
)
type CloudMigrationSettings struct {
IsTarget bool
GcomAPIToken string
@ -26,6 +34,7 @@ type CloudMigrationSettings struct {
TokenExpiresAfter time.Duration
FeedbackURL string
FrontendPollInterval time.Duration
AlertRulesState string
IsDeveloperMode bool
}
@ -53,6 +62,7 @@ func (cfg *Cfg) readCloudMigrationSettings() {
cfg.CloudMigration.IsDeveloperMode = cloudMigration.Key("developer_mode").MustBool(false)
cfg.CloudMigration.FeedbackURL = cloudMigration.Key("feedback_url").MustString("")
cfg.CloudMigration.FrontendPollInterval = cloudMigration.Key("frontend_poll_interval").MustDuration(2 * time.Second)
cfg.CloudMigration.AlertRulesState = cloudMigration.Key("alert_rules_state").In(GMSAlertRulesPaused, []string{GMSAlertRulesPaused, GMSAlertRulesUnchanged})
if cfg.CloudMigration.SnapshotFolder == "" {
cfg.CloudMigration.SnapshotFolder = filepath.Join(cfg.DataPath, "cloud_migration")