From 0ce1ccd6f969485bbb896fb4b768faf5c5a933e6 Mon Sep 17 00:00:00 2001 From: Matthew Jacobson Date: Wed, 31 Jan 2024 14:05:30 -0500 Subject: [PATCH] Alerting: Fix inconsistent AM raw config when applied via sync vs API (#81655) AM config applied via API would use the PostableUserConfig as the AM raw config and also the hash used to decide when the AM config has changed. However, when applied via the periodic sync the PostableApiAlertingConfig would be used instead. This leads to two issues: - Inconsistent hash comparisons when modifying the AM causing redundant applies. - GetStatus assumed the raw config was PostableUserConfig causing the endpoint to return correctly after a new config is applied via API and then nothing once the periodic sync runs. Note: Technically, the upstream GrafanaAlertamanger GetStatus shouldn't be returning PostableUserConfig or PostableApiAlertingConfig, but instead GettableStatus. However, this issue required changes elsewhere and is out of scope. --- pkg/services/ngalert/notifier/alertmanager.go | 23 ++++++++----------- pkg/services/ngalert/notifier/status.go | 8 +++---- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/pkg/services/ngalert/notifier/alertmanager.go b/pkg/services/ngalert/notifier/alertmanager.go index eb06c23ae7f..980a66905cf 100644 --- a/pkg/services/ngalert/notifier/alertmanager.go +++ b/pkg/services/ngalert/notifier/alertmanager.go @@ -180,7 +180,7 @@ func (am *alertmanager) SaveAndApplyDefaultConfig(ctx context.Context) error { } err = am.Store.SaveAlertmanagerConfigurationWithCallback(ctx, cmd, func() error { - _, err := am.applyConfig(cfg, []byte(am.Settings.UnifiedAlerting.DefaultConfiguration)) + _, err := am.applyConfig(cfg) return err }) if err != nil { @@ -210,7 +210,7 @@ func (am *alertmanager) SaveAndApplyConfig(ctx context.Context, cfg *apimodels.P } err = am.Store.SaveAlertmanagerConfigurationWithCallback(ctx, cmd, func() error { - _, err := am.applyConfig(cfg, rawConfig) + _, err := am.applyConfig(cfg) return err }) if err != nil { @@ -232,7 +232,7 @@ func (am *alertmanager) ApplyConfig(ctx context.Context, dbCfg *ngmodels.AlertCo var outerErr error am.Base.WithLock(func() { - if err := am.applyAndMarkConfig(ctx, dbCfg.ConfigurationHash, cfg, nil); err != nil { + if err := am.applyAndMarkConfig(ctx, dbCfg.ConfigurationHash, cfg); err != nil { outerErr = fmt.Errorf("unable to apply configuration: %w", err) return } @@ -286,16 +286,13 @@ func (am *alertmanager) aggregateInhibitMatchers(rules []config.InhibitRule, amu // applyConfig applies a new configuration by re-initializing all components using the configuration provided. // It returns a boolean indicating whether the user config was changed and an error. // It is not safe to call concurrently. -func (am *alertmanager) applyConfig(cfg *apimodels.PostableUserConfig, rawConfig []byte) (bool, error) { +func (am *alertmanager) applyConfig(cfg *apimodels.PostableUserConfig) (bool, error) { // First, let's make sure this config is not already loaded var amConfigChanged bool - if rawConfig == nil { - enc, err := json.Marshal(cfg.AlertmanagerConfig) - if err != nil { - // In theory, this should never happen. - return false, err - } - rawConfig = enc + rawConfig, err := json.Marshal(cfg.AlertmanagerConfig) + if err != nil { + // In theory, this should never happen. + return false, err } if am.Base.ConfigHash() != md5.Sum(rawConfig) { @@ -335,8 +332,8 @@ func (am *alertmanager) applyConfig(cfg *apimodels.PostableUserConfig, rawConfig } // applyAndMarkConfig applies a configuration and marks it as applied if no errors occur. -func (am *alertmanager) applyAndMarkConfig(ctx context.Context, hash string, cfg *apimodels.PostableUserConfig, rawConfig []byte) error { - configChanged, err := am.applyConfig(cfg, rawConfig) +func (am *alertmanager) applyAndMarkConfig(ctx context.Context, hash string, cfg *apimodels.PostableUserConfig) error { + configChanged, err := am.applyConfig(cfg) if err != nil { return err } diff --git a/pkg/services/ngalert/notifier/status.go b/pkg/services/ngalert/notifier/status.go index bae821f6d40..e93dbffd010 100644 --- a/pkg/services/ngalert/notifier/status.go +++ b/pkg/services/ngalert/notifier/status.go @@ -8,15 +8,15 @@ import ( // TODO: We no longer do apimodels at this layer, move it to the API. func (am *alertmanager) GetStatus() apimodels.GettableStatus { - config := &apimodels.PostableUserConfig{} - status := am.Base.GetStatus() + config := &apimodels.PostableApiAlertingConfig{} + status := am.Base.GetStatus() // TODO: This should return a GettableStatus, for now it returns PostableApiAlertingConfig. if status == nil { - return *apimodels.NewGettableStatus(&config.AlertmanagerConfig) + return *apimodels.NewGettableStatus(config) } if err := json.Unmarshal(status, config); err != nil { am.logger.Error("Unable to unmarshall alertmanager config", "Err", err) } - return *apimodels.NewGettableStatus(&config.AlertmanagerConfig) + return *apimodels.NewGettableStatus(config) }