Alerting: Introduce proper feature toggles for common state history backend combinations (#65497)

* define 3 feature toggles for rollout phases

* Pass feature toggles along

* Implement first feature toggle

* Try a different strategy with fall-throughs to specific configurations

* Apply toggle overrides once outside of backend composition

* Emit log messages when we coerce backends

* Run code generator for feature toggle files

* Improve wording in flag descs

* Re-run generator

* Use code-generated constants instead of plain strings

* Use converted enum values rather than strings for pre-parsing
This commit is contained in:
Alexander Weaver 2023-03-30 13:53:21 -05:00 committed by GitHub
parent 8a4135f63f
commit b2abb63286
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 88 additions and 1 deletions

View File

@ -99,6 +99,9 @@ Alpha features might be changed or removed without prior notice.
| `influxdbBackendMigration` | Query InfluxDB InfluxQL without the proxy |
| `clientTokenRotation` | Replaces the current in-request token rotation so that the client initiates the rotation |
| `prometheusDataplane` | Changes responses to from Prometheus to be compliant with the dataplane specification. In particular it sets the numeric Field.Name from 'Value' to the value of the `__name__` label when present. |
| `alertStateHistoryLokiSecondary` | Enable Grafana to write alert state history to an external Loki instance in addition to Grafana annotations. |
| `alertStateHistoryLokiPrimary` | Enable a remote Loki instance as the primary source for state history reads. |
| `alertStateHistoryLokiOnly` | Disable Grafana alerts from emitting annotations when a remote Loki instance is available. |
## Development feature toggles

View File

@ -87,4 +87,7 @@ export interface FeatureToggles {
clientTokenRotation?: boolean;
disableElasticsearchBackendExploreQuery?: boolean;
prometheusDataplane?: boolean;
alertStateHistoryLokiSecondary?: boolean;
alertStateHistoryLokiPrimary?: boolean;
alertStateHistoryLokiOnly?: boolean;
}

View File

@ -461,5 +461,23 @@ var (
State: FeatureStateAlpha,
Owner: grafanaObservabilityMetricsSquad,
},
{
Name: "alertStateHistoryLokiSecondary",
Description: "Enable Grafana to write alert state history to an external Loki instance in addition to Grafana annotations.",
State: FeatureStateAlpha,
Owner: grafanaAlertingSquad,
},
{
Name: "alertStateHistoryLokiPrimary",
Description: "Enable a remote Loki instance as the primary source for state history reads.",
State: FeatureStateAlpha,
Owner: grafanaAlertingSquad,
},
{
Name: "alertStateHistoryLokiOnly",
Description: "Disable Grafana alerts from emitting annotations when a remote Loki instance is available.",
State: FeatureStateAlpha,
Owner: grafanaAlertingSquad,
},
}
)

View File

@ -68,3 +68,6 @@ influxdbBackendMigration,alpha,@grafana/observability-metrics,false,false,false,
clientTokenRotation,alpha,@grafana/grafana-authnz-team,false,false,false,false
disableElasticsearchBackendExploreQuery,beta,@grafana/observability-logs,false,false,false,false
prometheusDataplane,alpha,@grafana/observability-metrics,false,false,false,false
alertStateHistoryLokiSecondary,alpha,@grafana/alerting-squad,false,false,false,false
alertStateHistoryLokiPrimary,alpha,@grafana/alerting-squad,false,false,false,false
alertStateHistoryLokiOnly,alpha,@grafana/alerting-squad,false,false,false,false

1 Name State Owner requiresDevMode RequiresLicense RequiresRestart FrontendOnly
68 clientTokenRotation alpha @grafana/grafana-authnz-team false false false false
69 disableElasticsearchBackendExploreQuery beta @grafana/observability-logs false false false false
70 prometheusDataplane alpha @grafana/observability-metrics false false false false
71 alertStateHistoryLokiSecondary alpha @grafana/alerting-squad false false false false
72 alertStateHistoryLokiPrimary alpha @grafana/alerting-squad false false false false
73 alertStateHistoryLokiOnly alpha @grafana/alerting-squad false false false false

View File

@ -282,4 +282,16 @@ const (
// FlagPrometheusDataplane
// Changes responses to from Prometheus to be compliant with the dataplane specification. In particular it sets the numeric Field.Name from 'Value' to the value of the `__name__` label when present.
FlagPrometheusDataplane = "prometheusDataplane"
// FlagAlertStateHistoryLokiSecondary
// Enable Grafana to write alert state history to an external Loki instance in addition to Grafana annotations.
FlagAlertStateHistoryLokiSecondary = "alertStateHistoryLokiSecondary"
// FlagAlertStateHistoryLokiPrimary
// Enable a remote Loki instance as the primary source for state history reads.
FlagAlertStateHistoryLokiPrimary = "alertStateHistoryLokiPrimary"
// FlagAlertStateHistoryLokiOnly
// Disable Grafana alerts from emitting annotations when a remote Loki instance is available.
FlagAlertStateHistoryLokiOnly = "alertStateHistoryLokiOnly"
)

View File

@ -210,6 +210,9 @@ func (ng *AlertNG) init() error {
Tracer: ng.tracer,
}
// There are a set of feature toggles available that act as short-circuits for common configurations.
// If any are set, override the config accordingly.
applyStateHistoryFeatureToggles(&ng.Cfg.UnifiedAlerting.StateHistory, ng.FeatureToggles, ng.Log)
history, err := configureHistorianBackend(initCtx, ng.Cfg.UnifiedAlerting.StateHistory, ng.annotationsRepo, ng.dashboardService, ng.store, ng.Metrics.GetHistorianMetrics(), ng.Log)
if err != nil {
return err
@ -438,3 +441,48 @@ func configureHistorianBackend(ctx context.Context, cfg setting.UnifiedAlertingS
return nil, fmt.Errorf("unrecognized state history backend: %s", backend)
}
// applyStateHistoryFeatureToggles edits state history configuration to comply with currently active feature toggles.
func applyStateHistoryFeatureToggles(cfg *setting.UnifiedAlertingStateHistorySettings, ft featuremgmt.FeatureToggles, logger log.Logger) {
backend, _ := historian.ParseBackendType(cfg.Backend)
// These feature toggles represent specific, common backend configurations.
// If all toggles are enabled, we listen to the state history config as written.
// If any of them are disabled, we ignore the configured backend and treat the toggles as an override.
// If multiple toggles are disabled, we go with the most "restrictive" one.
if !ft.IsEnabled(featuremgmt.FlagAlertStateHistoryLokiSecondary) {
// If we cannot even treat Loki as a secondary, we must use annotations only.
if backend == historian.BackendTypeMultiple || backend == historian.BackendTypeLoki {
logger.Info("Forcing Annotation backend due to state history feature toggles")
cfg.Backend = historian.BackendTypeAnnotations.String()
cfg.MultiPrimary = ""
cfg.MultiSecondaries = make([]string, 0)
}
return
}
if !ft.IsEnabled(featuremgmt.FlagAlertStateHistoryLokiPrimary) {
// If we're using multiple backends, Loki must be the secondary.
if backend == historian.BackendTypeMultiple {
logger.Info("Coercing Loki to a secondary backend due to state history feature toggles")
cfg.MultiPrimary = historian.BackendTypeAnnotations.String()
cfg.MultiSecondaries = []string{historian.BackendTypeLoki.String()}
}
// If we're using loki, we are only allowed to use it as a secondary. Dual write to it, plus annotations.
if backend == historian.BackendTypeLoki {
logger.Info("Coercing Loki to dual writes with a secondary backend due to state history feature toggles")
cfg.Backend = historian.BackendTypeMultiple.String()
cfg.MultiPrimary = historian.BackendTypeAnnotations.String()
cfg.MultiSecondaries = []string{historian.BackendTypeLoki.String()}
}
return
}
if !ft.IsEnabled(featuremgmt.FlagAlertStateHistoryLokiOnly) {
// If we're not allowed to use Loki only, make it the primary but keep the annotation writes.
if backend == historian.BackendTypeLoki {
logger.Info("Forcing dual writes to Loki and Annotations due to state history feature toggles")
cfg.Backend = historian.BackendTypeMultiple.String()
cfg.MultiPrimary = historian.BackendTypeLoki.String()
cfg.MultiSecondaries = []string{historian.BackendTypeAnnotations.String()}
}
return
}
}

View File

@ -137,7 +137,7 @@ func (cfg *Cfg) readUnifiedAlertingEnabledSetting(section *ini.Section) (*bool,
// than disable it. This issue can be found here
hasEnabled := section.Key("enabled").Value() != ""
if !hasEnabled {
// TODO: Remove in Grafana v9
// TODO: Remove in Grafana v10
if cfg.IsFeatureToggleEnabled("ngalert") {
cfg.Logger.Warn("ngalert feature flag is deprecated: use unified alerting enabled setting instead")
// feature flag overrides the legacy alerting setting