mirror of
https://github.com/grafana/grafana.git
synced 2024-11-29 04:04:00 -06:00
32f06c6d9c
* Replace global authz abstraction with one compatible with uid scope * Replace GettableApiReceiver with models.Receiver in receiver_svc * GrafanaIntegrationConfig -> models.Integration * Implement Create/Update methods * Add optimistic concurrency to receiver API * Add scope to ReceiversRead & ReceiversReadSecrets migrates existing permissions to include implicit global scope * Add receiver create, update, delete actions * Check if receiver is used by rules before delete * On receiver name change update in routes and notification settings * Improve errors * Linting * Include read permissions are requirements for create/update/delete * Alias ngalert/models to ngmodels to differentiate from v0alpha1 model * Ensure integration UIDs are valid, unique, and generated if empty * Validate integration settings on create/update * Leverage UidToName to GetReceiver instead of GetReceivers * Remove some unnecessary uses of simplejson * alerting.notifications.receiver -> alerting.notifications.receivers * validator -> provenanceValidator * Only validate the modified receiver stops existing invalid receivers from preventing modification of a valid receiver. * Improve error in Integration.Encrypt * Remove scope from alert.notifications.receivers:create * Add todos for receiver renaming * Use receiverAC precondition checks in k8s api * Linting * Optional optimistic concurrency for delete * make update-workspace * More specific auth checks in k8s authorize.go * Add debug log when delete optimistic concurrency is skipped * Improve error message on authorizer.DecisionDeny * Keep error for non-forbidden errutil errors
682 lines
29 KiB
Go
682 lines
29 KiB
Go
package ngalert
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/url"
|
|
"time"
|
|
|
|
"github.com/benbjohnson/clock"
|
|
"github.com/prometheus/alertmanager/featurecontrol"
|
|
"github.com/prometheus/alertmanager/matchers/compat"
|
|
"golang.org/x/sync/errgroup"
|
|
|
|
"github.com/grafana/grafana/pkg/api/routing"
|
|
"github.com/grafana/grafana/pkg/bus"
|
|
"github.com/grafana/grafana/pkg/events"
|
|
"github.com/grafana/grafana/pkg/expr"
|
|
"github.com/grafana/grafana/pkg/infra/db"
|
|
"github.com/grafana/grafana/pkg/infra/httpclient"
|
|
"github.com/grafana/grafana/pkg/infra/kvstore"
|
|
"github.com/grafana/grafana/pkg/infra/log"
|
|
"github.com/grafana/grafana/pkg/infra/tracing"
|
|
"github.com/grafana/grafana/pkg/services/accesscontrol"
|
|
"github.com/grafana/grafana/pkg/services/annotations"
|
|
"github.com/grafana/grafana/pkg/services/dashboards"
|
|
"github.com/grafana/grafana/pkg/services/datasourceproxy"
|
|
"github.com/grafana/grafana/pkg/services/datasources"
|
|
"github.com/grafana/grafana/pkg/services/featuremgmt"
|
|
"github.com/grafana/grafana/pkg/services/folder"
|
|
ac "github.com/grafana/grafana/pkg/services/ngalert/accesscontrol"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/api"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/image"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/models"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/notifier"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/notifier/legacy_storage"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/provisioning"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/remote"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/schedule"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/sender"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/state"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/state/historian"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/store"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/writer"
|
|
"github.com/grafana/grafana/pkg/services/notifications"
|
|
"github.com/grafana/grafana/pkg/services/pluginsintegration/pluginstore"
|
|
"github.com/grafana/grafana/pkg/services/quota"
|
|
"github.com/grafana/grafana/pkg/services/rendering"
|
|
"github.com/grafana/grafana/pkg/services/secrets"
|
|
"github.com/grafana/grafana/pkg/setting"
|
|
)
|
|
|
|
func ProvideService(
|
|
cfg *setting.Cfg,
|
|
featureToggles featuremgmt.FeatureToggles,
|
|
dataSourceCache datasources.CacheService,
|
|
dataSourceService datasources.DataSourceService,
|
|
routeRegister routing.RouteRegister,
|
|
sqlStore db.DB,
|
|
kvStore kvstore.KVStore,
|
|
expressionService *expr.Service,
|
|
dataProxy *datasourceproxy.DataSourceProxyService,
|
|
quotaService quota.Service,
|
|
secretsService secrets.Service,
|
|
notificationService notifications.Service,
|
|
m *metrics.NGAlert,
|
|
folderService folder.Service,
|
|
ac accesscontrol.AccessControl,
|
|
dashboardService dashboards.DashboardService,
|
|
renderService rendering.Service,
|
|
bus bus.Bus,
|
|
accesscontrolService accesscontrol.Service,
|
|
annotationsRepo annotations.Repository,
|
|
pluginsStore pluginstore.Store,
|
|
tracer tracing.Tracer,
|
|
ruleStore *store.DBstore,
|
|
httpClientProvider httpclient.Provider,
|
|
) (*AlertNG, error) {
|
|
ng := &AlertNG{
|
|
Cfg: cfg,
|
|
FeatureToggles: featureToggles,
|
|
DataSourceCache: dataSourceCache,
|
|
DataSourceService: dataSourceService,
|
|
RouteRegister: routeRegister,
|
|
SQLStore: sqlStore,
|
|
KVStore: kvStore,
|
|
ExpressionService: expressionService,
|
|
DataProxy: dataProxy,
|
|
QuotaService: quotaService,
|
|
SecretsService: secretsService,
|
|
Metrics: m,
|
|
Log: log.New("ngalert"),
|
|
NotificationService: notificationService,
|
|
folderService: folderService,
|
|
accesscontrol: ac,
|
|
dashboardService: dashboardService,
|
|
renderService: renderService,
|
|
bus: bus,
|
|
accesscontrolService: accesscontrolService,
|
|
annotationsRepo: annotationsRepo,
|
|
pluginsStore: pluginsStore,
|
|
tracer: tracer,
|
|
store: ruleStore,
|
|
httpClientProvider: httpClientProvider,
|
|
}
|
|
|
|
if ng.IsDisabled() {
|
|
return ng, nil
|
|
}
|
|
|
|
if err := ng.init(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return ng, nil
|
|
}
|
|
|
|
// AlertNG is the service for evaluating the condition of an alert definition.
|
|
type AlertNG struct {
|
|
Cfg *setting.Cfg
|
|
FeatureToggles featuremgmt.FeatureToggles
|
|
DataSourceCache datasources.CacheService
|
|
DataSourceService datasources.DataSourceService
|
|
RouteRegister routing.RouteRegister
|
|
SQLStore db.DB
|
|
KVStore kvstore.KVStore
|
|
ExpressionService *expr.Service
|
|
DataProxy *datasourceproxy.DataSourceProxyService
|
|
QuotaService quota.Service
|
|
SecretsService secrets.Service
|
|
Metrics *metrics.NGAlert
|
|
NotificationService notifications.Service
|
|
Log log.Logger
|
|
renderService rendering.Service
|
|
ImageService image.ImageService
|
|
RecordingWriter schedule.RecordingWriter
|
|
schedule schedule.ScheduleService
|
|
stateManager *state.Manager
|
|
folderService folder.Service
|
|
dashboardService dashboards.DashboardService
|
|
Api *api.API
|
|
httpClientProvider httpclient.Provider
|
|
|
|
// Alerting notification services
|
|
MultiOrgAlertmanager *notifier.MultiOrgAlertmanager
|
|
AlertsRouter *sender.AlertsRouter
|
|
accesscontrol accesscontrol.AccessControl
|
|
accesscontrolService accesscontrol.Service
|
|
annotationsRepo annotations.Repository
|
|
store *store.DBstore
|
|
|
|
bus bus.Bus
|
|
pluginsStore pluginstore.Store
|
|
tracer tracing.Tracer
|
|
}
|
|
|
|
func (ng *AlertNG) init() error {
|
|
// AlertNG should be initialized before the cancellation deadline of initCtx
|
|
initCtx, cancelFunc := context.WithTimeout(context.Background(), 30*time.Second)
|
|
defer cancelFunc()
|
|
|
|
ng.store.Logger = ng.Log
|
|
|
|
// This initializes the compat package in fallback mode with logging. It parses first
|
|
// using the UTF-8 parser and then fallsback to the classic parser on error.
|
|
// UTF-8 is permitted in label names. This should be removed when the compat package
|
|
// is removed from Alertmanager.
|
|
compat.InitFromFlags(ng.Log, featurecontrol.NoopFlags{})
|
|
|
|
// If enabled, configure the remote Alertmanager.
|
|
// - If several toggles are enabled, the order of precedence is RemoteOnly, RemotePrimary, RemoteSecondary
|
|
// - If no toggles are enabled, we default to using only the internal Alertmanager
|
|
// We currently do not support remote primary mode, so we fall back to remote secondary.
|
|
var overrides []notifier.Option
|
|
moaLogger := log.New("ngalert.multiorg.alertmanager")
|
|
remoteOnly := ng.FeatureToggles.IsEnabled(initCtx, featuremgmt.FlagAlertmanagerRemoteOnly)
|
|
remotePrimary := ng.FeatureToggles.IsEnabled(initCtx, featuremgmt.FlagAlertmanagerRemotePrimary)
|
|
remoteSecondary := ng.FeatureToggles.IsEnabled(initCtx, featuremgmt.FlagAlertmanagerRemoteSecondary)
|
|
if ng.Cfg.UnifiedAlerting.RemoteAlertmanager.Enable {
|
|
autogenFn := remote.NoopAutogenFn
|
|
if ng.FeatureToggles.IsEnabled(initCtx, featuremgmt.FlagAlertingSimplifiedRouting) {
|
|
autogenFn = func(ctx context.Context, logger log.Logger, orgID int64, cfg *definitions.PostableApiAlertingConfig, skipInvalid bool) error {
|
|
return notifier.AddAutogenConfig(ctx, logger, ng.store, orgID, cfg, skipInvalid)
|
|
}
|
|
}
|
|
|
|
switch {
|
|
case remoteOnly:
|
|
ng.Log.Debug("Starting Grafana with remote only mode enabled")
|
|
m := ng.Metrics.GetRemoteAlertmanagerMetrics()
|
|
m.Info.WithLabelValues(metrics.ModeRemoteOnly).Set(1)
|
|
ng.Cfg.UnifiedAlerting.SkipClustering = true
|
|
|
|
// This function will be used by the MOA to create new Alertmanagers.
|
|
override := notifier.WithAlertmanagerOverride(func(_ notifier.OrgAlertmanagerFactory) notifier.OrgAlertmanagerFactory {
|
|
return func(ctx context.Context, orgID int64) (notifier.Alertmanager, error) {
|
|
// Create remote Alertmanager.
|
|
cfg := remote.AlertmanagerConfig{
|
|
BasicAuthPassword: ng.Cfg.UnifiedAlerting.RemoteAlertmanager.Password,
|
|
DefaultConfig: ng.Cfg.UnifiedAlerting.DefaultConfiguration,
|
|
OrgID: orgID,
|
|
TenantID: ng.Cfg.UnifiedAlerting.RemoteAlertmanager.TenantID,
|
|
URL: ng.Cfg.UnifiedAlerting.RemoteAlertmanager.URL,
|
|
PromoteConfig: true,
|
|
SyncInterval: ng.Cfg.UnifiedAlerting.RemoteAlertmanager.SyncInterval,
|
|
ExternalURL: ng.Cfg.AppURL,
|
|
StaticHeaders: ng.Cfg.Smtp.StaticHeaders,
|
|
}
|
|
remoteAM, err := createRemoteAlertmanager(cfg, ng.KVStore, ng.SecretsService.Decrypt, autogenFn, m, ng.tracer)
|
|
if err != nil {
|
|
moaLogger.Error("Failed to create remote Alertmanager", "err", err)
|
|
return nil, err
|
|
}
|
|
return remoteAM, nil
|
|
}
|
|
})
|
|
|
|
overrides = append(overrides, override)
|
|
|
|
case remotePrimary:
|
|
ng.Log.Debug("Starting Grafana with remote primary mode enabled")
|
|
m := ng.Metrics.GetRemoteAlertmanagerMetrics()
|
|
m.Info.WithLabelValues(metrics.ModeRemotePrimary).Set(1)
|
|
ng.Cfg.UnifiedAlerting.SkipClustering = true
|
|
// This function will be used by the MOA to create new Alertmanagers.
|
|
override := notifier.WithAlertmanagerOverride(func(factoryFn notifier.OrgAlertmanagerFactory) notifier.OrgAlertmanagerFactory {
|
|
return func(ctx context.Context, orgID int64) (notifier.Alertmanager, error) {
|
|
// Create internal Alertmanager.
|
|
internalAM, err := factoryFn(ctx, orgID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Create remote Alertmanager.
|
|
cfg := remote.AlertmanagerConfig{
|
|
BasicAuthPassword: ng.Cfg.UnifiedAlerting.RemoteAlertmanager.Password,
|
|
DefaultConfig: ng.Cfg.UnifiedAlerting.DefaultConfiguration,
|
|
OrgID: orgID,
|
|
PromoteConfig: true,
|
|
TenantID: ng.Cfg.UnifiedAlerting.RemoteAlertmanager.TenantID,
|
|
URL: ng.Cfg.UnifiedAlerting.RemoteAlertmanager.URL,
|
|
ExternalURL: ng.Cfg.AppURL,
|
|
StaticHeaders: ng.Cfg.Smtp.StaticHeaders,
|
|
}
|
|
remoteAM, err := createRemoteAlertmanager(cfg, ng.KVStore, ng.SecretsService.Decrypt, autogenFn, m, ng.tracer)
|
|
if err != nil {
|
|
moaLogger.Error("Failed to create remote Alertmanager, falling back to using only the internal one", "err", err)
|
|
return internalAM, nil
|
|
}
|
|
|
|
// Use both Alertmanager implementations in the forked Alertmanager.
|
|
return remote.NewRemotePrimaryForkedAlertmanager(log.New("ngalert.forked-alertmanager.remote-primary"), internalAM, remoteAM), nil
|
|
}
|
|
})
|
|
|
|
overrides = append(overrides, override)
|
|
|
|
case remoteSecondary:
|
|
ng.Log.Debug("Starting Grafana with remote secondary mode enabled")
|
|
m := ng.Metrics.GetRemoteAlertmanagerMetrics()
|
|
m.Info.WithLabelValues(metrics.ModeRemoteSecondary).Set(1)
|
|
|
|
// This function will be used by the MOA to create new Alertmanagers.
|
|
override := notifier.WithAlertmanagerOverride(func(factoryFn notifier.OrgAlertmanagerFactory) notifier.OrgAlertmanagerFactory {
|
|
return func(ctx context.Context, orgID int64) (notifier.Alertmanager, error) {
|
|
// Create internal Alertmanager.
|
|
internalAM, err := factoryFn(ctx, orgID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Create remote Alertmanager.
|
|
cfg := remote.AlertmanagerConfig{
|
|
BasicAuthPassword: ng.Cfg.UnifiedAlerting.RemoteAlertmanager.Password,
|
|
DefaultConfig: ng.Cfg.UnifiedAlerting.DefaultConfiguration,
|
|
OrgID: orgID,
|
|
TenantID: ng.Cfg.UnifiedAlerting.RemoteAlertmanager.TenantID,
|
|
URL: ng.Cfg.UnifiedAlerting.RemoteAlertmanager.URL,
|
|
SyncInterval: ng.Cfg.UnifiedAlerting.RemoteAlertmanager.SyncInterval,
|
|
ExternalURL: ng.Cfg.AppURL,
|
|
StaticHeaders: ng.Cfg.Smtp.StaticHeaders,
|
|
}
|
|
remoteAM, err := createRemoteAlertmanager(cfg, ng.KVStore, ng.SecretsService.Decrypt, autogenFn, m, ng.tracer)
|
|
if err != nil {
|
|
moaLogger.Error("Failed to create remote Alertmanager, falling back to using only the internal one", "err", err)
|
|
return internalAM, nil
|
|
}
|
|
|
|
// Use both Alertmanager implementations in the forked Alertmanager.
|
|
rsCfg := remote.RemoteSecondaryConfig{
|
|
Logger: log.New("ngalert.forked-alertmanager.remote-secondary"),
|
|
OrgID: orgID,
|
|
Store: ng.store,
|
|
SyncInterval: ng.Cfg.UnifiedAlerting.RemoteAlertmanager.SyncInterval,
|
|
}
|
|
return remote.NewRemoteSecondaryForkedAlertmanager(rsCfg, internalAM, remoteAM)
|
|
}
|
|
})
|
|
|
|
overrides = append(overrides, override)
|
|
|
|
default:
|
|
ng.Log.Error("A mode should be selected when enabling the remote Alertmanager, falling back to using only the internal Alertmanager")
|
|
}
|
|
}
|
|
|
|
decryptFn := ng.SecretsService.GetDecryptedValue
|
|
multiOrgMetrics := ng.Metrics.GetMultiOrgAlertmanagerMetrics()
|
|
moa, err := notifier.NewMultiOrgAlertmanager(ng.Cfg, ng.store, ng.store, ng.KVStore, ng.store, decryptFn, multiOrgMetrics, ng.NotificationService, moaLogger, ng.SecretsService, ng.FeatureToggles, overrides...)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ng.MultiOrgAlertmanager = moa
|
|
|
|
imageService, err := image.NewScreenshotImageServiceFromCfg(ng.Cfg, ng.store, ng.dashboardService, ng.renderService, ng.Metrics.Registerer)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ng.ImageService = imageService
|
|
|
|
// Let's make sure we're able to complete an initial sync of Alertmanagers before we start the alerting components.
|
|
if err := ng.MultiOrgAlertmanager.LoadAndSyncAlertmanagersForOrgs(initCtx); err != nil {
|
|
return fmt.Errorf("failed to initialize alerting because multiorg alertmanager manager failed to warm up: %w", err)
|
|
}
|
|
|
|
appUrl, err := url.Parse(ng.Cfg.AppURL)
|
|
if err != nil {
|
|
ng.Log.Error("Failed to parse application URL. Continue without it.", "error", err)
|
|
appUrl = nil
|
|
}
|
|
|
|
clk := clock.New()
|
|
|
|
alertsRouter := sender.NewAlertsRouter(ng.MultiOrgAlertmanager, ng.store, clk, appUrl, ng.Cfg.UnifiedAlerting.DisabledOrgs,
|
|
ng.Cfg.UnifiedAlerting.AdminConfigPollInterval, ng.DataSourceService, ng.SecretsService, ng.FeatureToggles)
|
|
|
|
// Make sure we sync at least once as Grafana starts to get the router up and running before we start sending any alerts.
|
|
if err := alertsRouter.SyncAndApplyConfigFromDatabase(initCtx); err != nil {
|
|
return fmt.Errorf("failed to initialize alerting because alert notifications router failed to warm up: %w", err)
|
|
}
|
|
|
|
ng.AlertsRouter = alertsRouter
|
|
|
|
evalFactory := eval.NewEvaluatorFactory(ng.Cfg.UnifiedAlerting, ng.DataSourceCache, ng.ExpressionService)
|
|
conditionValidator := eval.NewConditionValidator(ng.DataSourceCache, ng.ExpressionService, ng.pluginsStore)
|
|
|
|
if !ng.FeatureToggles.IsEnabled(initCtx, featuremgmt.FlagGrafanaManagedRecordingRules) {
|
|
// Force-disable the feature if the feature toggle is not on - sets us up for feature toggle removal.
|
|
ng.Cfg.UnifiedAlerting.RecordingRules.Enabled = false
|
|
}
|
|
recordingWriter, err := createRecordingWriter(ng.FeatureToggles, ng.Cfg.UnifiedAlerting.RecordingRules, ng.httpClientProvider, clk, ng.Metrics.GetRemoteWriterMetrics())
|
|
if err != nil {
|
|
return fmt.Errorf("failed to initialize recording writer: %w", err)
|
|
}
|
|
ng.RecordingWriter = recordingWriter
|
|
|
|
schedCfg := schedule.SchedulerCfg{
|
|
MaxAttempts: ng.Cfg.UnifiedAlerting.MaxAttempts,
|
|
C: clk,
|
|
BaseInterval: ng.Cfg.UnifiedAlerting.BaseInterval,
|
|
MinRuleInterval: ng.Cfg.UnifiedAlerting.MinInterval,
|
|
DisableGrafanaFolder: ng.Cfg.UnifiedAlerting.ReservedLabels.IsReservedLabelDisabled(models.FolderTitleLabel),
|
|
JitterEvaluations: schedule.JitterStrategyFrom(ng.Cfg.UnifiedAlerting, ng.FeatureToggles),
|
|
AppURL: appUrl,
|
|
EvaluatorFactory: evalFactory,
|
|
RuleStore: ng.store,
|
|
RecordingRulesCfg: ng.Cfg.UnifiedAlerting.RecordingRules,
|
|
Metrics: ng.Metrics.GetSchedulerMetrics(),
|
|
AlertSender: alertsRouter,
|
|
Tracer: ng.tracer,
|
|
Log: log.New("ngalert.scheduler"),
|
|
RecordingWriter: ng.RecordingWriter,
|
|
}
|
|
|
|
// There are a set of feature toggles available that act as short-circuits for common configurations.
|
|
// If any are set, override the config accordingly.
|
|
ApplyStateHistoryFeatureToggles(&ng.Cfg.UnifiedAlerting.StateHistory, ng.FeatureToggles, ng.Log)
|
|
history, err := configureHistorianBackend(initCtx, ng.Cfg.UnifiedAlerting.StateHistory, ng.annotationsRepo, ng.dashboardService, ng.store, ng.Metrics.GetHistorianMetrics(), ng.Log, ng.tracer, ac.NewRuleService(ng.accesscontrol))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
cfg := state.ManagerCfg{
|
|
Metrics: ng.Metrics.GetStateMetrics(),
|
|
ExternalURL: appUrl,
|
|
DisableExecution: !ng.Cfg.UnifiedAlerting.ExecuteAlerts,
|
|
InstanceStore: ng.store,
|
|
Images: ng.ImageService,
|
|
Clock: clk,
|
|
Historian: history,
|
|
DoNotSaveNormalState: ng.FeatureToggles.IsEnabledGlobally(featuremgmt.FlagAlertingNoNormalState),
|
|
ApplyNoDataAndErrorToAllStates: ng.FeatureToggles.IsEnabledGlobally(featuremgmt.FlagAlertingNoDataErrorExecution),
|
|
MaxStateSaveConcurrency: ng.Cfg.UnifiedAlerting.MaxStateSaveConcurrency,
|
|
RulesPerRuleGroupLimit: ng.Cfg.UnifiedAlerting.RulesPerRuleGroupLimit,
|
|
Tracer: ng.tracer,
|
|
Log: log.New("ngalert.state.manager"),
|
|
ResolvedRetention: ng.Cfg.UnifiedAlerting.ResolvedAlertRetention,
|
|
}
|
|
logger := log.New("ngalert.state.manager.persist")
|
|
statePersister := state.NewSyncStatePersisiter(logger, cfg)
|
|
if ng.FeatureToggles.IsEnabledGlobally(featuremgmt.FlagAlertingSaveStatePeriodic) {
|
|
ticker := clock.New().Ticker(ng.Cfg.UnifiedAlerting.StatePeriodicSaveInterval)
|
|
statePersister = state.NewAsyncStatePersister(logger, ticker, cfg)
|
|
}
|
|
stateManager := state.NewManager(cfg, statePersister)
|
|
scheduler := schedule.NewScheduler(schedCfg, stateManager)
|
|
|
|
// if it is required to include folder title to the alerts, we need to subscribe to changes of alert title
|
|
if !ng.Cfg.UnifiedAlerting.ReservedLabels.IsReservedLabelDisabled(models.FolderTitleLabel) {
|
|
subscribeToFolderChanges(ng.Log, ng.bus, ng.store)
|
|
}
|
|
|
|
ng.stateManager = stateManager
|
|
ng.schedule = scheduler
|
|
|
|
configStore := legacy_storage.NewAlertmanagerConfigStore(ng.store)
|
|
receiverService := notifier.NewReceiverService(
|
|
ac.NewReceiverAccess[*models.Receiver](ng.accesscontrol, false),
|
|
configStore,
|
|
ng.store,
|
|
ng.store,
|
|
ng.SecretsService,
|
|
ng.store,
|
|
ng.Log,
|
|
)
|
|
provisioningReceiverService := notifier.NewReceiverService(
|
|
ac.NewReceiverAccess[*models.Receiver](ng.accesscontrol, true),
|
|
configStore,
|
|
ng.store,
|
|
ng.store,
|
|
ng.SecretsService,
|
|
ng.store,
|
|
ng.Log,
|
|
)
|
|
|
|
// Provisioning
|
|
policyService := provisioning.NewNotificationPolicyService(configStore, ng.store, ng.store, ng.Cfg.UnifiedAlerting, ng.Log)
|
|
contactPointService := provisioning.NewContactPointService(configStore, ng.SecretsService, ng.store, ng.store, provisioningReceiverService, ng.Log, ng.store)
|
|
templateService := provisioning.NewTemplateService(configStore, ng.store, ng.store, ng.Log)
|
|
muteTimingService := provisioning.NewMuteTimingService(configStore, ng.store, ng.store, ng.Log, ng.store)
|
|
alertRuleService := provisioning.NewAlertRuleService(ng.store, ng.store, ng.folderService, ng.QuotaService, ng.store,
|
|
int64(ng.Cfg.UnifiedAlerting.DefaultRuleEvaluationInterval.Seconds()),
|
|
int64(ng.Cfg.UnifiedAlerting.BaseInterval.Seconds()),
|
|
ng.Cfg.UnifiedAlerting.RulesPerRuleGroupLimit, ng.Log, notifier.NewNotificationSettingsValidationService(ng.store),
|
|
ac.NewRuleService(ng.accesscontrol))
|
|
|
|
ng.Api = &api.API{
|
|
Cfg: ng.Cfg,
|
|
DatasourceCache: ng.DataSourceCache,
|
|
DatasourceService: ng.DataSourceService,
|
|
RouteRegister: ng.RouteRegister,
|
|
DataProxy: ng.DataProxy,
|
|
QuotaService: ng.QuotaService,
|
|
TransactionManager: ng.store,
|
|
RuleStore: ng.store,
|
|
AlertingStore: ng.store,
|
|
AdminConfigStore: ng.store,
|
|
ProvenanceStore: ng.store,
|
|
MultiOrgAlertmanager: ng.MultiOrgAlertmanager,
|
|
StateManager: ng.stateManager,
|
|
AccessControl: ng.accesscontrol,
|
|
Policies: policyService,
|
|
ReceiverService: receiverService,
|
|
ContactPointService: contactPointService,
|
|
Templates: templateService,
|
|
MuteTimings: muteTimingService,
|
|
AlertRules: alertRuleService,
|
|
AlertsRouter: alertsRouter,
|
|
EvaluatorFactory: evalFactory,
|
|
ConditionValidator: conditionValidator,
|
|
FeatureManager: ng.FeatureToggles,
|
|
AppUrl: appUrl,
|
|
Historian: history,
|
|
Hooks: api.NewHooks(ng.Log),
|
|
Tracer: ng.tracer,
|
|
}
|
|
ng.Api.RegisterAPIEndpoints(ng.Metrics.GetAPIMetrics())
|
|
|
|
if err := RegisterQuotas(ng.Cfg, ng.QuotaService, ng.store); err != nil {
|
|
return err
|
|
}
|
|
|
|
log.RegisterContextualLogProvider(func(ctx context.Context) ([]interface{}, bool) {
|
|
key, ok := models.RuleKeyFromContext(ctx)
|
|
if !ok {
|
|
return nil, false
|
|
}
|
|
return key.LogContext(), true
|
|
})
|
|
|
|
return DeclareFixedRoles(ng.accesscontrolService)
|
|
}
|
|
|
|
func subscribeToFolderChanges(logger log.Logger, bus bus.Bus, dbStore api.RuleStore) {
|
|
// if full path to the folder is changed, we update all alert rules in that folder to make sure that all peers (in HA mode) will update folder title and
|
|
// clean up the current state
|
|
bus.AddEventListener(func(ctx context.Context, evt *events.FolderFullPathUpdated) error {
|
|
logger.Info("Got folder full path updated event. updating rules in the folders", "folderUIDs", evt.UIDs)
|
|
updatedKeys, err := dbStore.IncreaseVersionForAllRulesInNamespaces(ctx, evt.OrgID, evt.UIDs)
|
|
if err != nil {
|
|
logger.Error("Failed to update alert rules in the folders after their full paths were changed", "error", err, "folderUIDs", evt.UIDs, "orgID", evt.OrgID)
|
|
return err
|
|
}
|
|
logger.Info("Updated version for alert rules", "keys", updatedKeys)
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// Run starts the scheduler and Alertmanager.
|
|
func (ng *AlertNG) Run(ctx context.Context) error {
|
|
ng.Log.Debug("Starting", "execute_alerts", ng.Cfg.UnifiedAlerting.ExecuteAlerts)
|
|
|
|
children, subCtx := errgroup.WithContext(ctx)
|
|
|
|
children.Go(func() error {
|
|
return ng.MultiOrgAlertmanager.Run(subCtx)
|
|
})
|
|
children.Go(func() error {
|
|
return ng.AlertsRouter.Run(subCtx)
|
|
})
|
|
|
|
if ng.Cfg.UnifiedAlerting.ExecuteAlerts {
|
|
// Only Warm() the state manager if we are actually executing alerts.
|
|
// Doing so when we are not executing alerts is wasteful and could lead
|
|
// to misleading rule status queries, as the status returned will be
|
|
// always based on the state loaded from the database at startup, and
|
|
// not the most recent evaluation state.
|
|
//
|
|
// Also note that this runs synchronously to ensure state is loaded
|
|
// before rule evaluation begins, hence we use ctx and not subCtx.
|
|
//
|
|
ng.stateManager.Warm(ctx, ng.store)
|
|
|
|
children.Go(func() error {
|
|
return ng.schedule.Run(subCtx)
|
|
})
|
|
children.Go(func() error {
|
|
return ng.stateManager.Run(subCtx)
|
|
})
|
|
}
|
|
return children.Wait()
|
|
}
|
|
|
|
// IsDisabled returns true if the alerting service is disabled for this instance.
|
|
func (ng *AlertNG) IsDisabled() bool {
|
|
if ng.Cfg == nil {
|
|
return true
|
|
}
|
|
|
|
return !ng.Cfg.UnifiedAlerting.IsEnabled()
|
|
}
|
|
|
|
// GetHooks returns a facility for replacing handlers for paths. The handler hook for a path
|
|
// is invoked after all other middleware is invoked (authentication, instrumentation).
|
|
func (ng *AlertNG) GetHooks() *api.Hooks {
|
|
return ng.Api.Hooks
|
|
}
|
|
|
|
type Historian interface {
|
|
api.Historian
|
|
state.Historian
|
|
}
|
|
|
|
func configureHistorianBackend(ctx context.Context, cfg setting.UnifiedAlertingStateHistorySettings, ar annotations.Repository, ds dashboards.DashboardService, rs historian.RuleStore, met *metrics.Historian, l log.Logger, tracer tracing.Tracer, ac historian.AccessControl) (Historian, error) {
|
|
if !cfg.Enabled {
|
|
met.Info.WithLabelValues("noop").Set(0)
|
|
return historian.NewNopHistorian(), nil
|
|
}
|
|
|
|
backend, err := historian.ParseBackendType(cfg.Backend)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
met.Info.WithLabelValues(backend.String()).Set(1)
|
|
if backend == historian.BackendTypeMultiple {
|
|
primaryCfg := cfg
|
|
primaryCfg.Backend = cfg.MultiPrimary
|
|
primary, err := configureHistorianBackend(ctx, primaryCfg, ar, ds, rs, met, l, tracer, ac)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("multi-backend target \"%s\" was misconfigured: %w", cfg.MultiPrimary, err)
|
|
}
|
|
|
|
var secondaries []historian.Backend
|
|
for _, b := range cfg.MultiSecondaries {
|
|
secCfg := cfg
|
|
secCfg.Backend = b
|
|
sec, err := configureHistorianBackend(ctx, secCfg, ar, ds, rs, met, l, tracer, ac)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("multi-backend target \"%s\" was miconfigured: %w", b, err)
|
|
}
|
|
secondaries = append(secondaries, sec)
|
|
}
|
|
|
|
l.Info("State history is operating in multi-backend mode", "primary", cfg.MultiPrimary, "secondaries", cfg.MultiSecondaries)
|
|
return historian.NewMultipleBackend(primary, secondaries...), nil
|
|
}
|
|
if backend == historian.BackendTypeAnnotations {
|
|
store := historian.NewAnnotationStore(ar, ds, met)
|
|
annotationBackendLogger := log.New("ngalert.state.historian", "backend", "annotations")
|
|
return historian.NewAnnotationBackend(annotationBackendLogger, store, rs, met, ac), nil
|
|
}
|
|
if backend == historian.BackendTypeLoki {
|
|
lcfg, err := historian.NewLokiConfig(cfg)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid remote loki configuration: %w", err)
|
|
}
|
|
req := historian.NewRequester()
|
|
lokiBackendLogger := log.New("ngalert.state.historian", "backend", "loki")
|
|
backend := historian.NewRemoteLokiBackend(lokiBackendLogger, lcfg, req, met, tracer, rs, ac)
|
|
|
|
testConnCtx, cancelFunc := context.WithTimeout(ctx, 10*time.Second)
|
|
defer cancelFunc()
|
|
if err := backend.TestConnection(testConnCtx); err != nil {
|
|
l.Error("Failed to communicate with configured remote Loki backend, state history may not be persisted", "error", err)
|
|
}
|
|
return backend, nil
|
|
}
|
|
|
|
return nil, fmt.Errorf("unrecognized state history backend: %s", backend)
|
|
}
|
|
|
|
// ApplyStateHistoryFeatureToggles edits state history configuration to comply with currently active feature toggles.
|
|
func ApplyStateHistoryFeatureToggles(cfg *setting.UnifiedAlertingStateHistorySettings, ft featuremgmt.FeatureToggles, logger log.Logger) {
|
|
backend, _ := historian.ParseBackendType(cfg.Backend)
|
|
// These feature toggles represent specific, common backend configurations.
|
|
// If all toggles are enabled, we listen to the state history config as written.
|
|
// If any of them are disabled, we ignore the configured backend and treat the toggles as an override.
|
|
// If multiple toggles are disabled, we go with the most "restrictive" one.
|
|
if !ft.IsEnabledGlobally(featuremgmt.FlagAlertStateHistoryLokiSecondary) {
|
|
// If we cannot even treat Loki as a secondary, we must use annotations only.
|
|
if backend == historian.BackendTypeMultiple || backend == historian.BackendTypeLoki {
|
|
logger.Info("Forcing Annotation backend due to state history feature toggles")
|
|
cfg.Backend = historian.BackendTypeAnnotations.String()
|
|
cfg.MultiPrimary = ""
|
|
cfg.MultiSecondaries = make([]string, 0)
|
|
}
|
|
return
|
|
}
|
|
if !ft.IsEnabledGlobally(featuremgmt.FlagAlertStateHistoryLokiPrimary) {
|
|
// If we're using multiple backends, Loki must be the secondary.
|
|
if backend == historian.BackendTypeMultiple {
|
|
logger.Info("Coercing Loki to a secondary backend due to state history feature toggles")
|
|
cfg.MultiPrimary = historian.BackendTypeAnnotations.String()
|
|
cfg.MultiSecondaries = []string{historian.BackendTypeLoki.String()}
|
|
}
|
|
// If we're using loki, we are only allowed to use it as a secondary. Dual write to it, plus annotations.
|
|
if backend == historian.BackendTypeLoki {
|
|
logger.Info("Coercing Loki to dual writes with a secondary backend due to state history feature toggles")
|
|
cfg.Backend = historian.BackendTypeMultiple.String()
|
|
cfg.MultiPrimary = historian.BackendTypeAnnotations.String()
|
|
cfg.MultiSecondaries = []string{historian.BackendTypeLoki.String()}
|
|
}
|
|
return
|
|
}
|
|
if !ft.IsEnabledGlobally(featuremgmt.FlagAlertStateHistoryLokiOnly) {
|
|
// If we're not allowed to use Loki only, make it the primary but keep the annotation writes.
|
|
if backend == historian.BackendTypeLoki {
|
|
logger.Info("Forcing dual writes to Loki and Annotations due to state history feature toggles")
|
|
cfg.Backend = historian.BackendTypeMultiple.String()
|
|
cfg.MultiPrimary = historian.BackendTypeLoki.String()
|
|
cfg.MultiSecondaries = []string{historian.BackendTypeAnnotations.String()}
|
|
}
|
|
return
|
|
}
|
|
}
|
|
|
|
func createRemoteAlertmanager(cfg remote.AlertmanagerConfig, kvstore kvstore.KVStore, decryptFn remote.DecryptFn, autogenFn remote.AutogenFn, m *metrics.RemoteAlertmanager, tracer tracing.Tracer) (*remote.Alertmanager, error) {
|
|
return remote.NewAlertmanager(cfg, notifier.NewFileStore(cfg.OrgID, kvstore), decryptFn, autogenFn, m, tracer)
|
|
}
|
|
|
|
func createRecordingWriter(featureToggles featuremgmt.FeatureToggles, settings setting.RecordingRuleSettings, httpClientProvider httpclient.Provider, clock clock.Clock, m *metrics.RemoteWriter) (schedule.RecordingWriter, error) {
|
|
logger := log.New("ngalert.writer")
|
|
|
|
if settings.Enabled {
|
|
return writer.NewPrometheusWriter(settings, httpClientProvider, clock, logger, m)
|
|
}
|
|
|
|
return writer.NoopWriter{}, nil
|
|
}
|