grafana/pkg/services/ngalert/ngalert.go

353 lines
12 KiB
Go
Raw Normal View History

package ngalert
import (
"context"
"fmt"
"net/url"
Encryption: Refactor securejsondata.SecureJsonData to stop relying on global functions (#38865) * Encryption: Add support to encrypt/decrypt sjd * Add datasources.Service as a proxy to datasources db operations * Encrypt ds.SecureJsonData before calling SQLStore * Move ds cache code into ds service * Fix tlsmanager tests * Fix pluginproxy tests * Remove some securejsondata.GetEncryptedJsonData usages * Add pluginsettings.Service as a proxy for plugin settings db operations * Add AlertNotificationService as a proxy for alert notification db operations * Remove some securejsondata.GetEncryptedJsonData usages * Remove more securejsondata.GetEncryptedJsonData usages * Fix lint errors * Minor fixes * Remove encryption global functions usages from ngalert * Fix lint errors * Minor fixes * Minor fixes * Remove securejsondata.DecryptedValue usage * Refactor the refactor * Remove securejsondata.DecryptedValue usage * Move securejsondata to migrations package * Move securejsondata to migrations package * Minor fix * Fix integration test * Fix integration tests * Undo undesired changes * Fix tests * Add context.Context into encryption methods * Fix tests * Fix tests * Fix tests * Trigger CI * Fix test * Add names to params of encryption service interface * Remove bus from CacheServiceImpl * Add logging * Add keys to logger Co-authored-by: Emil Tullstedt <emil.tullstedt@grafana.com> * Add missing key to logger Co-authored-by: Emil Tullstedt <emil.tullstedt@grafana.com> * Undo changes in markdown files * Fix formatting * Add context to secrets service * Rename decryptSecureJsonData to decryptSecureJsonDataFn * Name args in GetDecryptedValueFn * Add template back to NewAlertmanagerNotifier * Copy GetDecryptedValueFn to ngalert * Add logging to pluginsettings * Fix pluginsettings test Co-authored-by: Tania B <yalyna.ts@gmail.com> Co-authored-by: Emil Tullstedt <emil.tullstedt@grafana.com>
2021-10-07 09:33:50 -05:00
"github.com/benbjohnson/clock"
"golang.org/x/sync/errgroup"
"github.com/grafana/grafana/pkg/api/routing"
"github.com/grafana/grafana/pkg/bus"
"github.com/grafana/grafana/pkg/events"
"github.com/grafana/grafana/pkg/expr"
"github.com/grafana/grafana/pkg/infra/db"
"github.com/grafana/grafana/pkg/infra/kvstore"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/accesscontrol"
"github.com/grafana/grafana/pkg/services/annotations"
"github.com/grafana/grafana/pkg/services/dashboards"
"github.com/grafana/grafana/pkg/services/datasourceproxy"
"github.com/grafana/grafana/pkg/services/datasources"
Alerting: Write and Delete multiple alert instances. (#55350) Prior to this change, all alert instance writes and deletes happened individually, in their own database transaction. This change batches up writes or deletes for a given rule's evaluation loop into a single transaction before applying it. These new transactions are off by default, guarded by the feature toggle "alertingBigTransactions" Before: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 398 2991381 ns/op 1133537 B/op 27703 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: FovKXiRVzm} with title: "an alert definition FTvFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: foDFXmRVkm} with title: "an alert definition fovFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: VQvFuigVkm} with title: "an alert definition VwDKXmR4kz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.619s ``` After: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 1440 816484 ns/op 352297 B/op 6529 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: 302r_igVzm} with title: "an alert definition q0h9lmR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: 71hrlmR4km} with title: "an alert definition nJ29_mR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: Cahr_mR4zm} with title: "an alert definition ja2rlmg4zz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.383s ``` So we cut time by about 75% and memory allocations by about 60% when storing and deleting 100 instances.
2022-10-06 01:22:58 -05:00
"github.com/grafana/grafana/pkg/services/featuremgmt"
"github.com/grafana/grafana/pkg/services/folder"
"github.com/grafana/grafana/pkg/services/ngalert/api"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/image"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/notifier"
"github.com/grafana/grafana/pkg/services/ngalert/provisioning"
"github.com/grafana/grafana/pkg/services/ngalert/schedule"
"github.com/grafana/grafana/pkg/services/ngalert/sender"
"github.com/grafana/grafana/pkg/services/ngalert/state"
"github.com/grafana/grafana/pkg/services/ngalert/state/historian"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/services/notifications"
"github.com/grafana/grafana/pkg/services/quota"
"github.com/grafana/grafana/pkg/services/rendering"
Encryption: Use secrets service (#40251) * Use secrets service in pluginproxy * Use secrets service in pluginxontext * Use secrets service in pluginsettings * Use secrets service in provisioning * Use secrets service in authinfoservice * Use secrets service in api * Use secrets service in sqlstore * Use secrets service in dashboardshapshots * Use secrets service in tsdb * Use secrets service in datasources * Use secrets service in alerting * Use secrets service in ngalert * Break cyclic dependancy * Refactor service * Break cyclic dependancy * Add FakeSecretsStore * Setup Secrets Service in sqlstore * Fix * Continue secrets service refactoring * Fix cyclic dependancy in sqlstore tests * Fix secrets service references * Fix linter errors * Add fake secrets service for tests * Refactor SetupTestSecretsService * Update setting up secret service in tests * Fix missing secrets service in multiorg_alertmanager_test * Use fake db in tests and sort imports * Use fake db in datasources tests * Fix more tests * Fix linter issues * Attempt to fix plugin proxy tests * Pass secrets service to getPluginProxiedRequest in pluginproxy tests * Fix pluginproxy tests * Revert using secrets service in alerting and provisioning * Update decryptFn in alerting migration * Rename defaultProvider to currentProvider * Use fake secrets service in alert channels tests * Refactor secrets service test helper * Update setting up secrets service in tests * Revert alerting changes in api * Add comments * Remove secrets service from background services * Convert global encryption functions into vars * Revert "Convert global encryption functions into vars" This reverts commit 498eb19859eba364a2400a6d7e73236b1c9a5b37. * Add feature toggle for envelope encryption * Rename toggle Co-authored-by: Emil Tullstedt <emil.tullstedt@grafana.com> Co-authored-by: Joan López de la Franca Beltran <joanjan14@gmail.com>
2021-11-04 11:47:21 -05:00
"github.com/grafana/grafana/pkg/services/secrets"
"github.com/grafana/grafana/pkg/setting"
)
Alerting: Write and Delete multiple alert instances. (#55350) Prior to this change, all alert instance writes and deletes happened individually, in their own database transaction. This change batches up writes or deletes for a given rule's evaluation loop into a single transaction before applying it. These new transactions are off by default, guarded by the feature toggle "alertingBigTransactions" Before: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 398 2991381 ns/op 1133537 B/op 27703 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: FovKXiRVzm} with title: "an alert definition FTvFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: foDFXmRVkm} with title: "an alert definition fovFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: VQvFuigVkm} with title: "an alert definition VwDKXmR4kz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.619s ``` After: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 1440 816484 ns/op 352297 B/op 6529 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: 302r_igVzm} with title: "an alert definition q0h9lmR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: 71hrlmR4km} with title: "an alert definition nJ29_mR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: Cahr_mR4zm} with title: "an alert definition ja2rlmg4zz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.383s ``` So we cut time by about 75% and memory allocations by about 60% when storing and deleting 100 instances.
2022-10-06 01:22:58 -05:00
func ProvideService(
cfg *setting.Cfg,
featureToggles featuremgmt.FeatureToggles,
dataSourceCache datasources.CacheService,
dataSourceService datasources.DataSourceService,
routeRegister routing.RouteRegister,
sqlStore db.DB,
Alerting: Write and Delete multiple alert instances. (#55350) Prior to this change, all alert instance writes and deletes happened individually, in their own database transaction. This change batches up writes or deletes for a given rule's evaluation loop into a single transaction before applying it. These new transactions are off by default, guarded by the feature toggle "alertingBigTransactions" Before: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 398 2991381 ns/op 1133537 B/op 27703 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: FovKXiRVzm} with title: "an alert definition FTvFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: foDFXmRVkm} with title: "an alert definition fovFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: VQvFuigVkm} with title: "an alert definition VwDKXmR4kz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.619s ``` After: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 1440 816484 ns/op 352297 B/op 6529 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: 302r_igVzm} with title: "an alert definition q0h9lmR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: 71hrlmR4km} with title: "an alert definition nJ29_mR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: Cahr_mR4zm} with title: "an alert definition ja2rlmg4zz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.383s ``` So we cut time by about 75% and memory allocations by about 60% when storing and deleting 100 instances.
2022-10-06 01:22:58 -05:00
kvStore kvstore.KVStore,
expressionService *expr.Service,
dataProxy *datasourceproxy.DataSourceProxyService,
quotaService quota.Service,
secretsService secrets.Service,
notificationService notifications.Service,
m *metrics.NGAlert,
folderService folder.Service,
Alerting: Write and Delete multiple alert instances. (#55350) Prior to this change, all alert instance writes and deletes happened individually, in their own database transaction. This change batches up writes or deletes for a given rule's evaluation loop into a single transaction before applying it. These new transactions are off by default, guarded by the feature toggle "alertingBigTransactions" Before: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 398 2991381 ns/op 1133537 B/op 27703 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: FovKXiRVzm} with title: "an alert definition FTvFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: foDFXmRVkm} with title: "an alert definition fovFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: VQvFuigVkm} with title: "an alert definition VwDKXmR4kz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.619s ``` After: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 1440 816484 ns/op 352297 B/op 6529 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: 302r_igVzm} with title: "an alert definition q0h9lmR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: 71hrlmR4km} with title: "an alert definition nJ29_mR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: Cahr_mR4zm} with title: "an alert definition ja2rlmg4zz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.383s ``` So we cut time by about 75% and memory allocations by about 60% when storing and deleting 100 instances.
2022-10-06 01:22:58 -05:00
ac accesscontrol.AccessControl,
dashboardService dashboards.DashboardService,
renderService rendering.Service,
bus bus.Bus,
accesscontrolService accesscontrol.Service,
annotationsRepo annotations.Repository,
) (*AlertNG, error) {
ng := &AlertNG{
Cfg: cfg,
Alerting: Write and Delete multiple alert instances. (#55350) Prior to this change, all alert instance writes and deletes happened individually, in their own database transaction. This change batches up writes or deletes for a given rule's evaluation loop into a single transaction before applying it. These new transactions are off by default, guarded by the feature toggle "alertingBigTransactions" Before: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 398 2991381 ns/op 1133537 B/op 27703 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: FovKXiRVzm} with title: "an alert definition FTvFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: foDFXmRVkm} with title: "an alert definition fovFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: VQvFuigVkm} with title: "an alert definition VwDKXmR4kz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.619s ``` After: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 1440 816484 ns/op 352297 B/op 6529 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: 302r_igVzm} with title: "an alert definition q0h9lmR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: 71hrlmR4km} with title: "an alert definition nJ29_mR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: Cahr_mR4zm} with title: "an alert definition ja2rlmg4zz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.383s ``` So we cut time by about 75% and memory allocations by about 60% when storing and deleting 100 instances.
2022-10-06 01:22:58 -05:00
FeatureToggles: featureToggles,
DataSourceCache: dataSourceCache,
DataSourceService: dataSourceService,
RouteRegister: routeRegister,
SQLStore: sqlStore,
KVStore: kvStore,
ExpressionService: expressionService,
DataProxy: dataProxy,
QuotaService: quotaService,
SecretsService: secretsService,
Metrics: m,
Log: log.New("ngalert"),
NotificationService: notificationService,
folderService: folderService,
accesscontrol: ac,
dashboardService: dashboardService,
renderService: renderService,
bus: bus,
accesscontrolService: accesscontrolService,
annotationsRepo: annotationsRepo,
}
if ng.IsDisabled() {
return ng, nil
}
if err := ng.init(); err != nil {
return nil, err
}
return ng, nil
}
// AlertNG is the service for evaluating the condition of an alert definition.
type AlertNG struct {
Cfg *setting.Cfg
Alerting: Write and Delete multiple alert instances. (#55350) Prior to this change, all alert instance writes and deletes happened individually, in their own database transaction. This change batches up writes or deletes for a given rule's evaluation loop into a single transaction before applying it. These new transactions are off by default, guarded by the feature toggle "alertingBigTransactions" Before: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 398 2991381 ns/op 1133537 B/op 27703 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: FovKXiRVzm} with title: "an alert definition FTvFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: foDFXmRVkm} with title: "an alert definition fovFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: VQvFuigVkm} with title: "an alert definition VwDKXmR4kz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.619s ``` After: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 1440 816484 ns/op 352297 B/op 6529 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: 302r_igVzm} with title: "an alert definition q0h9lmR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: 71hrlmR4km} with title: "an alert definition nJ29_mR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: Cahr_mR4zm} with title: "an alert definition ja2rlmg4zz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.383s ``` So we cut time by about 75% and memory allocations by about 60% when storing and deleting 100 instances.
2022-10-06 01:22:58 -05:00
FeatureToggles featuremgmt.FeatureToggles
DataSourceCache datasources.CacheService
DataSourceService datasources.DataSourceService
RouteRegister routing.RouteRegister
SQLStore db.DB
KVStore kvstore.KVStore
ExpressionService *expr.Service
DataProxy *datasourceproxy.DataSourceProxyService
QuotaService quota.Service
SecretsService secrets.Service
Metrics *metrics.NGAlert
NotificationService notifications.Service
Log log.Logger
renderService rendering.Service
imageService image.ImageService
schedule schedule.ScheduleService
stateManager *state.Manager
folderService folder.Service
dashboardService dashboards.DashboardService
// Alerting notification services
MultiOrgAlertmanager *notifier.MultiOrgAlertmanager
AlertsRouter *sender.AlertsRouter
accesscontrol accesscontrol.AccessControl
accesscontrolService accesscontrol.Service
annotationsRepo annotations.Repository
store *store.DBstore
bus bus.Bus
}
func (ng *AlertNG) init() error {
var err error
store := &store.DBstore{
Cfg: ng.Cfg.UnifiedAlerting,
Alerting: Write and Delete multiple alert instances. (#55350) Prior to this change, all alert instance writes and deletes happened individually, in their own database transaction. This change batches up writes or deletes for a given rule's evaluation loop into a single transaction before applying it. These new transactions are off by default, guarded by the feature toggle "alertingBigTransactions" Before: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 398 2991381 ns/op 1133537 B/op 27703 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: FovKXiRVzm} with title: "an alert definition FTvFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: foDFXmRVkm} with title: "an alert definition fovFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: VQvFuigVkm} with title: "an alert definition VwDKXmR4kz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.619s ``` After: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 1440 816484 ns/op 352297 B/op 6529 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: 302r_igVzm} with title: "an alert definition q0h9lmR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: 71hrlmR4km} with title: "an alert definition nJ29_mR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: Cahr_mR4zm} with title: "an alert definition ja2rlmg4zz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.383s ``` So we cut time by about 75% and memory allocations by about 60% when storing and deleting 100 instances.
2022-10-06 01:22:58 -05:00
FeatureToggles: ng.FeatureToggles,
SQLStore: ng.SQLStore,
Logger: ng.Log,
FolderService: ng.folderService,
AccessControl: ng.accesscontrol,
DashboardService: ng.dashboardService,
}
ng.store = store
Encryption: Use secrets service (#40251) * Use secrets service in pluginproxy * Use secrets service in pluginxontext * Use secrets service in pluginsettings * Use secrets service in provisioning * Use secrets service in authinfoservice * Use secrets service in api * Use secrets service in sqlstore * Use secrets service in dashboardshapshots * Use secrets service in tsdb * Use secrets service in datasources * Use secrets service in alerting * Use secrets service in ngalert * Break cyclic dependancy * Refactor service * Break cyclic dependancy * Add FakeSecretsStore * Setup Secrets Service in sqlstore * Fix * Continue secrets service refactoring * Fix cyclic dependancy in sqlstore tests * Fix secrets service references * Fix linter errors * Add fake secrets service for tests * Refactor SetupTestSecretsService * Update setting up secret service in tests * Fix missing secrets service in multiorg_alertmanager_test * Use fake db in tests and sort imports * Use fake db in datasources tests * Fix more tests * Fix linter issues * Attempt to fix plugin proxy tests * Pass secrets service to getPluginProxiedRequest in pluginproxy tests * Fix pluginproxy tests * Revert using secrets service in alerting and provisioning * Update decryptFn in alerting migration * Rename defaultProvider to currentProvider * Use fake secrets service in alert channels tests * Refactor secrets service test helper * Update setting up secrets service in tests * Revert alerting changes in api * Add comments * Remove secrets service from background services * Convert global encryption functions into vars * Revert "Convert global encryption functions into vars" This reverts commit 498eb19859eba364a2400a6d7e73236b1c9a5b37. * Add feature toggle for envelope encryption * Rename toggle Co-authored-by: Emil Tullstedt <emil.tullstedt@grafana.com> Co-authored-by: Joan López de la Franca Beltran <joanjan14@gmail.com>
2021-11-04 11:47:21 -05:00
decryptFn := ng.SecretsService.GetDecryptedValue
multiOrgMetrics := ng.Metrics.GetMultiOrgAlertmanagerMetrics()
ng.MultiOrgAlertmanager, err = notifier.NewMultiOrgAlertmanager(ng.Cfg, store, store, ng.KVStore, store, decryptFn, multiOrgMetrics, ng.NotificationService, log.New("ngalert.multiorg.alertmanager"), ng.SecretsService)
if err != nil {
return err
}
imageService, err := image.NewScreenshotImageServiceFromCfg(ng.Cfg, store, ng.dashboardService, ng.renderService, ng.Metrics.Registerer)
if err != nil {
return err
}
ng.imageService = imageService
// Let's make sure we're able to complete an initial sync of Alertmanagers before we start the alerting components.
if err := ng.MultiOrgAlertmanager.LoadAndSyncAlertmanagersForOrgs(context.Background()); err != nil {
return fmt.Errorf("failed to initialize alerting because multiorg alertmanager manager failed to warm up: %w", err)
}
appUrl, err := url.Parse(ng.Cfg.AppURL)
if err != nil {
ng.Log.Error("Failed to parse application URL. Continue without it.", "error", err)
appUrl = nil
}
clk := clock.New()
alertsRouter := sender.NewAlertsRouter(ng.MultiOrgAlertmanager, store, clk, appUrl, ng.Cfg.UnifiedAlerting.DisabledOrgs,
ng.Cfg.UnifiedAlerting.AdminConfigPollInterval, ng.DataSourceService, ng.SecretsService)
// Make sure we sync at least once as Grafana starts to get the router up and running before we start sending any alerts.
if err := alertsRouter.SyncAndApplyConfigFromDatabase(); err != nil {
return fmt.Errorf("failed to initialize alerting because alert notifications router failed to warm up: %w", err)
}
ng.AlertsRouter = alertsRouter
evalFactory := eval.NewEvaluatorFactory(ng.Cfg.UnifiedAlerting, ng.DataSourceCache, ng.ExpressionService)
schedCfg := schedule.SchedulerCfg{
Cfg: ng.Cfg.UnifiedAlerting,
C: clk,
EvaluatorFactory: evalFactory,
RuleStore: store,
Metrics: ng.Metrics.GetSchedulerMetrics(),
AlertSender: alertsRouter,
}
historian := historian.NewAnnotationHistorian(ng.annotationsRepo, ng.dashboardService)
stateManager := state.NewManager(ng.Metrics.GetStateMetrics(), appUrl, store, ng.imageService, clk, historian)
scheduler := schedule.NewScheduler(schedCfg, appUrl, stateManager)
// if it is required to include folder title to the alerts, we need to subscribe to changes of alert title
if !ng.Cfg.UnifiedAlerting.ReservedLabels.IsReservedLabelDisabled(models.FolderTitleLabel) {
subscribeToFolderChanges(ng.Log, ng.bus, store, scheduler)
}
ng.stateManager = stateManager
ng.schedule = scheduler
// Provisioning
policyService := provisioning.NewNotificationPolicyService(store, store, store, ng.Cfg.UnifiedAlerting, ng.Log)
contactPointService := provisioning.NewContactPointService(store, ng.SecretsService, store, store, ng.Log)
templateService := provisioning.NewTemplateService(store, store, store, ng.Log)
muteTimingService := provisioning.NewMuteTimingService(store, store, store, ng.Log)
alertRuleService := provisioning.NewAlertRuleService(store, store, ng.QuotaService, store,
int64(ng.Cfg.UnifiedAlerting.DefaultRuleEvaluationInterval.Seconds()),
int64(ng.Cfg.UnifiedAlerting.BaseInterval.Seconds()), ng.Log)
api := api.API{
Cfg: ng.Cfg,
DatasourceCache: ng.DataSourceCache,
DatasourceService: ng.DataSourceService,
RouteRegister: ng.RouteRegister,
Schedule: ng.schedule,
DataProxy: ng.DataProxy,
QuotaService: ng.QuotaService,
TransactionManager: store,
RuleStore: store,
AlertingStore: store,
AdminConfigStore: store,
ProvenanceStore: store,
MultiOrgAlertmanager: ng.MultiOrgAlertmanager,
StateManager: ng.stateManager,
AccessControl: ng.accesscontrol,
Policies: policyService,
ContactPointService: contactPointService,
Templates: templateService,
MuteTimings: muteTimingService,
AlertRules: alertRuleService,
AlertsRouter: alertsRouter,
EvaluatorFactory: evalFactory,
}
api.RegisterAPIEndpoints(ng.Metrics.GetAPIMetrics())
defaultLimits, err := readQuotaConfig(ng.Cfg)
if err != nil {
return err
}
if err := ng.QuotaService.RegisterQuotaReporter(&quota.NewUsageReporter{
TargetSrv: models.QuotaTargetSrv,
DefaultLimits: defaultLimits,
Reporter: api.Usage,
}); err != nil {
return err
}
log.RegisterContextualLogProvider(func(ctx context.Context) ([]interface{}, bool) {
key, ok := models.RuleKeyFromContext(ctx)
if !ok {
return nil, false
}
return key.LogContext(), true
})
return DeclareFixedRoles(ng.accesscontrolService)
}
func subscribeToFolderChanges(logger log.Logger, bus bus.Bus, dbStore api.RuleStore, scheduler schedule.ScheduleService) {
// if folder title is changed, we update all alert rules in that folder to make sure that all peers (in HA mode) will update folder title and
// clean up the current state
bus.AddEventListener(func(ctx context.Context, e *events.FolderTitleUpdated) error {
// do not block the upstream execution
go func(evt *events.FolderTitleUpdated) {
logger.Info("Got folder title updated event. updating rules in the folder", "folderUID", evt.UID)
updated, err := dbStore.IncreaseVersionForAllRulesInNamespace(context.Background(), evt.OrgID, evt.UID)
if err != nil {
logger.Error("Failed to update alert rules in the folder after its title was changed", "error", err, "folderUID", evt.UID, "folder", evt.Title)
return
}
if len(updated) > 0 {
logger.Info("Rules that belong to the folder have been updated successfully. Clearing their status", "folderUID", evt.UID, "updatedRules", len(updated))
for _, key := range updated {
scheduler.UpdateAlertRule(key.AlertRuleKey, key.Version)
}
} else {
logger.Debug("No alert rules found in the folder. nothing to update", "folderUID", evt.UID, "folder", evt.Title)
}
}(e)
return nil
})
}
// Run starts the scheduler and Alertmanager.
func (ng *AlertNG) Run(ctx context.Context) error {
ng.Log.Debug("Starting")
ng.stateManager.Warm(ctx, ng.store)
children, subCtx := errgroup.WithContext(ctx)
children.Go(func() error {
return ng.stateManager.Run(subCtx)
})
children.Go(func() error {
return ng.MultiOrgAlertmanager.Run(subCtx)
})
children.Go(func() error {
return ng.AlertsRouter.Run(subCtx)
})
if ng.Cfg.UnifiedAlerting.ExecuteAlerts {
children.Go(func() error {
return ng.schedule.Run(subCtx)
})
}
return children.Wait()
}
// IsDisabled returns true if the alerting service is disable for this instance.
func (ng *AlertNG) IsDisabled() bool {
if ng.Cfg == nil {
return true
}
return !ng.Cfg.UnifiedAlerting.IsEnabled()
}
func readQuotaConfig(cfg *setting.Cfg) (*quota.Map, error) {
limits := &quota.Map{}
if cfg == nil {
return limits, nil
}
var alertOrgQuota int64
var alertGlobalQuota int64
if cfg.UnifiedAlerting.IsEnabled() {
alertOrgQuota = cfg.Quota.Org.AlertRule
alertGlobalQuota = cfg.Quota.Global.AlertRule
}
globalQuotaTag, err := quota.NewTag(models.QuotaTargetSrv, models.QuotaTarget, quota.GlobalScope)
if err != nil {
return limits, err
}
orgQuotaTag, err := quota.NewTag(models.QuotaTargetSrv, models.QuotaTarget, quota.OrgScope)
if err != nil {
return limits, err
}
limits.Set(globalQuotaTag, alertGlobalQuota)
limits.Set(orgQuotaTag, alertOrgQuota)
return limits, nil
}