2020-11-12 07:11:30 -06:00
|
|
|
package ngalert
|
|
|
|
|
|
|
|
import (
|
2020-12-17 08:00:09 -06:00
|
|
|
"context"
|
|
|
|
"time"
|
|
|
|
|
2021-08-25 08:11:22 -05:00
|
|
|
"github.com/benbjohnson/clock"
|
2021-05-04 11:16:28 -05:00
|
|
|
"github.com/grafana/grafana/pkg/services/quota"
|
2021-05-13 13:01:38 -05:00
|
|
|
"golang.org/x/sync/errgroup"
|
2021-05-04 11:16:28 -05:00
|
|
|
|
2021-08-25 08:11:22 -05:00
|
|
|
"github.com/grafana/grafana/pkg/services/ngalert/api"
|
|
|
|
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
2021-04-30 11:28:06 -05:00
|
|
|
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
|
2021-03-24 17:34:18 -05:00
|
|
|
"github.com/grafana/grafana/pkg/services/ngalert/state"
|
2021-08-25 08:11:22 -05:00
|
|
|
"github.com/grafana/grafana/pkg/services/ngalert/store"
|
2020-11-12 07:11:30 -06:00
|
|
|
|
|
|
|
"github.com/grafana/grafana/pkg/api/routing"
|
|
|
|
"github.com/grafana/grafana/pkg/infra/log"
|
2021-03-24 09:20:44 -05:00
|
|
|
"github.com/grafana/grafana/pkg/services/datasourceproxy"
|
2020-11-12 07:11:30 -06:00
|
|
|
"github.com/grafana/grafana/pkg/services/datasources"
|
2021-03-24 09:20:44 -05:00
|
|
|
"github.com/grafana/grafana/pkg/services/ngalert/notifier"
|
|
|
|
"github.com/grafana/grafana/pkg/services/ngalert/schedule"
|
|
|
|
"github.com/grafana/grafana/pkg/services/sqlstore"
|
2020-11-12 07:11:30 -06:00
|
|
|
"github.com/grafana/grafana/pkg/setting"
|
2021-03-24 09:20:44 -05:00
|
|
|
"github.com/grafana/grafana/pkg/tsdb"
|
2020-11-12 07:11:30 -06:00
|
|
|
)
|
|
|
|
|
2020-12-17 08:00:09 -06:00
|
|
|
const (
|
|
|
|
maxAttempts int64 = 3
|
|
|
|
// scheduler interval
|
|
|
|
// changing this value is discouraged
|
|
|
|
// because this could cause existing alert definition
|
|
|
|
// with intervals that are not exactly divided by this number
|
|
|
|
// not to be evaluated
|
2021-08-25 08:11:22 -05:00
|
|
|
defaultBaseIntervalSeconds = 10
|
2021-08-06 07:06:56 -05:00
|
|
|
// default alert definition interval
|
2021-08-25 08:11:22 -05:00
|
|
|
defaultIntervalSeconds int64 = 6 * defaultBaseIntervalSeconds
|
2020-12-17 08:00:09 -06:00
|
|
|
)
|
|
|
|
|
2021-08-25 08:11:22 -05:00
|
|
|
func ProvideService(cfg *setting.Cfg, dataSourceCache datasources.CacheService, routeRegister routing.RouteRegister,
|
|
|
|
sqlStore *sqlstore.SQLStore, dataService *tsdb.Service, dataProxy *datasourceproxy.DataSourceProxyService,
|
|
|
|
quotaService *quota.QuotaService, m *metrics.Metrics) (*AlertNG, error) {
|
|
|
|
ng := &AlertNG{
|
|
|
|
Cfg: cfg,
|
|
|
|
DataSourceCache: dataSourceCache,
|
|
|
|
RouteRegister: routeRegister,
|
|
|
|
SQLStore: sqlStore,
|
|
|
|
DataService: dataService,
|
|
|
|
DataProxy: dataProxy,
|
|
|
|
QuotaService: quotaService,
|
|
|
|
Metrics: m,
|
|
|
|
Log: log.New("ngalert"),
|
|
|
|
}
|
|
|
|
|
|
|
|
if ng.IsDisabled() {
|
|
|
|
return ng, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := ng.init(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return ng, nil
|
|
|
|
}
|
|
|
|
|
2020-11-12 07:11:30 -06:00
|
|
|
// AlertNG is the service for evaluating the condition of an alert definition.
|
|
|
|
type AlertNG struct {
|
2021-08-25 08:11:22 -05:00
|
|
|
Cfg *setting.Cfg
|
|
|
|
DataSourceCache datasources.CacheService
|
|
|
|
RouteRegister routing.RouteRegister
|
|
|
|
SQLStore *sqlstore.SQLStore
|
|
|
|
DataService *tsdb.Service
|
|
|
|
DataProxy *datasourceproxy.DataSourceProxyService
|
|
|
|
QuotaService *quota.QuotaService
|
|
|
|
Metrics *metrics.Metrics
|
|
|
|
Log log.Logger
|
2021-03-08 14:19:21 -06:00
|
|
|
schedule schedule.ScheduleService
|
2021-04-23 14:32:25 -05:00
|
|
|
stateManager *state.Manager
|
2021-08-06 07:06:56 -05:00
|
|
|
|
|
|
|
// Alerting notification services
|
2021-08-24 05:28:09 -05:00
|
|
|
MultiOrgAlertmanager *notifier.MultiOrgAlertmanager
|
2020-11-12 07:11:30 -06:00
|
|
|
}
|
|
|
|
|
2021-08-25 08:11:22 -05:00
|
|
|
func (ng *AlertNG) init() error {
|
|
|
|
baseInterval := ng.Cfg.AlertingBaseInterval
|
|
|
|
if baseInterval <= 0 {
|
|
|
|
baseInterval = defaultBaseIntervalSeconds
|
|
|
|
}
|
|
|
|
baseInterval *= time.Second
|
2021-03-03 09:52:19 -06:00
|
|
|
|
2021-05-14 15:13:44 -05:00
|
|
|
store := &store.DBstore{
|
|
|
|
BaseInterval: baseInterval,
|
|
|
|
DefaultIntervalSeconds: defaultIntervalSeconds,
|
|
|
|
SQLStore: ng.SQLStore,
|
2021-05-20 07:49:33 -05:00
|
|
|
Logger: ng.Log,
|
2021-05-14 15:13:44 -05:00
|
|
|
}
|
2021-05-13 13:01:38 -05:00
|
|
|
|
2021-08-24 05:28:09 -05:00
|
|
|
ng.MultiOrgAlertmanager = notifier.NewMultiOrgAlertmanager(ng.Cfg, store, store)
|
|
|
|
|
|
|
|
// Let's make sure we're able to complete an initial sync of Alertmanagers before we start the alerting components.
|
|
|
|
if err := ng.MultiOrgAlertmanager.LoadAndSyncAlertmanagersForOrgs(context.Background()); err != nil {
|
2021-05-13 13:01:38 -05:00
|
|
|
return err
|
|
|
|
}
|
2021-03-03 09:52:19 -06:00
|
|
|
|
2021-03-08 14:19:21 -06:00
|
|
|
schedCfg := schedule.SchedulerCfg{
|
2021-08-13 07:14:36 -05:00
|
|
|
C: clock.New(),
|
|
|
|
BaseInterval: baseInterval,
|
|
|
|
Logger: log.New("ngalert.scheduler"),
|
|
|
|
MaxAttempts: maxAttempts,
|
|
|
|
Evaluator: eval.Evaluator{Cfg: ng.Cfg, Log: ng.Log},
|
|
|
|
InstanceStore: store,
|
|
|
|
RuleStore: store,
|
|
|
|
AdminConfigStore: store,
|
2021-08-24 05:28:09 -05:00
|
|
|
OrgStore: store,
|
|
|
|
MultiOrgNotifier: ng.MultiOrgAlertmanager,
|
2021-08-13 07:14:36 -05:00
|
|
|
Metrics: ng.Metrics,
|
|
|
|
AdminConfigPollInterval: ng.Cfg.AdminConfigPollInterval,
|
2021-01-22 11:27:33 -06:00
|
|
|
}
|
2021-08-25 08:11:22 -05:00
|
|
|
stateManager := state.NewManager(ng.Log, ng.Metrics, store, store)
|
|
|
|
schedule := schedule.NewScheduler(schedCfg, ng.DataService, ng.Cfg.AppURL, stateManager)
|
2021-08-13 07:14:36 -05:00
|
|
|
|
2021-08-25 08:11:22 -05:00
|
|
|
ng.stateManager = stateManager
|
|
|
|
ng.schedule = schedule
|
2021-03-03 09:52:19 -06:00
|
|
|
|
2021-03-08 14:19:21 -06:00
|
|
|
api := api.API{
|
2021-08-24 05:28:09 -05:00
|
|
|
Cfg: ng.Cfg,
|
2021-08-25 08:11:22 -05:00
|
|
|
DatasourceCache: ng.DataSourceCache,
|
2021-08-24 05:28:09 -05:00
|
|
|
RouteRegister: ng.RouteRegister,
|
|
|
|
DataService: ng.DataService,
|
|
|
|
Schedule: ng.schedule,
|
|
|
|
DataProxy: ng.DataProxy,
|
|
|
|
QuotaService: ng.QuotaService,
|
|
|
|
InstanceStore: store,
|
|
|
|
RuleStore: store,
|
|
|
|
AlertingStore: store,
|
|
|
|
AdminConfigStore: store,
|
|
|
|
MultiOrgAlertmanager: ng.MultiOrgAlertmanager,
|
|
|
|
StateManager: ng.stateManager,
|
2021-03-24 09:20:44 -05:00
|
|
|
}
|
2021-04-30 11:28:06 -05:00
|
|
|
api.RegisterAPIEndpoints(ng.Metrics)
|
2021-03-03 09:52:19 -06:00
|
|
|
|
2020-11-12 07:11:30 -06:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-07-27 05:52:59 -05:00
|
|
|
// Run starts the scheduler and Alertmanager.
|
2020-12-17 08:00:09 -06:00
|
|
|
func (ng *AlertNG) Run(ctx context.Context) error {
|
2021-03-08 14:19:21 -06:00
|
|
|
ng.Log.Debug("ngalert starting")
|
2021-07-07 11:18:31 -05:00
|
|
|
ng.stateManager.Warm()
|
2021-05-13 13:01:38 -05:00
|
|
|
|
|
|
|
children, subCtx := errgroup.WithContext(ctx)
|
|
|
|
children.Go(func() error {
|
2021-07-27 05:52:59 -05:00
|
|
|
return ng.schedule.Run(subCtx)
|
2021-05-13 13:01:38 -05:00
|
|
|
})
|
|
|
|
children.Go(func() error {
|
2021-08-24 05:28:09 -05:00
|
|
|
return ng.MultiOrgAlertmanager.Run(subCtx)
|
2021-05-13 13:01:38 -05:00
|
|
|
})
|
|
|
|
return children.Wait()
|
2020-12-17 08:00:09 -06:00
|
|
|
}
|
|
|
|
|
2020-11-12 07:11:30 -06:00
|
|
|
// IsDisabled returns true if the alerting service is disable for this instance.
|
|
|
|
func (ng *AlertNG) IsDisabled() bool {
|
|
|
|
if ng.Cfg == nil {
|
2020-12-17 08:00:09 -06:00
|
|
|
return true
|
2020-11-12 07:11:30 -06:00
|
|
|
}
|
|
|
|
return !ng.Cfg.IsNgAlertEnabled()
|
|
|
|
}
|