2020-11-12 15:11:30 +02:00
package ngalert
import (
2020-12-17 16:00:09 +02:00
"context"
2021-09-30 12:51:20 -04:00
"net/url"
2020-12-17 16:00:09 +02:00
"time"
2020-11-12 15:11:30 +02:00
"github.com/grafana/grafana/pkg/api/routing"
2021-09-09 17:25:22 +01:00
"github.com/grafana/grafana/pkg/infra/kvstore"
2020-11-12 15:11:30 +02:00
"github.com/grafana/grafana/pkg/infra/log"
2021-03-24 14:20:44 +00:00
"github.com/grafana/grafana/pkg/services/datasourceproxy"
2020-11-12 15:11:30 +02:00
"github.com/grafana/grafana/pkg/services/datasources"
2021-09-09 17:25:22 +01:00
"github.com/grafana/grafana/pkg/services/ngalert/api"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
2021-03-24 14:20:44 +00:00
"github.com/grafana/grafana/pkg/services/ngalert/notifier"
"github.com/grafana/grafana/pkg/services/ngalert/schedule"
2021-09-09 17:25:22 +01:00
"github.com/grafana/grafana/pkg/services/ngalert/state"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/services/quota"
2021-03-24 14:20:44 +00:00
"github.com/grafana/grafana/pkg/services/sqlstore"
2020-11-12 15:11:30 +02:00
"github.com/grafana/grafana/pkg/setting"
2021-03-24 14:20:44 +00:00
"github.com/grafana/grafana/pkg/tsdb"
2021-09-09 17:25:22 +01:00
"github.com/benbjohnson/clock"
"golang.org/x/sync/errgroup"
2020-11-12 15:11:30 +02:00
)
2020-12-17 16:00:09 +02:00
const (
// scheduler interval
// changing this value is discouraged
// because this could cause existing alert definition
// with intervals that are not exactly divided by this number
// not to be evaluated
2021-08-25 15:11:22 +02:00
defaultBaseIntervalSeconds = 10
2021-08-06 13:06:56 +01:00
// default alert definition interval
2021-08-25 15:11:22 +02:00
defaultIntervalSeconds int64 = 6 * defaultBaseIntervalSeconds
2020-12-17 16:00:09 +02:00
)
2021-08-25 15:11:22 +02:00
func ProvideService ( cfg * setting . Cfg , dataSourceCache datasources . CacheService , routeRegister routing . RouteRegister ,
2021-09-09 17:25:22 +01:00
sqlStore * sqlstore . SQLStore , kvStore kvstore . KVStore , dataService * tsdb . Service , dataProxy * datasourceproxy . DataSourceProxyService ,
2021-09-14 12:55:01 +01:00
quotaService * quota . QuotaService , m * metrics . NGAlert ) ( * AlertNG , error ) {
2021-08-25 15:11:22 +02:00
ng := & AlertNG {
Cfg : cfg ,
DataSourceCache : dataSourceCache ,
RouteRegister : routeRegister ,
SQLStore : sqlStore ,
2021-09-09 17:25:22 +01:00
KVStore : kvStore ,
2021-08-25 15:11:22 +02:00
DataService : dataService ,
DataProxy : dataProxy ,
QuotaService : quotaService ,
Metrics : m ,
Log : log . New ( "ngalert" ) ,
}
if ng . IsDisabled ( ) {
return ng , nil
}
if err := ng . init ( ) ; err != nil {
return nil , err
}
return ng , nil
}
2020-11-12 15:11:30 +02:00
// AlertNG is the service for evaluating the condition of an alert definition.
type AlertNG struct {
2021-08-25 15:11:22 +02:00
Cfg * setting . Cfg
DataSourceCache datasources . CacheService
RouteRegister routing . RouteRegister
SQLStore * sqlstore . SQLStore
2021-09-09 17:25:22 +01:00
KVStore kvstore . KVStore
2021-08-25 15:11:22 +02:00
DataService * tsdb . Service
DataProxy * datasourceproxy . DataSourceProxyService
QuotaService * quota . QuotaService
2021-09-14 12:55:01 +01:00
Metrics * metrics . NGAlert
2021-08-25 15:11:22 +02:00
Log log . Logger
2021-03-08 22:19:21 +02:00
schedule schedule . ScheduleService
2021-04-23 12:32:25 -07:00
stateManager * state . Manager
2021-08-06 13:06:56 +01:00
// Alerting notification services
2021-08-24 03:28:09 -07:00
MultiOrgAlertmanager * notifier . MultiOrgAlertmanager
2020-11-12 15:11:30 +02:00
}
2021-08-25 15:11:22 +02:00
func ( ng * AlertNG ) init ( ) error {
2021-09-16 15:33:51 +01:00
var err error
2021-08-25 15:11:22 +02:00
baseInterval := ng . Cfg . AlertingBaseInterval
if baseInterval <= 0 {
baseInterval = defaultBaseIntervalSeconds
}
baseInterval *= time . Second
2021-03-03 17:52:19 +02:00
2021-05-14 16:13:44 -04:00
store := & store . DBstore {
2021-09-28 13:00:16 +03:00
BaseInterval : baseInterval ,
DefaultInterval : ng . getRuleDefaultInterval ( ) ,
SQLStore : ng . SQLStore ,
Logger : ng . Log ,
2021-05-14 16:13:44 -04:00
}
2021-05-13 14:01:38 -04:00
2021-09-16 15:33:51 +01:00
multiOrgMetrics := ng . Metrics . GetMultiOrgAlertmanagerMetrics ( )
ng . MultiOrgAlertmanager , err = notifier . NewMultiOrgAlertmanager ( ng . Cfg , store , store , ng . KVStore , multiOrgMetrics , log . New ( "ngalert.multiorg.alertmanager" ) )
if err != nil {
return err
}
2021-08-24 03:28:09 -07:00
// Let's make sure we're able to complete an initial sync of Alertmanagers before we start the alerting components.
if err := ng . MultiOrgAlertmanager . LoadAndSyncAlertmanagersForOrgs ( context . Background ( ) ) ; err != nil {
2021-05-13 14:01:38 -04:00
return err
}
2021-03-03 17:52:19 +02:00
2021-03-08 22:19:21 +02:00
schedCfg := schedule . SchedulerCfg {
2021-08-13 13:14:36 +01:00
C : clock . New ( ) ,
BaseInterval : baseInterval ,
2021-09-28 13:00:16 +03:00
Logger : ng . Log ,
MaxAttempts : ng . Cfg . UnifiedAlerting . MaxAttempts ,
2021-08-13 13:14:36 +01:00
Evaluator : eval . Evaluator { Cfg : ng . Cfg , Log : ng . Log } ,
InstanceStore : store ,
RuleStore : store ,
AdminConfigStore : store ,
2021-08-24 03:28:09 -07:00
OrgStore : store ,
MultiOrgNotifier : ng . MultiOrgAlertmanager ,
2021-09-14 12:55:01 +01:00
Metrics : ng . Metrics . GetSchedulerMetrics ( ) ,
2021-09-20 08:12:21 +01:00
AdminConfigPollInterval : ng . Cfg . UnifiedAlerting . AdminConfigPollInterval ,
2021-09-29 17:16:40 +03:00
DisabledOrgs : ng . Cfg . UnifiedAlerting . DisabledOrgs ,
2021-09-28 13:00:16 +03:00
MinRuleInterval : ng . getRuleMinInterval ( ) ,
2021-01-22 19:27:33 +02:00
}
2021-09-30 12:51:20 -04:00
appUrl , err := url . Parse ( ng . Cfg . AppURL )
if err != nil {
ng . Log . Error ( "Failed to parse application URL. Continue without it." , "error" , err )
appUrl = nil
}
2021-09-14 12:55:01 +01:00
stateManager := state . NewManager ( ng . Log , ng . Metrics . GetStateMetrics ( ) , store , store )
2021-09-30 12:51:20 -04:00
scheduler := schedule . NewScheduler ( schedCfg , ng . DataService , appUrl , stateManager )
2021-08-13 13:14:36 +01:00
2021-08-25 15:11:22 +02:00
ng . stateManager = stateManager
2021-09-30 12:51:20 -04:00
ng . schedule = scheduler
2021-03-03 17:52:19 +02:00
2021-03-08 22:19:21 +02:00
api := api . API {
2021-08-24 03:28:09 -07:00
Cfg : ng . Cfg ,
2021-08-25 15:11:22 +02:00
DatasourceCache : ng . DataSourceCache ,
2021-08-24 03:28:09 -07:00
RouteRegister : ng . RouteRegister ,
DataService : ng . DataService ,
Schedule : ng . schedule ,
DataProxy : ng . DataProxy ,
QuotaService : ng . QuotaService ,
InstanceStore : store ,
RuleStore : store ,
AlertingStore : store ,
AdminConfigStore : store ,
MultiOrgAlertmanager : ng . MultiOrgAlertmanager ,
StateManager : ng . stateManager ,
2021-03-24 14:20:44 +00:00
}
2021-09-14 12:55:01 +01:00
api . RegisterAPIEndpoints ( ng . Metrics . GetAPIMetrics ( ) )
2021-03-03 17:52:19 +02:00
2020-11-12 15:11:30 +02:00
return nil
}
2021-07-27 11:52:59 +01:00
// Run starts the scheduler and Alertmanager.
2020-12-17 16:00:09 +02:00
func ( ng * AlertNG ) Run ( ctx context . Context ) error {
2021-03-08 22:19:21 +02:00
ng . Log . Debug ( "ngalert starting" )
2021-07-07 17:18:31 +01:00
ng . stateManager . Warm ( )
2021-05-13 14:01:38 -04:00
children , subCtx := errgroup . WithContext ( ctx )
2021-09-28 13:00:16 +03:00
if ng . Cfg . UnifiedAlerting . ExecuteAlerts {
children . Go ( func ( ) error {
return ng . schedule . Run ( subCtx )
} )
}
2021-05-13 14:01:38 -04:00
children . Go ( func ( ) error {
2021-08-24 03:28:09 -07:00
return ng . MultiOrgAlertmanager . Run ( subCtx )
2021-05-13 14:01:38 -04:00
} )
return children . Wait ( )
2020-12-17 16:00:09 +02:00
}
2020-11-12 15:11:30 +02:00
// IsDisabled returns true if the alerting service is disable for this instance.
func ( ng * AlertNG ) IsDisabled ( ) bool {
if ng . Cfg == nil {
2020-12-17 16:00:09 +02:00
return true
2020-11-12 15:11:30 +02:00
}
2021-09-29 17:16:40 +03:00
return ! ng . Cfg . UnifiedAlerting . Enabled
2020-11-12 15:11:30 +02:00
}
2021-09-28 13:00:16 +03:00
// getRuleDefaultIntervalSeconds returns the default rule interval if the interval is not set.
// If this constant (1 minute) is lower than the configured minimum evaluation interval then
// this configuration is returned.
func ( ng * AlertNG ) getRuleDefaultInterval ( ) time . Duration {
ruleMinInterval := ng . getRuleMinInterval ( )
if defaultIntervalSeconds < int64 ( ruleMinInterval . Seconds ( ) ) {
return ruleMinInterval
}
return time . Duration ( defaultIntervalSeconds ) * time . Second
}
// getRuleMinIntervalSeconds returns the configured minimum rule interval.
// If this value is less or equal to zero or not divided exactly by the scheduler interval
// the scheduler interval (10 seconds) is returned.
func ( ng * AlertNG ) getRuleMinInterval ( ) time . Duration {
if ng . Cfg . UnifiedAlerting . MinInterval <= 0 {
return defaultBaseIntervalSeconds // if it's not configured; apply default
}
if ng . Cfg . UnifiedAlerting . MinInterval % defaultBaseIntervalSeconds != 0 {
ng . Log . Error ( "Configured minimum evaluation interval is not divided exactly by the scheduler interval and it will fallback to default" , "alertingMinInterval" , ng . Cfg . UnifiedAlerting . MinInterval , "baseIntervalSeconds" , defaultBaseIntervalSeconds , "defaultIntervalSeconds" , defaultIntervalSeconds )
return defaultBaseIntervalSeconds // if it's invalid; apply default
}
return ng . Cfg . UnifiedAlerting . MinInterval
}