2021-03-08 14:19:21 -06:00
package schedule
2020-12-17 08:00:09 -06:00
import (
"context"
2021-11-02 16:04:13 -05:00
"errors"
2020-12-17 08:00:09 -06:00
"fmt"
2021-08-06 07:06:56 -05:00
"net/url"
2020-12-17 08:00:09 -06:00
"sync"
"time"
2021-11-10 04:52:16 -06:00
"github.com/grafana/grafana/pkg/expr"
2020-12-17 08:00:09 -06:00
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/alerting"
2022-01-11 10:39:34 -06:00
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
2020-12-17 08:00:09 -06:00
"github.com/grafana/grafana/pkg/services/ngalert/eval"
2021-08-06 07:06:56 -05:00
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
2021-04-19 01:58:44 -05:00
"github.com/grafana/grafana/pkg/services/ngalert/models"
2021-08-24 05:28:09 -05:00
"github.com/grafana/grafana/pkg/services/ngalert/notifier"
2021-08-06 07:06:56 -05:00
"github.com/grafana/grafana/pkg/services/ngalert/sender"
2021-04-19 01:58:44 -05:00
"github.com/grafana/grafana/pkg/services/ngalert/state"
"github.com/grafana/grafana/pkg/services/ngalert/store"
2021-08-06 07:06:56 -05:00
"github.com/benbjohnson/clock"
"golang.org/x/sync/errgroup"
2020-12-17 08:00:09 -06:00
)
2021-11-25 04:12:04 -06:00
// ScheduleService is an interface for a service that schedules the evaluation
// of alert rules.
2022-03-25 11:39:24 -05:00
//go:generate mockery --name ScheduleService --structname FakeScheduleService --inpackage --filename schedule_mock.go
2021-03-08 14:19:21 -06:00
type ScheduleService interface {
2021-11-25 04:12:04 -06:00
// Run the scheduler until the context is canceled or the scheduler returns
// an error. The scheduler is terminated when this function returns.
2021-07-27 05:52:59 -05:00
Run ( context . Context ) error
2021-11-25 04:12:04 -06:00
// AlertmanagersFor returns all the discovered Alertmanager URLs for the
// organization.
2021-08-13 07:14:36 -05:00
AlertmanagersFor ( orgID int64 ) [ ] * url . URL
2021-11-25 04:12:04 -06:00
// DroppedAlertmanagersFor returns all the dropped Alertmanager URLs for the
// organization.
2021-08-13 07:14:36 -05:00
DroppedAlertmanagersFor ( orgID int64 ) [ ] * url . URL
2022-01-11 10:39:34 -06:00
// UpdateAlertRule notifies scheduler that a rule has been changed
UpdateAlertRule ( key models . AlertRuleKey )
// DeleteAlertRule notifies scheduler that a rule has been changed
DeleteAlertRule ( key models . AlertRuleKey )
2021-03-03 09:52:19 -06:00
// the following are used by tests only used for tests
2021-04-03 12:13:29 -05:00
evalApplied ( models . AlertRuleKey , time . Time )
stopApplied ( models . AlertRuleKey )
2021-03-08 14:19:21 -06:00
overrideCfg ( cfg SchedulerCfg )
2021-03-03 09:52:19 -06:00
}
2020-12-17 08:00:09 -06:00
type schedule struct {
// base tick rate (fastest possible configured check)
baseInterval time . Duration
2021-04-03 12:13:29 -05:00
// each alert rule gets its own channel and routine
registry alertRuleRegistry
2020-12-17 08:00:09 -06:00
maxAttempts int64
clock clock . Clock
2022-02-25 04:09:20 -06:00
ticker * alerting . Ticker
2020-12-17 08:00:09 -06:00
// evalApplied is only used for tests: test code can set it to non-nil
// function, and then it'll be called from the event loop whenever the
// message from evalApplied is handled.
2021-04-03 12:13:29 -05:00
evalAppliedFunc func ( models . AlertRuleKey , time . Time )
2020-12-17 08:00:09 -06:00
2021-01-11 08:14:03 -06:00
// stopApplied is only used for tests: test code can set it to non-nil
// function, and then it'll be called from the event loop whenever the
// message from stopApplied is handled.
2021-04-03 12:13:29 -05:00
stopAppliedFunc func ( models . AlertRuleKey )
2021-01-11 08:14:03 -06:00
2020-12-17 08:00:09 -06:00
log log . Logger
2021-01-22 11:27:33 -06:00
2022-04-01 19:00:23 -05:00
evaluator eval . Evaluator
2021-03-03 09:52:19 -06:00
2021-11-10 04:52:16 -06:00
ruleStore store . RuleStore
instanceStore store . InstanceStore
adminConfigStore store . AdminConfigurationStore
orgStore store . OrgStore
expressionService * expr . Service
2021-03-30 11:37:56 -05:00
2021-07-07 11:18:31 -05:00
stateManager * state . Manager
2021-09-30 11:51:20 -05:00
appURL * url . URL
2021-06-16 05:04:12 -05:00
2021-08-24 05:28:09 -05:00
multiOrgNotifier * notifier . MultiOrgAlertmanager
2021-09-14 06:55:01 -05:00
metrics * metrics . Scheduler
2021-08-06 07:06:56 -05:00
// Senders help us send alerts to external Alertmanagers.
Alerting: send alerts to external, internal, or both alertmanagers (#40341)
* (WIP) send alerts to external, internal, or both alertmanagers
* Modify admin configuration endpoint, update swagger docs
* Integration test for admin config updated
* Code review changes
* Fix alertmanagers choice not changing bug, add unit test
* Add AlertmanagersChoice as enum in swagger, code review changes
* Fix API and tests errors
* Change enum from int to string, use 'SendAlertsTo' instead of 'AlertmanagerChoice' where necessary
* Fix tests to reflect last changes
* Keep senders running when alerts are handled just internally
* Check if any external AM has been discovered before sending alerts, update tests
* remove duplicate data from logs
* update comment
* represent alertmanagers choice as an int instead of a string
* default alertmanagers choice to all alertmanagers, test cases
* update definitions and generate spec
2022-02-01 17:36:55 -06:00
adminConfigMtx sync . RWMutex
sendAlertsTo map [ int64 ] models . AlertmanagersChoice
2021-08-13 07:14:36 -05:00
sendersCfgHash map [ int64 ] string
senders map [ int64 ] * sender . Sender
adminConfigPollInterval time . Duration
2021-09-29 09:16:40 -05:00
disabledOrgs map [ int64 ] struct { }
2021-09-28 05:00:16 -05:00
minRuleInterval time . Duration
2021-01-22 11:27:33 -06:00
}
2021-03-08 14:19:21 -06:00
// SchedulerCfg is the scheduler configuration.
type SchedulerCfg struct {
2021-08-13 07:14:36 -05:00
C clock . Clock
BaseInterval time . Duration
Logger log . Logger
EvalAppliedFunc func ( models . AlertRuleKey , time . Time )
MaxAttempts int64
StopAppliedFunc func ( models . AlertRuleKey )
2022-04-01 19:00:23 -05:00
Evaluator eval . Evaluator
2021-08-13 07:14:36 -05:00
RuleStore store . RuleStore
2021-08-24 05:28:09 -05:00
OrgStore store . OrgStore
2021-08-13 07:14:36 -05:00
InstanceStore store . InstanceStore
AdminConfigStore store . AdminConfigurationStore
2021-08-24 05:28:09 -05:00
MultiOrgNotifier * notifier . MultiOrgAlertmanager
2021-09-14 06:55:01 -05:00
Metrics * metrics . Scheduler
2021-08-13 07:14:36 -05:00
AdminConfigPollInterval time . Duration
2021-09-29 09:16:40 -05:00
DisabledOrgs map [ int64 ] struct { }
2021-09-28 05:00:16 -05:00
MinRuleInterval time . Duration
2020-12-17 08:00:09 -06:00
}
2021-03-08 14:19:21 -06:00
// NewScheduler returns a new schedule.
2021-11-10 04:52:16 -06:00
func NewScheduler ( cfg SchedulerCfg , expressionService * expr . Service , appURL * url . URL , stateManager * state . Manager ) * schedule {
2022-04-22 14:09:47 -05:00
ticker := alerting . NewTicker ( cfg . C , cfg . BaseInterval , cfg . Metrics . Ticker )
2021-09-30 11:51:20 -05:00
2020-12-17 08:00:09 -06:00
sch := schedule {
2021-12-16 13:52:47 -06:00
registry : alertRuleRegistry { alertRuleInfo : make ( map [ models . AlertRuleKey ] * alertRuleInfo ) } ,
2021-08-13 07:14:36 -05:00
maxAttempts : cfg . MaxAttempts ,
clock : cfg . C ,
baseInterval : cfg . BaseInterval ,
log : cfg . Logger ,
2022-02-25 04:09:20 -06:00
ticker : ticker ,
2021-08-13 07:14:36 -05:00
evalAppliedFunc : cfg . EvalAppliedFunc ,
stopAppliedFunc : cfg . StopAppliedFunc ,
evaluator : cfg . Evaluator ,
ruleStore : cfg . RuleStore ,
instanceStore : cfg . InstanceStore ,
2021-08-24 05:28:09 -05:00
orgStore : cfg . OrgStore ,
2021-11-10 04:52:16 -06:00
expressionService : expressionService ,
2021-08-13 07:14:36 -05:00
adminConfigStore : cfg . AdminConfigStore ,
2021-08-24 05:28:09 -05:00
multiOrgNotifier : cfg . MultiOrgNotifier ,
2021-08-13 07:14:36 -05:00
metrics : cfg . Metrics ,
appURL : appURL ,
stateManager : stateManager ,
Alerting: send alerts to external, internal, or both alertmanagers (#40341)
* (WIP) send alerts to external, internal, or both alertmanagers
* Modify admin configuration endpoint, update swagger docs
* Integration test for admin config updated
* Code review changes
* Fix alertmanagers choice not changing bug, add unit test
* Add AlertmanagersChoice as enum in swagger, code review changes
* Fix API and tests errors
* Change enum from int to string, use 'SendAlertsTo' instead of 'AlertmanagerChoice' where necessary
* Fix tests to reflect last changes
* Keep senders running when alerts are handled just internally
* Check if any external AM has been discovered before sending alerts, update tests
* remove duplicate data from logs
* update comment
* represent alertmanagers choice as an int instead of a string
* default alertmanagers choice to all alertmanagers, test cases
* update definitions and generate spec
2022-02-01 17:36:55 -06:00
sendAlertsTo : map [ int64 ] models . AlertmanagersChoice { } ,
2021-08-13 07:14:36 -05:00
senders : map [ int64 ] * sender . Sender { } ,
sendersCfgHash : map [ int64 ] string { } ,
adminConfigPollInterval : cfg . AdminConfigPollInterval ,
2021-09-29 09:16:40 -05:00
disabledOrgs : cfg . DisabledOrgs ,
2021-09-28 05:00:16 -05:00
minRuleInterval : cfg . MinRuleInterval ,
2020-12-17 08:00:09 -06:00
}
return & sch
}
2021-07-27 05:52:59 -05:00
func ( sch * schedule ) Run ( ctx context . Context ) error {
2021-08-06 07:06:56 -05:00
var wg sync . WaitGroup
wg . Add ( 2 )
2021-07-07 11:18:31 -05:00
2021-07-27 05:52:59 -05:00
go func ( ) {
2021-08-06 07:06:56 -05:00
defer wg . Done ( )
2022-01-31 10:56:43 -06:00
if err := sch . schedulePeriodic ( ctx ) ; err != nil {
2021-07-27 05:52:59 -05:00
sch . log . Error ( "failure while running the rule evaluation loop" , "err" , err )
2021-07-07 11:18:31 -05:00
}
2021-07-27 05:52:59 -05:00
} ( )
2021-08-06 07:06:56 -05:00
go func ( ) {
defer wg . Done ( )
if err := sch . adminConfigSync ( ctx ) ; err != nil {
sch . log . Error ( "failure while running the admin configuration sync" , "err" , err )
}
} ( )
wg . Wait ( )
2021-07-27 05:52:59 -05:00
return nil
2021-07-07 11:18:31 -05:00
}
Alerting: send alerts to external, internal, or both alertmanagers (#40341)
* (WIP) send alerts to external, internal, or both alertmanagers
* Modify admin configuration endpoint, update swagger docs
* Integration test for admin config updated
* Code review changes
* Fix alertmanagers choice not changing bug, add unit test
* Add AlertmanagersChoice as enum in swagger, code review changes
* Fix API and tests errors
* Change enum from int to string, use 'SendAlertsTo' instead of 'AlertmanagerChoice' where necessary
* Fix tests to reflect last changes
* Keep senders running when alerts are handled just internally
* Check if any external AM has been discovered before sending alerts, update tests
* remove duplicate data from logs
* update comment
* represent alertmanagers choice as an int instead of a string
* default alertmanagers choice to all alertmanagers, test cases
* update definitions and generate spec
2022-02-01 17:36:55 -06:00
// SyncAndApplyConfigFromDatabase looks for the admin configuration in the database
// and adjusts the sender(s) and alert handling mechanism accordingly.
2021-08-06 07:06:56 -05:00
func ( sch * schedule ) SyncAndApplyConfigFromDatabase ( ) error {
sch . log . Debug ( "start of admin configuration sync" )
cfgs , err := sch . adminConfigStore . GetAdminConfigurations ( )
if err != nil {
return err
}
sch . log . Debug ( "found admin configurations" , "count" , len ( cfgs ) )
orgsFound := make ( map [ int64 ] struct { } , len ( cfgs ) )
Alerting: send alerts to external, internal, or both alertmanagers (#40341)
* (WIP) send alerts to external, internal, or both alertmanagers
* Modify admin configuration endpoint, update swagger docs
* Integration test for admin config updated
* Code review changes
* Fix alertmanagers choice not changing bug, add unit test
* Add AlertmanagersChoice as enum in swagger, code review changes
* Fix API and tests errors
* Change enum from int to string, use 'SendAlertsTo' instead of 'AlertmanagerChoice' where necessary
* Fix tests to reflect last changes
* Keep senders running when alerts are handled just internally
* Check if any external AM has been discovered before sending alerts, update tests
* remove duplicate data from logs
* update comment
* represent alertmanagers choice as an int instead of a string
* default alertmanagers choice to all alertmanagers, test cases
* update definitions and generate spec
2022-02-01 17:36:55 -06:00
sch . adminConfigMtx . Lock ( )
2021-08-06 07:06:56 -05:00
for _ , cfg := range cfgs {
2021-09-29 09:16:40 -05:00
_ , isDisabledOrg := sch . disabledOrgs [ cfg . OrgID ]
if isDisabledOrg {
sch . log . Debug ( "skipping starting sender for disabled org" , "org" , cfg . OrgID )
continue
}
Alerting: send alerts to external, internal, or both alertmanagers (#40341)
* (WIP) send alerts to external, internal, or both alertmanagers
* Modify admin configuration endpoint, update swagger docs
* Integration test for admin config updated
* Code review changes
* Fix alertmanagers choice not changing bug, add unit test
* Add AlertmanagersChoice as enum in swagger, code review changes
* Fix API and tests errors
* Change enum from int to string, use 'SendAlertsTo' instead of 'AlertmanagerChoice' where necessary
* Fix tests to reflect last changes
* Keep senders running when alerts are handled just internally
* Check if any external AM has been discovered before sending alerts, update tests
* remove duplicate data from logs
* update comment
* represent alertmanagers choice as an int instead of a string
* default alertmanagers choice to all alertmanagers, test cases
* update definitions and generate spec
2022-02-01 17:36:55 -06:00
// Update the Alertmanagers choice for the organization.
sch . sendAlertsTo [ cfg . OrgID ] = cfg . SendAlertsTo
2021-08-06 07:06:56 -05:00
orgsFound [ cfg . OrgID ] = struct { } { } // keep track of the which senders we need to keep.
existing , ok := sch . senders [ cfg . OrgID ]
Alerting: send alerts to external, internal, or both alertmanagers (#40341)
* (WIP) send alerts to external, internal, or both alertmanagers
* Modify admin configuration endpoint, update swagger docs
* Integration test for admin config updated
* Code review changes
* Fix alertmanagers choice not changing bug, add unit test
* Add AlertmanagersChoice as enum in swagger, code review changes
* Fix API and tests errors
* Change enum from int to string, use 'SendAlertsTo' instead of 'AlertmanagerChoice' where necessary
* Fix tests to reflect last changes
* Keep senders running when alerts are handled just internally
* Check if any external AM has been discovered before sending alerts, update tests
* remove duplicate data from logs
* update comment
* represent alertmanagers choice as an int instead of a string
* default alertmanagers choice to all alertmanagers, test cases
* update definitions and generate spec
2022-02-01 17:36:55 -06:00
// We have no running sender and no Alertmanager(s) configured, no-op.
2021-08-06 07:06:56 -05:00
if ! ok && len ( cfg . Alertmanagers ) == 0 {
sch . log . Debug ( "no external alertmanagers configured" , "org" , cfg . OrgID )
continue
}
Alerting: send alerts to external, internal, or both alertmanagers (#40341)
* (WIP) send alerts to external, internal, or both alertmanagers
* Modify admin configuration endpoint, update swagger docs
* Integration test for admin config updated
* Code review changes
* Fix alertmanagers choice not changing bug, add unit test
* Add AlertmanagersChoice as enum in swagger, code review changes
* Fix API and tests errors
* Change enum from int to string, use 'SendAlertsTo' instead of 'AlertmanagerChoice' where necessary
* Fix tests to reflect last changes
* Keep senders running when alerts are handled just internally
* Check if any external AM has been discovered before sending alerts, update tests
* remove duplicate data from logs
* update comment
* represent alertmanagers choice as an int instead of a string
* default alertmanagers choice to all alertmanagers, test cases
* update definitions and generate spec
2022-02-01 17:36:55 -06:00
// We have no running sender and alerts are handled internally, no-op.
if ! ok && cfg . SendAlertsTo == models . InternalAlertmanager {
sch . log . Debug ( "alerts are handled internally" , "org" , cfg . OrgID )
continue
}
2021-08-06 07:06:56 -05:00
// We have a running sender but no Alertmanager(s) configured, shut it down.
if ok && len ( cfg . Alertmanagers ) == 0 {
sch . log . Debug ( "no external alertmanager(s) configured, sender will be stopped" , "org" , cfg . OrgID )
delete ( orgsFound , cfg . OrgID )
continue
}
// We have a running sender, check if we need to apply a new config.
if ok {
if sch . sendersCfgHash [ cfg . OrgID ] == cfg . AsSHA256 ( ) {
sch . log . Debug ( "sender configuration is the same as the one running, no-op" , "org" , cfg . OrgID , "alertmanagers" , cfg . Alertmanagers )
continue
}
sch . log . Debug ( "applying new configuration to sender" , "org" , cfg . OrgID , "alertmanagers" , cfg . Alertmanagers )
err := existing . ApplyConfig ( cfg )
if err != nil {
sch . log . Error ( "failed to apply configuration" , "err" , err , "org" , cfg . OrgID )
continue
}
sch . sendersCfgHash [ cfg . OrgID ] = cfg . AsSHA256 ( )
continue
}
// No sender and have Alertmanager(s) to send to - start a new one.
sch . log . Info ( "creating new sender for the external alertmanagers" , "org" , cfg . OrgID , "alertmanagers" , cfg . Alertmanagers )
s , err := sender . New ( sch . metrics )
if err != nil {
sch . log . Error ( "unable to start the sender" , "err" , err , "org" , cfg . OrgID )
continue
}
sch . senders [ cfg . OrgID ] = s
s . Run ( )
err = s . ApplyConfig ( cfg )
if err != nil {
sch . log . Error ( "failed to apply configuration" , "err" , err , "org" , cfg . OrgID )
continue
}
2021-07-27 05:52:59 -05:00
2021-08-06 07:06:56 -05:00
sch . sendersCfgHash [ cfg . OrgID ] = cfg . AsSHA256 ( )
}
sendersToStop := map [ int64 ] * sender . Sender { }
for orgID , s := range sch . senders {
if _ , exists := orgsFound [ orgID ] ; ! exists {
sendersToStop [ orgID ] = s
delete ( sch . senders , orgID )
delete ( sch . sendersCfgHash , orgID )
}
}
Alerting: send alerts to external, internal, or both alertmanagers (#40341)
* (WIP) send alerts to external, internal, or both alertmanagers
* Modify admin configuration endpoint, update swagger docs
* Integration test for admin config updated
* Code review changes
* Fix alertmanagers choice not changing bug, add unit test
* Add AlertmanagersChoice as enum in swagger, code review changes
* Fix API and tests errors
* Change enum from int to string, use 'SendAlertsTo' instead of 'AlertmanagerChoice' where necessary
* Fix tests to reflect last changes
* Keep senders running when alerts are handled just internally
* Check if any external AM has been discovered before sending alerts, update tests
* remove duplicate data from logs
* update comment
* represent alertmanagers choice as an int instead of a string
* default alertmanagers choice to all alertmanagers, test cases
* update definitions and generate spec
2022-02-01 17:36:55 -06:00
sch . adminConfigMtx . Unlock ( )
2021-08-06 07:06:56 -05:00
// We can now stop these senders w/o having to hold a lock.
for orgID , s := range sendersToStop {
sch . log . Info ( "stopping sender" , "org" , orgID )
s . Stop ( )
sch . log . Info ( "stopped sender" , "org" , orgID )
}
sch . log . Debug ( "finish of admin configuration sync" )
return nil
}
// AlertmanagersFor returns all the discovered Alertmanager(s) for a particular organization.
func ( sch * schedule ) AlertmanagersFor ( orgID int64 ) [ ] * url . URL {
Alerting: send alerts to external, internal, or both alertmanagers (#40341)
* (WIP) send alerts to external, internal, or both alertmanagers
* Modify admin configuration endpoint, update swagger docs
* Integration test for admin config updated
* Code review changes
* Fix alertmanagers choice not changing bug, add unit test
* Add AlertmanagersChoice as enum in swagger, code review changes
* Fix API and tests errors
* Change enum from int to string, use 'SendAlertsTo' instead of 'AlertmanagerChoice' where necessary
* Fix tests to reflect last changes
* Keep senders running when alerts are handled just internally
* Check if any external AM has been discovered before sending alerts, update tests
* remove duplicate data from logs
* update comment
* represent alertmanagers choice as an int instead of a string
* default alertmanagers choice to all alertmanagers, test cases
* update definitions and generate spec
2022-02-01 17:36:55 -06:00
sch . adminConfigMtx . RLock ( )
defer sch . adminConfigMtx . RUnlock ( )
2021-08-06 07:06:56 -05:00
s , ok := sch . senders [ orgID ]
if ! ok {
return [ ] * url . URL { }
}
return s . Alertmanagers ( )
}
// DroppedAlertmanagersFor returns all the dropped Alertmanager(s) for a particular organization.
func ( sch * schedule ) DroppedAlertmanagersFor ( orgID int64 ) [ ] * url . URL {
Alerting: send alerts to external, internal, or both alertmanagers (#40341)
* (WIP) send alerts to external, internal, or both alertmanagers
* Modify admin configuration endpoint, update swagger docs
* Integration test for admin config updated
* Code review changes
* Fix alertmanagers choice not changing bug, add unit test
* Add AlertmanagersChoice as enum in swagger, code review changes
* Fix API and tests errors
* Change enum from int to string, use 'SendAlertsTo' instead of 'AlertmanagerChoice' where necessary
* Fix tests to reflect last changes
* Keep senders running when alerts are handled just internally
* Check if any external AM has been discovered before sending alerts, update tests
* remove duplicate data from logs
* update comment
* represent alertmanagers choice as an int instead of a string
* default alertmanagers choice to all alertmanagers, test cases
* update definitions and generate spec
2022-02-01 17:36:55 -06:00
sch . adminConfigMtx . RLock ( )
defer sch . adminConfigMtx . RUnlock ( )
2021-08-06 07:06:56 -05:00
s , ok := sch . senders [ orgID ]
if ! ok {
return [ ] * url . URL { }
}
return s . DroppedAlertmanagers ( )
}
2022-01-11 10:39:34 -06:00
// UpdateAlertRule looks for the active rule evaluation and commands it to update the rule
func ( sch * schedule ) UpdateAlertRule ( key models . AlertRuleKey ) {
ruleInfo , err := sch . registry . get ( key )
if err != nil {
return
}
ruleInfo . update ( )
}
// DeleteAlertRule stops evaluation of the rule, deletes it from active rules, and cleans up state cache.
func ( sch * schedule ) DeleteAlertRule ( key models . AlertRuleKey ) {
ruleInfo , ok := sch . registry . del ( key )
if ! ok {
sch . log . Info ( "unable to delete alert rule routine information by key" , "uid" , key . UID , "org_id" , key . OrgID )
return
}
// stop rule evaluation
ruleInfo . stop ( )
}
2021-08-06 07:06:56 -05:00
func ( sch * schedule ) adminConfigSync ( ctx context . Context ) error {
for {
select {
2021-08-13 07:14:36 -05:00
case <- time . After ( sch . adminConfigPollInterval ) :
2021-08-06 07:06:56 -05:00
if err := sch . SyncAndApplyConfigFromDatabase ( ) ; err != nil {
sch . log . Error ( "unable to sync admin configuration" , "err" , err )
}
case <- ctx . Done ( ) :
// Stop sending alerts to all external Alertmanager(s).
Alerting: send alerts to external, internal, or both alertmanagers (#40341)
* (WIP) send alerts to external, internal, or both alertmanagers
* Modify admin configuration endpoint, update swagger docs
* Integration test for admin config updated
* Code review changes
* Fix alertmanagers choice not changing bug, add unit test
* Add AlertmanagersChoice as enum in swagger, code review changes
* Fix API and tests errors
* Change enum from int to string, use 'SendAlertsTo' instead of 'AlertmanagerChoice' where necessary
* Fix tests to reflect last changes
* Keep senders running when alerts are handled just internally
* Check if any external AM has been discovered before sending alerts, update tests
* remove duplicate data from logs
* update comment
* represent alertmanagers choice as an int instead of a string
* default alertmanagers choice to all alertmanagers, test cases
* update definitions and generate spec
2022-02-01 17:36:55 -06:00
sch . adminConfigMtx . Lock ( )
2021-08-06 07:06:56 -05:00
for orgID , s := range sch . senders {
delete ( sch . senders , orgID ) // delete before we stop to make sure we don't accept any more alerts.
s . Stop ( )
}
Alerting: send alerts to external, internal, or both alertmanagers (#40341)
* (WIP) send alerts to external, internal, or both alertmanagers
* Modify admin configuration endpoint, update swagger docs
* Integration test for admin config updated
* Code review changes
* Fix alertmanagers choice not changing bug, add unit test
* Add AlertmanagersChoice as enum in swagger, code review changes
* Fix API and tests errors
* Change enum from int to string, use 'SendAlertsTo' instead of 'AlertmanagerChoice' where necessary
* Fix tests to reflect last changes
* Keep senders running when alerts are handled just internally
* Check if any external AM has been discovered before sending alerts, update tests
* remove duplicate data from logs
* update comment
* represent alertmanagers choice as an int instead of a string
* default alertmanagers choice to all alertmanagers, test cases
* update definitions and generate spec
2022-02-01 17:36:55 -06:00
sch . adminConfigMtx . Unlock ( )
2021-08-06 07:06:56 -05:00
return nil
}
}
}
2022-01-31 10:56:43 -06:00
func ( sch * schedule ) schedulePeriodic ( ctx context . Context ) error {
2021-07-27 05:52:59 -05:00
dispatcherGroup , ctx := errgroup . WithContext ( ctx )
2020-12-17 08:00:09 -06:00
for {
select {
2022-02-25 04:09:20 -06:00
case tick := <- sch . ticker . C :
2022-02-25 05:40:30 -06:00
// We use Round(0) on the start time to remove the monotonic clock.
2022-02-25 08:43:08 -06:00
// This is required as ticks from the ticker and time.Now() can have
// a monotonic clock that when subtracted do not represent the delta
// in wall clock time.
2022-02-25 05:40:30 -06:00
start := time . Now ( ) . Round ( 0 )
2022-01-31 10:56:43 -06:00
sch . metrics . BehindSeconds . Set ( start . Sub ( tick ) . Seconds ( ) )
2021-03-03 09:52:19 -06:00
tickNum := tick . Unix ( ) / int64 ( sch . baseInterval . Seconds ( ) )
2021-09-29 09:16:40 -05:00
disabledOrgs := make ( [ ] int64 , 0 , len ( sch . disabledOrgs ) )
for disabledOrg := range sch . disabledOrgs {
disabledOrgs = append ( disabledOrgs , disabledOrg )
}
2022-01-31 10:56:43 -06:00
2022-02-08 02:52:03 -06:00
alertRules := sch . getAlertRules ( ctx , disabledOrgs )
2021-09-29 09:16:40 -05:00
sch . log . Debug ( "alert rules fetched" , "count" , len ( alertRules ) , "disabled_orgs" , disabledOrgs )
2020-12-17 08:00:09 -06:00
2021-04-03 12:13:29 -05:00
// registeredDefinitions is a map used for finding deleted alert rules
// initially it is assigned to all known alert rules from the previous cycle
// each alert rule found also in this cycle is removed
// so, at the end, the remaining registered alert rules are the deleted ones
2021-03-03 09:52:19 -06:00
registeredDefinitions := sch . registry . keyMap ( )
2020-12-17 08:00:09 -06:00
type readyToRunItem struct {
2021-04-03 12:13:29 -05:00
key models . AlertRuleKey
2021-12-16 13:52:47 -06:00
ruleInfo * alertRuleInfo
version int64
2020-12-17 08:00:09 -06:00
}
2021-08-06 07:06:56 -05:00
2020-12-17 08:00:09 -06:00
readyToRun := make ( [ ] readyToRunItem , 0 )
2021-04-03 12:13:29 -05:00
for _ , item := range alertRules {
2021-03-08 14:19:21 -06:00
key := item . GetKey ( )
2020-12-17 08:00:09 -06:00
itemVersion := item . Version
2021-12-16 13:52:47 -06:00
ruleInfo , newRoutine := sch . registry . getOrCreateInfo ( ctx , key )
2021-09-28 05:00:16 -05:00
// enforce minimum evaluation interval
if item . IntervalSeconds < int64 ( sch . minRuleInterval . Seconds ( ) ) {
sch . log . Debug ( "interval adjusted" , "rule_interval_seconds" , item . IntervalSeconds , "min_interval_seconds" , sch . minRuleInterval . Seconds ( ) , "key" , key )
item . IntervalSeconds = int64 ( sch . minRuleInterval . Seconds ( ) )
}
2021-03-03 09:52:19 -06:00
invalidInterval := item . IntervalSeconds % int64 ( sch . baseInterval . Seconds ( ) ) != 0
2020-12-17 08:00:09 -06:00
if newRoutine && ! invalidInterval {
dispatcherGroup . Go ( func ( ) error {
2022-01-11 10:39:34 -06:00
return sch . ruleRoutine ( ruleInfo . ctx , key , ruleInfo . evalCh , ruleInfo . updateCh )
2020-12-17 08:00:09 -06:00
} )
}
if invalidInterval {
// this is expected to be always false
2021-09-28 05:00:16 -05:00
// given that we validate interval during alert rule updates
2021-04-03 12:13:29 -05:00
sch . log . Debug ( "alert rule with invalid interval will be ignored: interval should be divided exactly by scheduler interval" , "key" , key , "interval" , time . Duration ( item . IntervalSeconds ) * time . Second , "scheduler interval" , sch . baseInterval )
2020-12-17 08:00:09 -06:00
continue
}
2021-03-03 09:52:19 -06:00
itemFrequency := item . IntervalSeconds / int64 ( sch . baseInterval . Seconds ( ) )
2020-12-17 08:00:09 -06:00
if item . IntervalSeconds != 0 && tickNum % itemFrequency == 0 {
2021-12-16 13:52:47 -06:00
readyToRun = append ( readyToRun , readyToRunItem { key : key , ruleInfo : ruleInfo , version : itemVersion } )
2020-12-17 08:00:09 -06:00
}
2021-04-03 12:13:29 -05:00
// remove the alert rule from the registered alert rules
2021-01-07 09:45:42 -06:00
delete ( registeredDefinitions , key )
2020-12-17 08:00:09 -06:00
}
var step int64 = 0
if len ( readyToRun ) > 0 {
2021-03-03 09:52:19 -06:00
step = sch . baseInterval . Nanoseconds ( ) / int64 ( len ( readyToRun ) )
2020-12-17 08:00:09 -06:00
}
for i := range readyToRun {
item := readyToRun [ i ]
time . AfterFunc ( time . Duration ( int64 ( i ) * step ) , func ( ) {
2021-12-16 13:52:47 -06:00
success := item . ruleInfo . eval ( tick , item . version )
if ! success {
sch . log . Debug ( "Scheduled evaluation was canceled because evaluation routine was stopped" , "uid" , item . key . UID , "org" , item . key . OrgID , "time" , tick )
}
2020-12-17 08:00:09 -06:00
} )
}
2021-04-03 12:13:29 -05:00
// unregister and stop routines of the deleted alert rules
2021-01-07 09:45:42 -06:00
for key := range registeredDefinitions {
2022-01-11 10:39:34 -06:00
sch . DeleteAlertRule ( key )
2020-12-17 08:00:09 -06:00
}
2022-01-31 10:56:43 -06:00
sch . metrics . SchedulePeriodicDuration . Observe ( time . Since ( start ) . Seconds ( ) )
2021-07-27 05:52:59 -05:00
case <- ctx . Done ( ) :
2021-05-12 06:17:43 -05:00
waitErr := dispatcherGroup . Wait ( )
2022-02-08 07:49:04 -06:00
orgIds , err := sch . instanceStore . FetchOrgIds ( ctx )
2021-05-12 06:17:43 -05:00
if err != nil {
2021-05-04 11:57:50 -05:00
sch . log . Error ( "unable to fetch orgIds" , "msg" , err . Error ( ) )
}
2021-05-12 06:17:43 -05:00
for _ , v := range orgIds {
2022-02-08 07:49:04 -06:00
sch . saveAlertStates ( ctx , sch . stateManager . GetAll ( v ) )
2021-05-04 11:57:50 -05:00
}
2021-05-12 06:17:43 -05:00
2021-07-07 11:18:31 -05:00
sch . stateManager . Close ( )
2021-05-12 06:17:43 -05:00
return waitErr
2020-12-17 08:00:09 -06:00
}
}
}
2022-02-25 04:09:20 -06:00
func ( sch * schedule ) ruleRoutine ( grafanaCtx context . Context , key models . AlertRuleKey , evalCh <- chan * evaluation , updateCh <- chan struct { } ) error {
2021-11-02 16:04:13 -05:00
logger := sch . log . New ( "uid" , key . UID , "org" , key . OrgID )
logger . Debug ( "alert rule routine started" )
2021-07-27 05:52:59 -05:00
2021-11-02 16:04:13 -05:00
orgID := fmt . Sprint ( key . OrgID )
evalTotal := sch . metrics . EvalTotal . WithLabelValues ( orgID )
evalDuration := sch . metrics . EvalDuration . WithLabelValues ( orgID )
evalTotalFailures := sch . metrics . EvalFailures . WithLabelValues ( orgID )
2021-07-27 05:52:59 -05:00
2022-01-11 10:39:34 -06:00
notify := func ( alerts definitions . PostableAlerts , logger log . Logger ) {
if len ( alerts . PostableAlerts ) == 0 {
Alerting: send alerts to external, internal, or both alertmanagers (#40341)
* (WIP) send alerts to external, internal, or both alertmanagers
* Modify admin configuration endpoint, update swagger docs
* Integration test for admin config updated
* Code review changes
* Fix alertmanagers choice not changing bug, add unit test
* Add AlertmanagersChoice as enum in swagger, code review changes
* Fix API and tests errors
* Change enum from int to string, use 'SendAlertsTo' instead of 'AlertmanagerChoice' where necessary
* Fix tests to reflect last changes
* Keep senders running when alerts are handled just internally
* Check if any external AM has been discovered before sending alerts, update tests
* remove duplicate data from logs
* update comment
* represent alertmanagers choice as an int instead of a string
* default alertmanagers choice to all alertmanagers, test cases
* update definitions and generate spec
2022-02-01 17:36:55 -06:00
logger . Debug ( "no alerts to put in the notifier or to send to external Alertmanager(s)" )
2022-01-11 10:39:34 -06:00
return
}
Alerting: send alerts to external, internal, or both alertmanagers (#40341)
* (WIP) send alerts to external, internal, or both alertmanagers
* Modify admin configuration endpoint, update swagger docs
* Integration test for admin config updated
* Code review changes
* Fix alertmanagers choice not changing bug, add unit test
* Add AlertmanagersChoice as enum in swagger, code review changes
* Fix API and tests errors
* Change enum from int to string, use 'SendAlertsTo' instead of 'AlertmanagerChoice' where necessary
* Fix tests to reflect last changes
* Keep senders running when alerts are handled just internally
* Check if any external AM has been discovered before sending alerts, update tests
* remove duplicate data from logs
* update comment
* represent alertmanagers choice as an int instead of a string
* default alertmanagers choice to all alertmanagers, test cases
* update definitions and generate spec
2022-02-01 17:36:55 -06:00
// Send alerts to local notifier if they need to be handled internally
// or if no external AMs have been discovered yet.
2022-01-11 10:39:34 -06:00
var localNotifierExist , externalNotifierExist bool
Alerting: send alerts to external, internal, or both alertmanagers (#40341)
* (WIP) send alerts to external, internal, or both alertmanagers
* Modify admin configuration endpoint, update swagger docs
* Integration test for admin config updated
* Code review changes
* Fix alertmanagers choice not changing bug, add unit test
* Add AlertmanagersChoice as enum in swagger, code review changes
* Fix API and tests errors
* Change enum from int to string, use 'SendAlertsTo' instead of 'AlertmanagerChoice' where necessary
* Fix tests to reflect last changes
* Keep senders running when alerts are handled just internally
* Check if any external AM has been discovered before sending alerts, update tests
* remove duplicate data from logs
* update comment
* represent alertmanagers choice as an int instead of a string
* default alertmanagers choice to all alertmanagers, test cases
* update definitions and generate spec
2022-02-01 17:36:55 -06:00
if sch . sendAlertsTo [ key . OrgID ] == models . ExternalAlertmanagers && len ( sch . AlertmanagersFor ( key . OrgID ) ) > 0 {
logger . Debug ( "no alerts to put in the notifier" )
2022-01-11 10:39:34 -06:00
} else {
Alerting: send alerts to external, internal, or both alertmanagers (#40341)
* (WIP) send alerts to external, internal, or both alertmanagers
* Modify admin configuration endpoint, update swagger docs
* Integration test for admin config updated
* Code review changes
* Fix alertmanagers choice not changing bug, add unit test
* Add AlertmanagersChoice as enum in swagger, code review changes
* Fix API and tests errors
* Change enum from int to string, use 'SendAlertsTo' instead of 'AlertmanagerChoice' where necessary
* Fix tests to reflect last changes
* Keep senders running when alerts are handled just internally
* Check if any external AM has been discovered before sending alerts, update tests
* remove duplicate data from logs
* update comment
* represent alertmanagers choice as an int instead of a string
* default alertmanagers choice to all alertmanagers, test cases
* update definitions and generate spec
2022-02-01 17:36:55 -06:00
logger . Debug ( "sending alerts to local notifier" , "count" , len ( alerts . PostableAlerts ) , "alerts" , alerts . PostableAlerts )
n , err := sch . multiOrgNotifier . AlertmanagerFor ( key . OrgID )
if err == nil {
localNotifierExist = true
if err := n . PutAlerts ( alerts ) ; err != nil {
logger . Error ( "failed to put alerts in the local notifier" , "count" , len ( alerts . PostableAlerts ) , "err" , err )
}
2022-01-11 10:39:34 -06:00
} else {
Alerting: send alerts to external, internal, or both alertmanagers (#40341)
* (WIP) send alerts to external, internal, or both alertmanagers
* Modify admin configuration endpoint, update swagger docs
* Integration test for admin config updated
* Code review changes
* Fix alertmanagers choice not changing bug, add unit test
* Add AlertmanagersChoice as enum in swagger, code review changes
* Fix API and tests errors
* Change enum from int to string, use 'SendAlertsTo' instead of 'AlertmanagerChoice' where necessary
* Fix tests to reflect last changes
* Keep senders running when alerts are handled just internally
* Check if any external AM has been discovered before sending alerts, update tests
* remove duplicate data from logs
* update comment
* represent alertmanagers choice as an int instead of a string
* default alertmanagers choice to all alertmanagers, test cases
* update definitions and generate spec
2022-02-01 17:36:55 -06:00
if errors . Is ( err , notifier . ErrNoAlertmanagerForOrg ) {
logger . Debug ( "local notifier was not found" )
} else {
logger . Error ( "local notifier is not available" , "err" , err )
}
2022-01-11 10:39:34 -06:00
}
}
Alerting: send alerts to external, internal, or both alertmanagers (#40341)
* (WIP) send alerts to external, internal, or both alertmanagers
* Modify admin configuration endpoint, update swagger docs
* Integration test for admin config updated
* Code review changes
* Fix alertmanagers choice not changing bug, add unit test
* Add AlertmanagersChoice as enum in swagger, code review changes
* Fix API and tests errors
* Change enum from int to string, use 'SendAlertsTo' instead of 'AlertmanagerChoice' where necessary
* Fix tests to reflect last changes
* Keep senders running when alerts are handled just internally
* Check if any external AM has been discovered before sending alerts, update tests
* remove duplicate data from logs
* update comment
* represent alertmanagers choice as an int instead of a string
* default alertmanagers choice to all alertmanagers, test cases
* update definitions and generate spec
2022-02-01 17:36:55 -06:00
// Send alerts to external Alertmanager(s) if we have a sender for this organization
// and alerts are not being handled just internally.
sch . adminConfigMtx . RLock ( )
defer sch . adminConfigMtx . RUnlock ( )
2022-01-11 10:39:34 -06:00
s , ok := sch . senders [ key . OrgID ]
Alerting: send alerts to external, internal, or both alertmanagers (#40341)
* (WIP) send alerts to external, internal, or both alertmanagers
* Modify admin configuration endpoint, update swagger docs
* Integration test for admin config updated
* Code review changes
* Fix alertmanagers choice not changing bug, add unit test
* Add AlertmanagersChoice as enum in swagger, code review changes
* Fix API and tests errors
* Change enum from int to string, use 'SendAlertsTo' instead of 'AlertmanagerChoice' where necessary
* Fix tests to reflect last changes
* Keep senders running when alerts are handled just internally
* Check if any external AM has been discovered before sending alerts, update tests
* remove duplicate data from logs
* update comment
* represent alertmanagers choice as an int instead of a string
* default alertmanagers choice to all alertmanagers, test cases
* update definitions and generate spec
2022-02-01 17:36:55 -06:00
if ok && sch . sendAlertsTo [ key . OrgID ] != models . InternalAlertmanager {
logger . Debug ( "sending alerts to external notifier" , "count" , len ( alerts . PostableAlerts ) , "alerts" , alerts . PostableAlerts )
2022-01-11 10:39:34 -06:00
s . SendAlerts ( alerts )
externalNotifierExist = true
}
if ! localNotifierExist && ! externalNotifierExist {
logger . Error ( "no external or internal notifier - alerts not delivered!" , "count" , len ( alerts . PostableAlerts ) )
}
}
clearState := func ( ) {
states := sch . stateManager . GetStatesForRuleUID ( key . OrgID , key . UID )
expiredAlerts := FromAlertsStateToStoppedAlert ( states , sch . appURL , sch . clock )
sch . stateManager . RemoveByRuleUID ( key . OrgID , key . UID )
notify ( expiredAlerts , logger )
}
2022-02-08 02:52:03 -06:00
updateRule := func ( ctx context . Context , oldRule * models . AlertRule ) ( * models . AlertRule , error ) {
2021-11-02 16:04:13 -05:00
q := models . GetAlertRuleByUIDQuery { OrgID : key . OrgID , UID : key . UID }
2022-02-08 02:52:03 -06:00
err := sch . ruleStore . GetAlertRuleByUID ( ctx , & q )
2021-11-02 16:04:13 -05:00
if err != nil {
logger . Error ( "failed to fetch alert rule" , "err" , err )
return nil , err
}
2022-01-11 10:39:34 -06:00
if oldRule != nil && oldRule . Version < q . Result . Version {
clearState ( )
}
2021-11-02 16:04:13 -05:00
return q . Result , nil
}
2021-07-27 05:52:59 -05:00
2022-02-25 04:09:20 -06:00
evaluate := func ( ctx context . Context , r * models . AlertRule , attempt int64 , e * evaluation ) error {
logger := logger . New ( "version" , r . Version , "attempt" , attempt , "now" , e . scheduledAt )
2021-11-02 16:04:13 -05:00
start := sch . clock . Now ( )
2021-07-27 05:52:59 -05:00
2021-11-02 16:04:13 -05:00
condition := models . Condition {
2022-02-25 04:09:20 -06:00
Condition : r . Condition ,
OrgID : r . OrgID ,
Data : r . Data ,
2021-11-02 16:04:13 -05:00
}
2022-02-25 04:09:20 -06:00
results , err := sch . evaluator . ConditionEval ( & condition , e . scheduledAt , sch . expressionService )
2021-11-02 16:04:13 -05:00
dur := sch . clock . Now ( ) . Sub ( start )
evalTotal . Inc ( )
evalDuration . Observe ( dur . Seconds ( ) )
if err != nil {
evalTotalFailures . Inc ( )
// consider saving alert instance on error
logger . Error ( "failed to evaluate alert rule" , "duration" , dur , "err" , err )
return err
}
logger . Debug ( "alert rule evaluated" , "results" , results , "duration" , dur )
2021-07-27 05:52:59 -05:00
2022-02-25 04:09:20 -06:00
processedStates := sch . stateManager . ProcessEvalResults ( ctx , r , results )
2022-02-08 07:49:04 -06:00
sch . saveAlertStates ( ctx , processedStates )
2021-11-02 16:04:13 -05:00
alerts := FromAlertStateToPostableAlerts ( processedStates , sch . stateManager , sch . appURL )
2021-08-24 05:28:09 -05:00
2022-01-11 10:39:34 -06:00
notify ( alerts , logger )
2021-11-02 16:04:13 -05:00
return nil
}
2021-08-06 07:06:56 -05:00
2021-11-02 16:04:13 -05:00
retryIfError := func ( f func ( attempt int64 ) error ) error {
var attempt int64
var err error
for attempt = 0 ; attempt < sch . maxAttempts ; attempt ++ {
err = f ( attempt )
if err == nil {
2021-07-27 05:52:59 -05:00
return nil
}
2021-11-02 16:04:13 -05:00
}
return err
}
evalRunning := false
var currentRule * models . AlertRule
2021-12-16 13:52:47 -06:00
defer sch . stopApplied ( key )
2021-11-02 16:04:13 -05:00
for {
select {
2022-01-11 10:39:34 -06:00
// used by external services (API) to notify that rule is updated.
case <- updateCh :
logger . Info ( "fetching new version of the rule" )
err := retryIfError ( func ( attempt int64 ) error {
2022-02-08 02:52:03 -06:00
newRule , err := updateRule ( grafanaCtx , currentRule )
2022-01-11 10:39:34 -06:00
if err != nil {
return err
}
logger . Debug ( "new alert rule version fetched" , "title" , newRule . Title , "version" , newRule . Version )
currentRule = newRule
return nil
} )
if err != nil {
logger . Error ( "updating rule failed after all retries" , "error" , err )
}
// evalCh - used by the scheduler to signal that evaluation is needed.
2021-12-16 13:52:47 -06:00
case ctx , ok := <- evalCh :
if ! ok {
logger . Debug ( "Evaluation channel has been closed. Exiting" )
return nil
}
2021-11-02 16:04:13 -05:00
if evalRunning {
continue
}
2021-07-27 05:52:59 -05:00
func ( ) {
evalRunning = true
defer func ( ) {
evalRunning = false
2022-02-25 04:09:20 -06:00
sch . evalApplied ( key , ctx . scheduledAt )
2021-07-27 05:52:59 -05:00
} ( )
2021-11-02 16:04:13 -05:00
err := retryIfError ( func ( attempt int64 ) error {
// fetch latest alert rule version
if currentRule == nil || currentRule . Version < ctx . version {
2022-02-08 02:52:03 -06:00
newRule , err := updateRule ( grafanaCtx , currentRule )
2021-11-02 16:04:13 -05:00
if err != nil {
return err
}
currentRule = newRule
logger . Debug ( "new alert rule version fetched" , "title" , newRule . Title , "version" , newRule . Version )
2021-07-27 05:52:59 -05:00
}
2022-02-08 07:49:04 -06:00
return evaluate ( grafanaCtx , currentRule , attempt , ctx )
2021-11-02 16:04:13 -05:00
} )
if err != nil {
2021-11-08 10:56:56 -06:00
logger . Error ( "evaluation failed after all retries" , "err" , err )
2021-07-27 05:52:59 -05:00
}
} ( )
2021-12-16 13:52:47 -06:00
case <- grafanaCtx . Done ( ) :
2022-01-11 10:39:34 -06:00
clearState ( )
2021-11-02 16:04:13 -05:00
logger . Debug ( "stopping alert rule routine" )
2021-07-27 05:52:59 -05:00
return nil
}
}
}
2022-02-08 07:49:04 -06:00
func ( sch * schedule ) saveAlertStates ( ctx context . Context , states [ ] * state . State ) {
2021-04-02 10:11:33 -05:00
sch . log . Debug ( "saving alert states" , "count" , len ( states ) )
for _ , s := range states {
cmd := models . SaveAlertInstanceCommand {
2021-05-03 06:19:15 -05:00
RuleOrgID : s . OrgID ,
RuleUID : s . AlertRuleUID ,
2021-04-02 10:11:33 -05:00
Labels : models . InstanceLabels ( s . Labels ) ,
State : models . InstanceStateType ( s . State . String ( ) ) ,
LastEvalTime : s . LastEvaluationTime ,
CurrentStateSince : s . StartsAt ,
CurrentStateEnd : s . EndsAt ,
}
2022-02-08 07:49:04 -06:00
err := sch . instanceStore . SaveAlertInstance ( ctx , & cmd )
2021-04-02 10:11:33 -05:00
if err != nil {
2021-04-21 11:30:03 -05:00
sch . log . Error ( "failed to save alert state" , "uid" , s . AlertRuleUID , "orgId" , s . OrgID , "labels" , s . Labels . String ( ) , "state" , s . State . String ( ) , "msg" , err . Error ( ) )
2021-04-02 10:11:33 -05:00
}
}
}
2021-07-27 05:52:59 -05:00
// overrideCfg is only used on tests.
func ( sch * schedule ) overrideCfg ( cfg SchedulerCfg ) {
sch . clock = cfg . C
sch . baseInterval = cfg . BaseInterval
2022-04-22 14:09:47 -05:00
sch . ticker = alerting . NewTicker ( cfg . C , cfg . BaseInterval , cfg . Metrics . Ticker )
2021-07-27 05:52:59 -05:00
sch . evalAppliedFunc = cfg . EvalAppliedFunc
sch . stopAppliedFunc = cfg . StopAppliedFunc
}
// evalApplied is only used on tests.
func ( sch * schedule ) evalApplied ( alertDefKey models . AlertRuleKey , now time . Time ) {
if sch . evalAppliedFunc == nil {
return
}
sch . evalAppliedFunc ( alertDefKey , now )
}
// stopApplied is only used on tests.
func ( sch * schedule ) stopApplied ( alertDefKey models . AlertRuleKey ) {
if sch . stopAppliedFunc == nil {
return
}
sch . stopAppliedFunc ( alertDefKey )
}