2021-04-23 14:32:25 -05:00
package state
import (
"time"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
2021-04-30 11:28:06 -05:00
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
2021-04-23 14:32:25 -05:00
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
)
type Manager struct {
2021-04-30 11:28:06 -05:00
cache * cache
quit chan struct { }
Log log . Logger
metrics * metrics . Metrics
2021-04-23 14:32:25 -05:00
}
2021-04-30 11:28:06 -05:00
func NewManager ( logger log . Logger , metrics * metrics . Metrics ) * Manager {
2021-04-23 14:32:25 -05:00
manager := & Manager {
2021-04-30 11:28:06 -05:00
cache : newCache ( logger , metrics ) ,
quit : make ( chan struct { } ) ,
Log : logger ,
metrics : metrics ,
2021-04-23 14:32:25 -05:00
}
2021-05-18 12:56:14 -05:00
go manager . recordMetrics ( )
2021-04-23 14:32:25 -05:00
return manager
}
func ( st * Manager ) Close ( ) {
st . quit <- struct { } { }
}
func ( st * Manager ) getOrCreate ( alertRule * ngModels . AlertRule , result eval . Result ) * State {
return st . cache . getOrCreate ( alertRule , result )
}
func ( st * Manager ) set ( entry * State ) {
st . cache . set ( entry )
}
2021-05-04 11:57:50 -05:00
func ( st * Manager ) Get ( orgID int64 , alertRuleUID , stateId string ) ( * State , error ) {
return st . cache . get ( orgID , alertRuleUID , stateId )
2021-04-23 14:32:25 -05:00
}
2021-05-03 13:01:33 -05:00
// ResetCache is used to ensure a clean cache on startup.
2021-04-23 14:32:25 -05:00
func ( st * Manager ) ResetCache ( ) {
st . cache . reset ( )
}
2021-05-03 13:01:33 -05:00
// RemoveByRuleUID deletes all entries in the state manager that match the given rule UID.
func ( st * Manager ) RemoveByRuleUID ( orgID int64 , ruleUID string ) {
st . cache . removeByRuleUID ( orgID , ruleUID )
}
2021-04-23 14:32:25 -05:00
func ( st * Manager ) ProcessEvalResults ( alertRule * ngModels . AlertRule , results eval . Results ) [ ] * State {
st . Log . Debug ( "state manager processing evaluation results" , "uid" , alertRule . UID , "resultCount" , len ( results ) )
var states [ ] * State
for _ , result := range results {
s := st . setNextState ( alertRule , result )
states = append ( states , s )
}
st . Log . Debug ( "returning changed states to scheduler" , "count" , len ( states ) )
return states
}
//TODO: When calculating if an alert should not be firing anymore, we should take into account the re-send delay if any. We don't want to send every firing alert every time, we should have a fixed delay across all alerts to avoid saturating the notification system
//Set the current state based on evaluation results
func ( st * Manager ) setNextState ( alertRule * ngModels . AlertRule , result eval . Result ) * State {
currentState := st . getOrCreate ( alertRule , result )
currentState . LastEvaluationTime = result . EvaluatedAt
currentState . EvaluationDuration = result . EvaluationDuration
currentState . Results = append ( currentState . Results , Evaluation {
2021-05-18 08:12:39 -05:00
EvaluationTime : result . EvaluatedAt ,
EvaluationState : result . State ,
EvaluationString : result . EvaluationString ,
2021-04-23 14:32:25 -05:00
} )
2021-05-18 12:56:14 -05:00
currentState . TrimResults ( alertRule )
2021-04-23 14:32:25 -05:00
st . Log . Debug ( "setting alert state" , "uid" , alertRule . UID )
switch result . State {
case eval . Normal :
currentState = resultNormal ( currentState , result )
case eval . Alerting :
currentState = currentState . resultAlerting ( alertRule , result )
case eval . Error :
currentState = currentState . resultError ( alertRule , result )
case eval . NoData :
currentState = currentState . resultNoData ( alertRule , result )
case eval . Pending : // we do not emit results with this state
}
st . set ( currentState )
return currentState
}
2021-05-04 11:57:50 -05:00
func ( st * Manager ) GetAll ( orgID int64 ) [ ] * State {
return st . cache . getAll ( orgID )
2021-04-23 14:32:25 -05:00
}
2021-05-04 11:57:50 -05:00
func ( st * Manager ) GetStatesForRuleUID ( orgID int64 , alertRuleUID string ) [ ] * State {
return st . cache . getStatesForRuleUID ( orgID , alertRuleUID )
2021-04-23 14:32:25 -05:00
}
2021-05-18 12:56:14 -05:00
func ( st * Manager ) recordMetrics ( ) {
2021-04-30 11:28:06 -05:00
// TODO: parameterize?
// Setting to a reasonable default scrape interval for Prometheus.
dur := time . Duration ( 15 ) * time . Second
ticker := time . NewTicker ( dur )
2021-04-23 14:32:25 -05:00
for {
select {
case <- ticker . C :
2021-05-18 12:56:14 -05:00
st . Log . Info ( "recording state cache metrics" , "now" , time . Now ( ) )
st . cache . recordMetrics ( )
2021-04-23 14:32:25 -05:00
case <- st . quit :
2021-05-18 12:56:14 -05:00
st . Log . Debug ( "stopping state cache metrics recording" , "now" , time . Now ( ) )
2021-04-23 14:32:25 -05:00
ticker . Stop ( )
return
}
}
}
func ( st * Manager ) Put ( states [ ] * State ) {
for _ , s := range states {
st . set ( s )
}
}