2021-04-23 14:32:25 -05:00
|
|
|
package state
|
|
|
|
|
|
|
|
import (
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/grafana/grafana/pkg/infra/log"
|
|
|
|
|
|
|
|
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
2021-04-30 11:28:06 -05:00
|
|
|
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
|
2021-04-23 14:32:25 -05:00
|
|
|
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
|
|
|
|
)
|
|
|
|
|
|
|
|
type Manager struct {
|
2021-05-19 15:15:09 -05:00
|
|
|
cache *cache
|
|
|
|
quit chan struct{}
|
|
|
|
ResendDelay time.Duration
|
|
|
|
Log log.Logger
|
|
|
|
metrics *metrics.Metrics
|
2021-04-23 14:32:25 -05:00
|
|
|
}
|
|
|
|
|
2021-04-30 11:28:06 -05:00
|
|
|
func NewManager(logger log.Logger, metrics *metrics.Metrics) *Manager {
|
2021-04-23 14:32:25 -05:00
|
|
|
manager := &Manager{
|
2021-05-19 15:15:09 -05:00
|
|
|
cache: newCache(logger, metrics),
|
|
|
|
quit: make(chan struct{}),
|
|
|
|
ResendDelay: 1 * time.Minute, // TODO: make this configurable
|
|
|
|
Log: logger,
|
|
|
|
metrics: metrics,
|
2021-04-23 14:32:25 -05:00
|
|
|
}
|
2021-05-18 12:56:14 -05:00
|
|
|
go manager.recordMetrics()
|
2021-04-23 14:32:25 -05:00
|
|
|
return manager
|
|
|
|
}
|
|
|
|
|
|
|
|
func (st *Manager) Close() {
|
|
|
|
st.quit <- struct{}{}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (st *Manager) getOrCreate(alertRule *ngModels.AlertRule, result eval.Result) *State {
|
|
|
|
return st.cache.getOrCreate(alertRule, result)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (st *Manager) set(entry *State) {
|
|
|
|
st.cache.set(entry)
|
|
|
|
}
|
|
|
|
|
2021-05-04 11:57:50 -05:00
|
|
|
func (st *Manager) Get(orgID int64, alertRuleUID, stateId string) (*State, error) {
|
|
|
|
return st.cache.get(orgID, alertRuleUID, stateId)
|
2021-04-23 14:32:25 -05:00
|
|
|
}
|
|
|
|
|
2021-05-03 13:01:33 -05:00
|
|
|
// ResetCache is used to ensure a clean cache on startup.
|
2021-04-23 14:32:25 -05:00
|
|
|
func (st *Manager) ResetCache() {
|
|
|
|
st.cache.reset()
|
|
|
|
}
|
|
|
|
|
2021-05-03 13:01:33 -05:00
|
|
|
// RemoveByRuleUID deletes all entries in the state manager that match the given rule UID.
|
|
|
|
func (st *Manager) RemoveByRuleUID(orgID int64, ruleUID string) {
|
|
|
|
st.cache.removeByRuleUID(orgID, ruleUID)
|
|
|
|
}
|
|
|
|
|
2021-04-23 14:32:25 -05:00
|
|
|
func (st *Manager) ProcessEvalResults(alertRule *ngModels.AlertRule, results eval.Results) []*State {
|
|
|
|
st.Log.Debug("state manager processing evaluation results", "uid", alertRule.UID, "resultCount", len(results))
|
|
|
|
var states []*State
|
|
|
|
for _, result := range results {
|
|
|
|
s := st.setNextState(alertRule, result)
|
|
|
|
states = append(states, s)
|
|
|
|
}
|
|
|
|
st.Log.Debug("returning changed states to scheduler", "count", len(states))
|
|
|
|
return states
|
|
|
|
}
|
|
|
|
|
|
|
|
//Set the current state based on evaluation results
|
|
|
|
func (st *Manager) setNextState(alertRule *ngModels.AlertRule, result eval.Result) *State {
|
|
|
|
currentState := st.getOrCreate(alertRule, result)
|
|
|
|
|
|
|
|
currentState.LastEvaluationTime = result.EvaluatedAt
|
|
|
|
currentState.EvaluationDuration = result.EvaluationDuration
|
|
|
|
currentState.Results = append(currentState.Results, Evaluation{
|
2021-05-18 08:12:39 -05:00
|
|
|
EvaluationTime: result.EvaluatedAt,
|
|
|
|
EvaluationState: result.State,
|
|
|
|
EvaluationString: result.EvaluationString,
|
2021-04-23 14:32:25 -05:00
|
|
|
})
|
2021-05-18 12:56:14 -05:00
|
|
|
currentState.TrimResults(alertRule)
|
2021-04-23 14:32:25 -05:00
|
|
|
|
|
|
|
st.Log.Debug("setting alert state", "uid", alertRule.UID)
|
|
|
|
switch result.State {
|
|
|
|
case eval.Normal:
|
2021-05-26 13:37:42 -05:00
|
|
|
currentState = currentState.resultNormal(result)
|
2021-04-23 14:32:25 -05:00
|
|
|
case eval.Alerting:
|
|
|
|
currentState = currentState.resultAlerting(alertRule, result)
|
|
|
|
case eval.Error:
|
|
|
|
currentState = currentState.resultError(alertRule, result)
|
|
|
|
case eval.NoData:
|
|
|
|
currentState = currentState.resultNoData(alertRule, result)
|
|
|
|
case eval.Pending: // we do not emit results with this state
|
|
|
|
}
|
|
|
|
|
|
|
|
st.set(currentState)
|
|
|
|
return currentState
|
|
|
|
}
|
|
|
|
|
2021-05-04 11:57:50 -05:00
|
|
|
func (st *Manager) GetAll(orgID int64) []*State {
|
|
|
|
return st.cache.getAll(orgID)
|
2021-04-23 14:32:25 -05:00
|
|
|
}
|
|
|
|
|
2021-05-04 11:57:50 -05:00
|
|
|
func (st *Manager) GetStatesForRuleUID(orgID int64, alertRuleUID string) []*State {
|
|
|
|
return st.cache.getStatesForRuleUID(orgID, alertRuleUID)
|
2021-04-23 14:32:25 -05:00
|
|
|
}
|
|
|
|
|
2021-05-18 12:56:14 -05:00
|
|
|
func (st *Manager) recordMetrics() {
|
2021-04-30 11:28:06 -05:00
|
|
|
// TODO: parameterize?
|
|
|
|
// Setting to a reasonable default scrape interval for Prometheus.
|
|
|
|
dur := time.Duration(15) * time.Second
|
|
|
|
ticker := time.NewTicker(dur)
|
2021-04-23 14:32:25 -05:00
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-ticker.C:
|
2021-05-18 12:56:14 -05:00
|
|
|
st.Log.Info("recording state cache metrics", "now", time.Now())
|
|
|
|
st.cache.recordMetrics()
|
2021-04-23 14:32:25 -05:00
|
|
|
case <-st.quit:
|
2021-05-18 12:56:14 -05:00
|
|
|
st.Log.Debug("stopping state cache metrics recording", "now", time.Now())
|
2021-04-23 14:32:25 -05:00
|
|
|
ticker.Stop()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (st *Manager) Put(states []*State) {
|
|
|
|
for _, s := range states {
|
|
|
|
st.set(s)
|
|
|
|
}
|
|
|
|
}
|