2016-06-06 03:31:21 -05:00
|
|
|
package alerting
|
2016-06-06 04:56:58 -05:00
|
|
|
|
|
|
|
import (
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/benbjohnson/clock"
|
2016-06-06 06:50:47 -05:00
|
|
|
"github.com/grafana/grafana/pkg/log"
|
2016-06-06 04:56:58 -05:00
|
|
|
)
|
|
|
|
|
|
|
|
type Engine struct {
|
2016-06-23 05:57:10 -05:00
|
|
|
execQueue chan *AlertJob
|
2016-07-19 15:36:59 -05:00
|
|
|
resultQueue chan *AlertResultContext
|
2016-06-23 05:57:10 -05:00
|
|
|
clock clock.Clock
|
|
|
|
ticker *Ticker
|
|
|
|
scheduler Scheduler
|
2016-07-20 02:30:31 -05:00
|
|
|
handler AlertHandler
|
2016-06-23 05:57:10 -05:00
|
|
|
ruleReader RuleReader
|
|
|
|
log log.Logger
|
|
|
|
responseHandler ResultHandler
|
2016-06-06 04:56:58 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
func NewEngine() *Engine {
|
|
|
|
e := &Engine{
|
2016-06-23 05:57:10 -05:00
|
|
|
ticker: NewTicker(time.Now(), time.Second*0, clock.New()),
|
|
|
|
execQueue: make(chan *AlertJob, 1000),
|
2016-07-19 15:36:59 -05:00
|
|
|
resultQueue: make(chan *AlertResultContext, 1000),
|
2016-06-23 05:57:10 -05:00
|
|
|
scheduler: NewScheduler(),
|
|
|
|
handler: NewHandler(),
|
|
|
|
ruleReader: NewRuleReader(),
|
|
|
|
log: log.New("alerting.engine"),
|
|
|
|
responseHandler: NewResultHandler(),
|
2016-06-06 04:56:58 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
return e
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e *Engine) Start() {
|
2016-06-07 06:31:56 -05:00
|
|
|
e.log.Info("Starting Alerting Engine")
|
2016-06-06 06:50:47 -05:00
|
|
|
|
2016-06-06 07:24:14 -05:00
|
|
|
go e.alertingTicker()
|
2016-06-06 04:56:58 -05:00
|
|
|
go e.execDispatch()
|
2016-06-06 06:50:47 -05:00
|
|
|
go e.resultHandler()
|
2016-06-06 04:56:58 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
func (e *Engine) Stop() {
|
|
|
|
close(e.execQueue)
|
2016-06-06 06:50:47 -05:00
|
|
|
close(e.resultQueue)
|
2016-06-06 04:56:58 -05:00
|
|
|
}
|
|
|
|
|
2016-06-06 07:24:14 -05:00
|
|
|
func (e *Engine) alertingTicker() {
|
2016-06-11 06:49:11 -05:00
|
|
|
defer func() {
|
|
|
|
if err := recover(); err != nil {
|
2016-06-22 06:43:11 -05:00
|
|
|
e.log.Error("Scheduler Panic: stopping alertingTicker", "error", err, "stack", log.Stack(1))
|
2016-06-11 06:49:11 -05:00
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2016-06-06 06:50:47 -05:00
|
|
|
tickIndex := 0
|
|
|
|
|
2016-06-06 04:56:58 -05:00
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case tick := <-e.ticker.C:
|
2016-06-06 10:11:46 -05:00
|
|
|
// TEMP SOLUTION update rules ever tenth tick
|
2016-06-06 06:50:47 -05:00
|
|
|
if tickIndex%10 == 0 {
|
|
|
|
e.scheduler.Update(e.ruleReader.Fetch())
|
|
|
|
}
|
|
|
|
|
2016-06-06 04:56:58 -05:00
|
|
|
e.scheduler.Tick(tick, e.execQueue)
|
2016-06-06 07:24:14 -05:00
|
|
|
tickIndex++
|
2016-06-06 04:56:58 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e *Engine) execDispatch() {
|
2016-06-22 06:43:11 -05:00
|
|
|
defer func() {
|
|
|
|
if err := recover(); err != nil {
|
|
|
|
e.log.Error("Scheduler Panic: stopping executor", "error", err, "stack", log.Stack(1))
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2016-06-06 04:56:58 -05:00
|
|
|
for job := range e.execQueue {
|
2016-06-06 10:11:46 -05:00
|
|
|
log.Trace("Alerting: engine:execDispatch() starting job %s", job.Rule.Name)
|
2016-06-06 06:50:47 -05:00
|
|
|
e.executeJob(job)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e *Engine) executeJob(job *AlertJob) {
|
2016-07-21 06:09:12 -05:00
|
|
|
job.Running = true
|
|
|
|
context := NewAlertResultContext(job.Rule)
|
|
|
|
e.handler.Execute(context)
|
2016-07-19 15:36:59 -05:00
|
|
|
job.Running = false
|
2016-06-06 06:50:47 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
func (e *Engine) resultHandler() {
|
2016-06-22 06:43:11 -05:00
|
|
|
defer func() {
|
|
|
|
if err := recover(); err != nil {
|
|
|
|
e.log.Error("Engine Panic, stopping resultHandler", "error", err, "stack", log.Stack(1))
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2016-06-06 06:50:47 -05:00
|
|
|
for result := range e.resultQueue {
|
2016-07-19 15:36:59 -05:00
|
|
|
e.log.Debug("Alert Rule Result", "ruleId", result.Rule.Id, "triggered", result.Triggered)
|
2016-06-06 06:50:47 -05:00
|
|
|
|
2016-06-06 10:11:46 -05:00
|
|
|
if result.Error != nil {
|
2016-07-19 15:36:59 -05:00
|
|
|
e.log.Error("Alert Rule Result Error", "ruleId", result.Rule.Id, "error", result.Error, "retry")
|
2016-06-06 06:50:47 -05:00
|
|
|
} else {
|
2016-06-23 05:57:10 -05:00
|
|
|
e.responseHandler.Handle(result)
|
2016-06-06 06:50:47 -05:00
|
|
|
}
|
2016-06-06 04:56:58 -05:00
|
|
|
}
|
|
|
|
}
|