From 04690ca920a88f1255d3e5ba47dfb0350b41343d Mon Sep 17 00:00:00 2001 From: bergquist Date: Tue, 6 Sep 2016 15:09:49 +0200 Subject: [PATCH] feat(alerting): add support for retries ref #5855 --- pkg/services/alerting/eval_context.go | 2 ++ pkg/services/alerting/eval_handler.go | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/pkg/services/alerting/eval_context.go b/pkg/services/alerting/eval_context.go index 29c51e02abd..24ee0f1252c 100644 --- a/pkg/services/alerting/eval_context.go +++ b/pkg/services/alerting/eval_context.go @@ -26,6 +26,7 @@ type EvalContext struct { dashboardSlug string ImagePublicUrl string ImageOnDiskPath string + RetryCount int } type StateDescription struct { @@ -111,5 +112,6 @@ func NewEvalContext(rule *Rule) *EvalContext { DoneChan: make(chan bool, 1), CancelChan: make(chan bool, 1), log: log.New("alerting.evalContext"), + RetryCount: 0, } } diff --git a/pkg/services/alerting/eval_handler.go b/pkg/services/alerting/eval_handler.go index e251278e7e1..0b0bebf68f9 100644 --- a/pkg/services/alerting/eval_handler.go +++ b/pkg/services/alerting/eval_handler.go @@ -8,6 +8,10 @@ import ( "github.com/grafana/grafana/pkg/metrics" ) +var ( + MaxRetries int = 1 +) + type DefaultEvalHandler struct { log log.Logger alertJobTimeout time.Duration @@ -28,8 +32,24 @@ func (e *DefaultEvalHandler) Eval(context *EvalContext) { context.Error = fmt.Errorf("Timeout") context.EndTime = time.Now() e.log.Debug("Job Execution timeout", "alertId", context.Rule.Id) + e.retry(context) case <-context.DoneChan: e.log.Debug("Job Execution done", "timeMs", context.GetDurationMs(), "alertId", context.Rule.Id, "firing", context.Firing) + + if context.Error != nil { + e.retry(context) + } + } +} + +func (e *DefaultEvalHandler) retry(context *EvalContext) { + e.log.Debug("Retrying eval exeuction", "alertId", context.Rule.Id) + + context.RetryCount++ + if context.RetryCount > MaxRetries { + context.DoneChan = make(chan bool, 1) + context.CancelChan = make(chan bool, 1) + e.Eval(context) } }