feat(alerting): testing alert is starting to work

This commit is contained in:
Torkel Ödegaard
2016-07-21 13:09:12 +02:00
parent 9e91aacd34
commit f6a160b270
14 changed files with 104 additions and 70 deletions

View File

@@ -83,6 +83,9 @@ func AlertTest(c *middleware.Context, dto dtos.AlertTestCommand) Response {
} }
if err := bus.Dispatch(&backendCmd); err != nil { if err := bus.Dispatch(&backendCmd); err != nil {
if validationErr, ok := err.(alerting.AlertValidationError); ok {
return ApiError(422, validationErr.Error(), nil)
}
return ApiError(500, "Failed to test rule", err) return ApiError(500, "Failed to test rule", err)
} }
@@ -96,6 +99,10 @@ func AlertTest(c *middleware.Context, dto dtos.AlertTestCommand) Response {
dtoRes.Error = res.Error.Error() dtoRes.Error = res.Error.Error()
} }
for _, log := range res.Logs {
dtoRes.Logs = append(dtoRes.Logs, &dtos.AlertTestResultLog{Message: log.Message, Data: log.Data})
}
dtoRes.Timing = fmt.Sprintf("%1.3fs", res.GetDurationSeconds()) dtoRes.Timing = fmt.Sprintf("%1.3fs", res.GetDurationSeconds())
return Json(200, dtoRes) return Json(200, dtoRes)

View File

@@ -40,7 +40,13 @@ type AlertTestCommand struct {
} }
type AlertTestResult struct { type AlertTestResult struct {
Triggered bool `json:"triggerd"` Triggered bool `json:"triggerd"`
Timing string `json:"timing"` Timing string `json:"timing"`
Error string `json:"error"` Error string `json:"error"`
Logs []*AlertTestResultLog `json:"logs"`
}
type AlertTestResultLog struct {
Message string `json:"message"`
Data interface{} `json:"data"`
} }

View File

@@ -23,6 +23,14 @@ type AlertRule struct {
Notifications []int64 Notifications []int64
} }
type AlertValidationError struct {
Reason string
}
func (e AlertValidationError) Error() string {
return e.Reason
}
var ( var (
ValueFormatRegex = regexp.MustCompile("^\\d+") ValueFormatRegex = regexp.MustCompile("^\\d+")
UnitFormatRegex = regexp.MustCompile("\\w{1}$") UnitFormatRegex = regexp.MustCompile("\\w{1}$")

View File

@@ -2,7 +2,6 @@ package alerting
import ( import (
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"github.com/grafana/grafana/pkg/bus" "github.com/grafana/grafana/pkg/bus"
@@ -50,15 +49,22 @@ func (c *QueryCondition) executeQuery(context *AlertResultContext) (tsdb.TimeSer
resp, err := c.HandleRequest(req) resp, err := c.HandleRequest(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("Alerting: GetSeries() tsdb.HandleRequest() error %v", err) return nil, fmt.Errorf("tsdb.HandleRequest() error %v", err)
} }
for _, v := range resp.Results { for _, v := range resp.Results {
if v.Error != nil { if v.Error != nil {
return nil, fmt.Errorf("Alerting: GetSeries() tsdb.HandleRequest() response error %v", v) return nil, fmt.Errorf("tsdb.HandleRequest() response error %v", v)
} }
result = append(result, v.Series...) result = append(result, v.Series...)
if context.IsTestRun {
context.Logs = append(context.Logs, &AlertResultLogEntry{
Message: "Query Condition Query Result",
Data: v.Series,
})
}
} }
return result, nil return result, nil
@@ -154,17 +160,17 @@ func NewDefaultAlertEvaluator(model *simplejson.Json) (*DefaultAlertEvaluator, e
evaluator.Type = model.Get("type").MustString() evaluator.Type = model.Get("type").MustString()
if evaluator.Type == "" { if evaluator.Type == "" {
return nil, errors.New("Alert evaluator missing type property") return nil, AlertValidationError{Reason: "Evaluator missing type property"}
} }
params := model.Get("params").MustArray() params := model.Get("params").MustArray()
if len(params) == 0 { if len(params) == 0 {
return nil, errors.New("Alert evaluator missing threshold parameter") return nil, AlertValidationError{Reason: "Evaluator missing threshold parameter"}
} }
threshold, ok := params[0].(json.Number) threshold, ok := params[0].(json.Number)
if !ok { if !ok {
return nil, errors.New("Alert evaluator has invalid threshold parameter") return nil, AlertValidationError{Reason: "Evaluator has invalid threshold parameter"}
} }
evaluator.Threshold, _ = threshold.Float64() evaluator.Threshold, _ = threshold.Float64()

View File

@@ -1,7 +1,6 @@
package alerting package alerting
import ( import (
"fmt"
"time" "time"
"github.com/benbjohnson/clock" "github.com/benbjohnson/clock"
@@ -18,7 +17,6 @@ type Engine struct {
ruleReader RuleReader ruleReader RuleReader
log log.Logger log log.Logger
responseHandler ResultHandler responseHandler ResultHandler
alertJobTimeout time.Duration
} }
func NewEngine() *Engine { func NewEngine() *Engine {
@@ -31,7 +29,6 @@ func NewEngine() *Engine {
ruleReader: NewRuleReader(), ruleReader: NewRuleReader(),
log: log.New("alerting.engine"), log: log.New("alerting.engine"),
responseHandler: NewResultHandler(), responseHandler: NewResultHandler(),
alertJobTimeout: time.Second * 5,
} }
return e return e
@@ -82,32 +79,14 @@ func (e *Engine) execDispatch() {
for job := range e.execQueue { for job := range e.execQueue {
log.Trace("Alerting: engine:execDispatch() starting job %s", job.Rule.Name) log.Trace("Alerting: engine:execDispatch() starting job %s", job.Rule.Name)
job.Running = true
e.executeJob(job) e.executeJob(job)
} }
} }
func (e *Engine) executeJob(job *AlertJob) { func (e *Engine) executeJob(job *AlertJob) {
startTime := time.Now() job.Running = true
context := NewAlertResultContext(job.Rule)
resultChan := make(chan *AlertResultContext, 1) e.handler.Execute(context)
go e.handler.Execute(job.Rule, resultChan)
select {
case <-time.After(e.alertJobTimeout):
e.resultQueue <- &AlertResultContext{
Error: fmt.Errorf("Timeout"),
Rule: job.Rule,
StartTime: startTime,
EndTime: time.Now(),
}
close(resultChan)
e.log.Debug("Job Execution timeout", "alertRuleId", job.Rule.Id)
case result := <-resultChan:
e.log.Debug("Job Execution done", "timing", result.GetDurationSeconds(), "ruleId", job.Rule.Id)
e.resultQueue <- result
}
job.Running = false job.Running = false
} }

View File

@@ -129,7 +129,7 @@ func (e *DashAlertExtractor) GetAlerts() ([]*m.Alert, error) {
alerts = append(alerts, alert) alerts = append(alerts, alert)
} else { } else {
e.log.Error("Failed to extract alerts from dashboard", "error", err) e.log.Error("Failed to extract alerts from dashboard", "error", err)
return nil, errors.New("Failed to extract alerts from dashboard") return nil, err
} }
} }
} }

View File

@@ -1,6 +1,7 @@
package alerting package alerting
import ( import (
"fmt"
"time" "time"
"github.com/grafana/grafana/pkg/log" "github.com/grafana/grafana/pkg/log"
@@ -11,41 +12,50 @@ var (
) )
type HandlerImpl struct { type HandlerImpl struct {
log log.Logger log log.Logger
alertJobTimeout time.Duration
} }
func NewHandler() *HandlerImpl { func NewHandler() *HandlerImpl {
return &HandlerImpl{ return &HandlerImpl{
log: log.New("alerting.executor"), log: log.New("alerting.executor"),
alertJobTimeout: time.Second * 5,
} }
} }
func (e *HandlerImpl) Execute(rule *AlertRule, resultQueue chan *AlertResultContext) { func (e *HandlerImpl) Execute(context *AlertResultContext) {
resultQueue <- e.eval(rule)
}
func (e *HandlerImpl) eval(rule *AlertRule) *AlertResultContext { go e.eval(context)
result := &AlertResultContext{
StartTime: time.Now(), select {
Rule: rule, case <-time.After(e.alertJobTimeout):
context.Error = fmt.Errorf("Timeout")
context.EndTime = time.Now()
e.log.Debug("Job Execution timeout", "alertId", context.Rule.Id)
case <-context.DoneChan:
e.log.Debug("Job Execution done", "timing", context.GetDurationSeconds(), "alertId", context.Rule.Id)
} }
for _, condition := range rule.Conditions { }
condition.Eval(result)
func (e *HandlerImpl) eval(context *AlertResultContext) {
for _, condition := range context.Rule.Conditions {
condition.Eval(context)
// break if condition could not be evaluated // break if condition could not be evaluated
if result.Error != nil { if context.Error != nil {
break break
} }
// break if result has not triggered yet // break if result has not triggered yet
if result.Triggered == false { if context.Triggered == false {
break break
} }
} }
result.EndTime = time.Now() context.EndTime = time.Now()
return result context.DoneChan <- true
} }
// func (e *HandlerImpl) executeQuery(job *AlertJob) (tsdb.TimeSeriesSlice, error) { // func (e *HandlerImpl) executeQuery(job *AlertJob) (tsdb.TimeSeriesSlice, error) {

View File

@@ -7,7 +7,7 @@ import (
) )
type AlertHandler interface { type AlertHandler interface {
Execute(rule *AlertRule, resultChan chan *AlertResultContext) Execute(context *AlertResultContext)
} }
type Scheduler interface { type Scheduler interface {

View File

@@ -28,18 +28,37 @@ func (aj *AlertJob) IncRetry() {
type AlertResultContext struct { type AlertResultContext struct {
Triggered bool Triggered bool
IsTestRun bool
Details []*AlertResultDetail Details []*AlertResultDetail
Logs []*AlertResultLogEntry
Error error Error error
Description string Description string
StartTime time.Time StartTime time.Time
EndTime time.Time EndTime time.Time
Rule *AlertRule Rule *AlertRule
DoneChan chan bool
CancelChan chan bool
} }
func (a *AlertResultContext) GetDurationSeconds() float64 { func (a *AlertResultContext) GetDurationSeconds() float64 {
return float64(a.EndTime.Nanosecond()-a.StartTime.Nanosecond()) / float64(1000000000) return float64(a.EndTime.Nanosecond()-a.StartTime.Nanosecond()) / float64(1000000000)
} }
func NewAlertResultContext(rule *AlertRule) *AlertResultContext {
return &AlertResultContext{
StartTime: time.Now(),
Rule: rule,
Logs: make([]*AlertResultLogEntry, 0),
DoneChan: make(chan bool, 1),
CancelChan: make(chan bool, 1),
}
}
type AlertResultLogEntry struct {
Message string
Data interface{}
}
type AlertResultDetail struct { type AlertResultDetail struct {
Value float64 Value float64
Metric string Metric string

View File

@@ -2,7 +2,6 @@ package alerting
import ( import (
"fmt" "fmt"
"time"
"github.com/grafana/grafana/pkg/bus" "github.com/grafana/grafana/pkg/bus"
"github.com/grafana/grafana/pkg/components/simplejson" "github.com/grafana/grafana/pkg/components/simplejson"
@@ -38,28 +37,21 @@ func handleAlertTestCommand(cmd *AlertTestCommand) error {
return err return err
} }
if res, err := testAlertRule(rule); err != nil { cmd.Result = testAlertRule(rule)
return err return nil
} else {
cmd.Result = res
return nil
}
} }
} }
return fmt.Errorf("Could not find alert with panel id %d", cmd.PanelId) return fmt.Errorf("Could not find alert with panel id %d", cmd.PanelId)
} }
func testAlertRule(rule *AlertRule) (*AlertResultContext, error) { func testAlertRule(rule *AlertRule) *AlertResultContext {
handler := NewHandler() handler := NewHandler()
resultChan := make(chan *AlertResultContext, 1) context := NewAlertResultContext(rule)
handler.Execute(rule, resultChan) context.IsTestRun = true
select { handler.Execute(context)
case <-time.After(time.Second * 10):
return &AlertResultContext{Error: fmt.Errorf("Timeout")}, nil return context
case result := <-resultChan:
return result, nil
}
} }

View File

@@ -19,6 +19,7 @@ func addAlertMigrations(mg *Migrator) {
{Name: "settings", Type: DB_Text, Nullable: false}, {Name: "settings", Type: DB_Text, Nullable: false},
{Name: "frequency", Type: DB_BigInt, Nullable: false}, {Name: "frequency", Type: DB_BigInt, Nullable: false},
{Name: "handler", Type: DB_BigInt, Nullable: false}, {Name: "handler", Type: DB_BigInt, Nullable: false},
{Name: "severity", Type: DB_Text, Nullable: false},
{Name: "enabled", Type: DB_Bool, Nullable: false}, {Name: "enabled", Type: DB_Bool, Nullable: false},
{Name: "created", Type: DB_DateTime, Nullable: false}, {Name: "created", Type: DB_DateTime, Nullable: false},
{Name: "updated", Type: DB_DateTime, Nullable: false}, {Name: "updated", Type: DB_DateTime, Nullable: false},

View File

@@ -16,7 +16,7 @@ export class AlertSrv {
init() { init() {
this.$rootScope.onAppEvent('alert-error', (e, alert) => { this.$rootScope.onAppEvent('alert-error', (e, alert) => {
this.set(alert[0], alert[1], 'error', 0); this.set(alert[0], alert[1], 'error', 7000);
}, this.$rootScope); }, this.$rootScope);
this.$rootScope.onAppEvent('alert-warning', (e, alert) => { this.$rootScope.onAppEvent('alert-warning', (e, alert) => {

View File

@@ -69,7 +69,7 @@ export class AlertTabCtrl {
initModel() { initModel() {
var alert = this.alert = this.panel.alert = this.panel.alert || {}; var alert = this.alert = this.panel.alert = this.panel.alert || {};
alert.conditions = []; alert.conditions = alert.conditions || [];
if (alert.conditions.length === 0) { if (alert.conditions.length === 0) {
alert.conditions.push(this.buildDefaultCondition()); alert.conditions.push(this.buildDefaultCondition());
} }
@@ -149,8 +149,8 @@ export class AlertTabCtrl {
panelId: this.panelCtrl.panel.id, panelId: this.panelCtrl.panel.id,
}; };
this.backendSrv.post('/api/alerts/test', payload).then(res => { return this.backendSrv.post('/api/alerts/test', payload).then(res => {
this.testResult = res; this.testResult = angular.toJson(res, true);
this.testing = false; this.testing = false;
}); });
} }

View File

@@ -131,6 +131,12 @@
Evaluating rule <i class="fa fa-spinner fa-spin"></i> Evaluating rule <i class="fa fa-spinner fa-spin"></i>
</div> </div>
<div class="gf-form-group" ng-if="ctrl.testResult">
<pre>
{{ctrl.testResult}}
</pre>
</div>
<div class="gf-form-group" ng-if="!ctrl.alert.enabled"> <div class="gf-form-group" ng-if="!ctrl.alert.enabled">
<div class="gf-form-button-row"> <div class="gf-form-button-row">
<button class="btn btn-inverse" ng-click="ctrl.enable()"> <button class="btn btn-inverse" ng-click="ctrl.enable()">