mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
feat(alerting): testing alert is starting to work
This commit is contained in:
@@ -83,6 +83,9 @@ func AlertTest(c *middleware.Context, dto dtos.AlertTestCommand) Response {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if err := bus.Dispatch(&backendCmd); err != nil {
|
if err := bus.Dispatch(&backendCmd); err != nil {
|
||||||
|
if validationErr, ok := err.(alerting.AlertValidationError); ok {
|
||||||
|
return ApiError(422, validationErr.Error(), nil)
|
||||||
|
}
|
||||||
return ApiError(500, "Failed to test rule", err)
|
return ApiError(500, "Failed to test rule", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -96,6 +99,10 @@ func AlertTest(c *middleware.Context, dto dtos.AlertTestCommand) Response {
|
|||||||
dtoRes.Error = res.Error.Error()
|
dtoRes.Error = res.Error.Error()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for _, log := range res.Logs {
|
||||||
|
dtoRes.Logs = append(dtoRes.Logs, &dtos.AlertTestResultLog{Message: log.Message, Data: log.Data})
|
||||||
|
}
|
||||||
|
|
||||||
dtoRes.Timing = fmt.Sprintf("%1.3fs", res.GetDurationSeconds())
|
dtoRes.Timing = fmt.Sprintf("%1.3fs", res.GetDurationSeconds())
|
||||||
|
|
||||||
return Json(200, dtoRes)
|
return Json(200, dtoRes)
|
||||||
|
|||||||
@@ -40,7 +40,13 @@ type AlertTestCommand struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type AlertTestResult struct {
|
type AlertTestResult struct {
|
||||||
Triggered bool `json:"triggerd"`
|
Triggered bool `json:"triggerd"`
|
||||||
Timing string `json:"timing"`
|
Timing string `json:"timing"`
|
||||||
Error string `json:"error"`
|
Error string `json:"error"`
|
||||||
|
Logs []*AlertTestResultLog `json:"logs"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type AlertTestResultLog struct {
|
||||||
|
Message string `json:"message"`
|
||||||
|
Data interface{} `json:"data"`
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,6 +23,14 @@ type AlertRule struct {
|
|||||||
Notifications []int64
|
Notifications []int64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type AlertValidationError struct {
|
||||||
|
Reason string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e AlertValidationError) Error() string {
|
||||||
|
return e.Reason
|
||||||
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
ValueFormatRegex = regexp.MustCompile("^\\d+")
|
ValueFormatRegex = regexp.MustCompile("^\\d+")
|
||||||
UnitFormatRegex = regexp.MustCompile("\\w{1}$")
|
UnitFormatRegex = regexp.MustCompile("\\w{1}$")
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ package alerting
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"github.com/grafana/grafana/pkg/bus"
|
"github.com/grafana/grafana/pkg/bus"
|
||||||
@@ -50,15 +49,22 @@ func (c *QueryCondition) executeQuery(context *AlertResultContext) (tsdb.TimeSer
|
|||||||
|
|
||||||
resp, err := c.HandleRequest(req)
|
resp, err := c.HandleRequest(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("Alerting: GetSeries() tsdb.HandleRequest() error %v", err)
|
return nil, fmt.Errorf("tsdb.HandleRequest() error %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, v := range resp.Results {
|
for _, v := range resp.Results {
|
||||||
if v.Error != nil {
|
if v.Error != nil {
|
||||||
return nil, fmt.Errorf("Alerting: GetSeries() tsdb.HandleRequest() response error %v", v)
|
return nil, fmt.Errorf("tsdb.HandleRequest() response error %v", v)
|
||||||
}
|
}
|
||||||
|
|
||||||
result = append(result, v.Series...)
|
result = append(result, v.Series...)
|
||||||
|
|
||||||
|
if context.IsTestRun {
|
||||||
|
context.Logs = append(context.Logs, &AlertResultLogEntry{
|
||||||
|
Message: "Query Condition Query Result",
|
||||||
|
Data: v.Series,
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return result, nil
|
return result, nil
|
||||||
@@ -154,17 +160,17 @@ func NewDefaultAlertEvaluator(model *simplejson.Json) (*DefaultAlertEvaluator, e
|
|||||||
|
|
||||||
evaluator.Type = model.Get("type").MustString()
|
evaluator.Type = model.Get("type").MustString()
|
||||||
if evaluator.Type == "" {
|
if evaluator.Type == "" {
|
||||||
return nil, errors.New("Alert evaluator missing type property")
|
return nil, AlertValidationError{Reason: "Evaluator missing type property"}
|
||||||
}
|
}
|
||||||
|
|
||||||
params := model.Get("params").MustArray()
|
params := model.Get("params").MustArray()
|
||||||
if len(params) == 0 {
|
if len(params) == 0 {
|
||||||
return nil, errors.New("Alert evaluator missing threshold parameter")
|
return nil, AlertValidationError{Reason: "Evaluator missing threshold parameter"}
|
||||||
}
|
}
|
||||||
|
|
||||||
threshold, ok := params[0].(json.Number)
|
threshold, ok := params[0].(json.Number)
|
||||||
if !ok {
|
if !ok {
|
||||||
return nil, errors.New("Alert evaluator has invalid threshold parameter")
|
return nil, AlertValidationError{Reason: "Evaluator has invalid threshold parameter"}
|
||||||
}
|
}
|
||||||
|
|
||||||
evaluator.Threshold, _ = threshold.Float64()
|
evaluator.Threshold, _ = threshold.Float64()
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
package alerting
|
package alerting
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/benbjohnson/clock"
|
"github.com/benbjohnson/clock"
|
||||||
@@ -18,7 +17,6 @@ type Engine struct {
|
|||||||
ruleReader RuleReader
|
ruleReader RuleReader
|
||||||
log log.Logger
|
log log.Logger
|
||||||
responseHandler ResultHandler
|
responseHandler ResultHandler
|
||||||
alertJobTimeout time.Duration
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewEngine() *Engine {
|
func NewEngine() *Engine {
|
||||||
@@ -31,7 +29,6 @@ func NewEngine() *Engine {
|
|||||||
ruleReader: NewRuleReader(),
|
ruleReader: NewRuleReader(),
|
||||||
log: log.New("alerting.engine"),
|
log: log.New("alerting.engine"),
|
||||||
responseHandler: NewResultHandler(),
|
responseHandler: NewResultHandler(),
|
||||||
alertJobTimeout: time.Second * 5,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return e
|
return e
|
||||||
@@ -82,32 +79,14 @@ func (e *Engine) execDispatch() {
|
|||||||
|
|
||||||
for job := range e.execQueue {
|
for job := range e.execQueue {
|
||||||
log.Trace("Alerting: engine:execDispatch() starting job %s", job.Rule.Name)
|
log.Trace("Alerting: engine:execDispatch() starting job %s", job.Rule.Name)
|
||||||
job.Running = true
|
|
||||||
e.executeJob(job)
|
e.executeJob(job)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *Engine) executeJob(job *AlertJob) {
|
func (e *Engine) executeJob(job *AlertJob) {
|
||||||
startTime := time.Now()
|
job.Running = true
|
||||||
|
context := NewAlertResultContext(job.Rule)
|
||||||
resultChan := make(chan *AlertResultContext, 1)
|
e.handler.Execute(context)
|
||||||
go e.handler.Execute(job.Rule, resultChan)
|
|
||||||
|
|
||||||
select {
|
|
||||||
case <-time.After(e.alertJobTimeout):
|
|
||||||
e.resultQueue <- &AlertResultContext{
|
|
||||||
Error: fmt.Errorf("Timeout"),
|
|
||||||
Rule: job.Rule,
|
|
||||||
StartTime: startTime,
|
|
||||||
EndTime: time.Now(),
|
|
||||||
}
|
|
||||||
close(resultChan)
|
|
||||||
e.log.Debug("Job Execution timeout", "alertRuleId", job.Rule.Id)
|
|
||||||
case result := <-resultChan:
|
|
||||||
e.log.Debug("Job Execution done", "timing", result.GetDurationSeconds(), "ruleId", job.Rule.Id)
|
|
||||||
e.resultQueue <- result
|
|
||||||
}
|
|
||||||
|
|
||||||
job.Running = false
|
job.Running = false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -129,7 +129,7 @@ func (e *DashAlertExtractor) GetAlerts() ([]*m.Alert, error) {
|
|||||||
alerts = append(alerts, alert)
|
alerts = append(alerts, alert)
|
||||||
} else {
|
} else {
|
||||||
e.log.Error("Failed to extract alerts from dashboard", "error", err)
|
e.log.Error("Failed to extract alerts from dashboard", "error", err)
|
||||||
return nil, errors.New("Failed to extract alerts from dashboard")
|
return nil, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package alerting
|
package alerting
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/grafana/grafana/pkg/log"
|
"github.com/grafana/grafana/pkg/log"
|
||||||
@@ -11,41 +12,50 @@ var (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type HandlerImpl struct {
|
type HandlerImpl struct {
|
||||||
log log.Logger
|
log log.Logger
|
||||||
|
alertJobTimeout time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewHandler() *HandlerImpl {
|
func NewHandler() *HandlerImpl {
|
||||||
return &HandlerImpl{
|
return &HandlerImpl{
|
||||||
log: log.New("alerting.executor"),
|
log: log.New("alerting.executor"),
|
||||||
|
alertJobTimeout: time.Second * 5,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *HandlerImpl) Execute(rule *AlertRule, resultQueue chan *AlertResultContext) {
|
func (e *HandlerImpl) Execute(context *AlertResultContext) {
|
||||||
resultQueue <- e.eval(rule)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *HandlerImpl) eval(rule *AlertRule) *AlertResultContext {
|
go e.eval(context)
|
||||||
result := &AlertResultContext{
|
|
||||||
StartTime: time.Now(),
|
select {
|
||||||
Rule: rule,
|
case <-time.After(e.alertJobTimeout):
|
||||||
|
context.Error = fmt.Errorf("Timeout")
|
||||||
|
context.EndTime = time.Now()
|
||||||
|
e.log.Debug("Job Execution timeout", "alertId", context.Rule.Id)
|
||||||
|
case <-context.DoneChan:
|
||||||
|
e.log.Debug("Job Execution done", "timing", context.GetDurationSeconds(), "alertId", context.Rule.Id)
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, condition := range rule.Conditions {
|
}
|
||||||
condition.Eval(result)
|
|
||||||
|
func (e *HandlerImpl) eval(context *AlertResultContext) {
|
||||||
|
|
||||||
|
for _, condition := range context.Rule.Conditions {
|
||||||
|
condition.Eval(context)
|
||||||
|
|
||||||
// break if condition could not be evaluated
|
// break if condition could not be evaluated
|
||||||
if result.Error != nil {
|
if context.Error != nil {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
// break if result has not triggered yet
|
// break if result has not triggered yet
|
||||||
if result.Triggered == false {
|
if context.Triggered == false {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
result.EndTime = time.Now()
|
context.EndTime = time.Now()
|
||||||
return result
|
context.DoneChan <- true
|
||||||
}
|
}
|
||||||
|
|
||||||
// func (e *HandlerImpl) executeQuery(job *AlertJob) (tsdb.TimeSeriesSlice, error) {
|
// func (e *HandlerImpl) executeQuery(job *AlertJob) (tsdb.TimeSeriesSlice, error) {
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type AlertHandler interface {
|
type AlertHandler interface {
|
||||||
Execute(rule *AlertRule, resultChan chan *AlertResultContext)
|
Execute(context *AlertResultContext)
|
||||||
}
|
}
|
||||||
|
|
||||||
type Scheduler interface {
|
type Scheduler interface {
|
||||||
|
|||||||
@@ -28,18 +28,37 @@ func (aj *AlertJob) IncRetry() {
|
|||||||
|
|
||||||
type AlertResultContext struct {
|
type AlertResultContext struct {
|
||||||
Triggered bool
|
Triggered bool
|
||||||
|
IsTestRun bool
|
||||||
Details []*AlertResultDetail
|
Details []*AlertResultDetail
|
||||||
|
Logs []*AlertResultLogEntry
|
||||||
Error error
|
Error error
|
||||||
Description string
|
Description string
|
||||||
StartTime time.Time
|
StartTime time.Time
|
||||||
EndTime time.Time
|
EndTime time.Time
|
||||||
Rule *AlertRule
|
Rule *AlertRule
|
||||||
|
DoneChan chan bool
|
||||||
|
CancelChan chan bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *AlertResultContext) GetDurationSeconds() float64 {
|
func (a *AlertResultContext) GetDurationSeconds() float64 {
|
||||||
return float64(a.EndTime.Nanosecond()-a.StartTime.Nanosecond()) / float64(1000000000)
|
return float64(a.EndTime.Nanosecond()-a.StartTime.Nanosecond()) / float64(1000000000)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func NewAlertResultContext(rule *AlertRule) *AlertResultContext {
|
||||||
|
return &AlertResultContext{
|
||||||
|
StartTime: time.Now(),
|
||||||
|
Rule: rule,
|
||||||
|
Logs: make([]*AlertResultLogEntry, 0),
|
||||||
|
DoneChan: make(chan bool, 1),
|
||||||
|
CancelChan: make(chan bool, 1),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type AlertResultLogEntry struct {
|
||||||
|
Message string
|
||||||
|
Data interface{}
|
||||||
|
}
|
||||||
|
|
||||||
type AlertResultDetail struct {
|
type AlertResultDetail struct {
|
||||||
Value float64
|
Value float64
|
||||||
Metric string
|
Metric string
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ package alerting
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/grafana/grafana/pkg/bus"
|
"github.com/grafana/grafana/pkg/bus"
|
||||||
"github.com/grafana/grafana/pkg/components/simplejson"
|
"github.com/grafana/grafana/pkg/components/simplejson"
|
||||||
@@ -38,28 +37,21 @@ func handleAlertTestCommand(cmd *AlertTestCommand) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if res, err := testAlertRule(rule); err != nil {
|
cmd.Result = testAlertRule(rule)
|
||||||
return err
|
return nil
|
||||||
} else {
|
|
||||||
cmd.Result = res
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return fmt.Errorf("Could not find alert with panel id %d", cmd.PanelId)
|
return fmt.Errorf("Could not find alert with panel id %d", cmd.PanelId)
|
||||||
}
|
}
|
||||||
|
|
||||||
func testAlertRule(rule *AlertRule) (*AlertResultContext, error) {
|
func testAlertRule(rule *AlertRule) *AlertResultContext {
|
||||||
handler := NewHandler()
|
handler := NewHandler()
|
||||||
|
|
||||||
resultChan := make(chan *AlertResultContext, 1)
|
context := NewAlertResultContext(rule)
|
||||||
handler.Execute(rule, resultChan)
|
context.IsTestRun = true
|
||||||
|
|
||||||
select {
|
handler.Execute(context)
|
||||||
case <-time.After(time.Second * 10):
|
|
||||||
return &AlertResultContext{Error: fmt.Errorf("Timeout")}, nil
|
return context
|
||||||
case result := <-resultChan:
|
|
||||||
return result, nil
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ func addAlertMigrations(mg *Migrator) {
|
|||||||
{Name: "settings", Type: DB_Text, Nullable: false},
|
{Name: "settings", Type: DB_Text, Nullable: false},
|
||||||
{Name: "frequency", Type: DB_BigInt, Nullable: false},
|
{Name: "frequency", Type: DB_BigInt, Nullable: false},
|
||||||
{Name: "handler", Type: DB_BigInt, Nullable: false},
|
{Name: "handler", Type: DB_BigInt, Nullable: false},
|
||||||
|
{Name: "severity", Type: DB_Text, Nullable: false},
|
||||||
{Name: "enabled", Type: DB_Bool, Nullable: false},
|
{Name: "enabled", Type: DB_Bool, Nullable: false},
|
||||||
{Name: "created", Type: DB_DateTime, Nullable: false},
|
{Name: "created", Type: DB_DateTime, Nullable: false},
|
||||||
{Name: "updated", Type: DB_DateTime, Nullable: false},
|
{Name: "updated", Type: DB_DateTime, Nullable: false},
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ export class AlertSrv {
|
|||||||
|
|
||||||
init() {
|
init() {
|
||||||
this.$rootScope.onAppEvent('alert-error', (e, alert) => {
|
this.$rootScope.onAppEvent('alert-error', (e, alert) => {
|
||||||
this.set(alert[0], alert[1], 'error', 0);
|
this.set(alert[0], alert[1], 'error', 7000);
|
||||||
}, this.$rootScope);
|
}, this.$rootScope);
|
||||||
|
|
||||||
this.$rootScope.onAppEvent('alert-warning', (e, alert) => {
|
this.$rootScope.onAppEvent('alert-warning', (e, alert) => {
|
||||||
|
|||||||
@@ -69,7 +69,7 @@ export class AlertTabCtrl {
|
|||||||
initModel() {
|
initModel() {
|
||||||
var alert = this.alert = this.panel.alert = this.panel.alert || {};
|
var alert = this.alert = this.panel.alert = this.panel.alert || {};
|
||||||
|
|
||||||
alert.conditions = [];
|
alert.conditions = alert.conditions || [];
|
||||||
if (alert.conditions.length === 0) {
|
if (alert.conditions.length === 0) {
|
||||||
alert.conditions.push(this.buildDefaultCondition());
|
alert.conditions.push(this.buildDefaultCondition());
|
||||||
}
|
}
|
||||||
@@ -149,8 +149,8 @@ export class AlertTabCtrl {
|
|||||||
panelId: this.panelCtrl.panel.id,
|
panelId: this.panelCtrl.panel.id,
|
||||||
};
|
};
|
||||||
|
|
||||||
this.backendSrv.post('/api/alerts/test', payload).then(res => {
|
return this.backendSrv.post('/api/alerts/test', payload).then(res => {
|
||||||
this.testResult = res;
|
this.testResult = angular.toJson(res, true);
|
||||||
this.testing = false;
|
this.testing = false;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -131,6 +131,12 @@
|
|||||||
Evaluating rule <i class="fa fa-spinner fa-spin"></i>
|
Evaluating rule <i class="fa fa-spinner fa-spin"></i>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div class="gf-form-group" ng-if="ctrl.testResult">
|
||||||
|
<pre>
|
||||||
|
{{ctrl.testResult}}
|
||||||
|
</pre>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div class="gf-form-group" ng-if="!ctrl.alert.enabled">
|
<div class="gf-form-group" ng-if="!ctrl.alert.enabled">
|
||||||
<div class="gf-form-button-row">
|
<div class="gf-form-button-row">
|
||||||
<button class="btn btn-inverse" ng-click="ctrl.enable()">
|
<button class="btn btn-inverse" ng-click="ctrl.enable()">
|
||||||
|
|||||||
Reference in New Issue
Block a user