feat(alerting): work on alerting

This commit is contained in:
Torkel Ödegaard 2016-07-14 13:32:16 +02:00
parent 624cd6fc0a
commit f13b869aa4
14 changed files with 95 additions and 72 deletions

View File

@ -245,25 +245,23 @@ func Register(r *macaron.Macaron) {
// metrics
r.Get("/metrics", wrap(GetInternalMetrics))
r.Group("/alerting", func() {
r.Group("/rules", func() {
r.Get("/:alertId/states", wrap(GetAlertStates))
//r.Put("/:alertId/state", bind(m.UpdateAlertStateCommand{}), wrap(PutAlertState))
r.Get("/:alertId", ValidateOrgAlert, wrap(GetAlert))
//r.Delete("/:alertId", ValidateOrgAlert, wrap(DelAlert)) disabled until we know how to handle it dashboard updates
r.Get("/", wrap(GetAlerts))
})
r.Get("/notifications", wrap(GetAlertNotifications))
r.Group("/notification", func() {
r.Post("/", bind(m.CreateAlertNotificationCommand{}), wrap(CreateAlertNotification))
r.Put("/:notificationId", bind(m.UpdateAlertNotificationCommand{}), wrap(UpdateAlertNotification))
r.Get("/:notificationId", wrap(GetAlertNotificationById))
r.Delete("/:notificationId", wrap(DeleteAlertNotification))
}, reqOrgAdmin)
r.Group("/alerts", func() {
r.Get("/:alertId/states", wrap(GetAlertStates))
//r.Put("/:alertId/state", bind(m.UpdateAlertStateCommand{}), wrap(PutAlertState))
r.Get("/:alertId", ValidateOrgAlert, wrap(GetAlert))
//r.Delete("/:alertId", ValidateOrgAlert, wrap(DelAlert)) disabled until we know how to handle it dashboard updates
r.Get("/", wrap(GetAlerts))
})
r.Get("/alert-notifications", wrap(GetAlertNotifications))
r.Group("/alert-notifications", func() {
r.Post("/", bind(m.CreateAlertNotificationCommand{}), wrap(CreateAlertNotification))
r.Put("/:notificationId", bind(m.UpdateAlertNotificationCommand{}), wrap(UpdateAlertNotification))
r.Get("/:notificationId", wrap(GetAlertNotificationById))
r.Delete("/:notificationId", wrap(DeleteAlertNotification))
}, reqOrgAdmin)
// error test
r.Get("/metrics/error", wrap(GenerateError))

View File

@ -18,6 +18,9 @@ type Alert struct {
Enabled bool
Frequency int64
CreatedBy int64
UpdatedBy int64
Created time.Time
Updated time.Time

View File

@ -5,14 +5,12 @@ var (
Ok,
Warn,
Critical,
Acknowledged,
Maintenance,
Unknown,
}
Ok = "OK"
Warn = "WARN"
Critical = "CRITICAL"
Acknowledged = "ACKNOWLEDGED"
Maintenance = "MAINTENANCE"
Pending = "PENDING"
Ok = "OK"
Warn = "WARN"
Critical = "CRITICAL"
Pending = "PENDING"
Unknown = "UNKNOWN"
)

View File

@ -19,6 +19,7 @@ type Engine struct {
ruleReader RuleReader
log log.Logger
responseHandler ResultHandler
alertJobTimeout time.Duration
}
func NewEngine() *Engine {
@ -31,6 +32,7 @@ func NewEngine() *Engine {
ruleReader: NewRuleReader(),
log: log.New("alerting.engine"),
responseHandler: NewResultHandler(),
alertJobTimeout: time.Second * 5,
}
return e
@ -87,24 +89,25 @@ func (e *Engine) execDispatch() {
}
func (e *Engine) executeJob(job *AlertJob) {
now := time.Now()
startTime := time.Now()
resultChan := make(chan *AlertResult, 1)
go e.handler.Execute(job, resultChan)
select {
case <-time.After(time.Second * 5):
case <-time.After(e.alertJobTimeout):
e.resultQueue <- &AlertResult{
State: alertstates.Pending,
Duration: float64(time.Since(now).Nanoseconds()) / float64(1000000),
Error: fmt.Errorf("Timeout"),
AlertJob: job,
ExeuctionTime: time.Now(),
State: alertstates.Pending,
Error: fmt.Errorf("Timeout"),
AlertJob: job,
StartTime: startTime,
EndTime: time.Now(),
}
close(resultChan)
e.log.Debug("Job Execution timeout", "alertRuleId", job.Rule.Id)
case result := <-resultChan:
result.Duration = float64(time.Since(now).Nanoseconds()) / float64(1000000)
e.log.Debug("Job Execution done", "timeTakenMs", result.Duration, "ruleId", job.Rule.Id)
duration := float64(result.EndTime.Nanosecond()-result.StartTime.Nanosecond()) / float64(1000000)
e.log.Debug("Job Execution done", "timeTakenMs", duration, "ruleId", job.Rule.Id)
e.resultQueue <- result
}
}
@ -117,7 +120,7 @@ func (e *Engine) resultHandler() {
}()
for result := range e.resultQueue {
e.log.Debug("Alert Rule Result", "ruleId", result.AlertJob.Rule.Id, "state", result.State, "value", result.ActualValue, "retry", result.AlertJob.RetryCount)
e.log.Debug("Alert Rule Result", "ruleId", result.AlertJob.Rule.Id, "state", result.State, "retry", result.AlertJob.RetryCount)
result.AlertJob.Running = false

View File

@ -26,18 +26,24 @@ func NewHandler() *HandlerImpl {
}
func (e *HandlerImpl) Execute(job *AlertJob, resultQueue chan *AlertResult) {
startTime := time.Now()
timeSeries, err := e.executeQuery(job)
if err != nil {
resultQueue <- &AlertResult{
Error: err,
State: alertstates.Pending,
AlertJob: job,
ExeuctionTime: time.Now(),
Error: err,
State: alertstates.Pending,
AlertJob: job,
StartTime: time.Now(),
EndTime: time.Now(),
}
}
result := e.evaluateRule(job.Rule, timeSeries)
result.AlertJob = job
result.StartTime = startTime
result.EndTime = time.Now()
resultQueue <- result
}
@ -108,9 +114,9 @@ func (e *HandlerImpl) evaluateRule(rule *AlertRule, series tsdb.TimeSeriesSlice)
e.log.Debug("Alert execution Crit", "name", serie.Name, "condition", condition2, "result", critResult)
if critResult {
triggeredAlert = append(triggeredAlert, &TriggeredAlert{
State: alertstates.Critical,
ActualValue: transformedValue,
Name: serie.Name,
State: alertstates.Critical,
Value: transformedValue,
Metric: serie.Name,
})
continue
}
@ -120,9 +126,9 @@ func (e *HandlerImpl) evaluateRule(rule *AlertRule, series tsdb.TimeSeriesSlice)
e.log.Debug("Alert execution Warn", "name", serie.Name, "condition", condition, "result", warnResult)
if warnResult {
triggeredAlert = append(triggeredAlert, &TriggeredAlert{
State: alertstates.Warn,
ActualValue: transformedValue,
Name: serie.Name,
State: alertstates.Warn,
Value: transformedValue,
Metric: serie.Name,
})
}
}
@ -138,5 +144,5 @@ func (e *HandlerImpl) evaluateRule(rule *AlertRule, series tsdb.TimeSeriesSlice)
}
}
return &AlertResult{State: executionState, Description: "Returned " + executionState, TriggeredAlerts: triggeredAlert, ExeuctionTime: time.Now()}
return &AlertResult{State: executionState, TriggeredAlerts: triggeredAlert}
}

View File

@ -24,19 +24,20 @@ func (aj *AlertJob) IncRetry() {
type AlertResult struct {
State string
ActualValue float64
Duration float64
TriggeredAlerts []*TriggeredAlert
Description string
Error error
AlertJob *AlertJob
ExeuctionTime time.Time
Description string
StartTime time.Time
EndTime time.Time
AlertJob *AlertJob
}
type TriggeredAlert struct {
ActualValue float64
Name string
State string
Value float64
Metric string
State string
Tags map[string]string
}
type Level struct {

View File

@ -60,7 +60,7 @@ func (handler *ResultHandlerImpl) shouldUpdateState(result *AlertResult) bool {
}
lastExecution := query.Result.Created
asdf := result.ExeuctionTime.Add(time.Minute * -15)
asdf := result.StartTime.Add(time.Minute * -15)
olderThen15Min := lastExecution.Before(asdf)
changedState := query.Result.NewState != result.State

View File

@ -158,24 +158,29 @@ func DeleteAlertDefinition(dashboardId int64, sess *xorm.Session) error {
func SaveAlerts(cmd *m.SaveAlertsCommand) error {
return inTransaction(func(sess *xorm.Session) error {
alerts, err := GetAlertsByDashboardId2(cmd.DashboardId, sess)
existingAlerts, err := GetAlertsByDashboardId2(cmd.DashboardId, sess)
if err != nil {
return err
}
upsertAlerts(alerts, cmd, sess)
deleteMissingAlerts(alerts, cmd, sess)
if err := upsertAlerts(existingAlerts, cmd, sess); err != nil {
return err
}
if err := deleteMissingAlerts(existingAlerts, cmd, sess); err != nil {
return err
}
return nil
})
}
func upsertAlerts(alerts []*m.Alert, cmd *m.SaveAlertsCommand, sess *xorm.Session) error {
func upsertAlerts(existingAlerts []*m.Alert, cmd *m.SaveAlertsCommand, sess *xorm.Session) error {
for _, alert := range cmd.Alerts {
update := false
var alertToUpdate *m.Alert
for _, k := range alerts {
for _, k := range existingAlerts {
if alert.PanelId == k.PanelId {
update = true
alert.Id = k.Id
@ -195,11 +200,13 @@ func upsertAlerts(alerts []*m.Alert, cmd *m.SaveAlertsCommand, sess *xorm.Sessio
sqlog.Debug("Alert updated", "name", alert.Name, "id", alert.Id)
}
} else {
alert.Updated = time.Now()
alert.Created = time.Now()
alert.State = "OK"
alert.State = "UNKNOWN"
alert.CreatedBy = cmd.UserId
alert.UpdatedBy = cmd.UserId
_, err := sess.Insert(alert)
if err != nil {
return err

View File

@ -20,7 +20,7 @@ export class AlertLogCtrl {
}
loadAlertLogs(alertId: number) {
this.backendSrv.get(`/api/alerts/rules/${alertId}/states`).then(result => {
this.backendSrv.get(`/api/alerts/${alertId}/states`).then(result => {
this.alertLogs = _.map(result, log => {
log.iconCss = alertDef.getCssForState(log.newState);
log.humanTime = moment(log.created).format("YYYY-MM-DD HH:mm:ss");
@ -28,7 +28,7 @@ export class AlertLogCtrl {
});
});
this.backendSrv.get(`/api/alerts/rules/${alertId}`).then(result => {
this.backendSrv.get(`/api/alerts/${alertId}`).then(result => {
this.alert = result;
});
}

View File

@ -49,7 +49,7 @@ export class AlertListCtrl {
state: stats
};
this.backendSrv.get('/api/alerts/rules', params).then(result => {
this.backendSrv.get('/api/alerts', params).then(result => {
this.alerts = _.map(result, alert => {
alert.iconCss = alertDef.getCssForState(alert.state);
return alert;

View File

@ -24,7 +24,7 @@ export class AlertNotificationEditCtrl {
}
loadNotification(notificationId) {
this.backendSrv.get(`/api/alerts/notification/${notificationId}`).then(result => {
this.backendSrv.get(`/api/alert-notifications/${notificationId}`).then(result => {
console.log(result);
this.notification = result;
});
@ -37,7 +37,7 @@ export class AlertNotificationEditCtrl {
save() {
if (this.notification.id) {
console.log('this.notification: ', this.notification);
this.backendSrv.put(`/api/alerts/notification/${this.notification.id}`, this.notification)
this.backendSrv.put(`/api/alert-notifications/${this.notification.id}`, this.notification)
.then(result => {
this.notification = result;
this.$scope.appEvent('alert-success', ['Notification created!', '']);
@ -45,7 +45,7 @@ export class AlertNotificationEditCtrl {
this.$scope.appEvent('alert-error', ['Unable to create notification.', '']);
});
} else {
this.backendSrv.post(`/api/alerts/notification`, this.notification)
this.backendSrv.post(`/api/alert-notifications`, this.notification)
.then(result => {
this.notification = result;
this.$scope.appEvent('alert-success', ['Notification updated!', '']);

View File

@ -15,13 +15,13 @@ export class AlertNotificationsListCtrl {
}
loadNotifications() {
this.backendSrv.get(`/api/alerts/notifications`).then(result => {
this.backendSrv.get(`/api/alert-notifications`).then(result => {
this.notifications = result;
});
}
deleteNotification(notificationId) {
this.backendSrv.delete(`/api/alerts/notification/${notificationId}`)
this.backendSrv.delete(`/api/alerts-notification/${notificationId}`)
.then(() => {
this.notifications = this.notifications.filter(notification => {
return notification.id !== notificationId;

View File

@ -0,0 +1,7 @@
{
"type": "datasource",
"name": "Grafana Live",
"id": "grafana-live",
"metrics": true
}

View File

@ -123,14 +123,14 @@
<h5 class="section-heading">Information</h5>
<div class="gf-form">
<span class="gf-form-label width-10">Alert name</span>
<input type="text" class="gf-form-input width-22" ng-model="ctrl.panel.alerting.name">
<input type="text" class="gf-form-input width-22" ng-model="ctrl.alert.name">
</div>
<div class="gf-form-inline">
<div class="gf-form">
<span class="gf-form-label width-10" style="margin-top: -73px;">Alert description</span>
</div>
<div class="gf-form">
<textarea rows="5" ng-model="ctrl.panel.alerting.description" class="gf-form-input width-22"></textarea>
<textarea rows="5" ng-model="ctrl.alert.description" class="gf-form-input width-22"></textarea>
</div>
</div>
</div>