mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
feat(alerting): work on alerting
This commit is contained in:
@@ -245,25 +245,23 @@ func Register(r *macaron.Macaron) {
|
|||||||
// metrics
|
// metrics
|
||||||
r.Get("/metrics", wrap(GetInternalMetrics))
|
r.Get("/metrics", wrap(GetInternalMetrics))
|
||||||
|
|
||||||
r.Group("/alerting", func() {
|
r.Group("/alerts", func() {
|
||||||
r.Group("/rules", func() {
|
r.Get("/:alertId/states", wrap(GetAlertStates))
|
||||||
r.Get("/:alertId/states", wrap(GetAlertStates))
|
//r.Put("/:alertId/state", bind(m.UpdateAlertStateCommand{}), wrap(PutAlertState))
|
||||||
//r.Put("/:alertId/state", bind(m.UpdateAlertStateCommand{}), wrap(PutAlertState))
|
r.Get("/:alertId", ValidateOrgAlert, wrap(GetAlert))
|
||||||
r.Get("/:alertId", ValidateOrgAlert, wrap(GetAlert))
|
//r.Delete("/:alertId", ValidateOrgAlert, wrap(DelAlert)) disabled until we know how to handle it dashboard updates
|
||||||
//r.Delete("/:alertId", ValidateOrgAlert, wrap(DelAlert)) disabled until we know how to handle it dashboard updates
|
r.Get("/", wrap(GetAlerts))
|
||||||
r.Get("/", wrap(GetAlerts))
|
|
||||||
})
|
|
||||||
|
|
||||||
r.Get("/notifications", wrap(GetAlertNotifications))
|
|
||||||
|
|
||||||
r.Group("/notification", func() {
|
|
||||||
r.Post("/", bind(m.CreateAlertNotificationCommand{}), wrap(CreateAlertNotification))
|
|
||||||
r.Put("/:notificationId", bind(m.UpdateAlertNotificationCommand{}), wrap(UpdateAlertNotification))
|
|
||||||
r.Get("/:notificationId", wrap(GetAlertNotificationById))
|
|
||||||
r.Delete("/:notificationId", wrap(DeleteAlertNotification))
|
|
||||||
}, reqOrgAdmin)
|
|
||||||
})
|
})
|
||||||
|
|
||||||
|
r.Get("/alert-notifications", wrap(GetAlertNotifications))
|
||||||
|
|
||||||
|
r.Group("/alert-notifications", func() {
|
||||||
|
r.Post("/", bind(m.CreateAlertNotificationCommand{}), wrap(CreateAlertNotification))
|
||||||
|
r.Put("/:notificationId", bind(m.UpdateAlertNotificationCommand{}), wrap(UpdateAlertNotification))
|
||||||
|
r.Get("/:notificationId", wrap(GetAlertNotificationById))
|
||||||
|
r.Delete("/:notificationId", wrap(DeleteAlertNotification))
|
||||||
|
}, reqOrgAdmin)
|
||||||
|
|
||||||
// error test
|
// error test
|
||||||
r.Get("/metrics/error", wrap(GenerateError))
|
r.Get("/metrics/error", wrap(GenerateError))
|
||||||
|
|
||||||
|
|||||||
@@ -18,6 +18,9 @@ type Alert struct {
|
|||||||
Enabled bool
|
Enabled bool
|
||||||
Frequency int64
|
Frequency int64
|
||||||
|
|
||||||
|
CreatedBy int64
|
||||||
|
UpdatedBy int64
|
||||||
|
|
||||||
Created time.Time
|
Created time.Time
|
||||||
Updated time.Time
|
Updated time.Time
|
||||||
|
|
||||||
|
|||||||
@@ -5,14 +5,12 @@ var (
|
|||||||
Ok,
|
Ok,
|
||||||
Warn,
|
Warn,
|
||||||
Critical,
|
Critical,
|
||||||
Acknowledged,
|
Unknown,
|
||||||
Maintenance,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok = "OK"
|
Ok = "OK"
|
||||||
Warn = "WARN"
|
Warn = "WARN"
|
||||||
Critical = "CRITICAL"
|
Critical = "CRITICAL"
|
||||||
Acknowledged = "ACKNOWLEDGED"
|
Pending = "PENDING"
|
||||||
Maintenance = "MAINTENANCE"
|
Unknown = "UNKNOWN"
|
||||||
Pending = "PENDING"
|
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ type Engine struct {
|
|||||||
ruleReader RuleReader
|
ruleReader RuleReader
|
||||||
log log.Logger
|
log log.Logger
|
||||||
responseHandler ResultHandler
|
responseHandler ResultHandler
|
||||||
|
alertJobTimeout time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewEngine() *Engine {
|
func NewEngine() *Engine {
|
||||||
@@ -31,6 +32,7 @@ func NewEngine() *Engine {
|
|||||||
ruleReader: NewRuleReader(),
|
ruleReader: NewRuleReader(),
|
||||||
log: log.New("alerting.engine"),
|
log: log.New("alerting.engine"),
|
||||||
responseHandler: NewResultHandler(),
|
responseHandler: NewResultHandler(),
|
||||||
|
alertJobTimeout: time.Second * 5,
|
||||||
}
|
}
|
||||||
|
|
||||||
return e
|
return e
|
||||||
@@ -87,24 +89,25 @@ func (e *Engine) execDispatch() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (e *Engine) executeJob(job *AlertJob) {
|
func (e *Engine) executeJob(job *AlertJob) {
|
||||||
now := time.Now()
|
startTime := time.Now()
|
||||||
|
|
||||||
resultChan := make(chan *AlertResult, 1)
|
resultChan := make(chan *AlertResult, 1)
|
||||||
go e.handler.Execute(job, resultChan)
|
go e.handler.Execute(job, resultChan)
|
||||||
|
|
||||||
select {
|
select {
|
||||||
case <-time.After(time.Second * 5):
|
case <-time.After(e.alertJobTimeout):
|
||||||
e.resultQueue <- &AlertResult{
|
e.resultQueue <- &AlertResult{
|
||||||
State: alertstates.Pending,
|
State: alertstates.Pending,
|
||||||
Duration: float64(time.Since(now).Nanoseconds()) / float64(1000000),
|
Error: fmt.Errorf("Timeout"),
|
||||||
Error: fmt.Errorf("Timeout"),
|
AlertJob: job,
|
||||||
AlertJob: job,
|
StartTime: startTime,
|
||||||
ExeuctionTime: time.Now(),
|
EndTime: time.Now(),
|
||||||
}
|
}
|
||||||
|
close(resultChan)
|
||||||
e.log.Debug("Job Execution timeout", "alertRuleId", job.Rule.Id)
|
e.log.Debug("Job Execution timeout", "alertRuleId", job.Rule.Id)
|
||||||
case result := <-resultChan:
|
case result := <-resultChan:
|
||||||
result.Duration = float64(time.Since(now).Nanoseconds()) / float64(1000000)
|
duration := float64(result.EndTime.Nanosecond()-result.StartTime.Nanosecond()) / float64(1000000)
|
||||||
e.log.Debug("Job Execution done", "timeTakenMs", result.Duration, "ruleId", job.Rule.Id)
|
e.log.Debug("Job Execution done", "timeTakenMs", duration, "ruleId", job.Rule.Id)
|
||||||
e.resultQueue <- result
|
e.resultQueue <- result
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -117,7 +120,7 @@ func (e *Engine) resultHandler() {
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
for result := range e.resultQueue {
|
for result := range e.resultQueue {
|
||||||
e.log.Debug("Alert Rule Result", "ruleId", result.AlertJob.Rule.Id, "state", result.State, "value", result.ActualValue, "retry", result.AlertJob.RetryCount)
|
e.log.Debug("Alert Rule Result", "ruleId", result.AlertJob.Rule.Id, "state", result.State, "retry", result.AlertJob.RetryCount)
|
||||||
|
|
||||||
result.AlertJob.Running = false
|
result.AlertJob.Running = false
|
||||||
|
|
||||||
|
|||||||
@@ -26,18 +26,24 @@ func NewHandler() *HandlerImpl {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (e *HandlerImpl) Execute(job *AlertJob, resultQueue chan *AlertResult) {
|
func (e *HandlerImpl) Execute(job *AlertJob, resultQueue chan *AlertResult) {
|
||||||
|
startTime := time.Now()
|
||||||
|
|
||||||
timeSeries, err := e.executeQuery(job)
|
timeSeries, err := e.executeQuery(job)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
resultQueue <- &AlertResult{
|
resultQueue <- &AlertResult{
|
||||||
Error: err,
|
Error: err,
|
||||||
State: alertstates.Pending,
|
State: alertstates.Pending,
|
||||||
AlertJob: job,
|
AlertJob: job,
|
||||||
ExeuctionTime: time.Now(),
|
StartTime: time.Now(),
|
||||||
|
EndTime: time.Now(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
result := e.evaluateRule(job.Rule, timeSeries)
|
result := e.evaluateRule(job.Rule, timeSeries)
|
||||||
result.AlertJob = job
|
result.AlertJob = job
|
||||||
|
result.StartTime = startTime
|
||||||
|
result.EndTime = time.Now()
|
||||||
|
|
||||||
resultQueue <- result
|
resultQueue <- result
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -108,9 +114,9 @@ func (e *HandlerImpl) evaluateRule(rule *AlertRule, series tsdb.TimeSeriesSlice)
|
|||||||
e.log.Debug("Alert execution Crit", "name", serie.Name, "condition", condition2, "result", critResult)
|
e.log.Debug("Alert execution Crit", "name", serie.Name, "condition", condition2, "result", critResult)
|
||||||
if critResult {
|
if critResult {
|
||||||
triggeredAlert = append(triggeredAlert, &TriggeredAlert{
|
triggeredAlert = append(triggeredAlert, &TriggeredAlert{
|
||||||
State: alertstates.Critical,
|
State: alertstates.Critical,
|
||||||
ActualValue: transformedValue,
|
Value: transformedValue,
|
||||||
Name: serie.Name,
|
Metric: serie.Name,
|
||||||
})
|
})
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -120,9 +126,9 @@ func (e *HandlerImpl) evaluateRule(rule *AlertRule, series tsdb.TimeSeriesSlice)
|
|||||||
e.log.Debug("Alert execution Warn", "name", serie.Name, "condition", condition, "result", warnResult)
|
e.log.Debug("Alert execution Warn", "name", serie.Name, "condition", condition, "result", warnResult)
|
||||||
if warnResult {
|
if warnResult {
|
||||||
triggeredAlert = append(triggeredAlert, &TriggeredAlert{
|
triggeredAlert = append(triggeredAlert, &TriggeredAlert{
|
||||||
State: alertstates.Warn,
|
State: alertstates.Warn,
|
||||||
ActualValue: transformedValue,
|
Value: transformedValue,
|
||||||
Name: serie.Name,
|
Metric: serie.Name,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -138,5 +144,5 @@ func (e *HandlerImpl) evaluateRule(rule *AlertRule, series tsdb.TimeSeriesSlice)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return &AlertResult{State: executionState, Description: "Returned " + executionState, TriggeredAlerts: triggeredAlert, ExeuctionTime: time.Now()}
|
return &AlertResult{State: executionState, TriggeredAlerts: triggeredAlert}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -24,19 +24,20 @@ func (aj *AlertJob) IncRetry() {
|
|||||||
|
|
||||||
type AlertResult struct {
|
type AlertResult struct {
|
||||||
State string
|
State string
|
||||||
ActualValue float64
|
|
||||||
Duration float64
|
|
||||||
TriggeredAlerts []*TriggeredAlert
|
TriggeredAlerts []*TriggeredAlert
|
||||||
Description string
|
|
||||||
Error error
|
Error error
|
||||||
AlertJob *AlertJob
|
Description string
|
||||||
ExeuctionTime time.Time
|
StartTime time.Time
|
||||||
|
EndTime time.Time
|
||||||
|
|
||||||
|
AlertJob *AlertJob
|
||||||
}
|
}
|
||||||
|
|
||||||
type TriggeredAlert struct {
|
type TriggeredAlert struct {
|
||||||
ActualValue float64
|
Value float64
|
||||||
Name string
|
Metric string
|
||||||
State string
|
State string
|
||||||
|
Tags map[string]string
|
||||||
}
|
}
|
||||||
|
|
||||||
type Level struct {
|
type Level struct {
|
||||||
|
|||||||
@@ -60,7 +60,7 @@ func (handler *ResultHandlerImpl) shouldUpdateState(result *AlertResult) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
lastExecution := query.Result.Created
|
lastExecution := query.Result.Created
|
||||||
asdf := result.ExeuctionTime.Add(time.Minute * -15)
|
asdf := result.StartTime.Add(time.Minute * -15)
|
||||||
olderThen15Min := lastExecution.Before(asdf)
|
olderThen15Min := lastExecution.Before(asdf)
|
||||||
changedState := query.Result.NewState != result.State
|
changedState := query.Result.NewState != result.State
|
||||||
|
|
||||||
|
|||||||
@@ -158,24 +158,29 @@ func DeleteAlertDefinition(dashboardId int64, sess *xorm.Session) error {
|
|||||||
|
|
||||||
func SaveAlerts(cmd *m.SaveAlertsCommand) error {
|
func SaveAlerts(cmd *m.SaveAlertsCommand) error {
|
||||||
return inTransaction(func(sess *xorm.Session) error {
|
return inTransaction(func(sess *xorm.Session) error {
|
||||||
alerts, err := GetAlertsByDashboardId2(cmd.DashboardId, sess)
|
existingAlerts, err := GetAlertsByDashboardId2(cmd.DashboardId, sess)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
upsertAlerts(alerts, cmd, sess)
|
if err := upsertAlerts(existingAlerts, cmd, sess); err != nil {
|
||||||
deleteMissingAlerts(alerts, cmd, sess)
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := deleteMissingAlerts(existingAlerts, cmd, sess); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func upsertAlerts(alerts []*m.Alert, cmd *m.SaveAlertsCommand, sess *xorm.Session) error {
|
func upsertAlerts(existingAlerts []*m.Alert, cmd *m.SaveAlertsCommand, sess *xorm.Session) error {
|
||||||
for _, alert := range cmd.Alerts {
|
for _, alert := range cmd.Alerts {
|
||||||
update := false
|
update := false
|
||||||
var alertToUpdate *m.Alert
|
var alertToUpdate *m.Alert
|
||||||
|
|
||||||
for _, k := range alerts {
|
for _, k := range existingAlerts {
|
||||||
if alert.PanelId == k.PanelId {
|
if alert.PanelId == k.PanelId {
|
||||||
update = true
|
update = true
|
||||||
alert.Id = k.Id
|
alert.Id = k.Id
|
||||||
@@ -195,11 +200,13 @@ func upsertAlerts(alerts []*m.Alert, cmd *m.SaveAlertsCommand, sess *xorm.Sessio
|
|||||||
|
|
||||||
sqlog.Debug("Alert updated", "name", alert.Name, "id", alert.Id)
|
sqlog.Debug("Alert updated", "name", alert.Name, "id", alert.Id)
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
alert.Updated = time.Now()
|
alert.Updated = time.Now()
|
||||||
alert.Created = time.Now()
|
alert.Created = time.Now()
|
||||||
alert.State = "OK"
|
alert.State = "UNKNOWN"
|
||||||
|
alert.CreatedBy = cmd.UserId
|
||||||
|
alert.UpdatedBy = cmd.UserId
|
||||||
|
|
||||||
_, err := sess.Insert(alert)
|
_, err := sess.Insert(alert)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ export class AlertLogCtrl {
|
|||||||
}
|
}
|
||||||
|
|
||||||
loadAlertLogs(alertId: number) {
|
loadAlertLogs(alertId: number) {
|
||||||
this.backendSrv.get(`/api/alerts/rules/${alertId}/states`).then(result => {
|
this.backendSrv.get(`/api/alerts/${alertId}/states`).then(result => {
|
||||||
this.alertLogs = _.map(result, log => {
|
this.alertLogs = _.map(result, log => {
|
||||||
log.iconCss = alertDef.getCssForState(log.newState);
|
log.iconCss = alertDef.getCssForState(log.newState);
|
||||||
log.humanTime = moment(log.created).format("YYYY-MM-DD HH:mm:ss");
|
log.humanTime = moment(log.created).format("YYYY-MM-DD HH:mm:ss");
|
||||||
@@ -28,7 +28,7 @@ export class AlertLogCtrl {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
this.backendSrv.get(`/api/alerts/rules/${alertId}`).then(result => {
|
this.backendSrv.get(`/api/alerts/${alertId}`).then(result => {
|
||||||
this.alert = result;
|
this.alert = result;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -49,7 +49,7 @@ export class AlertListCtrl {
|
|||||||
state: stats
|
state: stats
|
||||||
};
|
};
|
||||||
|
|
||||||
this.backendSrv.get('/api/alerts/rules', params).then(result => {
|
this.backendSrv.get('/api/alerts', params).then(result => {
|
||||||
this.alerts = _.map(result, alert => {
|
this.alerts = _.map(result, alert => {
|
||||||
alert.iconCss = alertDef.getCssForState(alert.state);
|
alert.iconCss = alertDef.getCssForState(alert.state);
|
||||||
return alert;
|
return alert;
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ export class AlertNotificationEditCtrl {
|
|||||||
}
|
}
|
||||||
|
|
||||||
loadNotification(notificationId) {
|
loadNotification(notificationId) {
|
||||||
this.backendSrv.get(`/api/alerts/notification/${notificationId}`).then(result => {
|
this.backendSrv.get(`/api/alert-notifications/${notificationId}`).then(result => {
|
||||||
console.log(result);
|
console.log(result);
|
||||||
this.notification = result;
|
this.notification = result;
|
||||||
});
|
});
|
||||||
@@ -37,7 +37,7 @@ export class AlertNotificationEditCtrl {
|
|||||||
save() {
|
save() {
|
||||||
if (this.notification.id) {
|
if (this.notification.id) {
|
||||||
console.log('this.notification: ', this.notification);
|
console.log('this.notification: ', this.notification);
|
||||||
this.backendSrv.put(`/api/alerts/notification/${this.notification.id}`, this.notification)
|
this.backendSrv.put(`/api/alert-notifications/${this.notification.id}`, this.notification)
|
||||||
.then(result => {
|
.then(result => {
|
||||||
this.notification = result;
|
this.notification = result;
|
||||||
this.$scope.appEvent('alert-success', ['Notification created!', '']);
|
this.$scope.appEvent('alert-success', ['Notification created!', '']);
|
||||||
@@ -45,7 +45,7 @@ export class AlertNotificationEditCtrl {
|
|||||||
this.$scope.appEvent('alert-error', ['Unable to create notification.', '']);
|
this.$scope.appEvent('alert-error', ['Unable to create notification.', '']);
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
this.backendSrv.post(`/api/alerts/notification`, this.notification)
|
this.backendSrv.post(`/api/alert-notifications`, this.notification)
|
||||||
.then(result => {
|
.then(result => {
|
||||||
this.notification = result;
|
this.notification = result;
|
||||||
this.$scope.appEvent('alert-success', ['Notification updated!', '']);
|
this.$scope.appEvent('alert-success', ['Notification updated!', '']);
|
||||||
|
|||||||
@@ -15,13 +15,13 @@ export class AlertNotificationsListCtrl {
|
|||||||
}
|
}
|
||||||
|
|
||||||
loadNotifications() {
|
loadNotifications() {
|
||||||
this.backendSrv.get(`/api/alerts/notifications`).then(result => {
|
this.backendSrv.get(`/api/alert-notifications`).then(result => {
|
||||||
this.notifications = result;
|
this.notifications = result;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
deleteNotification(notificationId) {
|
deleteNotification(notificationId) {
|
||||||
this.backendSrv.delete(`/api/alerts/notification/${notificationId}`)
|
this.backendSrv.delete(`/api/alerts-notification/${notificationId}`)
|
||||||
.then(() => {
|
.then(() => {
|
||||||
this.notifications = this.notifications.filter(notification => {
|
this.notifications = this.notifications.filter(notification => {
|
||||||
return notification.id !== notificationId;
|
return notification.id !== notificationId;
|
||||||
|
|||||||
7
public/app/plugins/datasource/grafana-live/plugin.json
Normal file
7
public/app/plugins/datasource/grafana-live/plugin.json
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"type": "datasource",
|
||||||
|
"name": "Grafana Live",
|
||||||
|
"id": "grafana-live",
|
||||||
|
|
||||||
|
"metrics": true
|
||||||
|
}
|
||||||
@@ -123,14 +123,14 @@
|
|||||||
<h5 class="section-heading">Information</h5>
|
<h5 class="section-heading">Information</h5>
|
||||||
<div class="gf-form">
|
<div class="gf-form">
|
||||||
<span class="gf-form-label width-10">Alert name</span>
|
<span class="gf-form-label width-10">Alert name</span>
|
||||||
<input type="text" class="gf-form-input width-22" ng-model="ctrl.panel.alerting.name">
|
<input type="text" class="gf-form-input width-22" ng-model="ctrl.alert.name">
|
||||||
</div>
|
</div>
|
||||||
<div class="gf-form-inline">
|
<div class="gf-form-inline">
|
||||||
<div class="gf-form">
|
<div class="gf-form">
|
||||||
<span class="gf-form-label width-10" style="margin-top: -73px;">Alert description</span>
|
<span class="gf-form-label width-10" style="margin-top: -73px;">Alert description</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="gf-form">
|
<div class="gf-form">
|
||||||
<textarea rows="5" ng-model="ctrl.panel.alerting.description" class="gf-form-input width-22"></textarea>
|
<textarea rows="5" ng-model="ctrl.alert.description" class="gf-form-input width-22"></textarea>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
Reference in New Issue
Block a user