mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
feat(alerting): work on alerting
This commit is contained in:
parent
624cd6fc0a
commit
f13b869aa4
@ -245,25 +245,23 @@ func Register(r *macaron.Macaron) {
|
||||
// metrics
|
||||
r.Get("/metrics", wrap(GetInternalMetrics))
|
||||
|
||||
r.Group("/alerting", func() {
|
||||
r.Group("/rules", func() {
|
||||
r.Get("/:alertId/states", wrap(GetAlertStates))
|
||||
//r.Put("/:alertId/state", bind(m.UpdateAlertStateCommand{}), wrap(PutAlertState))
|
||||
r.Get("/:alertId", ValidateOrgAlert, wrap(GetAlert))
|
||||
//r.Delete("/:alertId", ValidateOrgAlert, wrap(DelAlert)) disabled until we know how to handle it dashboard updates
|
||||
r.Get("/", wrap(GetAlerts))
|
||||
})
|
||||
|
||||
r.Get("/notifications", wrap(GetAlertNotifications))
|
||||
|
||||
r.Group("/notification", func() {
|
||||
r.Post("/", bind(m.CreateAlertNotificationCommand{}), wrap(CreateAlertNotification))
|
||||
r.Put("/:notificationId", bind(m.UpdateAlertNotificationCommand{}), wrap(UpdateAlertNotification))
|
||||
r.Get("/:notificationId", wrap(GetAlertNotificationById))
|
||||
r.Delete("/:notificationId", wrap(DeleteAlertNotification))
|
||||
}, reqOrgAdmin)
|
||||
r.Group("/alerts", func() {
|
||||
r.Get("/:alertId/states", wrap(GetAlertStates))
|
||||
//r.Put("/:alertId/state", bind(m.UpdateAlertStateCommand{}), wrap(PutAlertState))
|
||||
r.Get("/:alertId", ValidateOrgAlert, wrap(GetAlert))
|
||||
//r.Delete("/:alertId", ValidateOrgAlert, wrap(DelAlert)) disabled until we know how to handle it dashboard updates
|
||||
r.Get("/", wrap(GetAlerts))
|
||||
})
|
||||
|
||||
r.Get("/alert-notifications", wrap(GetAlertNotifications))
|
||||
|
||||
r.Group("/alert-notifications", func() {
|
||||
r.Post("/", bind(m.CreateAlertNotificationCommand{}), wrap(CreateAlertNotification))
|
||||
r.Put("/:notificationId", bind(m.UpdateAlertNotificationCommand{}), wrap(UpdateAlertNotification))
|
||||
r.Get("/:notificationId", wrap(GetAlertNotificationById))
|
||||
r.Delete("/:notificationId", wrap(DeleteAlertNotification))
|
||||
}, reqOrgAdmin)
|
||||
|
||||
// error test
|
||||
r.Get("/metrics/error", wrap(GenerateError))
|
||||
|
||||
|
@ -18,6 +18,9 @@ type Alert struct {
|
||||
Enabled bool
|
||||
Frequency int64
|
||||
|
||||
CreatedBy int64
|
||||
UpdatedBy int64
|
||||
|
||||
Created time.Time
|
||||
Updated time.Time
|
||||
|
||||
|
@ -5,14 +5,12 @@ var (
|
||||
Ok,
|
||||
Warn,
|
||||
Critical,
|
||||
Acknowledged,
|
||||
Maintenance,
|
||||
Unknown,
|
||||
}
|
||||
|
||||
Ok = "OK"
|
||||
Warn = "WARN"
|
||||
Critical = "CRITICAL"
|
||||
Acknowledged = "ACKNOWLEDGED"
|
||||
Maintenance = "MAINTENANCE"
|
||||
Pending = "PENDING"
|
||||
Ok = "OK"
|
||||
Warn = "WARN"
|
||||
Critical = "CRITICAL"
|
||||
Pending = "PENDING"
|
||||
Unknown = "UNKNOWN"
|
||||
)
|
||||
|
@ -19,6 +19,7 @@ type Engine struct {
|
||||
ruleReader RuleReader
|
||||
log log.Logger
|
||||
responseHandler ResultHandler
|
||||
alertJobTimeout time.Duration
|
||||
}
|
||||
|
||||
func NewEngine() *Engine {
|
||||
@ -31,6 +32,7 @@ func NewEngine() *Engine {
|
||||
ruleReader: NewRuleReader(),
|
||||
log: log.New("alerting.engine"),
|
||||
responseHandler: NewResultHandler(),
|
||||
alertJobTimeout: time.Second * 5,
|
||||
}
|
||||
|
||||
return e
|
||||
@ -87,24 +89,25 @@ func (e *Engine) execDispatch() {
|
||||
}
|
||||
|
||||
func (e *Engine) executeJob(job *AlertJob) {
|
||||
now := time.Now()
|
||||
startTime := time.Now()
|
||||
|
||||
resultChan := make(chan *AlertResult, 1)
|
||||
go e.handler.Execute(job, resultChan)
|
||||
|
||||
select {
|
||||
case <-time.After(time.Second * 5):
|
||||
case <-time.After(e.alertJobTimeout):
|
||||
e.resultQueue <- &AlertResult{
|
||||
State: alertstates.Pending,
|
||||
Duration: float64(time.Since(now).Nanoseconds()) / float64(1000000),
|
||||
Error: fmt.Errorf("Timeout"),
|
||||
AlertJob: job,
|
||||
ExeuctionTime: time.Now(),
|
||||
State: alertstates.Pending,
|
||||
Error: fmt.Errorf("Timeout"),
|
||||
AlertJob: job,
|
||||
StartTime: startTime,
|
||||
EndTime: time.Now(),
|
||||
}
|
||||
close(resultChan)
|
||||
e.log.Debug("Job Execution timeout", "alertRuleId", job.Rule.Id)
|
||||
case result := <-resultChan:
|
||||
result.Duration = float64(time.Since(now).Nanoseconds()) / float64(1000000)
|
||||
e.log.Debug("Job Execution done", "timeTakenMs", result.Duration, "ruleId", job.Rule.Id)
|
||||
duration := float64(result.EndTime.Nanosecond()-result.StartTime.Nanosecond()) / float64(1000000)
|
||||
e.log.Debug("Job Execution done", "timeTakenMs", duration, "ruleId", job.Rule.Id)
|
||||
e.resultQueue <- result
|
||||
}
|
||||
}
|
||||
@ -117,7 +120,7 @@ func (e *Engine) resultHandler() {
|
||||
}()
|
||||
|
||||
for result := range e.resultQueue {
|
||||
e.log.Debug("Alert Rule Result", "ruleId", result.AlertJob.Rule.Id, "state", result.State, "value", result.ActualValue, "retry", result.AlertJob.RetryCount)
|
||||
e.log.Debug("Alert Rule Result", "ruleId", result.AlertJob.Rule.Id, "state", result.State, "retry", result.AlertJob.RetryCount)
|
||||
|
||||
result.AlertJob.Running = false
|
||||
|
||||
|
@ -26,18 +26,24 @@ func NewHandler() *HandlerImpl {
|
||||
}
|
||||
|
||||
func (e *HandlerImpl) Execute(job *AlertJob, resultQueue chan *AlertResult) {
|
||||
startTime := time.Now()
|
||||
|
||||
timeSeries, err := e.executeQuery(job)
|
||||
if err != nil {
|
||||
resultQueue <- &AlertResult{
|
||||
Error: err,
|
||||
State: alertstates.Pending,
|
||||
AlertJob: job,
|
||||
ExeuctionTime: time.Now(),
|
||||
Error: err,
|
||||
State: alertstates.Pending,
|
||||
AlertJob: job,
|
||||
StartTime: time.Now(),
|
||||
EndTime: time.Now(),
|
||||
}
|
||||
}
|
||||
|
||||
result := e.evaluateRule(job.Rule, timeSeries)
|
||||
result.AlertJob = job
|
||||
result.StartTime = startTime
|
||||
result.EndTime = time.Now()
|
||||
|
||||
resultQueue <- result
|
||||
}
|
||||
|
||||
@ -108,9 +114,9 @@ func (e *HandlerImpl) evaluateRule(rule *AlertRule, series tsdb.TimeSeriesSlice)
|
||||
e.log.Debug("Alert execution Crit", "name", serie.Name, "condition", condition2, "result", critResult)
|
||||
if critResult {
|
||||
triggeredAlert = append(triggeredAlert, &TriggeredAlert{
|
||||
State: alertstates.Critical,
|
||||
ActualValue: transformedValue,
|
||||
Name: serie.Name,
|
||||
State: alertstates.Critical,
|
||||
Value: transformedValue,
|
||||
Metric: serie.Name,
|
||||
})
|
||||
continue
|
||||
}
|
||||
@ -120,9 +126,9 @@ func (e *HandlerImpl) evaluateRule(rule *AlertRule, series tsdb.TimeSeriesSlice)
|
||||
e.log.Debug("Alert execution Warn", "name", serie.Name, "condition", condition, "result", warnResult)
|
||||
if warnResult {
|
||||
triggeredAlert = append(triggeredAlert, &TriggeredAlert{
|
||||
State: alertstates.Warn,
|
||||
ActualValue: transformedValue,
|
||||
Name: serie.Name,
|
||||
State: alertstates.Warn,
|
||||
Value: transformedValue,
|
||||
Metric: serie.Name,
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -138,5 +144,5 @@ func (e *HandlerImpl) evaluateRule(rule *AlertRule, series tsdb.TimeSeriesSlice)
|
||||
}
|
||||
}
|
||||
|
||||
return &AlertResult{State: executionState, Description: "Returned " + executionState, TriggeredAlerts: triggeredAlert, ExeuctionTime: time.Now()}
|
||||
return &AlertResult{State: executionState, TriggeredAlerts: triggeredAlert}
|
||||
}
|
||||
|
@ -24,19 +24,20 @@ func (aj *AlertJob) IncRetry() {
|
||||
|
||||
type AlertResult struct {
|
||||
State string
|
||||
ActualValue float64
|
||||
Duration float64
|
||||
TriggeredAlerts []*TriggeredAlert
|
||||
Description string
|
||||
Error error
|
||||
AlertJob *AlertJob
|
||||
ExeuctionTime time.Time
|
||||
Description string
|
||||
StartTime time.Time
|
||||
EndTime time.Time
|
||||
|
||||
AlertJob *AlertJob
|
||||
}
|
||||
|
||||
type TriggeredAlert struct {
|
||||
ActualValue float64
|
||||
Name string
|
||||
State string
|
||||
Value float64
|
||||
Metric string
|
||||
State string
|
||||
Tags map[string]string
|
||||
}
|
||||
|
||||
type Level struct {
|
||||
|
@ -60,7 +60,7 @@ func (handler *ResultHandlerImpl) shouldUpdateState(result *AlertResult) bool {
|
||||
}
|
||||
|
||||
lastExecution := query.Result.Created
|
||||
asdf := result.ExeuctionTime.Add(time.Minute * -15)
|
||||
asdf := result.StartTime.Add(time.Minute * -15)
|
||||
olderThen15Min := lastExecution.Before(asdf)
|
||||
changedState := query.Result.NewState != result.State
|
||||
|
||||
|
@ -158,24 +158,29 @@ func DeleteAlertDefinition(dashboardId int64, sess *xorm.Session) error {
|
||||
|
||||
func SaveAlerts(cmd *m.SaveAlertsCommand) error {
|
||||
return inTransaction(func(sess *xorm.Session) error {
|
||||
alerts, err := GetAlertsByDashboardId2(cmd.DashboardId, sess)
|
||||
existingAlerts, err := GetAlertsByDashboardId2(cmd.DashboardId, sess)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
upsertAlerts(alerts, cmd, sess)
|
||||
deleteMissingAlerts(alerts, cmd, sess)
|
||||
if err := upsertAlerts(existingAlerts, cmd, sess); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := deleteMissingAlerts(existingAlerts, cmd, sess); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
func upsertAlerts(alerts []*m.Alert, cmd *m.SaveAlertsCommand, sess *xorm.Session) error {
|
||||
func upsertAlerts(existingAlerts []*m.Alert, cmd *m.SaveAlertsCommand, sess *xorm.Session) error {
|
||||
for _, alert := range cmd.Alerts {
|
||||
update := false
|
||||
var alertToUpdate *m.Alert
|
||||
|
||||
for _, k := range alerts {
|
||||
for _, k := range existingAlerts {
|
||||
if alert.PanelId == k.PanelId {
|
||||
update = true
|
||||
alert.Id = k.Id
|
||||
@ -195,11 +200,13 @@ func upsertAlerts(alerts []*m.Alert, cmd *m.SaveAlertsCommand, sess *xorm.Sessio
|
||||
|
||||
sqlog.Debug("Alert updated", "name", alert.Name, "id", alert.Id)
|
||||
}
|
||||
|
||||
} else {
|
||||
alert.Updated = time.Now()
|
||||
alert.Created = time.Now()
|
||||
alert.State = "OK"
|
||||
alert.State = "UNKNOWN"
|
||||
alert.CreatedBy = cmd.UserId
|
||||
alert.UpdatedBy = cmd.UserId
|
||||
|
||||
_, err := sess.Insert(alert)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -20,7 +20,7 @@ export class AlertLogCtrl {
|
||||
}
|
||||
|
||||
loadAlertLogs(alertId: number) {
|
||||
this.backendSrv.get(`/api/alerts/rules/${alertId}/states`).then(result => {
|
||||
this.backendSrv.get(`/api/alerts/${alertId}/states`).then(result => {
|
||||
this.alertLogs = _.map(result, log => {
|
||||
log.iconCss = alertDef.getCssForState(log.newState);
|
||||
log.humanTime = moment(log.created).format("YYYY-MM-DD HH:mm:ss");
|
||||
@ -28,7 +28,7 @@ export class AlertLogCtrl {
|
||||
});
|
||||
});
|
||||
|
||||
this.backendSrv.get(`/api/alerts/rules/${alertId}`).then(result => {
|
||||
this.backendSrv.get(`/api/alerts/${alertId}`).then(result => {
|
||||
this.alert = result;
|
||||
});
|
||||
}
|
||||
|
@ -49,7 +49,7 @@ export class AlertListCtrl {
|
||||
state: stats
|
||||
};
|
||||
|
||||
this.backendSrv.get('/api/alerts/rules', params).then(result => {
|
||||
this.backendSrv.get('/api/alerts', params).then(result => {
|
||||
this.alerts = _.map(result, alert => {
|
||||
alert.iconCss = alertDef.getCssForState(alert.state);
|
||||
return alert;
|
||||
|
@ -24,7 +24,7 @@ export class AlertNotificationEditCtrl {
|
||||
}
|
||||
|
||||
loadNotification(notificationId) {
|
||||
this.backendSrv.get(`/api/alerts/notification/${notificationId}`).then(result => {
|
||||
this.backendSrv.get(`/api/alert-notifications/${notificationId}`).then(result => {
|
||||
console.log(result);
|
||||
this.notification = result;
|
||||
});
|
||||
@ -37,7 +37,7 @@ export class AlertNotificationEditCtrl {
|
||||
save() {
|
||||
if (this.notification.id) {
|
||||
console.log('this.notification: ', this.notification);
|
||||
this.backendSrv.put(`/api/alerts/notification/${this.notification.id}`, this.notification)
|
||||
this.backendSrv.put(`/api/alert-notifications/${this.notification.id}`, this.notification)
|
||||
.then(result => {
|
||||
this.notification = result;
|
||||
this.$scope.appEvent('alert-success', ['Notification created!', '']);
|
||||
@ -45,7 +45,7 @@ export class AlertNotificationEditCtrl {
|
||||
this.$scope.appEvent('alert-error', ['Unable to create notification.', '']);
|
||||
});
|
||||
} else {
|
||||
this.backendSrv.post(`/api/alerts/notification`, this.notification)
|
||||
this.backendSrv.post(`/api/alert-notifications`, this.notification)
|
||||
.then(result => {
|
||||
this.notification = result;
|
||||
this.$scope.appEvent('alert-success', ['Notification updated!', '']);
|
||||
|
@ -15,13 +15,13 @@ export class AlertNotificationsListCtrl {
|
||||
}
|
||||
|
||||
loadNotifications() {
|
||||
this.backendSrv.get(`/api/alerts/notifications`).then(result => {
|
||||
this.backendSrv.get(`/api/alert-notifications`).then(result => {
|
||||
this.notifications = result;
|
||||
});
|
||||
}
|
||||
|
||||
deleteNotification(notificationId) {
|
||||
this.backendSrv.delete(`/api/alerts/notification/${notificationId}`)
|
||||
this.backendSrv.delete(`/api/alerts-notification/${notificationId}`)
|
||||
.then(() => {
|
||||
this.notifications = this.notifications.filter(notification => {
|
||||
return notification.id !== notificationId;
|
||||
|
7
public/app/plugins/datasource/grafana-live/plugin.json
Normal file
7
public/app/plugins/datasource/grafana-live/plugin.json
Normal file
@ -0,0 +1,7 @@
|
||||
{
|
||||
"type": "datasource",
|
||||
"name": "Grafana Live",
|
||||
"id": "grafana-live",
|
||||
|
||||
"metrics": true
|
||||
}
|
@ -123,14 +123,14 @@
|
||||
<h5 class="section-heading">Information</h5>
|
||||
<div class="gf-form">
|
||||
<span class="gf-form-label width-10">Alert name</span>
|
||||
<input type="text" class="gf-form-input width-22" ng-model="ctrl.panel.alerting.name">
|
||||
<input type="text" class="gf-form-input width-22" ng-model="ctrl.alert.name">
|
||||
</div>
|
||||
<div class="gf-form-inline">
|
||||
<div class="gf-form">
|
||||
<span class="gf-form-label width-10" style="margin-top: -73px;">Alert description</span>
|
||||
</div>
|
||||
<div class="gf-form">
|
||||
<textarea rows="5" ng-model="ctrl.panel.alerting.description" class="gf-form-input width-22"></textarea>
|
||||
<textarea rows="5" ng-model="ctrl.alert.description" class="gf-form-input width-22"></textarea>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
Loading…
Reference in New Issue
Block a user