mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: Handle NoData and Error evaluation results (#33194)
* set processing time * merge labels and set on response * use state cache for adding alerts to rules * minor cleanup * add support for NoData and Error results * rename test * bring in changes from other PRs tha have been merged * pr feedback * add integration test * close state tracker cleanup on context.Done * fixup test * not those annotations
This commit is contained in:
@@ -56,6 +56,10 @@ func NewStateTracker(logger log.Logger) *StateTracker {
|
||||
return tracker
|
||||
}
|
||||
|
||||
func (st *StateTracker) Close() {
|
||||
st.quit <- struct{}{}
|
||||
}
|
||||
|
||||
func (st *StateTracker) getOrCreate(alertRule *ngModels.AlertRule, result eval.Result, evaluationDuration time.Duration) AlertState {
|
||||
st.cache.mtxStates.Lock()
|
||||
defer st.cache.mtxStates.Unlock()
|
||||
@@ -76,13 +80,8 @@ func (st *StateTracker) getOrCreate(alertRule *ngModels.AlertRule, result eval.R
|
||||
annotations = alertRule.Annotations
|
||||
}
|
||||
|
||||
newResults := []StateEvaluation{
|
||||
{
|
||||
EvaluationTime: result.EvaluatedAt,
|
||||
EvaluationState: result.State,
|
||||
},
|
||||
}
|
||||
|
||||
// If the first result we get is alerting, set StartsAt to EvaluatedAt because we
|
||||
// do not have data for determining StartsAt otherwise
|
||||
st.Log.Debug("adding new alert state cache entry", "cacheId", id, "state", result.State.String(), "evaluatedAt", result.EvaluatedAt.String())
|
||||
newState := AlertState{
|
||||
AlertRuleUID: alertRule.UID,
|
||||
@@ -90,7 +89,6 @@ func (st *StateTracker) getOrCreate(alertRule *ngModels.AlertRule, result eval.R
|
||||
CacheId: id,
|
||||
Labels: lbs,
|
||||
State: result.State,
|
||||
Results: newResults,
|
||||
Annotations: annotations,
|
||||
EvaluationDuration: evaluationDuration,
|
||||
}
|
||||
@@ -136,57 +134,111 @@ func (st *StateTracker) ProcessEvalResults(alertRule *ngModels.AlertRule, result
|
||||
//Set the current state based on evaluation results
|
||||
func (st *StateTracker) setNextState(alertRule *ngModels.AlertRule, result eval.Result, evaluationDuration time.Duration) AlertState {
|
||||
currentState := st.getOrCreate(alertRule, result, evaluationDuration)
|
||||
|
||||
currentState.LastEvaluationTime = result.EvaluatedAt
|
||||
currentState.EvaluationDuration = evaluationDuration
|
||||
currentState.Results = append(currentState.Results, StateEvaluation{
|
||||
EvaluationTime: result.EvaluatedAt,
|
||||
EvaluationState: result.State,
|
||||
})
|
||||
|
||||
st.Log.Debug("setting alert state", "uid", alertRule.UID)
|
||||
switch {
|
||||
case currentState.State == result.State:
|
||||
st.Log.Debug("no state transition", "cacheId", currentState.CacheId, "state", currentState.State.String())
|
||||
currentState.LastEvaluationTime = result.EvaluatedAt
|
||||
currentState.EvaluationDuration = evaluationDuration
|
||||
currentState.Results = append(currentState.Results, StateEvaluation{
|
||||
EvaluationTime: result.EvaluatedAt,
|
||||
EvaluationState: result.State,
|
||||
})
|
||||
if currentState.State == eval.Alerting {
|
||||
//TODO: Move me and unify me with the top level constant
|
||||
// 10 seconds is the base evaluation interval. We use 2 times that interval to make sure we send an alert
|
||||
// that would expire after at least 2 iterations and avoid flapping.
|
||||
resendDelay := 10 * 2 * time.Second
|
||||
if alertRule.For > resendDelay {
|
||||
resendDelay = alertRule.For * 2
|
||||
}
|
||||
currentState.EndsAt = result.EvaluatedAt.Add(resendDelay)
|
||||
}
|
||||
st.set(currentState)
|
||||
return currentState
|
||||
case currentState.State == eval.Normal && result.State == eval.Alerting:
|
||||
st.Log.Debug("state transition from normal to alerting", "cacheId", currentState.CacheId)
|
||||
currentState.State = eval.Alerting
|
||||
currentState.LastEvaluationTime = result.EvaluatedAt
|
||||
currentState.StartsAt = result.EvaluatedAt
|
||||
currentState.EndsAt = result.EvaluatedAt.Add(alertRule.For * time.Second)
|
||||
currentState.EvaluationDuration = evaluationDuration
|
||||
currentState.Results = append(currentState.Results, StateEvaluation{
|
||||
EvaluationTime: result.EvaluatedAt,
|
||||
EvaluationState: result.State,
|
||||
})
|
||||
currentState.Annotations["alerting"] = result.EvaluatedAt.String()
|
||||
st.set(currentState)
|
||||
return currentState
|
||||
case currentState.State == eval.Alerting && result.State == eval.Normal:
|
||||
st.Log.Debug("state transition from alerting to normal", "cacheId", currentState.CacheId)
|
||||
currentState.State = eval.Normal
|
||||
currentState.LastEvaluationTime = result.EvaluatedAt
|
||||
currentState.EndsAt = result.EvaluatedAt
|
||||
currentState.EvaluationDuration = evaluationDuration
|
||||
currentState.Results = append(currentState.Results, StateEvaluation{
|
||||
EvaluationTime: result.EvaluatedAt,
|
||||
EvaluationState: result.State,
|
||||
})
|
||||
st.set(currentState)
|
||||
return currentState
|
||||
default:
|
||||
return currentState
|
||||
switch result.State {
|
||||
case eval.Normal:
|
||||
currentState = resultNormal(currentState, result)
|
||||
case eval.Alerting:
|
||||
currentState = currentState.resultAlerting(alertRule, result)
|
||||
case eval.Error:
|
||||
currentState = currentState.resultError(alertRule, result)
|
||||
case eval.NoData:
|
||||
currentState = currentState.resultNoData(alertRule, result)
|
||||
case eval.Pending: // we do not emit results with this state
|
||||
}
|
||||
|
||||
st.set(currentState)
|
||||
return currentState
|
||||
}
|
||||
|
||||
func resultNormal(alertState AlertState, result eval.Result) AlertState {
|
||||
newState := alertState
|
||||
if alertState.State != eval.Normal {
|
||||
newState.EndsAt = result.EvaluatedAt
|
||||
}
|
||||
newState.State = eval.Normal
|
||||
return newState
|
||||
}
|
||||
|
||||
func (a AlertState) resultAlerting(alertRule *ngModels.AlertRule, result eval.Result) AlertState {
|
||||
switch a.State {
|
||||
case eval.Alerting:
|
||||
if !(alertRule.For > 0) {
|
||||
// If there is not For set, we will set EndsAt to be twice the evaluation interval
|
||||
// to avoid flapping with every evaluation
|
||||
a.EndsAt = result.EvaluatedAt.Add(time.Duration(alertRule.IntervalSeconds*2) * time.Second)
|
||||
return a
|
||||
}
|
||||
a.EndsAt = result.EvaluatedAt.Add(alertRule.For)
|
||||
case eval.Pending:
|
||||
if result.EvaluatedAt.Sub(a.StartsAt) > alertRule.For {
|
||||
a.State = eval.Alerting
|
||||
a.StartsAt = result.EvaluatedAt
|
||||
a.EndsAt = result.EvaluatedAt.Add(alertRule.For)
|
||||
}
|
||||
default:
|
||||
a.StartsAt = result.EvaluatedAt
|
||||
if !(alertRule.For > 0) {
|
||||
a.EndsAt = result.EvaluatedAt.Add(time.Duration(alertRule.IntervalSeconds*2) * time.Second)
|
||||
a.State = eval.Alerting
|
||||
} else {
|
||||
a.EndsAt = result.EvaluatedAt.Add(alertRule.For)
|
||||
if result.EvaluatedAt.Sub(a.StartsAt) > alertRule.For {
|
||||
a.State = eval.Alerting
|
||||
} else {
|
||||
a.State = eval.Pending
|
||||
}
|
||||
}
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
func (a AlertState) resultError(alertRule *ngModels.AlertRule, result eval.Result) AlertState {
|
||||
if a.StartsAt.IsZero() {
|
||||
a.StartsAt = result.EvaluatedAt
|
||||
}
|
||||
if !(alertRule.For > 0) {
|
||||
a.EndsAt = result.EvaluatedAt.Add(time.Duration(alertRule.IntervalSeconds*2) * time.Second)
|
||||
} else {
|
||||
a.EndsAt = result.EvaluatedAt.Add(alertRule.For)
|
||||
}
|
||||
|
||||
switch alertRule.ExecErrState {
|
||||
case ngModels.AlertingErrState:
|
||||
a.State = eval.Alerting
|
||||
case ngModels.KeepLastStateErrState:
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
func (a AlertState) resultNoData(alertRule *ngModels.AlertRule, result eval.Result) AlertState {
|
||||
if a.StartsAt.IsZero() {
|
||||
a.StartsAt = result.EvaluatedAt
|
||||
}
|
||||
if !(alertRule.For > 0) {
|
||||
a.EndsAt = result.EvaluatedAt.Add(time.Duration(alertRule.IntervalSeconds*2) * time.Second)
|
||||
} else {
|
||||
a.EndsAt = result.EvaluatedAt.Add(alertRule.For)
|
||||
}
|
||||
|
||||
switch alertRule.NoDataState {
|
||||
case ngModels.Alerting:
|
||||
a.State = eval.Alerting
|
||||
case ngModels.NoData:
|
||||
a.State = eval.NoData
|
||||
case ngModels.KeepLastState:
|
||||
case ngModels.OK:
|
||||
a.State = eval.Normal
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
func (st *StateTracker) GetAll() []AlertState {
|
||||
|
Reference in New Issue
Block a user