Alerting: Handle NoData and Error evaluation results (#33194)

* set processing time

* merge labels and set on response

* use state cache for adding alerts to rules

* minor cleanup

* add support for NoData and Error results

* rename test

* bring in changes from other PRs tha have been merged

* pr feedback

* add integration test

* close state tracker cleanup on context.Done

* fixup test

* not those annotations
This commit is contained in:
David Parrott
2021-04-23 11:47:52 -07:00
committed by GitHub
parent 948cba199b
commit ca79206498
6 changed files with 1044 additions and 280 deletions

View File

@@ -56,6 +56,10 @@ func NewStateTracker(logger log.Logger) *StateTracker {
return tracker
}
func (st *StateTracker) Close() {
st.quit <- struct{}{}
}
func (st *StateTracker) getOrCreate(alertRule *ngModels.AlertRule, result eval.Result, evaluationDuration time.Duration) AlertState {
st.cache.mtxStates.Lock()
defer st.cache.mtxStates.Unlock()
@@ -76,13 +80,8 @@ func (st *StateTracker) getOrCreate(alertRule *ngModels.AlertRule, result eval.R
annotations = alertRule.Annotations
}
newResults := []StateEvaluation{
{
EvaluationTime: result.EvaluatedAt,
EvaluationState: result.State,
},
}
// If the first result we get is alerting, set StartsAt to EvaluatedAt because we
// do not have data for determining StartsAt otherwise
st.Log.Debug("adding new alert state cache entry", "cacheId", id, "state", result.State.String(), "evaluatedAt", result.EvaluatedAt.String())
newState := AlertState{
AlertRuleUID: alertRule.UID,
@@ -90,7 +89,6 @@ func (st *StateTracker) getOrCreate(alertRule *ngModels.AlertRule, result eval.R
CacheId: id,
Labels: lbs,
State: result.State,
Results: newResults,
Annotations: annotations,
EvaluationDuration: evaluationDuration,
}
@@ -136,57 +134,111 @@ func (st *StateTracker) ProcessEvalResults(alertRule *ngModels.AlertRule, result
//Set the current state based on evaluation results
func (st *StateTracker) setNextState(alertRule *ngModels.AlertRule, result eval.Result, evaluationDuration time.Duration) AlertState {
currentState := st.getOrCreate(alertRule, result, evaluationDuration)
currentState.LastEvaluationTime = result.EvaluatedAt
currentState.EvaluationDuration = evaluationDuration
currentState.Results = append(currentState.Results, StateEvaluation{
EvaluationTime: result.EvaluatedAt,
EvaluationState: result.State,
})
st.Log.Debug("setting alert state", "uid", alertRule.UID)
switch {
case currentState.State == result.State:
st.Log.Debug("no state transition", "cacheId", currentState.CacheId, "state", currentState.State.String())
currentState.LastEvaluationTime = result.EvaluatedAt
currentState.EvaluationDuration = evaluationDuration
currentState.Results = append(currentState.Results, StateEvaluation{
EvaluationTime: result.EvaluatedAt,
EvaluationState: result.State,
})
if currentState.State == eval.Alerting {
//TODO: Move me and unify me with the top level constant
// 10 seconds is the base evaluation interval. We use 2 times that interval to make sure we send an alert
// that would expire after at least 2 iterations and avoid flapping.
resendDelay := 10 * 2 * time.Second
if alertRule.For > resendDelay {
resendDelay = alertRule.For * 2
}
currentState.EndsAt = result.EvaluatedAt.Add(resendDelay)
}
st.set(currentState)
return currentState
case currentState.State == eval.Normal && result.State == eval.Alerting:
st.Log.Debug("state transition from normal to alerting", "cacheId", currentState.CacheId)
currentState.State = eval.Alerting
currentState.LastEvaluationTime = result.EvaluatedAt
currentState.StartsAt = result.EvaluatedAt
currentState.EndsAt = result.EvaluatedAt.Add(alertRule.For * time.Second)
currentState.EvaluationDuration = evaluationDuration
currentState.Results = append(currentState.Results, StateEvaluation{
EvaluationTime: result.EvaluatedAt,
EvaluationState: result.State,
})
currentState.Annotations["alerting"] = result.EvaluatedAt.String()
st.set(currentState)
return currentState
case currentState.State == eval.Alerting && result.State == eval.Normal:
st.Log.Debug("state transition from alerting to normal", "cacheId", currentState.CacheId)
currentState.State = eval.Normal
currentState.LastEvaluationTime = result.EvaluatedAt
currentState.EndsAt = result.EvaluatedAt
currentState.EvaluationDuration = evaluationDuration
currentState.Results = append(currentState.Results, StateEvaluation{
EvaluationTime: result.EvaluatedAt,
EvaluationState: result.State,
})
st.set(currentState)
return currentState
default:
return currentState
switch result.State {
case eval.Normal:
currentState = resultNormal(currentState, result)
case eval.Alerting:
currentState = currentState.resultAlerting(alertRule, result)
case eval.Error:
currentState = currentState.resultError(alertRule, result)
case eval.NoData:
currentState = currentState.resultNoData(alertRule, result)
case eval.Pending: // we do not emit results with this state
}
st.set(currentState)
return currentState
}
func resultNormal(alertState AlertState, result eval.Result) AlertState {
newState := alertState
if alertState.State != eval.Normal {
newState.EndsAt = result.EvaluatedAt
}
newState.State = eval.Normal
return newState
}
func (a AlertState) resultAlerting(alertRule *ngModels.AlertRule, result eval.Result) AlertState {
switch a.State {
case eval.Alerting:
if !(alertRule.For > 0) {
// If there is not For set, we will set EndsAt to be twice the evaluation interval
// to avoid flapping with every evaluation
a.EndsAt = result.EvaluatedAt.Add(time.Duration(alertRule.IntervalSeconds*2) * time.Second)
return a
}
a.EndsAt = result.EvaluatedAt.Add(alertRule.For)
case eval.Pending:
if result.EvaluatedAt.Sub(a.StartsAt) > alertRule.For {
a.State = eval.Alerting
a.StartsAt = result.EvaluatedAt
a.EndsAt = result.EvaluatedAt.Add(alertRule.For)
}
default:
a.StartsAt = result.EvaluatedAt
if !(alertRule.For > 0) {
a.EndsAt = result.EvaluatedAt.Add(time.Duration(alertRule.IntervalSeconds*2) * time.Second)
a.State = eval.Alerting
} else {
a.EndsAt = result.EvaluatedAt.Add(alertRule.For)
if result.EvaluatedAt.Sub(a.StartsAt) > alertRule.For {
a.State = eval.Alerting
} else {
a.State = eval.Pending
}
}
}
return a
}
func (a AlertState) resultError(alertRule *ngModels.AlertRule, result eval.Result) AlertState {
if a.StartsAt.IsZero() {
a.StartsAt = result.EvaluatedAt
}
if !(alertRule.For > 0) {
a.EndsAt = result.EvaluatedAt.Add(time.Duration(alertRule.IntervalSeconds*2) * time.Second)
} else {
a.EndsAt = result.EvaluatedAt.Add(alertRule.For)
}
switch alertRule.ExecErrState {
case ngModels.AlertingErrState:
a.State = eval.Alerting
case ngModels.KeepLastStateErrState:
}
return a
}
func (a AlertState) resultNoData(alertRule *ngModels.AlertRule, result eval.Result) AlertState {
if a.StartsAt.IsZero() {
a.StartsAt = result.EvaluatedAt
}
if !(alertRule.For > 0) {
a.EndsAt = result.EvaluatedAt.Add(time.Duration(alertRule.IntervalSeconds*2) * time.Second)
} else {
a.EndsAt = result.EvaluatedAt.Add(alertRule.For)
}
switch alertRule.NoDataState {
case ngModels.Alerting:
a.State = eval.Alerting
case ngModels.NoData:
a.State = eval.NoData
case ngModels.KeepLastState:
case ngModels.OK:
a.State = eval.Normal
}
return a
}
func (st *StateTracker) GetAll() []AlertState {