Alerting: Handle NoData and Error evaluation results (#33194)

* set processing time * merge labels and set on response * use state cache for adding alerts to rules * minor cleanup * add support for NoData and Error results * rename test * bring in changes from other PRs tha have been merged * pr feedback * add integration test * close state tracker cleanup on context.Done * fixup test * not those annotations
2025-02-25 18:55:37 -06:00 · 2021-04-23 11:47:52 -07:00
parent 948cba199b
commit ca79206498
6 changed files with 1044 additions and 280 deletions
--- a/pkg/services/ngalert/state/state_tracker.go
+++ b/pkg/services/ngalert/state/state_tracker.go
@@ -56,6 +56,10 @@ func NewStateTracker(logger log.Logger) *StateTracker {
 	return tracker
 }

+func (st *StateTracker) Close() {
+	st.quit <- struct{}{}
+}
+
 func (st *StateTracker) getOrCreate(alertRule *ngModels.AlertRule, result eval.Result, evaluationDuration time.Duration) AlertState {
 	st.cache.mtxStates.Lock()
 	defer st.cache.mtxStates.Unlock()
@@ -76,13 +80,8 @@ func (st *StateTracker) getOrCreate(alertRule *ngModels.AlertRule, result eval.R
 		annotations = alertRule.Annotations
 	}

-	newResults := []StateEvaluation{
-		{
-			EvaluationTime:  result.EvaluatedAt,
-			EvaluationState: result.State,
-		},
-	}
-
+	// If the first result we get is alerting, set StartsAt to EvaluatedAt because we
+	// do not have data for determining StartsAt otherwise
 	st.Log.Debug("adding new alert state cache entry", "cacheId", id, "state", result.State.String(), "evaluatedAt", result.EvaluatedAt.String())
 	newState := AlertState{
 		AlertRuleUID:       alertRule.UID,
@@ -90,7 +89,6 @@ func (st *StateTracker) getOrCreate(alertRule *ngModels.AlertRule, result eval.R
 		CacheId:            id,
 		Labels:             lbs,
 		State:              result.State,
-		Results:            newResults,
 		Annotations:        annotations,
 		EvaluationDuration: evaluationDuration,
 	}
@@ -136,57 +134,111 @@ func (st *StateTracker) ProcessEvalResults(alertRule *ngModels.AlertRule, result
 //Set the current state based on evaluation results
 func (st *StateTracker) setNextState(alertRule *ngModels.AlertRule, result eval.Result, evaluationDuration time.Duration) AlertState {
 	currentState := st.getOrCreate(alertRule, result, evaluationDuration)
+
+	currentState.LastEvaluationTime = result.EvaluatedAt
+	currentState.EvaluationDuration = evaluationDuration
+	currentState.Results = append(currentState.Results, StateEvaluation{
+		EvaluationTime:  result.EvaluatedAt,
+		EvaluationState: result.State,
+	})
+
 	st.Log.Debug("setting alert state", "uid", alertRule.UID)
-	switch {
-	case currentState.State == result.State:
-		st.Log.Debug("no state transition", "cacheId", currentState.CacheId, "state", currentState.State.String())
-		currentState.LastEvaluationTime = result.EvaluatedAt
-		currentState.EvaluationDuration = evaluationDuration
-		currentState.Results = append(currentState.Results, StateEvaluation{
-			EvaluationTime:  result.EvaluatedAt,
-			EvaluationState: result.State,
-		})
-		if currentState.State == eval.Alerting {
-			//TODO: Move me and unify me with the top level constant
-			// 10 seconds is the base evaluation interval. We use 2 times that interval to make sure we send an alert
-			// that would expire after at least 2 iterations and avoid flapping.
-			resendDelay := 10 * 2 * time.Second
-			if alertRule.For > resendDelay {
-				resendDelay = alertRule.For * 2
-			}
-			currentState.EndsAt = result.EvaluatedAt.Add(resendDelay)
-		}
-		st.set(currentState)
-		return currentState
-	case currentState.State == eval.Normal && result.State == eval.Alerting:
-		st.Log.Debug("state transition from normal to alerting", "cacheId", currentState.CacheId)
-		currentState.State = eval.Alerting
-		currentState.LastEvaluationTime = result.EvaluatedAt
-		currentState.StartsAt = result.EvaluatedAt
-		currentState.EndsAt = result.EvaluatedAt.Add(alertRule.For * time.Second)
-		currentState.EvaluationDuration = evaluationDuration
-		currentState.Results = append(currentState.Results, StateEvaluation{
-			EvaluationTime:  result.EvaluatedAt,
-			EvaluationState: result.State,
-		})
-		currentState.Annotations["alerting"] = result.EvaluatedAt.String()
-		st.set(currentState)
-		return currentState
-	case currentState.State == eval.Alerting && result.State == eval.Normal:
-		st.Log.Debug("state transition from alerting to normal", "cacheId", currentState.CacheId)
-		currentState.State = eval.Normal
-		currentState.LastEvaluationTime = result.EvaluatedAt
-		currentState.EndsAt = result.EvaluatedAt
-		currentState.EvaluationDuration = evaluationDuration
-		currentState.Results = append(currentState.Results, StateEvaluation{
-			EvaluationTime:  result.EvaluatedAt,
-			EvaluationState: result.State,
-		})
-		st.set(currentState)
-		return currentState
-	default:
-		return currentState
+	switch result.State {
+	case eval.Normal:
+		currentState = resultNormal(currentState, result)
+	case eval.Alerting:
+		currentState = currentState.resultAlerting(alertRule, result)
+	case eval.Error:
+		currentState = currentState.resultError(alertRule, result)
+	case eval.NoData:
+		currentState = currentState.resultNoData(alertRule, result)
+	case eval.Pending: // we do not emit results with this state
 	}
+
+	st.set(currentState)
+	return currentState
+}
+
+func resultNormal(alertState AlertState, result eval.Result) AlertState {
+	newState := alertState
+	if alertState.State != eval.Normal {
+		newState.EndsAt = result.EvaluatedAt
+	}
+	newState.State = eval.Normal
+	return newState
+}
+
+func (a AlertState) resultAlerting(alertRule *ngModels.AlertRule, result eval.Result) AlertState {
+	switch a.State {
+	case eval.Alerting:
+		if !(alertRule.For > 0) {
+			// If there is not For set, we will set EndsAt to be twice the evaluation interval
+			// to avoid flapping with every evaluation
+			a.EndsAt = result.EvaluatedAt.Add(time.Duration(alertRule.IntervalSeconds*2) * time.Second)
+			return a
+		}
+		a.EndsAt = result.EvaluatedAt.Add(alertRule.For)
+	case eval.Pending:
+		if result.EvaluatedAt.Sub(a.StartsAt) > alertRule.For {
+			a.State = eval.Alerting
+			a.StartsAt = result.EvaluatedAt
+			a.EndsAt = result.EvaluatedAt.Add(alertRule.For)
+		}
+	default:
+		a.StartsAt = result.EvaluatedAt
+		if !(alertRule.For > 0) {
+			a.EndsAt = result.EvaluatedAt.Add(time.Duration(alertRule.IntervalSeconds*2) * time.Second)
+			a.State = eval.Alerting
+		} else {
+			a.EndsAt = result.EvaluatedAt.Add(alertRule.For)
+			if result.EvaluatedAt.Sub(a.StartsAt) > alertRule.For {
+				a.State = eval.Alerting
+			} else {
+				a.State = eval.Pending
+			}
+		}
+	}
+	return a
+}
+
+func (a AlertState) resultError(alertRule *ngModels.AlertRule, result eval.Result) AlertState {
+	if a.StartsAt.IsZero() {
+		a.StartsAt = result.EvaluatedAt
+	}
+	if !(alertRule.For > 0) {
+		a.EndsAt = result.EvaluatedAt.Add(time.Duration(alertRule.IntervalSeconds*2) * time.Second)
+	} else {
+		a.EndsAt = result.EvaluatedAt.Add(alertRule.For)
+	}
+
+	switch alertRule.ExecErrState {
+	case ngModels.AlertingErrState:
+		a.State = eval.Alerting
+	case ngModels.KeepLastStateErrState:
+	}
+	return a
+}
+
+func (a AlertState) resultNoData(alertRule *ngModels.AlertRule, result eval.Result) AlertState {
+	if a.StartsAt.IsZero() {
+		a.StartsAt = result.EvaluatedAt
+	}
+	if !(alertRule.For > 0) {
+		a.EndsAt = result.EvaluatedAt.Add(time.Duration(alertRule.IntervalSeconds*2) * time.Second)
+	} else {
+		a.EndsAt = result.EvaluatedAt.Add(alertRule.For)
+	}
+
+	switch alertRule.NoDataState {
+	case ngModels.Alerting:
+		a.State = eval.Alerting
+	case ngModels.NoData:
+		a.State = eval.NoData
+	case ngModels.KeepLastState:
+	case ngModels.OK:
+		a.State = eval.Normal
+	}
+	return a
 }

 func (st *StateTracker) GetAll() []AlertState {