mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: Add debug logs for EndsAt timestamp (#70336)
This commit adds debug logs for previous_ends_at and next_ends_at to state.go to help us debug issues where alerts are resolved in Alertmanager due to expiration. This change is in response to a support escalation where this information was needed but unavailable.
This commit is contained in:
parent
fc39dadf62
commit
815e98ed95
@ -190,31 +190,73 @@ func resultNormal(state *State, _ *models.AlertRule, result eval.Result, logger
|
||||
if state.State == eval.Normal {
|
||||
logger.Debug("Keeping state", "state", state.State)
|
||||
} else {
|
||||
logger.Debug("Changing state", "previous_state", state.State, "next_state", eval.Normal)
|
||||
nextEndsAt := result.EvaluatedAt
|
||||
logger.Debug("Changing state",
|
||||
"previous_state",
|
||||
state.State,
|
||||
"next_state",
|
||||
eval.Normal,
|
||||
"previous_ends_at",
|
||||
state.EndsAt,
|
||||
"next_ends_at",
|
||||
nextEndsAt)
|
||||
// Normal states have the same start and end timestamps
|
||||
state.SetNormal("", result.EvaluatedAt, result.EvaluatedAt)
|
||||
state.SetNormal("", nextEndsAt, nextEndsAt)
|
||||
}
|
||||
}
|
||||
|
||||
func resultAlerting(state *State, rule *models.AlertRule, result eval.Result, logger log.Logger) {
|
||||
switch state.State {
|
||||
case eval.Alerting:
|
||||
logger.Debug("Keeping state", "state", state.State)
|
||||
prevEndsAt := state.EndsAt
|
||||
state.Maintain(rule.IntervalSeconds, result.EvaluatedAt)
|
||||
logger.Debug("Keeping state",
|
||||
"state",
|
||||
state.State,
|
||||
"previous_ends_at",
|
||||
prevEndsAt,
|
||||
"next_ends_at",
|
||||
state.EndsAt)
|
||||
case eval.Pending:
|
||||
// If the previous state is Pending then check if the For duration has been observed
|
||||
if result.EvaluatedAt.Sub(state.StartsAt) >= rule.For {
|
||||
logger.Debug("Changing state", "previous_state", state.State, "next_state", eval.Alerting)
|
||||
state.SetAlerting("", result.EvaluatedAt, nextEndsTime(rule.IntervalSeconds, result.EvaluatedAt))
|
||||
nextEndsAt := nextEndsTime(rule.IntervalSeconds, result.EvaluatedAt)
|
||||
logger.Debug("Changing state",
|
||||
"previous_state",
|
||||
state.State,
|
||||
"next_state",
|
||||
eval.Alerting,
|
||||
"previous_ends_at",
|
||||
state.EndsAt,
|
||||
"next_ends_at",
|
||||
nextEndsAt)
|
||||
state.SetAlerting("", result.EvaluatedAt, nextEndsAt)
|
||||
}
|
||||
default:
|
||||
nextEndsAt := nextEndsTime(rule.IntervalSeconds, result.EvaluatedAt)
|
||||
if rule.For > 0 {
|
||||
// If the alert rule has a For duration that should be observed then the state should be set to Pending
|
||||
logger.Debug("Changing state", "previous_state", state.State, "next_state", eval.Pending)
|
||||
state.SetPending("", result.EvaluatedAt, nextEndsTime(rule.IntervalSeconds, result.EvaluatedAt))
|
||||
logger.Debug("Changing state",
|
||||
"previous_state",
|
||||
state.State,
|
||||
"next_state",
|
||||
eval.Pending,
|
||||
"previous_ends_at",
|
||||
state.EndsAt,
|
||||
"next_ends_at",
|
||||
nextEndsAt)
|
||||
state.SetPending("", result.EvaluatedAt, nextEndsAt)
|
||||
} else {
|
||||
logger.Debug("Changing state", "previous_state", state.State, "next_state", eval.Alerting)
|
||||
state.SetAlerting("", result.EvaluatedAt, nextEndsTime(rule.IntervalSeconds, result.EvaluatedAt))
|
||||
logger.Debug("Changing state",
|
||||
"previous_state",
|
||||
state.State,
|
||||
"next_state",
|
||||
eval.Alerting,
|
||||
"previous_ends_at",
|
||||
state.EndsAt,
|
||||
"next_ends_at",
|
||||
nextEndsAt)
|
||||
state.SetAlerting("", result.EvaluatedAt, nextEndsAt)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -228,12 +270,28 @@ func resultError(state *State, rule *models.AlertRule, result eval.Result, logge
|
||||
state.StateReason = "error"
|
||||
case models.ErrorErrState:
|
||||
if state.State == eval.Error {
|
||||
logger.Debug("Keeping state", "state", state.State)
|
||||
prevEndsAt := state.EndsAt
|
||||
state.Maintain(rule.IntervalSeconds, result.EvaluatedAt)
|
||||
logger.Debug("Keeping state",
|
||||
"state",
|
||||
state.State,
|
||||
"previous_ends_at",
|
||||
prevEndsAt,
|
||||
"next_ends_at",
|
||||
state.EndsAt)
|
||||
} else {
|
||||
nextEndsAt := nextEndsTime(rule.IntervalSeconds, result.EvaluatedAt)
|
||||
// This is the first occurrence of an error
|
||||
logger.Debug("Changing state", "previous_state", state.State, "next_state", eval.Error)
|
||||
state.SetError(result.Error, result.EvaluatedAt, nextEndsTime(rule.IntervalSeconds, result.EvaluatedAt))
|
||||
logger.Debug("Changing state",
|
||||
"previous_state",
|
||||
state.State,
|
||||
"next_state",
|
||||
eval.Error,
|
||||
"previous_ends_at",
|
||||
state.EndsAt,
|
||||
"next_ends_at",
|
||||
nextEndsAt)
|
||||
state.SetError(result.Error, result.EvaluatedAt, nextEndsAt)
|
||||
|
||||
if result.Error != nil {
|
||||
state.Annotations["Error"] = result.Error.Error()
|
||||
@ -269,12 +327,28 @@ func resultNoData(state *State, rule *models.AlertRule, result eval.Result, logg
|
||||
state.StateReason = models.NoData.String()
|
||||
case models.NoData:
|
||||
if state.State == eval.NoData {
|
||||
logger.Debug("Keeping state", "state", state.State)
|
||||
prevEndsAt := state.EndsAt
|
||||
state.Maintain(rule.IntervalSeconds, result.EvaluatedAt)
|
||||
logger.Debug("Keeping state",
|
||||
"state",
|
||||
state.State,
|
||||
"previous_ends_at",
|
||||
prevEndsAt,
|
||||
"next_ends_at",
|
||||
state.EndsAt)
|
||||
} else {
|
||||
// This is the first occurrence of no data
|
||||
logger.Debug("Changing state", "previous_state", state.State, "next_state", eval.NoData)
|
||||
state.SetNoData("", result.EvaluatedAt, nextEndsTime(rule.IntervalSeconds, result.EvaluatedAt))
|
||||
nextEndsAt := nextEndsTime(rule.IntervalSeconds, result.EvaluatedAt)
|
||||
logger.Debug("Changing state",
|
||||
"previous_state",
|
||||
state.State,
|
||||
"next_state",
|
||||
eval.NoData,
|
||||
"previous_ends_at",
|
||||
state.EndsAt,
|
||||
"next_ends_at",
|
||||
nextEndsAt)
|
||||
state.SetNoData("", result.EvaluatedAt, nextEndsAt)
|
||||
}
|
||||
case models.OK:
|
||||
logger.Debug("Execution no data state is Normal", "handler", "resultNormal", "previous_handler", "resultNoData")
|
||||
|
Loading…
Reference in New Issue
Block a user