mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: Add a "Reason" to Alert Instances to show underlying cause of state. (#49259)
This change adds a field to state.State and models.AlertInstance that indicate the "Reason" that an instance has its current state. This helps us account for cases where the state is "Normal" but the underlying evaluation returned "NoData" or "Error", for example. Fixes #42606 Signed-off-by: Joe Blubaugh <joe.blubaugh@grafana.com>
This commit is contained in:
parent
26a206cce2
commit
1cc034d960
@ -58,9 +58,16 @@ func (srv PrometheusSrv) RouteGetAlertStatuses(c *models.ReqContext) response.Re
|
||||
alertResponse.Data.Alerts = append(alertResponse.Data.Alerts, &apimodels.Alert{
|
||||
Labels: alertState.GetLabels(labelOptions...),
|
||||
Annotations: alertState.Annotations,
|
||||
State: alertState.State.String(),
|
||||
ActiveAt: &startsAt,
|
||||
Value: valString,
|
||||
|
||||
// TODO: or should we make this two fields? Using one field lets the
|
||||
// frontend use the same logic for parsing text on annotations and this.
|
||||
State: state.InstanceStateAndReason{
|
||||
State: alertState.State,
|
||||
Reason: alertState.StateReason,
|
||||
}.String(),
|
||||
|
||||
ActiveAt: &startsAt,
|
||||
Value: valString,
|
||||
})
|
||||
}
|
||||
|
||||
@ -212,9 +219,16 @@ func (srv PrometheusSrv) toRuleGroup(groupName string, folder *models.Folder, ru
|
||||
alert := &apimodels.Alert{
|
||||
Labels: alertState.GetLabels(labelOptions...),
|
||||
Annotations: alertState.Annotations,
|
||||
State: alertState.State.String(),
|
||||
ActiveAt: &activeAt,
|
||||
Value: valString,
|
||||
|
||||
// TODO: or should we make this two fields? Using one field lets the
|
||||
// frontend use the same logic for parsing text on annotations and this.
|
||||
State: state.InstanceStateAndReason{
|
||||
State: alertState.State,
|
||||
Reason: alertState.StateReason,
|
||||
}.String(),
|
||||
|
||||
ActiveAt: &activeAt,
|
||||
Value: valString,
|
||||
}
|
||||
|
||||
if alertState.LastEvaluationTime.After(newRule.LastEvaluation) {
|
||||
|
@ -133,6 +133,10 @@ const (
|
||||
Error
|
||||
)
|
||||
|
||||
func (s State) IsValid() bool {
|
||||
return s <= Error
|
||||
}
|
||||
|
||||
func (s State) String() string {
|
||||
return [...]string{"Normal", "Alerting", "Pending", "NoData", "Error"}[s]
|
||||
}
|
||||
|
@ -12,6 +12,7 @@ type AlertInstance struct {
|
||||
Labels InstanceLabels
|
||||
LabelsHash string
|
||||
CurrentState InstanceStateType
|
||||
CurrentReason string
|
||||
CurrentStateSince time.Time
|
||||
CurrentStateEnd time.Time
|
||||
LastEvalTime time.Time
|
||||
@ -49,6 +50,7 @@ type SaveAlertInstanceCommand struct {
|
||||
RuleUID string
|
||||
Labels InstanceLabels
|
||||
State InstanceStateType
|
||||
StateReason string
|
||||
LastEvalTime time.Time
|
||||
CurrentStateSince time.Time
|
||||
CurrentStateEnd time.Time
|
||||
@ -66,23 +68,12 @@ type GetAlertInstanceQuery struct {
|
||||
|
||||
// ListAlertInstancesQuery is the query list alert Instances.
|
||||
type ListAlertInstancesQuery struct {
|
||||
RuleOrgID int64 `json:"-"`
|
||||
RuleUID string
|
||||
State InstanceStateType
|
||||
RuleOrgID int64 `json:"-"`
|
||||
RuleUID string
|
||||
State InstanceStateType
|
||||
StateReason string
|
||||
|
||||
Result []*ListAlertInstancesQueryResult
|
||||
}
|
||||
|
||||
// ListAlertInstancesQueryResult represents the result of listAlertInstancesQuery.
|
||||
type ListAlertInstancesQueryResult struct {
|
||||
RuleOrgID int64 `xorm:"rule_org_id" json:"ruleOrgId"`
|
||||
RuleUID string `xorm:"rule_uid" json:"ruleUid"`
|
||||
Labels InstanceLabels `json:"labels"`
|
||||
LabelsHash string `json:"labeHash"`
|
||||
CurrentState InstanceStateType `json:"currentState"`
|
||||
CurrentStateSince time.Time `json:"currentStateSince"`
|
||||
CurrentStateEnd time.Time `json:"currentStateEnd"`
|
||||
LastEvalTime time.Time `json:"lastEvalTime"`
|
||||
Result []*AlertInstance
|
||||
}
|
||||
|
||||
// ValidateAlertInstance validates that the alert instance contains an alert rule id,
|
||||
|
@ -636,6 +636,7 @@ func (sch *schedule) saveAlertStates(ctx context.Context, states []*state.State)
|
||||
RuleUID: s.AlertRuleUID,
|
||||
Labels: models.InstanceLabels(s.Labels),
|
||||
State: models.InstanceStateType(s.State.String()),
|
||||
StateReason: s.StateReason,
|
||||
LastEvalTime: s.LastEvaluationTime,
|
||||
CurrentStateSince: s.StartsAt,
|
||||
CurrentStateEnd: s.EndsAt,
|
||||
|
@ -120,6 +120,7 @@ func (st *Manager) Warm(ctx context.Context) {
|
||||
CacheId: cacheId,
|
||||
Labels: lbs,
|
||||
State: translateInstanceState(entry.CurrentState),
|
||||
StateReason: entry.CurrentReason,
|
||||
LastEvaluationString: "",
|
||||
StartsAt: entry.CurrentStateSince,
|
||||
EndsAt: entry.CurrentStateEnd,
|
||||
@ -216,6 +217,7 @@ func (st *Manager) setNextState(ctx context.Context, alertRule *ngModels.AlertRu
|
||||
currentState.LastEvaluationString = result.EvaluationString
|
||||
currentState.TrimResults(alertRule)
|
||||
oldState := currentState.State
|
||||
oldReason := currentState.StateReason
|
||||
|
||||
st.log.Debug("setting alert state", "uid", alertRule.UID)
|
||||
switch result.State {
|
||||
@ -230,6 +232,15 @@ func (st *Manager) setNextState(ctx context.Context, alertRule *ngModels.AlertRu
|
||||
case eval.Pending: // we do not emit results with this state
|
||||
}
|
||||
|
||||
// Set reason iff: result is different than state, reason is not Alerting or Normal
|
||||
currentState.StateReason = ""
|
||||
|
||||
if currentState.State != result.State &&
|
||||
result.State != eval.Normal &&
|
||||
result.State != eval.Alerting {
|
||||
currentState.StateReason = result.State.String()
|
||||
}
|
||||
|
||||
// Set Resolved property so the scheduler knows to send a postable alert
|
||||
// to Alertmanager.
|
||||
currentState.Resolved = oldState == eval.Alerting && currentState.State == eval.Normal
|
||||
@ -243,8 +254,10 @@ func (st *Manager) setNextState(ctx context.Context, alertRule *ngModels.AlertRu
|
||||
}
|
||||
|
||||
st.set(currentState)
|
||||
if oldState != currentState.State {
|
||||
go st.annotateState(ctx, alertRule, currentState.Labels, result.EvaluatedAt, currentState.State, oldState)
|
||||
|
||||
shouldUpdateAnnotation := oldState != currentState.State || oldReason != currentState.StateReason
|
||||
if shouldUpdateAnnotation {
|
||||
go st.annotateState(ctx, alertRule, currentState.Labels, result.EvaluatedAt, InstanceStateAndReason{State: currentState.State, Reason: currentState.StateReason}, InstanceStateAndReason{State: oldState, Reason: oldReason})
|
||||
}
|
||||
return currentState
|
||||
}
|
||||
@ -281,6 +294,7 @@ func (st *Manager) Put(states []*State) {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: why wouldn't you allow other types like NoData or Error?
|
||||
func translateInstanceState(state ngModels.InstanceStateType) eval.State {
|
||||
switch {
|
||||
case state == ngModels.InstanceStateFiring:
|
||||
@ -292,17 +306,31 @@ func translateInstanceState(state ngModels.InstanceStateType) eval.State {
|
||||
}
|
||||
}
|
||||
|
||||
func (st *Manager) annotateState(ctx context.Context, alertRule *ngModels.AlertRule, labels data.Labels, evaluatedAt time.Time, state eval.State, previousState eval.State) {
|
||||
st.log.Debug("alert state changed creating annotation", "alertRuleUID", alertRule.UID, "newState", state.String(), "oldState", previousState.String())
|
||||
// This struct provides grouping of state with reason, and string formatting.
|
||||
type InstanceStateAndReason struct {
|
||||
State eval.State
|
||||
Reason string
|
||||
}
|
||||
|
||||
func (i InstanceStateAndReason) String() string {
|
||||
s := fmt.Sprintf("%v", i.State)
|
||||
if len(i.Reason) > 0 {
|
||||
s += fmt.Sprintf(" (%v)", i.Reason)
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func (st *Manager) annotateState(ctx context.Context, alertRule *ngModels.AlertRule, labels data.Labels, evaluatedAt time.Time, currentData, previousData InstanceStateAndReason) {
|
||||
st.log.Debug("alert state changed creating annotation", "alertRuleUID", alertRule.UID, "newState", currentData.String(), "oldState", previousData.String())
|
||||
|
||||
labels = removePrivateLabels(labels)
|
||||
annotationText := fmt.Sprintf("%s {%s} - %s", alertRule.Title, labels.String(), state.String())
|
||||
annotationText := fmt.Sprintf("%s {%s} - %s", alertRule.Title, labels.String(), currentData.String())
|
||||
|
||||
item := &annotations.Item{
|
||||
AlertId: alertRule.ID,
|
||||
OrgId: alertRule.OrgID,
|
||||
PrevState: previousState.String(),
|
||||
NewState: state.String(),
|
||||
PrevState: previousData.String(),
|
||||
NewState: currentData.String(),
|
||||
Text: annotationText,
|
||||
Epoch: evaluatedAt.UnixNano() / int64(time.Millisecond),
|
||||
}
|
||||
@ -357,7 +385,9 @@ func (st *Manager) staleResultsHandler(ctx context.Context, alertRule *ngModels.
|
||||
}
|
||||
|
||||
if s.State == eval.Alerting {
|
||||
st.annotateState(ctx, alertRule, s.Labels, time.Now(), eval.Normal, s.State)
|
||||
st.annotateState(ctx, alertRule, s.Labels, time.Now(),
|
||||
InstanceStateAndReason{State: eval.Normal, Reason: ""},
|
||||
InstanceStateAndReason{State: s.State, Reason: s.StateReason})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -757,7 +757,8 @@ func TestProcessEvalResults(t *testing.T) {
|
||||
"label": "test",
|
||||
"instance_label": "test",
|
||||
},
|
||||
State: eval.Alerting,
|
||||
State: eval.Alerting,
|
||||
StateReason: eval.NoData.String(),
|
||||
Results: []state.Evaluation{
|
||||
{
|
||||
EvaluationTime: evaluationTime,
|
||||
@ -1061,6 +1062,7 @@ func TestProcessEvalResults(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedAnnotations: 1,
|
||||
expectedStates: map[string]*state.State{
|
||||
`[["__alert_rule_namespace_uid__","test_namespace_uid"],["__alert_rule_uid__","test_alert_rule_uid_2"],["alertname","test_title"],["instance_label","test"],["label","test"]]`: {
|
||||
AlertRuleUID: "test_alert_rule_uid_2",
|
||||
@ -1073,7 +1075,8 @@ func TestProcessEvalResults(t *testing.T) {
|
||||
"label": "test",
|
||||
"instance_label": "test",
|
||||
},
|
||||
State: eval.Normal,
|
||||
State: eval.Normal,
|
||||
StateReason: eval.NoData.String(),
|
||||
Results: []state.Evaluation{
|
||||
{
|
||||
EvaluationTime: evaluationTime,
|
||||
@ -1138,7 +1141,9 @@ func TestProcessEvalResults(t *testing.T) {
|
||||
"label": "test",
|
||||
"instance_label": "test",
|
||||
},
|
||||
State: eval.Alerting,
|
||||
State: eval.Alerting,
|
||||
StateReason: eval.NoData.String(),
|
||||
|
||||
Results: []state.Evaluation{
|
||||
{
|
||||
EvaluationTime: evaluationTime,
|
||||
@ -1203,7 +1208,8 @@ func TestProcessEvalResults(t *testing.T) {
|
||||
"label": "test",
|
||||
"instance_label": "test",
|
||||
},
|
||||
State: eval.Pending,
|
||||
State: eval.Pending,
|
||||
StateReason: eval.Error.String(),
|
||||
Results: []state.Evaluation{
|
||||
{
|
||||
EvaluationTime: evaluationTime,
|
||||
@ -1292,7 +1298,8 @@ func TestProcessEvalResults(t *testing.T) {
|
||||
"label": "test",
|
||||
"instance_label": "test",
|
||||
},
|
||||
State: eval.Alerting,
|
||||
State: eval.Alerting,
|
||||
StateReason: eval.Error.String(),
|
||||
Results: []state.Evaluation{
|
||||
{
|
||||
EvaluationTime: evaluationTime.Add(20 * time.Second),
|
||||
@ -1436,7 +1443,7 @@ func TestProcessEvalResults(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedAnnotations: 0,
|
||||
expectedAnnotations: 1,
|
||||
expectedStates: map[string]*state.State{
|
||||
`[["__alert_rule_namespace_uid__","test_namespace_uid"],["__alert_rule_uid__","test_alert_rule_uid_2"],["alertname","test_title"],["instance_label","test"],["label","test"]]`: {
|
||||
AlertRuleUID: "test_alert_rule_uid_2",
|
||||
@ -1449,8 +1456,9 @@ func TestProcessEvalResults(t *testing.T) {
|
||||
"label": "test",
|
||||
"instance_label": "test",
|
||||
},
|
||||
State: eval.Normal,
|
||||
Error: nil,
|
||||
State: eval.Normal,
|
||||
StateReason: eval.Error.String(),
|
||||
Error: nil,
|
||||
Results: []state.Evaluation{
|
||||
{
|
||||
EvaluationTime: evaluationTime,
|
||||
@ -1521,8 +1529,9 @@ func TestProcessEvalResults(t *testing.T) {
|
||||
"label": "test",
|
||||
"instance_label": "test",
|
||||
},
|
||||
State: eval.Normal,
|
||||
Error: nil,
|
||||
State: eval.Normal,
|
||||
StateReason: eval.Error.String(),
|
||||
Error: nil,
|
||||
Results: []state.Evaluation{
|
||||
{
|
||||
EvaluationTime: evaluationTime,
|
||||
@ -1590,7 +1599,7 @@ func TestProcessEvalResults(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedAnnotations: 2,
|
||||
expectedAnnotations: 3,
|
||||
expectedStates: map[string]*state.State{
|
||||
`[["__alert_rule_namespace_uid__","test_namespace_uid"],["__alert_rule_uid__","test_alert_rule_uid_2"],["alertname","test_title"],["instance_label","test"],["label","test"]]`: {
|
||||
AlertRuleUID: "test_alert_rule_uid_2",
|
||||
@ -1677,7 +1686,7 @@ func TestProcessEvalResults(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedAnnotations: 2,
|
||||
expectedAnnotations: 3,
|
||||
expectedStates: map[string]*state.State{
|
||||
`[["__alert_rule_namespace_uid__","test_namespace_uid"],["__alert_rule_uid__","test_alert_rule_uid_2"],["alertname","test_title"],["instance_label","test"],["label","test"]]`: {
|
||||
AlertRuleUID: "test_alert_rule_uid_2",
|
||||
@ -1790,11 +1799,21 @@ func TestProcessEvalResults(t *testing.T) {
|
||||
|
||||
require.Eventuallyf(t, func() bool {
|
||||
return tc.expectedAnnotations == fakeAnnoRepo.Len()
|
||||
}, time.Second, 100*time.Millisecond, "only %d annotations are present", fakeAnnoRepo.Len())
|
||||
}, time.Second, 100*time.Millisecond, "%d annotations are present, expected %d. We have %+v", fakeAnnoRepo.Len(), tc.expectedAnnotations, printAllAnnotations(fakeAnnoRepo.Items))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func printAllAnnotations(annos []*annotations.Item) string {
|
||||
str := "["
|
||||
for _, anno := range annos {
|
||||
str += fmt.Sprintf("%+v, ", *anno)
|
||||
}
|
||||
str += "]"
|
||||
|
||||
return str
|
||||
}
|
||||
|
||||
func TestStaleResultsHandler(t *testing.T) {
|
||||
evaluationTime, err := time.Parse("2006-01-02", "2021-03-25")
|
||||
if err != nil {
|
||||
|
@ -11,23 +11,26 @@ import (
|
||||
|
||||
"github.com/grafana/grafana/pkg/expr"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
||||
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/store"
|
||||
)
|
||||
|
||||
type State struct {
|
||||
AlertRuleUID string
|
||||
OrgID int64
|
||||
CacheId string
|
||||
AlertRuleUID string
|
||||
OrgID int64
|
||||
CacheId string
|
||||
|
||||
StartsAt time.Time
|
||||
EndsAt time.Time
|
||||
LastSentAt time.Time
|
||||
|
||||
State eval.State
|
||||
Resolved bool
|
||||
Results []Evaluation
|
||||
StateReason string
|
||||
LastEvaluationString string
|
||||
StartsAt time.Time
|
||||
EndsAt time.Time
|
||||
LastEvaluationTime time.Time
|
||||
EvaluationDuration time.Duration
|
||||
LastSentAt time.Time
|
||||
Results []Evaluation
|
||||
Resolved bool
|
||||
Annotations map[string]string
|
||||
Labels data.Labels
|
||||
Image *store.Image
|
||||
@ -54,7 +57,7 @@ func NewEvaluationValues(m map[string]eval.NumberValueCapture) map[string]*float
|
||||
return result
|
||||
}
|
||||
|
||||
func (a *State) resultNormal(_ *ngModels.AlertRule, result eval.Result) {
|
||||
func (a *State) resultNormal(_ *models.AlertRule, result eval.Result) {
|
||||
a.Error = nil // should be nil since state is not error
|
||||
if a.State != eval.Normal {
|
||||
a.EndsAt = result.EvaluatedAt
|
||||
@ -63,7 +66,7 @@ func (a *State) resultNormal(_ *ngModels.AlertRule, result eval.Result) {
|
||||
a.State = eval.Normal
|
||||
}
|
||||
|
||||
func (a *State) resultAlerting(alertRule *ngModels.AlertRule, result eval.Result) {
|
||||
func (a *State) resultAlerting(alertRule *models.AlertRule, result eval.Result) {
|
||||
a.Error = result.Error // should be nil since the state is not an error
|
||||
|
||||
switch a.State {
|
||||
@ -87,14 +90,14 @@ func (a *State) resultAlerting(alertRule *ngModels.AlertRule, result eval.Result
|
||||
}
|
||||
}
|
||||
|
||||
func (a *State) resultError(alertRule *ngModels.AlertRule, result eval.Result) {
|
||||
func (a *State) resultError(alertRule *models.AlertRule, result eval.Result) {
|
||||
a.Error = result.Error
|
||||
|
||||
execErrState := eval.Error
|
||||
switch alertRule.ExecErrState {
|
||||
case ngModels.AlertingErrState:
|
||||
case models.AlertingErrState:
|
||||
execErrState = eval.Alerting
|
||||
case ngModels.ErrorErrState:
|
||||
case models.ErrorErrState:
|
||||
// If the evaluation failed because a query returned an error then
|
||||
// update the state with the Datasource UID as a label and the error
|
||||
// message as an annotation so other code can use this metadata to
|
||||
@ -111,7 +114,7 @@ func (a *State) resultError(alertRule *ngModels.AlertRule, result eval.Result) {
|
||||
a.Annotations["Error"] = queryError.Error()
|
||||
}
|
||||
execErrState = eval.Error
|
||||
case ngModels.OkErrState:
|
||||
case models.OkErrState:
|
||||
a.resultNormal(alertRule, result)
|
||||
return
|
||||
default:
|
||||
@ -140,7 +143,7 @@ func (a *State) resultError(alertRule *ngModels.AlertRule, result eval.Result) {
|
||||
}
|
||||
}
|
||||
|
||||
func (a *State) resultNoData(alertRule *ngModels.AlertRule, result eval.Result) {
|
||||
func (a *State) resultNoData(alertRule *models.AlertRule, result eval.Result) {
|
||||
a.Error = result.Error
|
||||
|
||||
if a.StartsAt.IsZero() {
|
||||
@ -149,11 +152,11 @@ func (a *State) resultNoData(alertRule *ngModels.AlertRule, result eval.Result)
|
||||
a.setEndsAt(alertRule, result)
|
||||
|
||||
switch alertRule.NoDataState {
|
||||
case ngModels.Alerting:
|
||||
case models.Alerting:
|
||||
a.State = eval.Alerting
|
||||
case ngModels.NoData:
|
||||
case models.NoData:
|
||||
a.State = eval.NoData
|
||||
case ngModels.OK:
|
||||
case models.OK:
|
||||
a.State = eval.Normal
|
||||
}
|
||||
}
|
||||
@ -179,7 +182,7 @@ func (a *State) Equals(b *State) bool {
|
||||
data.Labels(a.Annotations).String() == data.Labels(b.Annotations).String()
|
||||
}
|
||||
|
||||
func (a *State) TrimResults(alertRule *ngModels.AlertRule) {
|
||||
func (a *State) TrimResults(alertRule *models.AlertRule) {
|
||||
numBuckets := int64(alertRule.For.Seconds()) / alertRule.IntervalSeconds
|
||||
if numBuckets == 0 {
|
||||
numBuckets = 10 // keep at least 10 evaluations in the event For is set to 0
|
||||
@ -197,7 +200,7 @@ func (a *State) TrimResults(alertRule *ngModels.AlertRule) {
|
||||
// The internal Alertmanager will use this time to know when it should automatically resolve the alert
|
||||
// in case it hasn't received additional alerts. Under regular operations the scheduler will continue to send the
|
||||
// alert with an updated EndsAt, if the alert is resolved then a last alert is sent with EndsAt = last evaluation time.
|
||||
func (a *State) setEndsAt(alertRule *ngModels.AlertRule, result eval.Result) {
|
||||
func (a *State) setEndsAt(alertRule *models.AlertRule, result eval.Result) {
|
||||
ends := ResendDelay
|
||||
if alertRule.IntervalSeconds > int64(ResendDelay.Seconds()) {
|
||||
ends = time.Second * time.Duration(alertRule.IntervalSeconds)
|
||||
@ -206,7 +209,7 @@ func (a *State) setEndsAt(alertRule *ngModels.AlertRule, result eval.Result) {
|
||||
a.EndsAt = result.EvaluatedAt.Add(ends * 3)
|
||||
}
|
||||
|
||||
func (a *State) GetLabels(opts ...ngModels.LabelOption) map[string]string {
|
||||
func (a *State) GetLabels(opts ...models.LabelOption) map[string]string {
|
||||
labels := a.Labels.Copy()
|
||||
|
||||
for _, opt := range opts {
|
||||
|
@ -54,7 +54,7 @@ func (st DBstore) GetAlertInstance(ctx context.Context, cmd *models.GetAlertInst
|
||||
// based on various filters.
|
||||
func (st DBstore) ListAlertInstances(ctx context.Context, cmd *models.ListAlertInstancesQuery) error {
|
||||
return st.SQLStore.WithDbSession(ctx, func(sess *sqlstore.DBSession) error {
|
||||
alertInstances := make([]*models.ListAlertInstancesQueryResult, 0)
|
||||
alertInstances := make([]*models.AlertInstance, 0)
|
||||
|
||||
s := strings.Builder{}
|
||||
params := make([]interface{}, 0)
|
||||
@ -74,6 +74,10 @@ func (st DBstore) ListAlertInstances(ctx context.Context, cmd *models.ListAlertI
|
||||
addToQuery(` AND current_state = ?`, cmd.State)
|
||||
}
|
||||
|
||||
if cmd.StateReason != "" {
|
||||
addToQuery(` AND current_reason = ?`, cmd.StateReason)
|
||||
}
|
||||
|
||||
if err := sess.SQL(s.String(), params...).Find(&alertInstances); err != nil {
|
||||
return err
|
||||
}
|
||||
@ -97,6 +101,7 @@ func (st DBstore) SaveAlertInstance(ctx context.Context, cmd *models.SaveAlertIn
|
||||
Labels: cmd.Labels,
|
||||
LabelsHash: labelsHash,
|
||||
CurrentState: cmd.State,
|
||||
CurrentReason: cmd.StateReason,
|
||||
CurrentStateSince: cmd.CurrentStateSince,
|
||||
CurrentStateEnd: cmd.CurrentStateEnd,
|
||||
LastEvalTime: cmd.LastEvalTime,
|
||||
@ -106,12 +111,12 @@ func (st DBstore) SaveAlertInstance(ctx context.Context, cmd *models.SaveAlertIn
|
||||
return err
|
||||
}
|
||||
|
||||
params := append(make([]interface{}, 0), alertInstance.RuleOrgID, alertInstance.RuleUID, labelTupleJSON, alertInstance.LabelsHash, alertInstance.CurrentState, alertInstance.CurrentStateSince.Unix(), alertInstance.CurrentStateEnd.Unix(), alertInstance.LastEvalTime.Unix())
|
||||
params := append(make([]interface{}, 0), alertInstance.RuleOrgID, alertInstance.RuleUID, labelTupleJSON, alertInstance.LabelsHash, alertInstance.CurrentState, alertInstance.CurrentReason, alertInstance.CurrentStateSince.Unix(), alertInstance.CurrentStateEnd.Unix(), alertInstance.LastEvalTime.Unix())
|
||||
|
||||
upsertSQL := st.SQLStore.Dialect.UpsertSQL(
|
||||
"alert_instance",
|
||||
[]string{"rule_org_id", "rule_uid", "labels_hash"},
|
||||
[]string{"rule_org_id", "rule_uid", "labels", "labels_hash", "current_state", "current_state_since", "current_state_end", "last_eval_time"})
|
||||
[]string{"rule_org_id", "rule_uid", "labels", "labels_hash", "current_state", "current_reason", "current_state_since", "current_state_end", "last_eval_time"})
|
||||
_, err = sess.SQL(upsertSQL, params...).Query()
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -47,10 +47,11 @@ func TestAlertInstanceOperations(t *testing.T) {
|
||||
|
||||
t.Run("can save and read new alert instance", func(t *testing.T) {
|
||||
saveCmd := &models.SaveAlertInstanceCommand{
|
||||
RuleOrgID: alertRule1.OrgID,
|
||||
RuleUID: alertRule1.UID,
|
||||
State: models.InstanceStateFiring,
|
||||
Labels: models.InstanceLabels{"test": "testValue"},
|
||||
RuleOrgID: alertRule1.OrgID,
|
||||
RuleUID: alertRule1.UID,
|
||||
State: models.InstanceStateFiring,
|
||||
StateReason: string(models.InstanceStateError),
|
||||
Labels: models.InstanceLabels{"test": "testValue"},
|
||||
}
|
||||
err := dbstore.SaveAlertInstance(ctx, saveCmd)
|
||||
require.NoError(t, err)
|
||||
@ -67,6 +68,7 @@ func TestAlertInstanceOperations(t *testing.T) {
|
||||
require.Equal(t, saveCmd.Labels, getCmd.Result.Labels)
|
||||
require.Equal(t, alertRule1.OrgID, getCmd.Result.RuleOrgID)
|
||||
require.Equal(t, alertRule1.UID, getCmd.Result.RuleUID)
|
||||
require.Equal(t, saveCmd.StateReason, getCmd.Result.CurrentReason)
|
||||
})
|
||||
|
||||
t.Run("can save and read new alert instance with no labels", func(t *testing.T) {
|
||||
|
@ -155,6 +155,11 @@ func AlertInstanceMigration(mg *migrator.Migrator) {
|
||||
mg.AddMigration("add index rule_org_id, current_state on alert_instance", migrator.NewAddIndexMigration(alertInstance, &migrator.Index{
|
||||
Cols: []string{"rule_org_id", "current_state"}, Type: migrator.IndexType,
|
||||
}))
|
||||
|
||||
mg.AddMigration("add current_reason column related to current_state",
|
||||
migrator.NewAddColumnMigration(alertInstance, &migrator.Column{
|
||||
Name: "current_reason", Type: migrator.DB_NVarchar, Length: 190, Nullable: true,
|
||||
}))
|
||||
}
|
||||
|
||||
func AddAlertRuleMigrations(mg *migrator.Migrator, defaultIntervalSeconds int64) {
|
||||
|
@ -197,7 +197,6 @@ const getStyles = <T extends unknown>(
|
||||
`,
|
||||
bodyCell: css`
|
||||
overflow: hidden;
|
||||
word-break: break-all;
|
||||
${theme.breakpoints.down('sm')} {
|
||||
grid-column-end: right;
|
||||
grid-column-start: right;
|
||||
|
@ -1,14 +1,14 @@
|
||||
import React, { FC } from 'react';
|
||||
|
||||
import { AlertState } from '@grafana/data';
|
||||
import { GrafanaAlertState, PromAlertingRuleState } from 'app/types/unified-alerting-dto';
|
||||
import { GrafanaAlertStateWithReason, PromAlertingRuleState } from 'app/types/unified-alerting-dto';
|
||||
|
||||
import { alertStateToReadable, alertStateToState } from '../../utils/rules';
|
||||
import { StateTag } from '../StateTag';
|
||||
interface Props {
|
||||
state: PromAlertingRuleState | GrafanaAlertState | AlertState;
|
||||
state: PromAlertingRuleState | GrafanaAlertStateWithReason | AlertState;
|
||||
}
|
||||
|
||||
export const AlertStateTag: FC<Props> = ({ state }) => (
|
||||
<StateTag state={alertStateToState[state]}>{alertStateToReadable(state)}</StateTag>
|
||||
<StateTag state={alertStateToState(state)}>{alertStateToReadable(state)}</StateTag>
|
||||
);
|
||||
|
@ -9,7 +9,7 @@ import { labelsMatchMatchers, parseMatchers } from 'app/features/alerting/unifie
|
||||
import { sortAlerts } from 'app/features/alerting/unified/utils/misc';
|
||||
import { SortOrder } from 'app/plugins/panel/alertlist/types';
|
||||
import { Alert, Rule } from 'app/types/unified-alerting';
|
||||
import { GrafanaAlertState } from 'app/types/unified-alerting-dto';
|
||||
import { GrafanaAlertState, mapStateWithReasonToBaseState } from 'app/types/unified-alerting-dto';
|
||||
|
||||
import { isAlertingRule } from '../../utils/rules';
|
||||
import { DetailsField } from '../DetailsField';
|
||||
@ -79,7 +79,7 @@ function filterAlerts(
|
||||
}
|
||||
if (alertInstanceState) {
|
||||
filteredAlerts = filteredAlerts.filter((alert) => {
|
||||
return alert.state === alertInstanceState;
|
||||
return mapStateWithReasonToBaseState(alert.state) === alertInstanceState;
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -5,7 +5,7 @@ import React, { FC } from 'react';
|
||||
import { AlertState, dateTimeFormat, GrafanaTheme } from '@grafana/data';
|
||||
import { Alert, LoadingPlaceholder, useStyles } from '@grafana/ui';
|
||||
import { StateHistoryItem, StateHistoryItemData } from 'app/types/unified-alerting';
|
||||
import { GrafanaAlertState, PromAlertingRuleState } from 'app/types/unified-alerting-dto';
|
||||
import { GrafanaAlertStateWithReason, PromAlertingRuleState } from 'app/types/unified-alerting-dto';
|
||||
|
||||
import { useManagedAlertStateHistory } from '../../hooks/useManagedAlertStateHistory';
|
||||
import { AlertLabel } from '../AlertLabel';
|
||||
@ -15,7 +15,7 @@ import { AlertStateTag } from './AlertStateTag';
|
||||
|
||||
type StateHistoryRowItem = {
|
||||
id: string;
|
||||
state: PromAlertingRuleState | GrafanaAlertState | AlertState;
|
||||
state: PromAlertingRuleState | GrafanaAlertStateWithReason | AlertState;
|
||||
text?: string;
|
||||
data?: StateHistoryItemData;
|
||||
timestamp?: number;
|
||||
|
@ -5,7 +5,11 @@ import { config } from '@grafana/runtime';
|
||||
import { alertInstanceKey } from 'app/features/alerting/unified/utils/rules';
|
||||
import { SortOrder } from 'app/plugins/panel/alertlist/types';
|
||||
import { Alert, CombinedRule, FilterState, RulesSource, SilenceFilterState } from 'app/types/unified-alerting';
|
||||
import { GrafanaAlertState, PromAlertingRuleState } from 'app/types/unified-alerting-dto';
|
||||
import {
|
||||
GrafanaAlertState,
|
||||
PromAlertingRuleState,
|
||||
mapStateWithReasonToBaseState,
|
||||
} from 'app/types/unified-alerting-dto';
|
||||
|
||||
import { ALERTMANAGER_NAME_QUERY_KEY } from './constants';
|
||||
import { getRulesSourceName } from './datasource';
|
||||
@ -126,7 +130,10 @@ const alertStateSortScore = {
|
||||
export function sortAlerts(sortOrder: SortOrder, alerts: Alert[]): Alert[] {
|
||||
// Make sure to handle tie-breaks because API returns alert instances in random order every time
|
||||
if (sortOrder === SortOrder.Importance) {
|
||||
return sortBy(alerts, (alert) => [alertStateSortScore[alert.state], alertInstanceKey(alert).toLocaleLowerCase()]);
|
||||
return sortBy(alerts, (alert) => [
|
||||
alertStateSortScore[mapStateWithReasonToBaseState(alert.state)],
|
||||
alertInstanceKey(alert).toLocaleLowerCase(),
|
||||
]);
|
||||
} else if (sortOrder === SortOrder.TimeAsc) {
|
||||
return sortBy(alerts, (alert) => [
|
||||
new Date(alert.activeAt) || new Date(),
|
||||
|
@ -16,6 +16,8 @@ import {
|
||||
} from 'app/types/unified-alerting';
|
||||
import {
|
||||
GrafanaAlertState,
|
||||
GrafanaAlertStateWithReason,
|
||||
mapStateWithReasonToBaseState,
|
||||
PromAlertingRuleState,
|
||||
PromRuleType,
|
||||
RulerAlertingRuleDTO,
|
||||
@ -69,7 +71,7 @@ export function isPrometheusRuleIdentifier(identifier: RuleIdentifier): identifi
|
||||
return 'ruleHash' in identifier;
|
||||
}
|
||||
|
||||
export function alertStateToReadable(state: PromAlertingRuleState | GrafanaAlertState | AlertState): string {
|
||||
export function alertStateToReadable(state: PromAlertingRuleState | GrafanaAlertStateWithReason | AlertState): string {
|
||||
if (state === PromAlertingRuleState.Inactive) {
|
||||
return 'Normal';
|
||||
}
|
||||
@ -89,7 +91,18 @@ export const flattenRules = (rules: RuleNamespace[]) => {
|
||||
}, []);
|
||||
};
|
||||
|
||||
export const alertStateToState: Record<PromAlertingRuleState | GrafanaAlertState | AlertState, State> = {
|
||||
export function alertStateToState(state: PromAlertingRuleState | GrafanaAlertStateWithReason | AlertState): State {
|
||||
let key: PromAlertingRuleState | GrafanaAlertState | AlertState;
|
||||
if (Object.values(AlertState).includes(state as AlertState)) {
|
||||
key = state as AlertState;
|
||||
} else {
|
||||
key = mapStateWithReasonToBaseState(state as GrafanaAlertStateWithReason | PromAlertingRuleState);
|
||||
}
|
||||
|
||||
return alertStateToStateMap[key];
|
||||
}
|
||||
|
||||
const alertStateToStateMap: Record<PromAlertingRuleState | GrafanaAlertState | AlertState, State> = {
|
||||
[PromAlertingRuleState.Inactive]: 'good',
|
||||
[PromAlertingRuleState.Firing]: 'bad',
|
||||
[PromAlertingRuleState.Pending]: 'warning',
|
||||
@ -111,7 +124,9 @@ export function getFirstActiveAt(promRule: AlertingRule) {
|
||||
return null;
|
||||
}
|
||||
return promRule.alerts.reduce((prev, alert) => {
|
||||
if (alert.activeAt && alert.state !== GrafanaAlertState.Normal) {
|
||||
const isNotNormal =
|
||||
mapStateWithReasonToBaseState(alert.state as GrafanaAlertStateWithReason) !== GrafanaAlertState.Normal;
|
||||
if (alert.activeAt && isNotNormal) {
|
||||
const activeAt = new Date(alert.activeAt);
|
||||
if (prev === null || prev.getTime() > activeAt.getTime()) {
|
||||
return activeAt;
|
||||
|
@ -34,7 +34,7 @@ const UngroupedModeView: FC<UngroupedModeProps> = ({ rules, options }) => {
|
||||
<div className={stateStyle.icon}>
|
||||
<Icon
|
||||
name={alertDef.getStateDisplayModel(rule.state).iconClass as IconName}
|
||||
className={stateStyle[alertStateToState[rule.state]]}
|
||||
className={stateStyle[alertStateToState(rule.state)]}
|
||||
size={'lg'}
|
||||
/>
|
||||
</div>
|
||||
@ -44,7 +44,7 @@ const UngroupedModeView: FC<UngroupedModeProps> = ({ rules, options }) => {
|
||||
{rule.name}
|
||||
</div>
|
||||
<div className={styles.alertDuration}>
|
||||
<span className={stateStyle[alertStateToState[rule.state]]}>{rule.state.toUpperCase()}</span>{' '}
|
||||
<span className={stateStyle[alertStateToState(rule.state)]}>{rule.state.toUpperCase()}</span>{' '}
|
||||
{firstActiveAt && rule.state !== PromAlertingRuleState.Inactive && (
|
||||
<>
|
||||
for{' '}
|
||||
|
@ -1,7 +1,7 @@
|
||||
import { isEmpty } from 'lodash';
|
||||
|
||||
import { PanelProps } from '@grafana/data';
|
||||
import { Alert } from 'app/types/unified-alerting';
|
||||
import { Alert, hasAlertState } from 'app/types/unified-alerting';
|
||||
import { GrafanaAlertState, PromAlertingRuleState } from 'app/types/unified-alerting-dto';
|
||||
|
||||
import { UnifiedAlertListOptions } from './types';
|
||||
@ -16,13 +16,13 @@ export function filterAlerts(options: PanelProps<UnifiedAlertListOptions>['optio
|
||||
return alerts.filter((alert) => {
|
||||
return (
|
||||
(stateFilter.firing &&
|
||||
(alert.state === GrafanaAlertState.Alerting || alert.state === PromAlertingRuleState.Firing)) ||
|
||||
(hasAlertState(alert, GrafanaAlertState.Alerting) || hasAlertState(alert, PromAlertingRuleState.Firing))) ||
|
||||
(stateFilter.pending &&
|
||||
(alert.state === GrafanaAlertState.Pending || alert.state === PromAlertingRuleState.Pending)) ||
|
||||
(stateFilter.noData && alert.state === GrafanaAlertState.NoData) ||
|
||||
(stateFilter.normal && alert.state === GrafanaAlertState.Normal) ||
|
||||
(stateFilter.error && alert.state === GrafanaAlertState.Error) ||
|
||||
(stateFilter.inactive && alert.state === PromAlertingRuleState.Inactive)
|
||||
(hasAlertState(alert, GrafanaAlertState.Pending) || hasAlertState(alert, PromAlertingRuleState.Pending))) ||
|
||||
(stateFilter.noData && hasAlertState(alert, GrafanaAlertState.NoData)) ||
|
||||
(stateFilter.normal && hasAlertState(alert, GrafanaAlertState.Normal)) ||
|
||||
(stateFilter.error && hasAlertState(alert, GrafanaAlertState.Error)) ||
|
||||
(stateFilter.inactive && hasAlertState(alert, PromAlertingRuleState.Inactive))
|
||||
);
|
||||
});
|
||||
}
|
||||
|
28
public/app/types/unified-alerting-dto.test.ts
Normal file
28
public/app/types/unified-alerting-dto.test.ts
Normal file
@ -0,0 +1,28 @@
|
||||
import {
|
||||
GrafanaAlertState,
|
||||
PromAlertingRuleState,
|
||||
mapStateWithReasonToBaseState,
|
||||
} from 'app/types/unified-alerting-dto';
|
||||
|
||||
describe('Unified Alerting DTO', () => {
|
||||
describe('mapStateWithReasonToBaseState', () => {
|
||||
it.each`
|
||||
arg | expected
|
||||
${GrafanaAlertState.Normal} | ${GrafanaAlertState.Normal}
|
||||
${'Normal (NoData)'} | ${GrafanaAlertState.Normal}
|
||||
${'Normal (Error)'} | ${GrafanaAlertState.Normal}
|
||||
${GrafanaAlertState.Alerting} | ${GrafanaAlertState.Alerting}
|
||||
${'Alerting (NoData)'} | ${GrafanaAlertState.Alerting}
|
||||
${'Alerting (Error)'} | ${GrafanaAlertState.Alerting}
|
||||
${'Pending '} | ${GrafanaAlertState.Pending}
|
||||
${'NoData'} | ${GrafanaAlertState.NoData}
|
||||
${'Error'} | ${GrafanaAlertState.Error}
|
||||
${PromAlertingRuleState.Firing} | ${PromAlertingRuleState.Firing}
|
||||
${PromAlertingRuleState.Pending} | ${PromAlertingRuleState.Pending}
|
||||
${PromAlertingRuleState.Inactive} | ${PromAlertingRuleState.Inactive}
|
||||
`('should map correctly', ({ arg, expected }) => {
|
||||
const result = mapStateWithReasonToBaseState(arg);
|
||||
expect(result).toEqual(expected);
|
||||
});
|
||||
});
|
||||
});
|
@ -19,6 +19,33 @@ export enum GrafanaAlertState {
|
||||
Error = 'Error',
|
||||
}
|
||||
|
||||
type GrafanaAlertStateReason = ` (${string})` | '';
|
||||
|
||||
export type GrafanaAlertStateWithReason = `${GrafanaAlertState}${GrafanaAlertStateReason}`;
|
||||
|
||||
/** We need this to disambiguate the union PromAlertingRuleState | GrafanaAlertStateWithReason
|
||||
*/
|
||||
export function isAlertStateWithReason(
|
||||
state: PromAlertingRuleState | GrafanaAlertStateWithReason
|
||||
): state is GrafanaAlertStateWithReason {
|
||||
return (
|
||||
state !== null &&
|
||||
typeof state !== 'undefined' &&
|
||||
!Object.values(PromAlertingRuleState).includes(state as PromAlertingRuleState)
|
||||
);
|
||||
}
|
||||
|
||||
export function mapStateWithReasonToBaseState(
|
||||
state: GrafanaAlertStateWithReason | PromAlertingRuleState
|
||||
): GrafanaAlertState | PromAlertingRuleState {
|
||||
if (isAlertStateWithReason(state)) {
|
||||
const fields = state.split(' ');
|
||||
return fields[0] as GrafanaAlertState;
|
||||
} else {
|
||||
return state;
|
||||
}
|
||||
}
|
||||
|
||||
export enum PromRuleType {
|
||||
Alerting = 'alerting',
|
||||
Recording = 'recording',
|
||||
@ -64,7 +91,7 @@ export interface PromAlertingRuleDTO extends PromRuleDTOBase {
|
||||
alerts: Array<{
|
||||
labels: Labels;
|
||||
annotations: Annotations;
|
||||
state: Exclude<PromAlertingRuleState | GrafanaAlertState, PromAlertingRuleState.Inactive>;
|
||||
state: Exclude<PromAlertingRuleState | GrafanaAlertStateWithReason, PromAlertingRuleState.Inactive>;
|
||||
activeAt: string;
|
||||
value: string;
|
||||
}>;
|
||||
|
@ -10,15 +10,22 @@ import {
|
||||
Annotations,
|
||||
RulerRuleGroupDTO,
|
||||
GrafanaAlertState,
|
||||
GrafanaAlertStateWithReason,
|
||||
mapStateWithReasonToBaseState,
|
||||
} from './unified-alerting-dto';
|
||||
|
||||
export type Alert = {
|
||||
activeAt: string;
|
||||
annotations: { [key: string]: string };
|
||||
labels: { [key: string]: string };
|
||||
state: PromAlertingRuleState | GrafanaAlertState;
|
||||
state: PromAlertingRuleState | GrafanaAlertStateWithReason;
|
||||
value: string;
|
||||
};
|
||||
|
||||
export function hasAlertState(alert: Alert, state: PromAlertingRuleState | GrafanaAlertState): boolean {
|
||||
return mapStateWithReasonToBaseState(alert.state as GrafanaAlertStateWithReason) === state;
|
||||
}
|
||||
|
||||
interface RuleBase {
|
||||
health: string;
|
||||
name: string;
|
||||
|
Loading…
Reference in New Issue
Block a user