Files
grafana/pkg/services/ngalert/models/instance.go
Alex Moreno 531b439cf1 Alerting: Add alert pausing feature (#60734)
* Add field in alert_rule model, add state to alert_instance model, and state to eval

* Remove paused state from eval package

* Skip paused alert rules in scheduler

* Add migration to add is_paused field to alert_rule table

* Convert to postable alerts only if not normal, pernding, or paused

* Handle paused eval results in state manager

* Add Paused state to eval package

* Add paused alerts logic in scheduler

* Skip alert on scheduler

* Remove paused status from eval package

* Apply suggestions from code review

Co-authored-by: George Robinson <george.robinson@grafana.com>

* Remove state

* Rethink schedule and manager for paused alerts

* Change return to continue

* Remove unused var

* Rethink alert pausing

* Paused alerts storing annotations

* Only add one state transition

* Revert boolean method renaming refactor

* Revert take image refactor

* Make registry errors public

* Revert method extraction for getting a folder title

* Revert variable renaming refactor

* Undo unnecessary changes

* Revert changes in test

* Remove IsPause check in PatchPartiLAlertRule function

* Use SetNormal to set state

* Fix text by returning to old behaviour on alert rule deletion

* Add test in schedule_unit_test.go to test ticks with paused alerts

* Add coment to clarify usage of context.Background()

* Add comment to clarify resetStateByRuleUID method usage

* Move rule get to a more limited scope

* Update pkg/services/ngalert/schedule/schedule.go

Co-authored-by: George Robinson <george.robinson@grafana.com>

* rum gofmt on pkg/services/ngalert/schedule/schedule.go

* Remove defer cancel for context

* Update pkg/services/ngalert/models/instance_test.go

Co-authored-by: Santiago <santiagohernandez.1997@gmail.com>

* Update pkg/services/ngalert/models/testing.go

Co-authored-by: Santiago <santiagohernandez.1997@gmail.com>

* Update pkg/services/ngalert/schedule/schedule_unit_test.go

Co-authored-by: Santiago <santiagohernandez.1997@gmail.com>

* Update pkg/services/ngalert/schedule/schedule_unit_test.go

Co-authored-by: Santiago <santiagohernandez.1997@gmail.com>

* Update pkg/services/ngalert/models/instance_test.go

Co-authored-by: Santiago <santiagohernandez.1997@gmail.com>

* skip scheduler rule state clean up on paused alert rule

* Update pkg/services/ngalert/schedule/schedule.go

Co-authored-by: Santiago <santiagohernandez.1997@gmail.com>

* Fix mock in test

* Add (hopefully) final suggestions

* Use error channel from recordAnnotationsSync to cancel context

* Run make gen-cue

* Place pause alert check in channel update after version check

* Reduce branching un update channel select

* Add if for error and move code inside if in state manager ResetStateByRuleUID

* Add reason to logs

* Update pkg/services/ngalert/schedule/schedule.go

Co-authored-by: George Robinson <george.robinson@grafana.com>

* Do not delete alert rule routine, just exit on eval if is paused

* Reduce branching and create-close a channel to avoid deadlocks

* Separate state deletion and state reset (includes history saving)

* Add current pause state in rule route in scheduler

* Split clearState and bring errCh closer to RecordStatesAsync call

* Change rule to ruleMeta in RecordStatesAsync

* copy state to be able to modify it

* Add timeout to context creation

* Shorten the timeout

* Use resetState is rule is paused and deleteState if rule is not paused

* Remove Empty state reason

* Save every rule change in historian

* Add tests for DeleteStateByRuleUID and ResetStateByRuleUID

* Remove useless line

* Remove outdated comment

Co-authored-by: George Robinson <george.robinson@grafana.com>
Co-authored-by: Santiago <santiagohernandez.1997@gmail.com>
Co-authored-by: Armand Grillet <2117580+armandgrillet@users.noreply.github.com>
2023-01-26 18:29:10 +01:00

76 lines
2.1 KiB
Go

package models
import (
"fmt"
"time"
)
// AlertInstance represents a single alert instance.
type AlertInstance struct {
AlertInstanceKey `xorm:"extends"`
Labels InstanceLabels
CurrentState InstanceStateType
CurrentReason string
CurrentStateSince time.Time
CurrentStateEnd time.Time
LastEvalTime time.Time
}
type AlertInstanceKey struct {
RuleOrgID int64 `xorm:"rule_org_id"`
RuleUID string `xorm:"rule_uid"`
LabelsHash string
}
// InstanceStateType is an enum for instance states.
type InstanceStateType string
const (
// InstanceStateFiring is for a firing alert.
InstanceStateFiring InstanceStateType = "Alerting"
// InstanceStateNormal is for a normal alert.
InstanceStateNormal InstanceStateType = "Normal"
// InstanceStatePending is for an alert that is firing but has not met the duration
InstanceStatePending InstanceStateType = "Pending"
// InstanceStateNoData is for an alert with no data.
InstanceStateNoData InstanceStateType = "NoData"
// InstanceStateError is for an erroring alert.
InstanceStateError InstanceStateType = "Error"
)
// IsValid checks that the value of InstanceStateType is a valid
// string.
func (i InstanceStateType) IsValid() bool {
return i == InstanceStateFiring ||
i == InstanceStateNormal ||
i == InstanceStateNoData ||
i == InstanceStatePending ||
i == InstanceStateError
}
// ListAlertInstancesQuery is the query list alert Instances.
type ListAlertInstancesQuery struct {
RuleUID string
RuleOrgID int64 `json:"-"`
Result []*AlertInstance
}
// ValidateAlertInstance validates that the alert instance contains an alert rule id,
// and state.
func ValidateAlertInstance(alertInstance AlertInstance) error {
if alertInstance.RuleOrgID == 0 {
return fmt.Errorf("alert instance is invalid due to missing alert rule organisation")
}
if alertInstance.RuleUID == "" {
return fmt.Errorf("alert instance is invalid due to missing alert rule uid")
}
if !alertInstance.CurrentState.IsValid() {
return fmt.Errorf("alert instance is invalid because the state '%v' is invalid", alertInstance.CurrentState)
}
return nil
}