mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: Scheduler and registry handle rules by an interface (#84044)
* export Evaluation * Export Evaluation * Export RuleVersionAndPauseStatus * export Eval, create interface * Export update and add to interface * Export Stop and Run and add to interface * Registry and scheduler use rule by interface and not concrete type * Update factory to use interface, update tests to work over public API rather than writing to channels directly * Rename map in registry * Rename getOrCreateInfo to not reference a specific implementation * Genericize alertRuleInfoRegistry into ruleRegistry * Rename alertRuleInfo to alertRule * Comments on interface * Update pkg/services/ngalert/schedule/schedule.go Co-authored-by: Jean-Philippe Quéméner <JohnnyQQQQ@users.noreply.github.com> --------- Co-authored-by: Jean-Philippe Quéméner <JohnnyQQQQ@users.noreply.github.com>
This commit is contained in:
@@ -23,9 +23,22 @@ import (
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
||||
type ruleFactoryFunc func(context.Context) *alertRuleInfo
|
||||
// Rule represents a single piece of work that is executed periodically by the ruler.
|
||||
type Rule interface {
|
||||
// Run creates the resources that will perform the rule's work, and starts it. It blocks indefinitely, until Stop is called or another signal is sent.
|
||||
Run(key ngmodels.AlertRuleKey) error
|
||||
// Stop shuts down the rule's execution with an optional reason. It has no effect if the rule has not yet been Run.
|
||||
Stop(reason error)
|
||||
// Eval sends a signal to execute the work represented by the rule, exactly one time.
|
||||
// It has no effect if the rule has not yet been Run, or if the rule is Stopped.
|
||||
Eval(eval *Evaluation) (bool, *Evaluation)
|
||||
// Update sends a singal to change the definition of the rule.
|
||||
Update(lastVersion RuleVersionAndPauseStatus) bool
|
||||
}
|
||||
|
||||
func (f ruleFactoryFunc) new(ctx context.Context) *alertRuleInfo {
|
||||
type ruleFactoryFunc func(context.Context) Rule
|
||||
|
||||
func (f ruleFactoryFunc) new(ctx context.Context) Rule {
|
||||
return f(ctx)
|
||||
}
|
||||
|
||||
@@ -44,8 +57,8 @@ func newRuleFactory(
|
||||
evalAppliedHook evalAppliedFunc,
|
||||
stopAppliedHook stopAppliedFunc,
|
||||
) ruleFactoryFunc {
|
||||
return func(ctx context.Context) *alertRuleInfo {
|
||||
return newAlertRuleInfo(
|
||||
return func(ctx context.Context) Rule {
|
||||
return newAlertRule(
|
||||
ctx,
|
||||
appURL,
|
||||
disableGrafanaFolder,
|
||||
@@ -71,9 +84,9 @@ type ruleProvider interface {
|
||||
get(ngmodels.AlertRuleKey) *ngmodels.AlertRule
|
||||
}
|
||||
|
||||
type alertRuleInfo struct {
|
||||
evalCh chan *evaluation
|
||||
updateCh chan ruleVersionAndPauseStatus
|
||||
type alertRule struct {
|
||||
evalCh chan *Evaluation
|
||||
updateCh chan RuleVersionAndPauseStatus
|
||||
ctx context.Context
|
||||
stopFn util.CancelCauseFunc
|
||||
|
||||
@@ -96,7 +109,7 @@ type alertRuleInfo struct {
|
||||
tracer tracing.Tracer
|
||||
}
|
||||
|
||||
func newAlertRuleInfo(
|
||||
func newAlertRule(
|
||||
parent context.Context,
|
||||
appURL *url.URL,
|
||||
disableGrafanaFolder bool,
|
||||
@@ -111,11 +124,11 @@ func newAlertRuleInfo(
|
||||
tracer tracing.Tracer,
|
||||
evalAppliedHook func(ngmodels.AlertRuleKey, time.Time),
|
||||
stopAppliedHook func(ngmodels.AlertRuleKey),
|
||||
) *alertRuleInfo {
|
||||
) *alertRule {
|
||||
ctx, stop := util.WithCancelCause(parent)
|
||||
return &alertRuleInfo{
|
||||
evalCh: make(chan *evaluation),
|
||||
updateCh: make(chan ruleVersionAndPauseStatus),
|
||||
return &alertRule{
|
||||
evalCh: make(chan *Evaluation),
|
||||
updateCh: make(chan RuleVersionAndPauseStatus),
|
||||
ctx: ctx,
|
||||
stopFn: stop,
|
||||
appURL: appURL,
|
||||
@@ -141,9 +154,9 @@ func newAlertRuleInfo(
|
||||
// - false when the send operation is stopped
|
||||
//
|
||||
// the second element contains a dropped message that was sent by a concurrent sender.
|
||||
func (a *alertRuleInfo) eval(eval *evaluation) (bool, *evaluation) {
|
||||
func (a *alertRule) Eval(eval *Evaluation) (bool, *Evaluation) {
|
||||
// read the channel in unblocking manner to make sure that there is no concurrent send operation.
|
||||
var droppedMsg *evaluation
|
||||
var droppedMsg *Evaluation
|
||||
select {
|
||||
case droppedMsg = <-a.evalCh:
|
||||
default:
|
||||
@@ -158,7 +171,7 @@ func (a *alertRuleInfo) eval(eval *evaluation) (bool, *evaluation) {
|
||||
}
|
||||
|
||||
// update sends an instruction to the rule evaluation routine to update the scheduled rule to the specified version. The specified version must be later than the current version, otherwise no update will happen.
|
||||
func (a *alertRuleInfo) update(lastVersion ruleVersionAndPauseStatus) bool {
|
||||
func (a *alertRule) Update(lastVersion RuleVersionAndPauseStatus) bool {
|
||||
// check if the channel is not empty.
|
||||
select {
|
||||
case <-a.updateCh:
|
||||
@@ -176,11 +189,13 @@ func (a *alertRuleInfo) update(lastVersion ruleVersionAndPauseStatus) bool {
|
||||
}
|
||||
|
||||
// stop sends an instruction to the rule evaluation routine to shut down. an optional shutdown reason can be given.
|
||||
func (a *alertRuleInfo) stop(reason error) {
|
||||
a.stopFn(reason)
|
||||
func (a *alertRule) Stop(reason error) {
|
||||
if a.stopFn != nil {
|
||||
a.stopFn(reason)
|
||||
}
|
||||
}
|
||||
|
||||
func (a *alertRuleInfo) run(key ngmodels.AlertRuleKey) error {
|
||||
func (a *alertRule) Run(key ngmodels.AlertRuleKey) error {
|
||||
grafanaCtx := ngmodels.WithRuleKey(a.ctx, key)
|
||||
logger := a.logger.FromContext(grafanaCtx)
|
||||
logger.Debug("Alert rule routine started")
|
||||
@@ -295,7 +310,7 @@ func (a *alertRuleInfo) run(key ngmodels.AlertRuleKey) error {
|
||||
}
|
||||
}
|
||||
|
||||
func (a *alertRuleInfo) evaluate(ctx context.Context, key ngmodels.AlertRuleKey, f fingerprint, attempt int64, e *evaluation, span trace.Span, retry bool) error {
|
||||
func (a *alertRule) evaluate(ctx context.Context, key ngmodels.AlertRuleKey, f fingerprint, attempt int64, e *Evaluation, span trace.Span, retry bool) error {
|
||||
orgID := fmt.Sprint(key.OrgID)
|
||||
evalTotal := a.metrics.EvalTotal.WithLabelValues(orgID)
|
||||
evalDuration := a.metrics.EvalDuration.WithLabelValues(orgID)
|
||||
@@ -393,14 +408,14 @@ func (a *alertRuleInfo) evaluate(ctx context.Context, key ngmodels.AlertRuleKey,
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *alertRuleInfo) notify(ctx context.Context, key ngmodels.AlertRuleKey, states []state.StateTransition) {
|
||||
func (a *alertRule) notify(ctx context.Context, key ngmodels.AlertRuleKey, states []state.StateTransition) {
|
||||
expiredAlerts := state.FromAlertsStateToStoppedAlert(states, a.appURL, a.clock)
|
||||
if len(expiredAlerts.PostableAlerts) > 0 {
|
||||
a.sender.Send(ctx, key, expiredAlerts)
|
||||
}
|
||||
}
|
||||
|
||||
func (a *alertRuleInfo) resetState(ctx context.Context, key ngmodels.AlertRuleKey, isPaused bool) {
|
||||
func (a *alertRule) resetState(ctx context.Context, key ngmodels.AlertRuleKey, isPaused bool) {
|
||||
rule := a.ruleProvider.get(key)
|
||||
reason := ngmodels.StateReasonUpdated
|
||||
if isPaused {
|
||||
@@ -411,7 +426,7 @@ func (a *alertRuleInfo) resetState(ctx context.Context, key ngmodels.AlertRuleKe
|
||||
}
|
||||
|
||||
// evalApplied is only used on tests.
|
||||
func (a *alertRuleInfo) evalApplied(alertDefKey ngmodels.AlertRuleKey, now time.Time) {
|
||||
func (a *alertRule) evalApplied(alertDefKey ngmodels.AlertRuleKey, now time.Time) {
|
||||
if a.evalAppliedHook == nil {
|
||||
return
|
||||
}
|
||||
@@ -420,7 +435,7 @@ func (a *alertRuleInfo) evalApplied(alertDefKey ngmodels.AlertRuleKey, now time.
|
||||
}
|
||||
|
||||
// stopApplied is only used on tests.
|
||||
func (a *alertRuleInfo) stopApplied(alertDefKey ngmodels.AlertRuleKey) {
|
||||
func (a *alertRule) stopApplied(alertDefKey ngmodels.AlertRuleKey) {
|
||||
if a.stopAppliedHook == nil {
|
||||
return
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user