mirror of
https://github.com/grafana/grafana.git
synced 2024-11-25 18:30:41 -06:00
Alerting: Log scheduler maxAttempts, guard against invalid retry counts, log retry errors (#80234)
* Log maxAttempts, add guard, log retry errors * fix whitespace * Initialize evaluator in TestProcessTicks
This commit is contained in:
parent
1caaa56de0
commit
542741f748
@ -114,6 +114,12 @@ type SchedulerCfg struct {
|
||||
|
||||
// NewScheduler returns a new schedule.
|
||||
func NewScheduler(cfg SchedulerCfg, stateManager *state.Manager) *schedule {
|
||||
const minMaxAttempts = int64(1)
|
||||
if cfg.MaxAttempts < minMaxAttempts {
|
||||
cfg.Log.Warn("Invalid scheduler maxAttempts, using a safe minimum", "configured", cfg.MaxAttempts, "actual", minMaxAttempts)
|
||||
cfg.MaxAttempts = minMaxAttempts
|
||||
}
|
||||
|
||||
sch := schedule{
|
||||
registry: alertRuleInfoRegistry{alertRuleInfo: make(map[ngmodels.AlertRuleKey]*alertRuleInfo)},
|
||||
maxAttempts: cfg.MaxAttempts,
|
||||
@ -136,7 +142,7 @@ func NewScheduler(cfg SchedulerCfg, stateManager *state.Manager) *schedule {
|
||||
}
|
||||
|
||||
func (sch *schedule) Run(ctx context.Context) error {
|
||||
sch.log.Info("Starting scheduler", "tickInterval", sch.baseInterval)
|
||||
sch.log.Info("Starting scheduler", "tickInterval", sch.baseInterval, "maxAttempts", sch.maxAttempts)
|
||||
t := ticker.New(sch.clock, sch.baseInterval, sch.metrics.Ticker)
|
||||
defer t.Stop()
|
||||
|
||||
@ -383,6 +389,9 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key ngmodels.AlertR
|
||||
start := sch.clock.Now()
|
||||
|
||||
evalCtx := eval.NewContextWithPreviousResults(ctx, SchedulerUserFor(e.rule.OrgID), sch.newLoadedMetricsReader(e.rule))
|
||||
if sch.evaluatorFactory == nil {
|
||||
panic("evalfactory nil")
|
||||
}
|
||||
ruleEval, err := sch.evaluatorFactory.Create(evalCtx, e.rule.GetEvalCondition())
|
||||
var results eval.Results
|
||||
var dur time.Duration
|
||||
@ -551,7 +560,7 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key ngmodels.AlertR
|
||||
return
|
||||
}
|
||||
|
||||
logger.Error("Failed to evaluate rule", "version", ctx.rule.Version, "fingerprint", f, "attempt", attempt, "now", ctx.scheduledAt)
|
||||
logger.Error("Failed to evaluate rule", "version", ctx.rule.Version, "fingerprint", f, "attempt", attempt, "now", ctx.scheduledAt, "error", err)
|
||||
select {
|
||||
case <-tracingCtx.Done():
|
||||
logger.Error("Context has been cancelled while backing off", "version", ctx.rule.Version, "fingerprint", f, "attempt", attempt, "now", ctx.scheduledAt)
|
||||
|
@ -24,6 +24,7 @@ import (
|
||||
"github.com/grafana/grafana/pkg/expr"
|
||||
"github.com/grafana/grafana/pkg/infra/log"
|
||||
"github.com/grafana/grafana/pkg/infra/tracing"
|
||||
datasources "github.com/grafana/grafana/pkg/services/datasources/fakes"
|
||||
"github.com/grafana/grafana/pkg/services/featuremgmt"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
||||
@ -66,15 +67,19 @@ func TestProcessTicks(t *testing.T) {
|
||||
Host: "localhost",
|
||||
}
|
||||
|
||||
cacheServ := &datasources.FakeCacheService{}
|
||||
evaluator := eval.NewEvaluatorFactory(setting.UnifiedAlertingSettings{}, cacheServ, expr.ProvideService(&setting.Cfg{ExpressionsEnabled: true}, nil, nil, &featuremgmt.FeatureManager{}, nil, tracing.InitializeTracerForTest()), &pluginstore.FakePluginStore{})
|
||||
|
||||
schedCfg := SchedulerCfg{
|
||||
BaseInterval: cfg.BaseInterval,
|
||||
C: mockedClock,
|
||||
AppURL: appUrl,
|
||||
RuleStore: ruleStore,
|
||||
Metrics: testMetrics.GetSchedulerMetrics(),
|
||||
AlertSender: notifier,
|
||||
Tracer: testTracer,
|
||||
Log: log.New("ngalert.scheduler"),
|
||||
BaseInterval: cfg.BaseInterval,
|
||||
C: mockedClock,
|
||||
AppURL: appUrl,
|
||||
EvaluatorFactory: evaluator,
|
||||
RuleStore: ruleStore,
|
||||
Metrics: testMetrics.GetSchedulerMetrics(),
|
||||
AlertSender: notifier,
|
||||
Tracer: testTracer,
|
||||
Log: log.New("ngalert.scheduler"),
|
||||
}
|
||||
managerCfg := state.ManagerCfg{
|
||||
Metrics: testMetrics.GetStateMetrics(),
|
||||
|
Loading…
Reference in New Issue
Block a user