Alerting: Contextual log provider for rule key (#57476)

* create contextual log context provider
* use contextual provider in scheduler
* init logger in the package
* use context for log context
* use context in state manager
This commit is contained in:
Yuriy Tseretyan 2022-10-26 19:16:02 -04:00 committed by GitHub
parent 59ffd9571e
commit 0a4121cef8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 33 additions and 15 deletions

View File

@ -202,6 +202,7 @@ func (s *Service) buildDSNode(dp *simple.DirectedGraph, rn *rawNode, req *Reques
// other nodes they must have already been executed and their results must
// already by in vars.
func (dn *DSNode) Execute(ctx context.Context, now time.Time, _ mathexp.Vars, s *Service) (mathexp.Results, error) {
logger := logger.FromContext(ctx).New("datasourceType", dn.datasource.Type)
dsInstanceSettings, err := adapters.ModelToInstanceSettings(dn.datasource, s.decryptSecureJsonDataFn(ctx))
if err != nil {
return mathexp.Results{}, fmt.Errorf("%v: %w", "failed to convert datasource instance settings", err)

View File

@ -92,7 +92,7 @@ func (api *API) RegisterAPIEndpoints(m *metrics.API) {
ac: api.AccessControl,
}
evaluator := eval.NewEvaluator(api.Cfg, log.New("ngalert.eval"), api.DatasourceCache, api.ExpressionService)
evaluator := eval.NewEvaluator(api.Cfg, api.DatasourceCache, api.ExpressionService)
// Register endpoints for proxying to Alertmanager-compatible backends.
api.RegisterAlertmanagerApiEndpoints(NewForkingAM(

View File

@ -22,6 +22,8 @@ import (
"github.com/grafana/grafana-plugin-sdk-go/data"
)
var logger = log.New("ngalert.eval")
//go:generate mockery --name Evaluator --structname FakeEvaluator --inpackage --filename evaluator_mock.go --with-expecter
type Evaluator interface {
// ConditionEval executes conditions and evaluates the result.
@ -34,19 +36,16 @@ type Evaluator interface {
type evaluatorImpl struct {
cfg *setting.Cfg
log log.Logger
dataSourceCache datasources.CacheService
expressionService *expr.Service
}
func NewEvaluator(
cfg *setting.Cfg,
log log.Logger,
datasourceCache datasources.CacheService,
expressionService *expr.Service) Evaluator {
return &evaluatorImpl{
cfg: cfg,
log: log,
dataSourceCache: datasourceCache,
expressionService: expressionService,
}
@ -313,10 +312,10 @@ func queryDataResponseToExecutionResults(c models.Condition, execResp *backend.Q
return result
}
func executeQueriesAndExpressions(ctx EvaluationContext, data []models.AlertQuery, exprService *expr.Service, dsCacheService datasources.CacheService, log log.Logger) (resp *backend.QueryDataResponse, err error) {
func executeQueriesAndExpressions(ctx EvaluationContext, data []models.AlertQuery, exprService *expr.Service, dsCacheService datasources.CacheService) (resp *backend.QueryDataResponse, err error) {
defer func() {
if e := recover(); e != nil {
log.Error("alert rule panic", "error", e, "stack", string(debug.Stack()))
logger.FromContext(ctx.Ctx).Error("alert rule panic", "error", e, "stack", string(debug.Stack()))
panicErr := fmt.Errorf("alert rule panic; please check the logs for the full stack")
if err != nil {
err = fmt.Errorf("queries and expressions execution failed: %w; %v", err, panicErr.Error())
@ -578,7 +577,7 @@ func (e *evaluatorImpl) QueriesAndExpressionsEval(ctx EvaluationContext, data []
timeoutCtx, cancelFn := ctx.WithTimeout(e.cfg.UnifiedAlerting.EvaluationTimeout)
defer cancelFn()
execResult, err := executeQueriesAndExpressions(timeoutCtx, data, e.expressionService, e.dataSourceCache, e.log)
execResult, err := executeQueriesAndExpressions(timeoutCtx, data, e.expressionService, e.dataSourceCache)
if err != nil {
return nil, fmt.Errorf("failed to execute conditions: %w", err)
}

View File

@ -12,7 +12,6 @@ import (
ptr "github.com/xorcare/pointer"
"github.com/grafana/grafana/pkg/expr"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/datasources"
fakes "github.com/grafana/grafana/pkg/services/datasources/fakes"
"github.com/grafana/grafana/pkg/services/ngalert/models"
@ -444,7 +443,7 @@ func TestValidate(t *testing.T) {
cacheService := &fakes.FakeCacheService{}
condition := testCase.condition(cacheService)
evaluator := NewEvaluator(&setting.Cfg{ExpressionsEnabled: true}, log.New("test"), cacheService, expr.ProvideService(&setting.Cfg{ExpressionsEnabled: true}, nil, nil))
evaluator := NewEvaluator(&setting.Cfg{ExpressionsEnabled: true}, cacheService, expr.ProvideService(&setting.Cfg{ExpressionsEnabled: true}, nil, nil))
evalCtx := Context(context.Background(), u)
err := evaluator.Validate(evalCtx, condition)

View File

@ -1,6 +1,7 @@
package models
import (
"context"
"encoding/json"
"errors"
"fmt"
@ -457,3 +458,14 @@ func (g RulesGroup) SortByGroupIndex() {
return g[i].RuleGroupIndex < g[j].RuleGroupIndex
})
}
type ruleKeyContextKey struct{}
func WithRuleKey(ctx context.Context, ruleKey AlertRuleKey) context.Context {
return context.WithValue(ctx, ruleKeyContextKey{}, ruleKey)
}
func RuleKeyFromContext(ctx context.Context) (AlertRuleKey, bool) {
key, ok := ctx.Value(ruleKeyContextKey{}).(AlertRuleKey)
return key, ok
}

View File

@ -183,7 +183,7 @@ func (ng *AlertNG) init() error {
schedCfg := schedule.SchedulerCfg{
Cfg: ng.Cfg.UnifiedAlerting,
C: clk,
Evaluator: eval.NewEvaluator(ng.Cfg, ng.Log, ng.DataSourceCache, ng.ExpressionService),
Evaluator: eval.NewEvaluator(ng.Cfg, ng.DataSourceCache, ng.ExpressionService),
RuleStore: store,
Metrics: ng.Metrics.GetSchedulerMetrics(),
AlertSender: alertsRouter,
@ -236,6 +236,14 @@ func (ng *AlertNG) init() error {
}
api.RegisterAPIEndpoints(ng.Metrics.GetAPIMetrics())
log.RegisterContextualLogProvider(func(ctx context.Context) ([]interface{}, bool) {
key, ok := models.RuleKeyFromContext(ctx)
if !ok {
return nil, false
}
return key.LogContext(), true
})
return DeclareFixedRoles(ng.accesscontrolService)
}

View File

@ -306,7 +306,8 @@ func (sch *schedule) schedulePeriodic(ctx context.Context, t *ticker.T) error {
}
func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key ngmodels.AlertRuleKey, evalCh <-chan *evaluation, updateCh <-chan ruleVersion) error {
logger := sch.log.New(key.LogContext()...)
grafanaCtx = ngmodels.WithRuleKey(grafanaCtx, key)
logger := sch.log.FromContext(grafanaCtx)
logger.Debug("Alert rule routine started")
orgID := fmt.Sprint(key.OrgID)

View File

@ -20,7 +20,6 @@ import (
"github.com/stretchr/testify/require"
"github.com/grafana/grafana/pkg/expr"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/image"
@ -482,7 +481,6 @@ func setupScheduler(t *testing.T, rs *fakeRulesStore, is *state.FakeInstanceStor
t.Helper()
mockedClock := clock.NewMock()
logger := log.New("ngalert schedule test")
if rs == nil {
rs = newFakeRulesStore()
@ -494,7 +492,7 @@ func setupScheduler(t *testing.T, rs *fakeRulesStore, is *state.FakeInstanceStor
var evaluator eval.Evaluator = evalMock
if evalMock == nil {
evaluator = eval.NewEvaluator(&setting.Cfg{ExpressionsEnabled: true}, logger, nil, expr.ProvideService(&setting.Cfg{ExpressionsEnabled: true}, nil, nil))
evaluator = eval.NewEvaluator(&setting.Cfg{ExpressionsEnabled: true}, nil, expr.ProvideService(&setting.Cfg{ExpressionsEnabled: true}, nil, nil))
}
if registry == nil {

View File

@ -162,7 +162,7 @@ func (st *Manager) ResetStateByRuleUID(ctx context.Context, ruleKey ngModels.Ale
// ProcessEvalResults updates the current states that belong to a rule with the evaluation results.
// if extraLabels is not empty, those labels will be added to every state. The extraLabels take precedence over rule labels and result labels
func (st *Manager) ProcessEvalResults(ctx context.Context, evaluatedAt time.Time, alertRule *ngModels.AlertRule, results eval.Results, extraLabels data.Labels) []*State {
logger := st.log.New(alertRule.GetKey().LogContext()...)
logger := st.log.FromContext(ctx)
logger.Debug("State manager processing evaluation results", "resultCount", len(results))
var states []*State
processedResults := make(map[string]*State, len(results))