mirror of
https://github.com/grafana/grafana.git
synced 2025-01-08 15:13:30 -06:00
Alerting: More graceful handling of NoData in recording rules (#90312)
* Handle NoData as its own case * Debug * Scalars parseable by CollectionReader * fix linter * Orgit add pkg/*git add pkg/* not and
This commit is contained in:
parent
c3b9c9b239
commit
88ed77e7e8
@ -439,6 +439,24 @@ type NumberValueCapture struct {
|
||||
Value *float64
|
||||
}
|
||||
|
||||
func IsNoData(res backend.DataResponse) bool {
|
||||
// There are two possible frame formats for No Data:
|
||||
//
|
||||
// 1. A response with no frames
|
||||
// 2. A response with 1 frame but no fields
|
||||
//
|
||||
// The first format is not documented in the data plane contract but needs to be
|
||||
// supported for older datasource plugins. The second format is documented in
|
||||
// https://github.com/grafana/grafana-plugin-sdk-go/blob/main/data/contract_docs/contract.md
|
||||
// and is what datasource plugins should use going forward.
|
||||
if len(res.Frames) <= 1 {
|
||||
hasNoFrames := len(res.Frames) == 0
|
||||
hasNoFields := len(res.Frames) == 1 && len(res.Frames[0].Fields) == 0
|
||||
return hasNoFrames || hasNoFields
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func queryDataResponseToExecutionResults(c models.Condition, execResp *backend.QueryDataResponse) ExecutionResults {
|
||||
// captures contains the values of all instant queries and expressions for each dimension
|
||||
captures := make(map[string]map[data.Fingerprint]NumberValueCapture)
|
||||
@ -468,27 +486,14 @@ func queryDataResponseToExecutionResults(c models.Condition, execResp *backend.Q
|
||||
result.Error = FindConditionError(execResp, c.Condition)
|
||||
|
||||
for refID, res := range execResp.Responses {
|
||||
// There are two possible frame formats for No Data:
|
||||
//
|
||||
// 1. A response with no frames
|
||||
// 2. A response with 1 frame but no fields
|
||||
//
|
||||
// The first format is not documented in the data plane contract but needs to be
|
||||
// supported for older datasource plugins. The second format is documented in
|
||||
// https://github.com/grafana/grafana-plugin-sdk-go/blob/main/data/contract_docs/contract.md
|
||||
// and is what datasource plugins should use going forward.
|
||||
if len(res.Frames) <= 1 {
|
||||
if IsNoData(res) {
|
||||
// To make sure NoData is nil when Results are also nil we wait to initialize
|
||||
// NoData until there is at least one query or expression that returned no data
|
||||
if result.NoData == nil {
|
||||
result.NoData = make(map[string]string)
|
||||
}
|
||||
hasNoFrames := len(res.Frames) == 0
|
||||
hasNoFields := len(res.Frames) == 1 && len(res.Frames[0].Fields) == 0
|
||||
if hasNoFrames || hasNoFields {
|
||||
if s, ok := datasourceUIDsForRefIDs[refID]; ok && expr.NodeTypeFromDatasourceUID(s) == expr.TypeDatasourceNode { // TODO perhaps extract datasource UID from ML expression too.
|
||||
result.NoData[refID] = s
|
||||
}
|
||||
if s, ok := datasourceUIDsForRefIDs[refID]; ok && expr.NodeTypeFromDatasourceUID(s) == expr.TypeDatasourceNode { // TODO perhaps extract datasource UID from ML expression too.
|
||||
result.NoData[refID] = s
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -217,11 +217,12 @@ func (r *recordingRule) doEvaluate(ctx context.Context, ev *Evaluation) {
|
||||
if r.maxAttempts > 0 {
|
||||
logger.Error("Recording rule evaluation failed after all attempts", "lastError", latestError)
|
||||
}
|
||||
} else {
|
||||
logger.Debug("Recording rule evaluation succeeded")
|
||||
r.lastError.Store(nil)
|
||||
r.health.Store("ok")
|
||||
return
|
||||
}
|
||||
logger.Debug("Recording rule evaluation succeeded")
|
||||
span.AddEvent("rule evaluated")
|
||||
r.lastError.Store(nil)
|
||||
r.health.Store("ok")
|
||||
}
|
||||
|
||||
func (r *recordingRule) tryEvaluation(ctx context.Context, ev *Evaluation, logger log.Logger) error {
|
||||
@ -239,17 +240,20 @@ func (r *recordingRule) tryEvaluation(ctx context.Context, ev *Evaluation, logge
|
||||
}
|
||||
// TODO: This is missing dedicated logic for NoData. If NoData we can skip the write.
|
||||
|
||||
logger.Info("Recording rule evaluated", "results", result, "duration", evalDur)
|
||||
logger.Debug("Recording rule query completed", "resultCount", len(result.Responses), "duration", evalDur)
|
||||
span := trace.SpanFromContext(ctx)
|
||||
span.AddEvent("rule evaluated", trace.WithAttributes(
|
||||
span.AddEvent("query succeeded", trace.WithAttributes(
|
||||
attribute.Int64("results", int64(len(result.Responses))),
|
||||
))
|
||||
|
||||
frames, err := r.frameRef(ev.rule.Record.From, result)
|
||||
if err != nil {
|
||||
span.SetStatus(codes.Error, "failed to extract frames from rule evaluation")
|
||||
span.RecordError(err)
|
||||
return fmt.Errorf("failed to extract frames from rule evaluation: %w", err)
|
||||
span.AddEvent("query returned no data, nothing to write", trace.WithAttributes(
|
||||
attribute.String("reason", err.Error()),
|
||||
))
|
||||
logger.Debug("Query returned no data", "reason", err)
|
||||
r.health.Store("nodata")
|
||||
return nil
|
||||
}
|
||||
|
||||
writeStart := r.clock.Now()
|
||||
@ -292,16 +296,20 @@ func (r *recordingRule) evaluationDoneTestHook(ev *Evaluation) {
|
||||
r.evalAppliedHook(r.key, ev.scheduledAt)
|
||||
}
|
||||
|
||||
// frameRef gets frames from a QueryDataResponse for a particular refID. It returns an error if the frames do not exist or have no data.
|
||||
func (r *recordingRule) frameRef(refID string, resp *backend.QueryDataResponse) (data.Frames, error) {
|
||||
if len(resp.Responses) == 0 {
|
||||
return nil, fmt.Errorf("no responses returned from rule evaluation")
|
||||
}
|
||||
|
||||
for ref, resp := range resp.Responses {
|
||||
if ref == refID {
|
||||
return resp.Frames, nil
|
||||
}
|
||||
targetNode, ok := resp.Responses[refID]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("no response with refID %s found in rule evaluation", refID)
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("no response with refID %s found in rule evaluation", refID)
|
||||
if eval.IsNoData(targetNode) {
|
||||
return nil, fmt.Errorf("response with refID %s has no data", refID)
|
||||
}
|
||||
|
||||
return targetNode.Frames, nil
|
||||
}
|
||||
|
@ -271,6 +271,47 @@ func TestRecordingRule_Integration(t *testing.T) {
|
||||
require.Zero(t, writeTarget.RequestsCount)
|
||||
})
|
||||
})
|
||||
|
||||
t.Run("nodata rule", func(t *testing.T) {
|
||||
rule := gen.With(withQueryForHealth("nodata")).GenerateRef()
|
||||
ruleStore.PutRule(context.Background(), rule)
|
||||
folderTitle := ruleStore.getNamespaceTitle(rule.NamespaceUID)
|
||||
ruleFactory := ruleFactoryFromScheduler(sch)
|
||||
|
||||
process := ruleFactory.new(context.Background(), rule)
|
||||
evalDoneChan := make(chan time.Time)
|
||||
process.(*recordingRule).evalAppliedHook = func(_ models.AlertRuleKey, t time.Time) {
|
||||
evalDoneChan <- t
|
||||
}
|
||||
now := time.Now()
|
||||
|
||||
go func() {
|
||||
_ = process.Run()
|
||||
}()
|
||||
|
||||
t.Run("status shows no evaluations", func(t *testing.T) {
|
||||
status := process.(*recordingRule).Status()
|
||||
|
||||
require.Equal(t, "unknown", status.Health)
|
||||
require.Nil(t, status.LastError)
|
||||
require.Zero(t, status.EvaluationTimestamp)
|
||||
require.Zero(t, status.EvaluationDuration)
|
||||
})
|
||||
|
||||
process.Eval(&Evaluation{
|
||||
scheduledAt: now,
|
||||
rule: rule,
|
||||
folderTitle: folderTitle,
|
||||
})
|
||||
_ = waitForTimeChannel(t, evalDoneChan)
|
||||
|
||||
t.Run("status shows evaluation", func(t *testing.T) {
|
||||
status := process.(*recordingRule).Status()
|
||||
|
||||
// TODO: OK expected for nil result but having a point. Probably should change.
|
||||
require.Equal(t, "ok", status.Health)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func withQueryForHealth(health string) models.AlertRuleMutator {
|
||||
@ -288,6 +329,12 @@ func withQueryForHealth(health string) models.AlertRuleMutator {
|
||||
"type":"math",
|
||||
"expression":"$NOTEXIST"
|
||||
}`
|
||||
case "nodata":
|
||||
expression = `{
|
||||
"datasourceUid": "__expr__",
|
||||
"type":"math",
|
||||
"expression":"null()"
|
||||
}`
|
||||
default:
|
||||
panic(fmt.Sprintf("Query generation for health %s is not supported yet", health))
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user