Alerting: More graceful handling of NoData in recording rules (#90312)

* Handle NoData as its own case

* Debug

* Scalars parseable by CollectionReader

* fix linter

* Orgit add pkg/*git add pkg/* not and
This commit is contained in:
Alexander Weaver 2024-07-17 15:24:03 -05:00 committed by GitHub
parent c3b9c9b239
commit 88ed77e7e8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 90 additions and 30 deletions

View File

@ -439,6 +439,24 @@ type NumberValueCapture struct {
Value *float64
}
func IsNoData(res backend.DataResponse) bool {
// There are two possible frame formats for No Data:
//
// 1. A response with no frames
// 2. A response with 1 frame but no fields
//
// The first format is not documented in the data plane contract but needs to be
// supported for older datasource plugins. The second format is documented in
// https://github.com/grafana/grafana-plugin-sdk-go/blob/main/data/contract_docs/contract.md
// and is what datasource plugins should use going forward.
if len(res.Frames) <= 1 {
hasNoFrames := len(res.Frames) == 0
hasNoFields := len(res.Frames) == 1 && len(res.Frames[0].Fields) == 0
return hasNoFrames || hasNoFields
}
return false
}
func queryDataResponseToExecutionResults(c models.Condition, execResp *backend.QueryDataResponse) ExecutionResults {
// captures contains the values of all instant queries and expressions for each dimension
captures := make(map[string]map[data.Fingerprint]NumberValueCapture)
@ -468,27 +486,14 @@ func queryDataResponseToExecutionResults(c models.Condition, execResp *backend.Q
result.Error = FindConditionError(execResp, c.Condition)
for refID, res := range execResp.Responses {
// There are two possible frame formats for No Data:
//
// 1. A response with no frames
// 2. A response with 1 frame but no fields
//
// The first format is not documented in the data plane contract but needs to be
// supported for older datasource plugins. The second format is documented in
// https://github.com/grafana/grafana-plugin-sdk-go/blob/main/data/contract_docs/contract.md
// and is what datasource plugins should use going forward.
if len(res.Frames) <= 1 {
if IsNoData(res) {
// To make sure NoData is nil when Results are also nil we wait to initialize
// NoData until there is at least one query or expression that returned no data
if result.NoData == nil {
result.NoData = make(map[string]string)
}
hasNoFrames := len(res.Frames) == 0
hasNoFields := len(res.Frames) == 1 && len(res.Frames[0].Fields) == 0
if hasNoFrames || hasNoFields {
if s, ok := datasourceUIDsForRefIDs[refID]; ok && expr.NodeTypeFromDatasourceUID(s) == expr.TypeDatasourceNode { // TODO perhaps extract datasource UID from ML expression too.
result.NoData[refID] = s
}
if s, ok := datasourceUIDsForRefIDs[refID]; ok && expr.NodeTypeFromDatasourceUID(s) == expr.TypeDatasourceNode { // TODO perhaps extract datasource UID from ML expression too.
result.NoData[refID] = s
}
}

View File

@ -217,11 +217,12 @@ func (r *recordingRule) doEvaluate(ctx context.Context, ev *Evaluation) {
if r.maxAttempts > 0 {
logger.Error("Recording rule evaluation failed after all attempts", "lastError", latestError)
}
} else {
logger.Debug("Recording rule evaluation succeeded")
r.lastError.Store(nil)
r.health.Store("ok")
return
}
logger.Debug("Recording rule evaluation succeeded")
span.AddEvent("rule evaluated")
r.lastError.Store(nil)
r.health.Store("ok")
}
func (r *recordingRule) tryEvaluation(ctx context.Context, ev *Evaluation, logger log.Logger) error {
@ -239,17 +240,20 @@ func (r *recordingRule) tryEvaluation(ctx context.Context, ev *Evaluation, logge
}
// TODO: This is missing dedicated logic for NoData. If NoData we can skip the write.
logger.Info("Recording rule evaluated", "results", result, "duration", evalDur)
logger.Debug("Recording rule query completed", "resultCount", len(result.Responses), "duration", evalDur)
span := trace.SpanFromContext(ctx)
span.AddEvent("rule evaluated", trace.WithAttributes(
span.AddEvent("query succeeded", trace.WithAttributes(
attribute.Int64("results", int64(len(result.Responses))),
))
frames, err := r.frameRef(ev.rule.Record.From, result)
if err != nil {
span.SetStatus(codes.Error, "failed to extract frames from rule evaluation")
span.RecordError(err)
return fmt.Errorf("failed to extract frames from rule evaluation: %w", err)
span.AddEvent("query returned no data, nothing to write", trace.WithAttributes(
attribute.String("reason", err.Error()),
))
logger.Debug("Query returned no data", "reason", err)
r.health.Store("nodata")
return nil
}
writeStart := r.clock.Now()
@ -292,16 +296,20 @@ func (r *recordingRule) evaluationDoneTestHook(ev *Evaluation) {
r.evalAppliedHook(r.key, ev.scheduledAt)
}
// frameRef gets frames from a QueryDataResponse for a particular refID. It returns an error if the frames do not exist or have no data.
func (r *recordingRule) frameRef(refID string, resp *backend.QueryDataResponse) (data.Frames, error) {
if len(resp.Responses) == 0 {
return nil, fmt.Errorf("no responses returned from rule evaluation")
}
for ref, resp := range resp.Responses {
if ref == refID {
return resp.Frames, nil
}
targetNode, ok := resp.Responses[refID]
if !ok {
return nil, fmt.Errorf("no response with refID %s found in rule evaluation", refID)
}
return nil, fmt.Errorf("no response with refID %s found in rule evaluation", refID)
if eval.IsNoData(targetNode) {
return nil, fmt.Errorf("response with refID %s has no data", refID)
}
return targetNode.Frames, nil
}

View File

@ -271,6 +271,47 @@ func TestRecordingRule_Integration(t *testing.T) {
require.Zero(t, writeTarget.RequestsCount)
})
})
t.Run("nodata rule", func(t *testing.T) {
rule := gen.With(withQueryForHealth("nodata")).GenerateRef()
ruleStore.PutRule(context.Background(), rule)
folderTitle := ruleStore.getNamespaceTitle(rule.NamespaceUID)
ruleFactory := ruleFactoryFromScheduler(sch)
process := ruleFactory.new(context.Background(), rule)
evalDoneChan := make(chan time.Time)
process.(*recordingRule).evalAppliedHook = func(_ models.AlertRuleKey, t time.Time) {
evalDoneChan <- t
}
now := time.Now()
go func() {
_ = process.Run()
}()
t.Run("status shows no evaluations", func(t *testing.T) {
status := process.(*recordingRule).Status()
require.Equal(t, "unknown", status.Health)
require.Nil(t, status.LastError)
require.Zero(t, status.EvaluationTimestamp)
require.Zero(t, status.EvaluationDuration)
})
process.Eval(&Evaluation{
scheduledAt: now,
rule: rule,
folderTitle: folderTitle,
})
_ = waitForTimeChannel(t, evalDoneChan)
t.Run("status shows evaluation", func(t *testing.T) {
status := process.(*recordingRule).Status()
// TODO: OK expected for nil result but having a point. Probably should change.
require.Equal(t, "ok", status.Health)
})
})
}
func withQueryForHealth(health string) models.AlertRuleMutator {
@ -288,6 +329,12 @@ func withQueryForHealth(health string) models.AlertRuleMutator {
"type":"math",
"expression":"$NOTEXIST"
}`
case "nodata":
expression = `{
"datasourceUid": "__expr__",
"type":"math",
"expression":"null()"
}`
default:
panic(fmt.Sprintf("Query generation for health %s is not supported yet", health))
}