mirror of
https://github.com/grafana/grafana.git
synced 2025-01-27 00:37:04 -06:00
48b5ac779b
* Move scope type vars to testutil package * Expose parts of state historian for use in annotation backend * Implement Loki ASH Annotation store This store will only implement the `Get` method of a RepositoryImpl since alert state history writes to Loki elsewhere. * Use interface for Loki HTTP Client * Add tests for Loki ASH Annotation store * Add missing test * Fix lint * Organize tests * Add filter tests * Improve tests * Move filter logic into outer function * Fix lint * Add comment * Fix tests * Fix lint * Rename historian store + refactor * Cleanup historian store * Fix tests * Minor cleanup * Use new `ShouldRecordAnnotation` filter * Fix logic and add tests for this check * Fix typos, remove unused variables, `< 1` -> `== 0` * More closely mimic RBAC filter from xorm to ensure correct logic * Move off weaveworks client * Address PR comments
831 lines
26 KiB
Go
831 lines
26 KiB
Go
// Package eval executes the condition for an alert definition, evaluates the condition results, and
|
|
// returns the alert instance states.
|
|
package eval
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"runtime/debug"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/grafana/grafana-plugin-sdk-go/backend"
|
|
"github.com/grafana/grafana-plugin-sdk-go/data"
|
|
|
|
"github.com/grafana/grafana/pkg/expr"
|
|
"github.com/grafana/grafana/pkg/expr/classic"
|
|
"github.com/grafana/grafana/pkg/infra/log"
|
|
"github.com/grafana/grafana/pkg/plugins"
|
|
"github.com/grafana/grafana/pkg/services/datasources"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/models"
|
|
"github.com/grafana/grafana/pkg/services/pluginsintegration/pluginstore"
|
|
"github.com/grafana/grafana/pkg/setting"
|
|
"github.com/grafana/grafana/pkg/util/errutil"
|
|
)
|
|
|
|
var logger = log.New("ngalert.eval")
|
|
|
|
type EvaluatorFactory interface {
|
|
// Validate validates that the condition is correct. Returns nil if the condition is correct. Otherwise, error that describes the failure
|
|
Validate(ctx EvaluationContext, condition models.Condition) error
|
|
// Create builds an evaluator pipeline ready to evaluate a rule's query
|
|
Create(ctx EvaluationContext, condition models.Condition) (ConditionEvaluator, error)
|
|
}
|
|
|
|
//go:generate mockery --name ConditionEvaluator --structname ConditionEvaluatorMock --with-expecter --output eval_mocks --outpkg eval_mocks
|
|
type ConditionEvaluator interface {
|
|
// EvaluateRaw evaluates the condition and returns raw backend response backend.QueryDataResponse
|
|
EvaluateRaw(ctx context.Context, now time.Time) (resp *backend.QueryDataResponse, err error)
|
|
// Evaluate evaluates the condition and converts the response to Results
|
|
Evaluate(ctx context.Context, now time.Time) (Results, error)
|
|
}
|
|
|
|
type expressionService interface {
|
|
ExecutePipeline(ctx context.Context, now time.Time, pipeline expr.DataPipeline) (*backend.QueryDataResponse, error)
|
|
}
|
|
|
|
type conditionEvaluator struct {
|
|
pipeline expr.DataPipeline
|
|
expressionService expressionService
|
|
condition models.Condition
|
|
evalTimeout time.Duration
|
|
}
|
|
|
|
func (r *conditionEvaluator) EvaluateRaw(ctx context.Context, now time.Time) (resp *backend.QueryDataResponse, err error) {
|
|
defer func() {
|
|
if e := recover(); e != nil {
|
|
logger.FromContext(ctx).Error("Alert rule panic", "error", e, "stack", string(debug.Stack()))
|
|
panicErr := fmt.Errorf("alert rule panic; please check the logs for the full stack")
|
|
if err != nil {
|
|
err = fmt.Errorf("queries and expressions execution failed: %w; %v", err, panicErr.Error())
|
|
} else {
|
|
err = panicErr
|
|
}
|
|
}
|
|
}()
|
|
|
|
execCtx := ctx
|
|
if r.evalTimeout >= 0 {
|
|
timeoutCtx, cancel := context.WithTimeout(ctx, r.evalTimeout)
|
|
defer cancel()
|
|
execCtx = timeoutCtx
|
|
}
|
|
return r.expressionService.ExecutePipeline(execCtx, now, r.pipeline)
|
|
}
|
|
|
|
// Evaluate evaluates the condition and converts the response to Results
|
|
func (r *conditionEvaluator) Evaluate(ctx context.Context, now time.Time) (Results, error) {
|
|
response, err := r.EvaluateRaw(ctx, now)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
execResults := queryDataResponseToExecutionResults(r.condition, response)
|
|
return evaluateExecutionResult(execResults, now), nil
|
|
}
|
|
|
|
type evaluatorImpl struct {
|
|
evaluationTimeout time.Duration
|
|
dataSourceCache datasources.CacheService
|
|
expressionService *expr.Service
|
|
pluginsStore pluginstore.Store
|
|
}
|
|
|
|
func NewEvaluatorFactory(
|
|
cfg setting.UnifiedAlertingSettings,
|
|
datasourceCache datasources.CacheService,
|
|
expressionService *expr.Service,
|
|
pluginsStore pluginstore.Store,
|
|
) EvaluatorFactory {
|
|
return &evaluatorImpl{
|
|
evaluationTimeout: cfg.EvaluationTimeout,
|
|
dataSourceCache: datasourceCache,
|
|
expressionService: expressionService,
|
|
pluginsStore: pluginsStore,
|
|
}
|
|
}
|
|
|
|
// invalidEvalResultFormatError is an error for invalid format of the alert definition evaluation results.
|
|
type invalidEvalResultFormatError struct {
|
|
refID string
|
|
reason string
|
|
err error
|
|
}
|
|
|
|
func (e *invalidEvalResultFormatError) Error() string {
|
|
s := fmt.Sprintf("invalid format of evaluation results for the alert definition %s: %s", e.refID, e.reason)
|
|
if e.err != nil {
|
|
s = fmt.Sprintf("%s: %s", s, e.err.Error())
|
|
}
|
|
return s
|
|
}
|
|
|
|
func (e *invalidEvalResultFormatError) Unwrap() error {
|
|
return e.err
|
|
}
|
|
|
|
// ExecutionResults contains the unevaluated results from executing
|
|
// a condition.
|
|
type ExecutionResults struct {
|
|
// Condition contains the results of the condition
|
|
Condition data.Frames
|
|
|
|
// Results contains the results of all queries, reduce and math expressions
|
|
Results map[string]data.Frames
|
|
|
|
// Errors contains a map of RefIDs that returned an error
|
|
Errors map[string]error
|
|
|
|
// NoData contains the DatasourceUID for RefIDs that returned no data.
|
|
NoData map[string]string
|
|
|
|
Error error
|
|
}
|
|
|
|
// Results is a slice of evaluated alert instances states.
|
|
type Results []Result
|
|
|
|
// HasErrors returns true when Results contains at least one element with error
|
|
func (evalResults Results) HasErrors() bool {
|
|
for _, r := range evalResults {
|
|
if r.State == Error {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// HasNonRetryableErrors returns true if we have at least 1 result with:
|
|
// 1. A `State` of `Error`
|
|
// 2. The `Error` attribute is not nil
|
|
// 3. The `Error` type is of `&invalidEvalResultFormatError`
|
|
// Our thinking with this approach, is that we don't want to retry errors that have relation with invalid alert definition format.
|
|
func (evalResults Results) HasNonRetryableErrors() bool {
|
|
for _, r := range evalResults {
|
|
if r.State == Error && r.Error != nil {
|
|
var nonRetryableError *invalidEvalResultFormatError
|
|
if errors.As(r.Error, &nonRetryableError) {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// HasErrors returns true when Results contains at least one element and all elements are errors
|
|
func (evalResults Results) IsError() bool {
|
|
for _, r := range evalResults {
|
|
if r.State != Error {
|
|
return false
|
|
}
|
|
}
|
|
return len(evalResults) > 0
|
|
}
|
|
|
|
// IsNoData returns true when all items are NoData or Results is empty
|
|
func (evalResults Results) IsNoData() bool {
|
|
for _, result := range evalResults {
|
|
if result.State != NoData {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
// Error returns the aggregated `error` of all results of which state is `Error`.
|
|
func (evalResults Results) Error() error {
|
|
var errs []error
|
|
for _, result := range evalResults {
|
|
if result.State == Error && result.Error != nil {
|
|
errs = append(errs, result.Error)
|
|
}
|
|
}
|
|
|
|
return errors.Join(errs...)
|
|
}
|
|
|
|
// Result contains the evaluated State of an alert instance
|
|
// identified by its labels.
|
|
type Result struct {
|
|
Instance data.Labels
|
|
State State // Enum
|
|
|
|
// Error message for Error state. should be nil if State != Error.
|
|
Error error
|
|
|
|
// Results contains the results of all queries, reduce and math expressions
|
|
Results map[string]data.Frames
|
|
|
|
// Values contains the labels and values for all Threshold, Reduce and Math expressions,
|
|
// and all conditions of a Classic Condition that are firing. Threshold, Reduce and Math
|
|
// expressions are indexed by their Ref ID, while conditions in a Classic Condition are
|
|
// indexed by their Ref ID and the index of the condition. For example, B0, B1, etc.
|
|
Values map[string]NumberValueCapture
|
|
|
|
EvaluatedAt time.Time
|
|
EvaluationDuration time.Duration
|
|
// EvaluationString is a string representation of evaluation data such
|
|
// as EvalMatches (from "classic condition"), and in the future from operations
|
|
// like SSE "math".
|
|
EvaluationString string
|
|
}
|
|
|
|
func NewResultFromError(err error, evaluatedAt time.Time, duration time.Duration) Result {
|
|
return Result{
|
|
State: Error,
|
|
Error: err,
|
|
EvaluatedAt: evaluatedAt,
|
|
EvaluationDuration: duration,
|
|
}
|
|
}
|
|
|
|
// State is an enum of the evaluation State for an alert instance.
|
|
type State int
|
|
|
|
const (
|
|
// Normal is the eval state for an alert instance condition
|
|
// that evaluated to false.
|
|
Normal State = iota
|
|
|
|
// Alerting is the eval state for an alert instance condition
|
|
// that evaluated to true (Alerting).
|
|
Alerting
|
|
|
|
// Pending is the eval state for an alert instance condition
|
|
// that evaluated to true (Alerting) but has not yet met
|
|
// the For duration defined in AlertRule.
|
|
Pending
|
|
|
|
// NoData is the eval state for an alert rule condition
|
|
// that evaluated to NoData.
|
|
NoData
|
|
|
|
// Error is the eval state for an alert rule condition
|
|
// that evaluated to Error.
|
|
Error
|
|
)
|
|
|
|
func (s State) IsValid() bool {
|
|
return s <= Error
|
|
}
|
|
|
|
func (s State) String() string {
|
|
return [...]string{"Normal", "Alerting", "Pending", "NoData", "Error"}[s]
|
|
}
|
|
|
|
func ParseStateString(repr string) (State, error) {
|
|
switch strings.ToLower(repr) {
|
|
case "normal":
|
|
return Normal, nil
|
|
case "alerting":
|
|
return Alerting, nil
|
|
case "pending":
|
|
return Pending, nil
|
|
case "nodata":
|
|
return NoData, nil
|
|
case "error":
|
|
return Error, nil
|
|
default:
|
|
return -1, fmt.Errorf("invalid state: %s", repr)
|
|
}
|
|
}
|
|
|
|
func buildDatasourceHeaders(ctx context.Context) map[string]string {
|
|
headers := map[string]string{
|
|
// Many data sources check this in query method as sometimes alerting needs special considerations.
|
|
// Several existing systems also compare against the value of this header. Altering this constitutes a breaking change.
|
|
//
|
|
// Note: The spelling of this headers is intentionally degenerate from the others for compatibility reasons.
|
|
// When sent over a network, the key of this header is canonicalized to "Fromalert".
|
|
// However, some datasources still compare against the string "FromAlert".
|
|
models.FromAlertHeaderName: "true",
|
|
|
|
models.CacheSkipHeaderName: "true",
|
|
}
|
|
|
|
key, ok := models.RuleKeyFromContext(ctx)
|
|
if ok {
|
|
headers["X-Rule-Uid"] = key.UID
|
|
headers["X-Grafana-Org-Id"] = strconv.FormatInt(key.OrgID, 10)
|
|
}
|
|
|
|
return headers
|
|
}
|
|
|
|
// getExprRequest validates the condition, gets the datasource information and creates an expr.Request from it.
|
|
func getExprRequest(ctx EvaluationContext, condition models.Condition, dsCacheService datasources.CacheService, reader AlertingResultsReader) (*expr.Request, error) {
|
|
req := &expr.Request{
|
|
OrgId: ctx.User.GetOrgID(),
|
|
Headers: buildDatasourceHeaders(ctx.Ctx),
|
|
User: ctx.User,
|
|
}
|
|
datasources := make(map[string]*datasources.DataSource, len(condition.Data))
|
|
|
|
for _, q := range condition.Data {
|
|
var err error
|
|
ds, ok := datasources[q.DatasourceUID]
|
|
if !ok {
|
|
switch nodeType := expr.NodeTypeFromDatasourceUID(q.DatasourceUID); nodeType {
|
|
case expr.TypeDatasourceNode:
|
|
ds, err = dsCacheService.GetDatasourceByUID(ctx.Ctx, q.DatasourceUID, ctx.User, false /*skipCache*/)
|
|
default:
|
|
ds, err = expr.DataSourceModelFromNodeType(nodeType)
|
|
}
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to build query '%s': %w", q.RefID, err)
|
|
}
|
|
datasources[q.DatasourceUID] = ds
|
|
}
|
|
|
|
// TODO rewrite the code below and remove the mutable component from AlertQuery
|
|
|
|
// if the query is command expression and it's a hysteresis, patch it with the current state
|
|
// it's important to do this before GetModel
|
|
if ds.Type == expr.DatasourceType {
|
|
isHysteresis, err := q.IsHysteresisExpression()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to build query '%s': %w", q.RefID, err)
|
|
}
|
|
if isHysteresis {
|
|
// make sure we allow hysteresis expressions to be specified only as the alert condition.
|
|
// This guarantees us that the AlertResultsReader can be correctly applied to the expression tree.
|
|
if q.RefID != condition.Condition {
|
|
return nil, fmt.Errorf("recovery threshold '%s' is only allowed to be the alert condition", q.RefID)
|
|
}
|
|
if reader != nil {
|
|
logger.FromContext(ctx.Ctx).Debug("Detected hysteresis threshold command. Populating with the results")
|
|
err = q.PatchHysteresisExpression(reader.Read())
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to amend hysteresis command '%s': %w", q.RefID, err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
model, err := q.GetModel()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get query model from '%s': %w", q.RefID, err)
|
|
}
|
|
interval, err := q.GetIntervalDuration()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to retrieve intervalMs from '%s': %w", q.RefID, err)
|
|
}
|
|
|
|
maxDatapoints, err := q.GetMaxDatapoints()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to retrieve maxDatapoints from '%s': %w", q.RefID, err)
|
|
}
|
|
|
|
req.Queries = append(req.Queries, expr.Query{
|
|
TimeRange: q.RelativeTimeRange.ToTimeRange(),
|
|
DataSource: ds,
|
|
JSON: model,
|
|
Interval: interval,
|
|
RefID: q.RefID,
|
|
MaxDataPoints: maxDatapoints,
|
|
QueryType: q.QueryType,
|
|
})
|
|
}
|
|
return req, nil
|
|
}
|
|
|
|
type NumberValueCapture struct {
|
|
Var string // RefID
|
|
Labels data.Labels
|
|
|
|
Value *float64
|
|
}
|
|
|
|
//nolint:gocyclo
|
|
func queryDataResponseToExecutionResults(c models.Condition, execResp *backend.QueryDataResponse) ExecutionResults {
|
|
// captures contains the values of all instant queries and expressions for each dimension
|
|
captures := make(map[string]map[data.Fingerprint]NumberValueCapture)
|
|
captureFn := func(refID string, labels data.Labels, value *float64) {
|
|
m := captures[refID]
|
|
if m == nil {
|
|
m = make(map[data.Fingerprint]NumberValueCapture)
|
|
}
|
|
fp := labels.Fingerprint()
|
|
m[fp] = NumberValueCapture{
|
|
Var: refID,
|
|
Value: value,
|
|
Labels: labels.Copy(),
|
|
}
|
|
captures[refID] = m
|
|
}
|
|
|
|
// datasourceUIDsForRefIDs is a short-lived lookup table of RefID to DatasourceUID
|
|
// for efficient lookups of the DatasourceUID when a RefID returns no data
|
|
datasourceUIDsForRefIDs := make(map[string]string)
|
|
for _, next := range c.Data {
|
|
datasourceUIDsForRefIDs[next.RefID] = next.DatasourceUID
|
|
}
|
|
|
|
result := ExecutionResults{Results: make(map[string]data.Frames)}
|
|
for refID, res := range execResp.Responses {
|
|
if res.Error != nil {
|
|
if result.Errors == nil {
|
|
result.Errors = make(map[string]error)
|
|
}
|
|
result.Errors[refID] = res.Error
|
|
if refID == c.Condition {
|
|
result.Error = res.Error
|
|
}
|
|
}
|
|
|
|
// There are two possible frame formats for No Data:
|
|
//
|
|
// 1. A response with no frames
|
|
// 2. A response with 1 frame but no fields
|
|
//
|
|
// The first format is not documented in the data plane contract but needs to be
|
|
// supported for older datasource plugins. The second format is documented in
|
|
// https://github.com/grafana/grafana-plugin-sdk-go/blob/main/data/contract_docs/contract.md
|
|
// and is what datasource plugins should use going forward.
|
|
if len(res.Frames) <= 1 {
|
|
// To make sure NoData is nil when Results are also nil we wait to initialize
|
|
// NoData until there is at least one query or expression that returned no data
|
|
if result.NoData == nil {
|
|
result.NoData = make(map[string]string)
|
|
}
|
|
hasNoFrames := len(res.Frames) == 0
|
|
hasNoFields := len(res.Frames) == 1 && len(res.Frames[0].Fields) == 0
|
|
if hasNoFrames || hasNoFields {
|
|
if s, ok := datasourceUIDsForRefIDs[refID]; ok && expr.NodeTypeFromDatasourceUID(s) == expr.TypeDatasourceNode { // TODO perhaps extract datasource UID from ML expression too.
|
|
result.NoData[refID] = s
|
|
}
|
|
}
|
|
}
|
|
|
|
// for each frame within each response, the response can contain several data types including time-series data.
|
|
// For now, we favour simplicity and only care about single scalar values.
|
|
for _, frame := range res.Frames {
|
|
if len(frame.Fields) != 1 || frame.Fields[0].Type() != data.FieldTypeNullableFloat64 {
|
|
continue
|
|
}
|
|
var v *float64
|
|
if frame.Fields[0].Len() == 1 {
|
|
v = frame.At(0, 0).(*float64) // type checked above
|
|
}
|
|
captureFn(frame.RefID, frame.Fields[0].Labels, v)
|
|
}
|
|
|
|
if refID == c.Condition {
|
|
result.Condition = res.Frames
|
|
}
|
|
result.Results[refID] = res.Frames
|
|
}
|
|
|
|
// add capture values as data frame metadata to each result (frame) that has matching labels.
|
|
for _, frame := range result.Condition {
|
|
// classic conditions already have metadata set and only have one value, there's no need to add anything in this case.
|
|
if frame.Meta != nil && frame.Meta.Custom != nil {
|
|
if _, ok := frame.Meta.Custom.([]classic.EvalMatch); ok {
|
|
continue // do not overwrite EvalMatch from classic condition.
|
|
}
|
|
}
|
|
|
|
frame.SetMeta(&data.FrameMeta{}) // overwrite metadata
|
|
|
|
if len(frame.Fields) == 1 {
|
|
theseLabels := frame.Fields[0].Labels
|
|
fp := theseLabels.Fingerprint()
|
|
|
|
for _, fps := range captures {
|
|
// First look for a capture whose labels are an exact match
|
|
if v, ok := fps[fp]; ok {
|
|
if frame.Meta.Custom == nil {
|
|
frame.Meta.Custom = []NumberValueCapture{}
|
|
}
|
|
frame.Meta.Custom = append(frame.Meta.Custom.([]NumberValueCapture), v)
|
|
} else {
|
|
// If no exact match was found, look for captures whose labels are either subsets
|
|
// or supersets
|
|
for _, v := range fps {
|
|
// matching labels are equal labels, or when one set of labels includes the labels of the other.
|
|
if theseLabels.Equals(v.Labels) || theseLabels.Contains(v.Labels) || v.Labels.Contains(theseLabels) {
|
|
if frame.Meta.Custom == nil {
|
|
frame.Meta.Custom = []NumberValueCapture{}
|
|
}
|
|
frame.Meta.Custom = append(frame.Meta.Custom.([]NumberValueCapture), v)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// If the error of the condition is an Error that indicates the condition failed
|
|
// because one of its dependent query or expressions failed, then we follow
|
|
// the dependency chain to an error that is not a dependency error.
|
|
if len(result.Errors) > 0 && result.Error != nil {
|
|
if errors.Is(result.Error, expr.DependencyError) {
|
|
var utilError errutil.Error
|
|
e := result.Error
|
|
for {
|
|
errors.As(e, &utilError)
|
|
depRefID := utilError.PublicPayload["depRefId"].(string)
|
|
depError, ok := result.Errors[depRefID]
|
|
if !ok {
|
|
return result
|
|
}
|
|
if !errors.Is(depError, expr.DependencyError) {
|
|
result.Error = depError
|
|
return result
|
|
}
|
|
e = depError
|
|
}
|
|
}
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
// datasourceUIDsToRefIDs returns a sorted slice of Ref IDs for each Datasource UID.
|
|
//
|
|
// If refIDsToDatasourceUIDs is nil then this function also returns nil. Likewise,
|
|
// if it is an empty map then it too returns an empty map.
|
|
//
|
|
// For example, given the following:
|
|
//
|
|
// map[string]string{
|
|
// "ref1": "datasource1",
|
|
// "ref2": "datasource1",
|
|
// "ref3": "datasource2",
|
|
// }
|
|
//
|
|
// we would expect:
|
|
//
|
|
// map[string][]string{
|
|
// "datasource1": []string{"ref1", "ref2"},
|
|
// "datasource2": []string{"ref3"},
|
|
// }
|
|
func datasourceUIDsToRefIDs(refIDsToDatasourceUIDs map[string]string) map[string][]string {
|
|
if refIDsToDatasourceUIDs == nil {
|
|
return nil
|
|
}
|
|
|
|
// The ref IDs must be sorted. However, instead of sorting them once
|
|
// for each Datasource UID we can append them all to a slice and then
|
|
// sort them once
|
|
refIDs := make([]string, 0, len(refIDsToDatasourceUIDs))
|
|
for refID := range refIDsToDatasourceUIDs {
|
|
refIDs = append(refIDs, refID)
|
|
}
|
|
sort.Strings(refIDs)
|
|
|
|
result := make(map[string][]string)
|
|
for _, refID := range refIDs {
|
|
datasourceUID := refIDsToDatasourceUIDs[refID]
|
|
result[datasourceUID] = append(result[datasourceUID], refID)
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
// evaluateExecutionResult takes the ExecutionResult which includes data.Frames returned
|
|
// from SSE (Server Side Expressions). It will create Results (slice of Result) with a State
|
|
// extracted from each Frame.
|
|
//
|
|
// If the ExecutionResults error property is not nil, a single Error result will be returned.
|
|
// If there is no error and no results then a single NoData state Result will be returned.
|
|
//
|
|
// Each non-empty Frame must be a single Field of type []*float64 and of length 1.
|
|
// Also, each Frame must be uniquely identified by its Field.Labels or a single Error result will be returned.
|
|
//
|
|
// Per Frame, data becomes a State based on the following rules:
|
|
//
|
|
// If no value is set:
|
|
// - Empty or zero length Frames result in NoData.
|
|
//
|
|
// If a value is set:
|
|
// - 0 results in Normal.
|
|
// - Nonzero (e.g 1.2, NaN) results in Alerting.
|
|
// - nil results in noData.
|
|
// - unsupported Frame schemas results in Error.
|
|
func evaluateExecutionResult(execResults ExecutionResults, ts time.Time) Results {
|
|
evalResults := make([]Result, 0)
|
|
|
|
appendErrRes := func(e error) {
|
|
evalResults = append(evalResults, NewResultFromError(e, ts, time.Since(ts)))
|
|
}
|
|
|
|
appendNoData := func(labels data.Labels) {
|
|
evalResults = append(evalResults, Result{
|
|
State: NoData,
|
|
Instance: labels,
|
|
EvaluatedAt: ts,
|
|
EvaluationDuration: time.Since(ts),
|
|
})
|
|
}
|
|
|
|
if execResults.Error != nil {
|
|
appendErrRes(execResults.Error)
|
|
return evalResults
|
|
}
|
|
|
|
if len(execResults.NoData) > 0 {
|
|
noData := datasourceUIDsToRefIDs(execResults.NoData)
|
|
for datasourceUID, refIDs := range noData {
|
|
appendNoData(data.Labels{
|
|
"datasource_uid": datasourceUID,
|
|
"ref_id": strings.Join(refIDs, ","),
|
|
})
|
|
}
|
|
return evalResults
|
|
}
|
|
|
|
if len(execResults.Condition) == 0 {
|
|
appendNoData(nil)
|
|
return evalResults
|
|
}
|
|
|
|
for _, f := range execResults.Condition {
|
|
rowLen, err := f.RowLen()
|
|
if err != nil {
|
|
appendErrRes(&invalidEvalResultFormatError{refID: f.RefID, reason: "unable to get frame row length", err: err})
|
|
continue
|
|
}
|
|
|
|
if len(f.TypeIndices(data.FieldTypeTime, data.FieldTypeNullableTime)) > 0 {
|
|
appendErrRes(&invalidEvalResultFormatError{refID: f.RefID, reason: "looks like time series data, only reduced data can be alerted on."})
|
|
continue
|
|
}
|
|
|
|
if rowLen == 0 {
|
|
if len(f.Fields) == 0 {
|
|
appendNoData(nil)
|
|
continue
|
|
}
|
|
if len(f.Fields) == 1 {
|
|
appendNoData(f.Fields[0].Labels)
|
|
continue
|
|
}
|
|
}
|
|
|
|
if rowLen > 1 {
|
|
appendErrRes(&invalidEvalResultFormatError{refID: f.RefID, reason: fmt.Sprintf("unexpected row length: %d instead of 0 or 1", rowLen)})
|
|
continue
|
|
}
|
|
|
|
if len(f.Fields) > 1 {
|
|
appendErrRes(&invalidEvalResultFormatError{refID: f.RefID, reason: fmt.Sprintf("unexpected field length: %d instead of 1", len(f.Fields))})
|
|
continue
|
|
}
|
|
|
|
if f.Fields[0].Type() != data.FieldTypeNullableFloat64 {
|
|
appendErrRes(&invalidEvalResultFormatError{refID: f.RefID, reason: fmt.Sprintf("invalid field type: %s", f.Fields[0].Type())})
|
|
continue
|
|
}
|
|
|
|
val := f.Fields[0].At(0).(*float64) // type checked by data.FieldTypeNullableFloat64 above
|
|
|
|
r := Result{
|
|
Instance: f.Fields[0].Labels,
|
|
EvaluatedAt: ts,
|
|
EvaluationDuration: time.Since(ts),
|
|
EvaluationString: extractEvalString(f),
|
|
Values: extractValues(f),
|
|
}
|
|
|
|
switch {
|
|
case val == nil:
|
|
r.State = NoData
|
|
case *val == 0:
|
|
r.State = Normal
|
|
default:
|
|
r.State = Alerting
|
|
}
|
|
|
|
evalResults = append(evalResults, r)
|
|
}
|
|
|
|
seenLabels := make(map[string]bool)
|
|
for _, res := range evalResults {
|
|
labelsStr := res.Instance.String()
|
|
_, ok := seenLabels[labelsStr]
|
|
if ok {
|
|
return Results{
|
|
Result{
|
|
State: Error,
|
|
Instance: res.Instance,
|
|
EvaluatedAt: ts,
|
|
EvaluationDuration: time.Since(ts),
|
|
Error: &invalidEvalResultFormatError{reason: fmt.Sprintf("frame cannot uniquely be identified by its labels: has duplicate results with labels {%s}", labelsStr)},
|
|
},
|
|
}
|
|
}
|
|
seenLabels[labelsStr] = true
|
|
}
|
|
|
|
return evalResults
|
|
}
|
|
|
|
// AsDataFrame forms the EvalResults in Frame suitable for displaying in the table panel of the front end.
|
|
// It displays one row per alert instance, with a column for each label and one for the alerting state.
|
|
func (evalResults Results) AsDataFrame() data.Frame {
|
|
fieldLen := len(evalResults)
|
|
|
|
uniqueLabelKeys := make(map[string]struct{})
|
|
|
|
for _, evalResult := range evalResults {
|
|
for k := range evalResult.Instance {
|
|
uniqueLabelKeys[k] = struct{}{}
|
|
}
|
|
}
|
|
|
|
labelColumns := make([]string, 0, len(uniqueLabelKeys))
|
|
for k := range uniqueLabelKeys {
|
|
labelColumns = append(labelColumns, k)
|
|
}
|
|
|
|
frame := data.NewFrame("evaluation results")
|
|
for _, lKey := range labelColumns {
|
|
frame.Fields = append(frame.Fields, data.NewField(lKey, nil, make([]string, fieldLen)))
|
|
}
|
|
frame.Fields = append(frame.Fields, data.NewField("State", nil, make([]string, fieldLen)))
|
|
frame.Fields = append(frame.Fields, data.NewField("Info", nil, make([]string, fieldLen)))
|
|
|
|
for evalIdx, evalResult := range evalResults {
|
|
for lIdx, v := range labelColumns {
|
|
frame.Set(lIdx, evalIdx, evalResult.Instance[v])
|
|
}
|
|
|
|
frame.Set(len(labelColumns), evalIdx, evalResult.State.String())
|
|
|
|
switch {
|
|
case evalResult.Error != nil:
|
|
frame.Set(len(labelColumns)+1, evalIdx, evalResult.Error.Error())
|
|
case evalResult.EvaluationString != "":
|
|
frame.Set(len(labelColumns)+1, evalIdx, evalResult.EvaluationString)
|
|
}
|
|
}
|
|
return *frame
|
|
}
|
|
|
|
func (e *evaluatorImpl) Validate(ctx EvaluationContext, condition models.Condition) error {
|
|
req, err := getExprRequest(ctx, condition, e.dataSourceCache, ctx.AlertingResultsReader)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for _, query := range req.Queries {
|
|
if query.DataSource == nil {
|
|
continue
|
|
}
|
|
switch expr.NodeTypeFromDatasourceUID(query.DataSource.UID) {
|
|
case expr.TypeDatasourceNode:
|
|
p, found := e.pluginsStore.Plugin(ctx.Ctx, query.DataSource.Type)
|
|
if !found { // technically this should fail earlier during datasource resolution phase.
|
|
return fmt.Errorf("datasource refID %s could not be found: %w", query.RefID, plugins.ErrPluginUnavailable)
|
|
}
|
|
if !p.Backend {
|
|
return fmt.Errorf("datasource refID %s is not a backend datasource", query.RefID)
|
|
}
|
|
case expr.TypeMLNode:
|
|
_, found := e.pluginsStore.Plugin(ctx.Ctx, query.DataSource.Type)
|
|
if !found {
|
|
return fmt.Errorf("datasource refID %s could not be found: %w", query.RefID, plugins.ErrPluginUnavailable)
|
|
}
|
|
case expr.TypeCMDNode:
|
|
}
|
|
}
|
|
_, err = e.create(condition, req)
|
|
return err
|
|
}
|
|
|
|
func (e *evaluatorImpl) Create(ctx EvaluationContext, condition models.Condition) (ConditionEvaluator, error) {
|
|
if len(condition.Data) == 0 {
|
|
return nil, errors.New("expression list is empty. must be at least 1 expression")
|
|
}
|
|
if len(condition.Condition) == 0 {
|
|
return nil, errors.New("condition must not be empty")
|
|
}
|
|
req, err := getExprRequest(ctx, condition, e.dataSourceCache, ctx.AlertingResultsReader)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return e.create(condition, req)
|
|
}
|
|
|
|
func (e *evaluatorImpl) create(condition models.Condition, req *expr.Request) (ConditionEvaluator, error) {
|
|
pipeline, err := e.expressionService.BuildPipeline(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
conditions := make([]string, 0, len(pipeline))
|
|
for _, node := range pipeline {
|
|
if node.RefID() == condition.Condition {
|
|
return &conditionEvaluator{
|
|
pipeline: pipeline,
|
|
expressionService: e.expressionService,
|
|
condition: condition,
|
|
evalTimeout: e.evaluationTimeout,
|
|
}, nil
|
|
}
|
|
conditions = append(conditions, node.RefID())
|
|
}
|
|
return nil, fmt.Errorf("condition %s does not exist, must be one of %v", condition.Condition, conditions)
|
|
}
|