Alerting: Display query from grafana-managed alert rules on /api/v1/rules (#45969)

* Aleting: Extract query from alerting rule model for api/v1/rules

* more changes and fixtures

* appease the linter
This commit is contained in:
gotjosh 2022-03-14 10:39:20 +00:00 committed by GitHub
parent ecf84be65a
commit a75d4fcbd8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 533 additions and 28 deletions

View File

@ -131,10 +131,8 @@ func (srv PrometheusSrv) RouteGetRuleStatuses(c *models.ReqContext) response.Res
groupId, namespaceUID, namespace := r[0], r[1], r[2]
newGroup := &apimodels.RuleGroup{
Name: groupId,
// This doesn't make sense in our architecture
// so we use this field for passing to the frontend the namespace
File: namespace,
Name: groupId,
File: namespace, // file is what Prometheus uses for provisioning, we replace it with namespace.
LastEvaluation: time.Time{},
EvaluationTime: 0, // TODO: see if we are able to pass this along with evaluation results
}
@ -149,17 +147,10 @@ func (srv PrometheusSrv) RouteGetRuleStatuses(c *models.ReqContext) response.Res
if !ok {
continue
}
var queryStr string
encodedQuery, err := json.Marshal(rule.Data)
if err != nil {
queryStr = err.Error()
} else {
queryStr = string(encodedQuery)
}
alertingRule := apimodels.AlertingRule{
State: "inactive",
Name: rule.Title,
Query: queryStr,
Query: ruleToQuery(srv.log, rule),
Duration: rule.For.Seconds(),
Annotations: rule.Annotations,
}
@ -211,6 +202,7 @@ func (srv PrometheusSrv) RouteGetRuleStatuses(c *models.ReqContext) response.Res
newRule.LastError = alertState.Error.Error()
newRule.Health = "error"
}
alertingRule.Alerts = append(alertingRule.Alerts, alert)
}
@ -220,3 +212,45 @@ func (srv PrometheusSrv) RouteGetRuleStatuses(c *models.ReqContext) response.Res
}
return response.JSON(http.StatusOK, ruleResponse)
}
// ruleToQuery attempts to extract the datasource queries from the alert query model.
// Returns the whole JSON model as a string if it fails to extract a minimum of 1 query.
func ruleToQuery(logger log.Logger, rule *ngmodels.AlertRule) string {
var queryErr error
var queries []string
for _, q := range rule.Data {
q, err := q.GetQuery()
if err != nil {
// If we can't find the query simply omit it, and try the rest.
// Even single query alerts would have 2 `AlertQuery`, one for the query and one for the condition.
if errors.Is(err, ngmodels.ErrNoQuery) {
continue
}
// For any other type of error, it is unexpected abort and return the whole JSON.
logger.Debug("failed to parse a query", "err", err)
queryErr = err
break
}
queries = append(queries, q)
}
// If we were able to extract at least one query without failure use it.
if queryErr == nil && len(queries) > 0 {
return strings.Join(queries, " | ")
}
return encodedQueriesOrError(rule.Data)
}
// encodedQueriesOrError tries to encode rule query data into JSON if it fails returns the encoding error as a string.
func encodedQueriesOrError(rules []ngmodels.AlertQuery) string {
encodedQueries, err := json.Marshal(rules)
if err == nil {
return string(encodedQueries)
}
return err.Error()
}

View File

@ -1,13 +1,22 @@
package api
import (
"context"
"encoding/json"
"fmt"
"net/http"
"testing"
"time"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/models"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/state"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/util"
"github.com/grafana/grafana/pkg/web"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/stretchr/testify/require"
)
@ -16,7 +25,7 @@ func TestRouteGetAlertStatuses(t *testing.T) {
fakeAlertInstanceManager := NewFakeAlertInstanceManager(t)
orgID := int64(1)
server := PrometheusSrv{
api := PrometheusSrv{
log: log.NewNopLogger(),
manager: fakeAlertInstanceManager,
store: fakeStore,
@ -25,7 +34,7 @@ func TestRouteGetAlertStatuses(t *testing.T) {
c := &models.ReqContext{SignedInUser: &models.SignedInUser{OrgId: orgID}}
t.Run("with no alerts", func(t *testing.T) {
r := server.RouteGetAlertStatuses(c)
r := api.RouteGetAlertStatuses(c)
require.Equal(t, http.StatusOK, r.Status())
require.JSONEq(t, `
{
@ -38,8 +47,8 @@ func TestRouteGetAlertStatuses(t *testing.T) {
})
t.Run("with two alerts", func(t *testing.T) {
fakeAlertInstanceManager.GenerateAlertInstances(1, 2)
r := server.RouteGetAlertStatuses(c)
fakeAlertInstanceManager.GenerateAlertInstances(1, util.GenerateShortUID(), 2)
r := api.RouteGetAlertStatuses(c)
require.Equal(t, http.StatusOK, r.Status())
require.JSONEq(t, `
{
@ -78,3 +87,224 @@ func TestRouteGetAlertStatuses(t *testing.T) {
}`, string(r.Body()))
})
}
func TestRouteGetRuleStatuses(t *testing.T) {
timeNow = func() time.Time { return time.Date(2022, 3, 10, 14, 0, 0, 0, time.UTC) }
orgID := int64(1)
req, err := http.NewRequest("GET", "/api/v1/rules", nil)
require.NoError(t, err)
c := &models.ReqContext{Context: &web.Context{Req: req}, SignedInUser: &models.SignedInUser{OrgId: orgID}}
t.Run("with no rules", func(t *testing.T) {
_, _, api := setupAPI(t)
r := api.RouteGetRuleStatuses(c)
require.JSONEq(t, `
{
"status": "success",
"data": {
"groups": []
}
}
`, string(r.Body()))
})
t.Run("with a rule that only has one query", func(t *testing.T) {
fakeStore, fakeAIM, api := setupAPI(t)
generateRuleAndInstanceWithQuery(t, orgID, fakeAIM, fakeStore, withClassicConditionSingleQuery())
r := api.RouteGetRuleStatuses(c)
require.Equal(t, http.StatusOK, r.Status())
require.JSONEq(t, `
{
"status": "success",
"data": {
"groups": [{
"name": "rule-group",
"file": "namespaceUID",
"rules": [{
"state": "inactive",
"name": "AlwaysFiring",
"query": "vector(1)",
"alerts": [{
"labels": {
"job": "prometheus"
},
"annotations": {
"severity": "critical"
},
"state": "Normal",
"activeAt": "0001-01-01T00:00:00Z",
"value": ""
}],
"labels": null,
"health": "ok",
"lastError": "",
"type": "alerting",
"lastEvaluation": "2022-03-10T14:01:00Z",
"duration": 180,
"evaluationTime": 60
}],
"interval": 60,
"lastEvaluation": "2022-03-10T14:01:00Z",
"evaluationTime": 0
}]
}
}
`, string(r.Body()))
})
t.Run("with a rule that has multiple queries", func(t *testing.T) {
fakeStore, fakeAIM, api := setupAPI(t)
generateRuleAndInstanceWithQuery(t, orgID, fakeAIM, fakeStore, withExpressionsMultiQuery())
r := api.RouteGetRuleStatuses(c)
require.Equal(t, http.StatusOK, r.Status())
require.JSONEq(t, `
{
"status": "success",
"data": {
"groups": [{
"name": "rule-group",
"file": "namespaceUID",
"rules": [{
"state": "inactive",
"name": "AlwaysFiring",
"query": "vector(1) | vector(1)",
"alerts": [{
"labels": {
"job": "prometheus"
},
"annotations": {
"severity": "critical"
},
"state": "Normal",
"activeAt": "0001-01-01T00:00:00Z",
"value": ""
}],
"labels": null,
"health": "ok",
"lastError": "",
"type": "alerting",
"lastEvaluation": "2022-03-10T14:01:00Z",
"duration": 180,
"evaluationTime": 60
}],
"interval": 60,
"lastEvaluation": "2022-03-10T14:01:00Z",
"evaluationTime": 0
}]
}
}
`, string(r.Body()))
})
}
func setupAPI(t *testing.T) (*store.FakeRuleStore, *fakeAlertInstanceManager, PrometheusSrv) {
fakeStore := store.NewFakeRuleStore(t)
fakeAIM := NewFakeAlertInstanceManager(t)
api := PrometheusSrv{
log: log.NewNopLogger(),
manager: fakeAIM,
store: fakeStore,
}
return fakeStore, fakeAIM, api
}
func generateRuleAndInstanceWithQuery(t *testing.T, orgID int64, fakeAIM *fakeAlertInstanceManager, fakeStore *store.FakeRuleStore, query func(r *ngmodels.AlertRule)) {
t.Helper()
rules := ngmodels.GenerateAlertRules(1, ngmodels.AlertRuleGen(withOrgID(orgID), asFixture(), query))
fakeAIM.GenerateAlertInstances(orgID, rules[0].UID, 1, func(s *state.State) *state.State {
s.Labels = data.Labels{"job": "prometheus"}
s.Annotations = data.Labels{"severity": "critical"}
return s
})
for _, r := range rules {
fakeStore.PutRule(context.Background(), r)
}
}
// asFixture removes variable values of the alert rule.
// we're not too interested in variability of the rule in this scenario.
func asFixture() func(r *ngmodels.AlertRule) {
return func(r *ngmodels.AlertRule) {
r.Title = "AlwaysFiring"
r.NamespaceUID = "namespaceUID"
r.RuleGroup = "rule-group"
r.UID = "RuleUID"
r.Labels = nil
r.Annotations = nil
r.IntervalSeconds = 60
r.For = 180 * time.Second
}
}
func withClassicConditionSingleQuery() func(r *ngmodels.AlertRule) {
return func(r *ngmodels.AlertRule) {
queries := []ngmodels.AlertQuery{
{
RefID: "A",
QueryType: "",
RelativeTimeRange: ngmodels.RelativeTimeRange{From: ngmodels.Duration(0), To: ngmodels.Duration(0)},
DatasourceUID: "AUID",
Model: json.RawMessage(fmt.Sprintf(prometheusQueryModel, "A")),
},
{
RefID: "B",
QueryType: "",
RelativeTimeRange: ngmodels.RelativeTimeRange{From: ngmodels.Duration(0), To: ngmodels.Duration(0)},
DatasourceUID: "-100",
Model: json.RawMessage(fmt.Sprintf(classicConditionsModel, "A", "B")),
},
}
r.Data = queries
}
}
func withExpressionsMultiQuery() func(r *ngmodels.AlertRule) {
return func(r *ngmodels.AlertRule) {
queries := []ngmodels.AlertQuery{
{
RefID: "A",
QueryType: "",
RelativeTimeRange: ngmodels.RelativeTimeRange{From: ngmodels.Duration(0), To: ngmodels.Duration(0)},
DatasourceUID: "AUID",
Model: json.RawMessage(fmt.Sprintf(prometheusQueryModel, "A")),
},
{
RefID: "B",
QueryType: "",
RelativeTimeRange: ngmodels.RelativeTimeRange{From: ngmodels.Duration(0), To: ngmodels.Duration(0)},
DatasourceUID: "BUID",
Model: json.RawMessage(fmt.Sprintf(prometheusQueryModel, "B")),
},
{
RefID: "C",
QueryType: "",
RelativeTimeRange: ngmodels.RelativeTimeRange{From: ngmodels.Duration(0), To: ngmodels.Duration(0)},
DatasourceUID: "-100",
Model: json.RawMessage(fmt.Sprintf(reduceLastExpressionModel, "A", "C")),
},
{
RefID: "D",
QueryType: "",
RelativeTimeRange: ngmodels.RelativeTimeRange{From: ngmodels.Duration(0), To: ngmodels.Duration(0)},
DatasourceUID: "-100",
Model: json.RawMessage(fmt.Sprintf(reduceLastExpressionModel, "B", "D")),
},
{
RefID: "E",
QueryType: "",
RelativeTimeRange: ngmodels.RelativeTimeRange{From: ngmodels.Duration(0), To: ngmodels.Duration(0)},
DatasourceUID: "-100",
Model: json.RawMessage(fmt.Sprintf(mathExpressionModel, "A", "B", "E")),
},
}
r.Data = queries
}
}

View File

@ -0,0 +1,118 @@
package api
// prometheusQueryModel represents the way we express a prometheus query as part of an alert query.
// It supports formatting for the refID of the query.
const prometheusQueryModel = `
{
"exemplar": false,
"expr": "vector(1)",
"hide": false,
"interval": "",
"intervalMs": 1000,
"legendFormat": "",
"maxDataPoints": 43200,
"refId": "%s"
}
`
// classicConditionsModel represents the way we express a classic condition as part of an alert query.
// It supports formatting for 1) the refID of the query and 2) the refID of the condition.
const classicConditionsModel = `
{
"conditions": [{
"evaluator": {
"params": [0.5],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": ["%s"]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}],
"datasource": {
"type": "__expr__",
"uid": "-100"
},
"hide": false,
"intervalMs": 1000,
"maxDataPoints": 43200,
"refId": "%s",
"type": "classic_conditions"
}
`
// reduceLastExpressionModel represents the way we express reduce to last data point as part of an alert query.
// It supports formatting for 1) the refID of the query to target and 2) the refID of the condition.
const reduceLastExpressionModel = `
{
"conditions": [{
"evaluator": {
"params": [0,0],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": []
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}],
"datasource": {
"type": "__expr__",
"uid": "__expr__"
},
"expression": "%s",
"hide": false,
"intervalMs": 1000,
"maxDataPoints": 43200,
"reducer": "last",
"refId": "%s",
"type": "reduce"
}
`
// reduceLastExpressionModel represents the way we express a math (sum of two refIDs greater than 1) operation of an alert query.
// It supports formatting for 1) refID of the first operand, 2) refID of the second operand and 3) the refID of the math operation.
const mathExpressionModel = `
{
"conditions": [{
"evaluator": {
"params": [0, 0],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": []
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}],
"datasource": {
"type": "__expr__",
"uid": "__expr__"
},
"expression": "$%s + $%s \u003e 1",
"hide": false,
"intervalMs": 1000,
"maxDataPoints": 43200,
"refId": "%s",
"type": "math"
}
`

View File

@ -11,7 +11,6 @@ import (
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/state"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/util"
"github.com/grafana/grafana-plugin-sdk-go/data"
)
@ -73,13 +72,15 @@ func (f *fakeAlertInstanceManager) GetStatesForRuleUID(orgID int64, alertRuleUID
return f.states[orgID][alertRuleUID]
}
func (f *fakeAlertInstanceManager) GenerateAlertInstances(orgID int64, count int) {
// forEachState represents the callback used when generating alert instances that allows us to modify the generated result
type forEachState func(s *state.State) *state.State
func (f *fakeAlertInstanceManager) GenerateAlertInstances(orgID int64, alertRuleUID string, count int, callbacks ...forEachState) {
f.mtx.Lock()
defer f.mtx.Unlock()
evaluationTime := time.Now()
evaluationTime := timeNow()
evaluationDuration := 1 * time.Minute
alertRuleUID := util.GenerateShortUID()
for i := 0; i < count; i++ {
_, ok := f.states[orgID]
@ -91,8 +92,8 @@ func (f *fakeAlertInstanceManager) GenerateAlertInstances(orgID int64, count int
f.states[orgID][alertRuleUID] = []*state.State{}
}
f.states[orgID][alertRuleUID] = append(f.states[orgID][alertRuleUID], &state.State{
AlertRuleUID: fmt.Sprintf("alert_rule_%v", i),
newState := &state.State{
AlertRuleUID: alertRuleUID,
OrgID: 1,
Labels: data.Labels{
"__alert_rule_namespace_uid__": "test_namespace_uid",
@ -117,6 +118,14 @@ func (f *fakeAlertInstanceManager) GenerateAlertInstances(orgID int64, count int
LastEvaluationTime: evaluationTime.Add(1 * time.Minute),
EvaluationDuration: evaluationDuration,
Annotations: map[string]string{"annotation": "test"},
})
}
if len(callbacks) != 0 {
for _, cb := range callbacks {
newState = cb(newState)
}
}
f.states[orgID][alertRuleUID] = append(f.states[orgID][alertRuleUID], newState)
}
}

View File

@ -2,6 +2,7 @@ package models
import (
"encoding/json"
"errors"
"fmt"
"time"
@ -12,6 +13,8 @@ import (
const defaultMaxDataPoints float64 = 43200 // 12 hours at 1sec interval
const defaultIntervalMS float64 = 1000
var ErrNoQuery = errors.New("no `expr` property in the query model")
// Duration is a type used for marshalling durations.
type Duration time.Duration
@ -174,6 +177,28 @@ func (aq *AlertQuery) GetDatasource() (string, error) {
return aq.DatasourceUID, nil
}
// GetQuery returns the query defined by `expr` within the model.
// Returns an ErrNoQuery if it is unable to find the query.
// Returns an error if it is not able to cast the query to a string.
func (aq *AlertQuery) GetQuery() (string, error) {
if aq.modelProps == nil {
err := aq.setModelProps()
if err != nil {
return "", err
}
}
query, ok := aq.modelProps["expr"]
if !ok {
return "", ErrNoQuery
}
q, ok := query.(string)
if !ok {
return "", fmt.Errorf("failed to cast query to string: %v", aq.modelProps["expr"])
}
return q, nil
}
func (aq *AlertQuery) GetModel() ([]byte, error) {
err := aq.setMaxDatapoints()
if err != nil {

View File

@ -2,6 +2,7 @@ package models
import (
"encoding/json"
"errors"
"fmt"
"testing"
"time"
@ -110,7 +111,7 @@ func TestAlertQuery(t *testing.T) {
Model: json.RawMessage(`{
"queryType": "metricQuery",
"intervalMs": "invalid",
"extraParam": "some text"
"extraParam": "some text"
}`),
},
expectedIsExpression: false,
@ -245,3 +246,38 @@ func TestAlertQueryMarshalling(t *testing.T) {
}
}
}
func TestAlertQuery_GetQuery(t *testing.T) {
tc := []struct {
name string
alertQuery AlertQuery
expected string
err error
}{
{
name: "when a query is present",
alertQuery: AlertQuery{Model: json.RawMessage(`{"expr": "sum by (job) (up)"}`)},
expected: "sum by (job) (up)",
},
{
name: "when no query is found",
alertQuery: AlertQuery{Model: json.RawMessage(`{"exprisnot": "sum by (job) (up)"}`)},
err: ErrNoQuery,
},
{
name: "when we're unable to cast the query to a string",
alertQuery: AlertQuery{Model: json.RawMessage(`{"expr": {"key": 1}}`)},
err: errors.New("failed to cast query to string: map[key:1]"),
},
}
for _, tt := range tc {
t.Run(tt.name, func(t *testing.T) {
expected, err := tt.alertQuery.GetQuery()
if err != nil {
require.Equal(t, tt.err, err)
}
require.Equal(t, tt.expected, expected)
})
}
}

View File

@ -34,8 +34,9 @@ func NewFakeRuleStore(t *testing.T) *FakeRuleStore {
// FakeRuleStore mocks the RuleStore of the scheduler.
type FakeRuleStore struct {
t *testing.T
mtx sync.Mutex
t *testing.T
mtx sync.Mutex
// OrgID -> RuleGroup -> Namespace -> Rules
Rules map[int64]map[string]map[string][]*models.AlertRule
Hook func(cmd interface{}) error // use Hook if you need to intercept some query and return an error
RecordedOps []interface{}
@ -141,10 +142,25 @@ func (f *FakeRuleStore) GetAlertRulesForScheduling(_ context.Context, q *models.
return nil
}
func (f *FakeRuleStore) GetOrgAlertRules(_ context.Context, q *models.ListAlertRulesQuery) error {
f.mtx.Lock()
defer f.mtx.Unlock()
f.RecordedOps = append(f.RecordedOps, *q)
if _, ok := f.Rules[q.OrgID]; !ok {
return nil
}
var rules []*models.AlertRule
for ruleGroup := range f.Rules[q.OrgID] {
for _, storedRules := range f.Rules[q.OrgID][ruleGroup] {
rules = append(rules, storedRules...)
}
}
q.Result = rules
return nil
}
func (f *FakeRuleStore) GetNamespaceAlertRules(_ context.Context, q *models.ListNamespaceAlertRulesQuery) error {
@ -185,8 +201,24 @@ func (f *FakeRuleStore) GetRuleGroupAlertRules(_ context.Context, q *models.List
return nil
}
func (f *FakeRuleStore) GetNamespaces(_ context.Context, _ int64, _ *models2.SignedInUser) (map[string]*models2.Folder, error) {
return nil, nil
func (f *FakeRuleStore) GetNamespaces(_ context.Context, orgID int64, _ *models2.SignedInUser) (map[string]*models2.Folder, error) {
f.mtx.Lock()
defer f.mtx.Unlock()
namespacesMap := map[string]*models2.Folder{}
_, ok := f.Rules[orgID]
if !ok {
return namespacesMap, nil
}
for rg := range f.Rules[orgID] {
for namespace := range f.Rules[orgID][rg] {
namespacesMap[namespace] = &models2.Folder{}
}
}
return namespacesMap, nil
}
func (f *FakeRuleStore) GetNamespaceByTitle(_ context.Context, _ string, _ int64, _ *models2.SignedInUser, _ bool) (*models2.Folder, error) {
return nil, nil
@ -198,6 +230,27 @@ func (f *FakeRuleStore) GetOrgRuleGroups(_ context.Context, q *models.ListOrgRul
if err := f.Hook(*q); err != nil {
return err
}
// If we have namespaces, we want to try and retrieve the list of rules stored.
if len(q.NamespaceUIDs) != 0 {
_, ok := f.Rules[q.OrgID]
if !ok {
return nil
}
var ruleGroups [][]string
for rg := range f.Rules[q.OrgID] {
for storedNamespace := range f.Rules[q.OrgID][rg] {
for _, namespace := range q.NamespaceUIDs {
if storedNamespace == namespace { // if they match, they should go in.
ruleGroups = append(ruleGroups, []string{rg, storedNamespace, storedNamespace})
}
}
}
}
q.Result = ruleGroups
}
return nil
}