Alerting: In migration, fallback to '1s' for malformed min interval (#78614)

* Alerting: In migration, fallback to '1s' for malformed min interval

During legacy migration, when we encounter an alert datasource query 
with a min interval (interval field in the query model) that is not 
parseable, instead of failing the migration we fallback to a min interval 
of 1s and continue.

The reason for this is a bug in legacy alerting (existing for a few major 
versions) which allows arbitrary dashboard variables to be used as the 
min interval, even though those variables do not work and will cause 
the legacy alert to fail with `interval calculation failed: time: invalid 
duration`.
This commit is contained in:
Matthew Jacobson 2023-11-24 11:27:44 -05:00 committed by GitHub
parent 6c7beb1ec3
commit 4b439b7f52
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 85 additions and 19 deletions

View File

@ -44,7 +44,7 @@ func addMigrationInfo(da *migrationStore.DashAlert, dashboardUID string) (map[st
// MigrateAlert migrates a single dashboard alert from legacy alerting to unified alerting.
func (om *OrgMigration) migrateAlert(ctx context.Context, l log.Logger, da *migrationStore.DashAlert, info migmodels.DashboardUpgradeInfo) (*ngmodels.AlertRule, error) {
l.Debug("Migrating alert rule to Unified Alerting")
cond, err := transConditions(ctx, da, om.migrationStore)
cond, err := transConditions(ctx, l, da, om.migrationStore)
if err != nil {
return nil, fmt.Errorf("transform conditions: %w", err)
}

View File

@ -10,6 +10,7 @@ import (
"time"
"github.com/grafana/grafana/pkg/components/simplejson"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/datasources"
migrationStore "github.com/grafana/grafana/pkg/services/ngalert/migration/store"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
@ -22,7 +23,7 @@ import (
const expressionDatasourceUID = "__expr__"
//nolint:gocyclo
func transConditions(ctx context.Context, alert *migrationStore.DashAlert, store migrationStore.Store) (*condition, error) {
func transConditions(ctx context.Context, l log.Logger, alert *migrationStore.DashAlert, store migrationStore.Store) (*condition, error) {
// TODO: needs a significant refactor to reduce complexity.
usr := getMigrationUser(alert.OrgID)
set := alert.ParsedSettings
@ -142,14 +143,10 @@ func transConditions(ctx context.Context, alert *migrationStore.DashAlert, store
}
// Could have an alert saved but datasource deleted, so can not require match.
dsUid := ""
if ds, err := store.GetDatasource(ctx, set.Conditions[condIdx].Query.DatasourceID, usr); err == nil {
dsUid = ds.UID
} else {
if !errors.Is(err, datasources.ErrDataSourceNotFound) {
ds, err := store.GetDatasource(ctx, set.Conditions[condIdx].Query.DatasourceID, usr)
if err != nil && !errors.Is(err, datasources.ErrDataSourceNotFound) {
return nil, err
}
}
queryObj["refId"] = refID
@ -163,7 +160,17 @@ func transConditions(ctx context.Context, alert *migrationStore.DashAlert, store
rawFrom := newRefIDsToTimeRanges[refID][0]
rawTo := newRefIDsToTimeRanges[refID][1]
calculatedInterval, err := calculateInterval(legacydata.NewDataTimeRange(rawFrom, rawTo), simpleJson, nil)
// We check if the minInterval stored in the model is parseable. If it's not, we use "1s" instead.
// The reason for this is because of a bug in legacy alerting which allows arbitrary variables to be used
// as the min interval, even though those variables do not work and will cause the legacy alert
// to fail with `interval calculation failed: time: invalid duration`.
if _, err := interval.GetIntervalFrom(ds, simpleJson, time.Millisecond*1); err != nil {
l.Warn("failed to parse min interval from query model, using '1s' instead", "interval", simpleJson.Get("interval").MustString(), "err", err)
simpleJson.Set("interval", "1s")
}
calculatedInterval, err := calculateInterval(legacydata.NewDataTimeRange(rawFrom, rawTo), simpleJson, ds)
if err != nil {
return nil, err
}
@ -183,9 +190,13 @@ func transConditions(ctx context.Context, alert *migrationStore.DashAlert, store
RefID: refID,
Model: encodedObj,
RelativeTimeRange: *rTR,
DatasourceUID: dsUid,
QueryType: queryType,
}
if ds != nil {
alertQuery.DatasourceUID = ds.UID
}
newCond.Data = append(newCond.Data, alertQuery)
}
}

View File

@ -9,6 +9,7 @@ import (
"github.com/grafana/grafana/pkg/expr"
"github.com/grafana/grafana/pkg/infra/db"
"github.com/grafana/grafana/pkg/infra/log/logtest"
legacymodels "github.com/grafana/grafana/pkg/services/alerting/models"
"github.com/grafana/grafana/pkg/services/ngalert/migration/store"
"github.com/grafana/grafana/pkg/services/ngalert/models"
@ -74,7 +75,7 @@ func TestCondTransMultiCondOnSingleQuery(t *testing.T) {
}
migrationStore := store.NewTestMigrationStore(t, db.InitTestDB(t), &setting.Cfg{})
c, err := transConditions(context.Background(), &dashAlert, migrationStore)
c, err := transConditions(context.Background(), &logtest.Fake{}, &dashAlert, migrationStore)
require.NoError(t, err)
require.Equal(t, expected, c)
@ -194,7 +195,7 @@ func TestCondTransExtended(t *testing.T) {
}
migrationStore := store.NewTestMigrationStore(t, db.InitTestDB(t), &setting.Cfg{})
c, err := transConditions(context.Background(), &dashAlert, migrationStore)
c, err := transConditions(context.Background(), &logtest.Fake{}, &dashAlert, migrationStore)
require.NoError(t, err)
require.Equal(t, expected, c)

View File

@ -14,6 +14,7 @@ import (
"github.com/prometheus/alertmanager/pkg/labels"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"xorm.io/xorm"
"github.com/grafana/grafana/pkg/components/simplejson"
@ -635,18 +636,22 @@ func TestDashAlertQueryMigration(t *testing.T) {
x := sqlStore.GetEngine()
service := NewTestMigrationService(t, sqlStore, &setting.Cfg{})
createAlertQuery := func(refId string, ds string, from string, to string) ngModels.AlertQuery {
dur, _ := calculateInterval(legacydata.NewDataTimeRange(from, to), simplejson.New(), nil)
intervalMs := strconv.FormatInt(dur.Milliseconds(), 10)
newQueryModel := `{"datasource":{"type":"prometheus","uid":"gdev-prometheus"},"expr":"up{job=\"fake-data-gen\"}","instant":false,"interval":"%s","intervalMs":%d,"maxDataPoints":1500,"refId":"%s"}`
createAlertQueryWithModel := func(refId string, ds string, from string, to string, model string) ngModels.AlertQuery {
rel, _ := getRelativeDuration(from, to)
return ngModels.AlertQuery{
RefID: refId,
RelativeTimeRange: ngModels.RelativeTimeRange{From: rel.From, To: rel.To},
DatasourceUID: ds,
Model: []byte(fmt.Sprintf(`{"datasource":{"type":"prometheus","uid":"gdev-prometheus"},"expr":"up{job=\"fake-data-gen\"}","instant":false,"intervalMs":%s,"maxDataPoints":1500,"refId":"%s"}`, intervalMs, refId)),
Model: []byte(model),
}
}
createAlertQuery := func(refId string, ds string, from string, to string) ngModels.AlertQuery {
dur, _ := calculateInterval(legacydata.NewDataTimeRange(from, to), simplejson.New(), nil)
return createAlertQueryWithModel(refId, ds, from, to, fmt.Sprintf(newQueryModel, "", dur.Milliseconds(), refId))
}
createClassicConditionQuery := func(refId string, conditions []classicConditionJSON) ngModels.AlertQuery {
exprModel := struct {
Type string `json:"type"`
@ -1000,6 +1005,50 @@ func TestDashAlertQueryMigration(t *testing.T) {
},
},
},
{
name: "simple query with interval, calculates intervalMs using it as min interval",
alerts: []*models.Alert{
createAlertWithCond(t, 1, 1, 1, "alert1", nil,
[]migrationStore.DashAlertCondition{
withQueryModel(
createCondition("A", "max", "gt", 42, 1, "5m", "now"),
fmt.Sprintf(queryModel, "A", "1s"),
),
}),
},
expected: map[int64][]*ngModels.AlertRule{
int64(1): {
genAlert(func(rule *ngModels.AlertRule) {
rule.Data = append(rule.Data, createAlertQueryWithModel("A", "ds1-1", "5m", "now", fmt.Sprintf(newQueryModel, "1s", 1000, "A")))
rule.Data = append(rule.Data, createClassicConditionQuery("B", []classicConditionJSON{
cond("A", "max", "gt", 42),
}))
}),
},
},
},
{
name: "simple query with interval as variable, calculates intervalMs using default as min interval",
alerts: []*models.Alert{
createAlertWithCond(t, 1, 1, 1, "alert1", nil,
[]migrationStore.DashAlertCondition{
withQueryModel(
createCondition("A", "max", "gt", 42, 1, "5m", "now"),
fmt.Sprintf(queryModel, "A", "$min_interval"),
),
}),
},
expected: map[int64][]*ngModels.AlertRule{
int64(1): {
genAlert(func(rule *ngModels.AlertRule) {
rule.Data = append(rule.Data, createAlertQueryWithModel("A", "ds1-1", "5m", "now", fmt.Sprintf(newQueryModel, "$min_interval", 1000, "A")))
rule.Data = append(rule.Data, createClassicConditionQuery("B", []classicConditionJSON{
cond("A", "max", "gt", 42),
}))
}),
},
},
},
}
for _, tt := range tc {
t.Run(tt.name, func(t *testing.T) {
@ -1107,7 +1156,12 @@ func createAlertNotification(t *testing.T, orgId int64, uid string, channelType
return createAlertNotificationWithReminder(t, orgId, uid, channelType, settings, defaultChannel, false, time.Duration(0))
}
var queryModel = `{"datasource":{"type":"prometheus","uid":"gdev-prometheus"},"expr":"up{job=\"fake-data-gen\"}","instant":false,"refId":"%s"}`
func withQueryModel(base migrationStore.DashAlertCondition, model string) migrationStore.DashAlertCondition {
base.Query.Model = []byte(model)
return base
}
var queryModel = `{"datasource":{"type":"prometheus","uid":"gdev-prometheus"},"expr":"up{job=\"fake-data-gen\"}","instant":false,"refId":"%s","interval":"%s"}`
func createCondition(refId string, reducer string, evalType string, thresh float64, datasourceId int64, from string, to string) migrationStore.DashAlertCondition {
return migrationStore.DashAlertCondition{
@ -1127,7 +1181,7 @@ func createCondition(refId string, reducer string, evalType string, thresh float
}{
Params: []string{refId, from, to},
DatasourceID: datasourceId,
Model: []byte(fmt.Sprintf(queryModel, refId)),
Model: []byte(fmt.Sprintf(queryModel, refId, "")),
},
Reducer: struct {
Type string `json:"type"`