Alerting: Migrate unknown NoData\Error settings to the default (#68403)

* use default execution if legacy is not known

* update docs

* Update docs/sources/alerting/migrating-alerts/migrating-legacy-alerts.md

Co-authored-by: brendamuir <100768211+brendamuir@users.noreply.github.com>

* Update docs/sources/alerting/migrating-alerts/migrating-legacy-alerts.md

Co-authored-by: brendamuir <100768211+brendamuir@users.noreply.github.com>

---------

Co-authored-by: brendamuir <100768211+brendamuir@users.noreply.github.com>
This commit is contained in:
Yuri Tseretyan 2023-05-24 13:09:17 -04:00 committed by GitHub
parent f49b9b89cf
commit 3af95bebe1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 54 additions and 28 deletions

View File

@ -24,7 +24,11 @@ longer supported. We refer to these as [Differences]({{< relref "#differences" >
- If there are no dashboard permissions and the dashboard is under a folder, then the rule is linked to this folder and inherits its permissions.
- If there are no dashboard permissions and the dashboard is under the General folder, then the rule is linked to the `General Alerting` folder, and the rule inherits the default permissions.
3. Since there is no `Keep Last State` option for [`No Data`]({{< relref "../alerting-rules/create-grafana-managed-rule#configure-no-data-and-error-handling" >}}) in Grafana Alerting, this option becomes `NoData` during the legacy rules migration. Option "Keep Last State" for [`Error handling`]({{< relref "../alerting-rules/create-grafana-managed-rule#configure-no-data-and-error-handling" >}}) is migrated to a new option `Error`. To match the behavior of the `Keep Last State`, in both cases, during the migration Grafana automatically creates a silence for each alert rule with a duration of 1 year.
3. `NoData` and `Error` settings are migrated as is to the corresponding settings in Grafana Alerting, except in two situations:
3.1. As there is no `Keep Last State` option for `No Data` in Grafana Alerting, this option becomes `NoData`. The `Keep Last State` option for `Error` is migrated to a new option `Error`. To match the behavior of the `Keep Last State`, in both cases, during the migration Grafana automatically creates a silence for each alert rule with a duration of 1 year.
3.2. Due to lack of validation, legacy alert rules imported via JSON or provisioned along with dashboards can contain arbitrary values for `NoData` and [`Error`](/docs/sources/alerting/alerting-rules/create-grafana-managed-rule.md#configure-no-data-and-error-handling). In this situation, Grafana will use the default setting: `NoData` for No data, and `Error` for Error.
4. Notification channels are migrated to an Alertmanager configuration with the appropriate routes and receivers. Default notification channels are added as contact points to the default route. Notification channels not associated with any Dashboard alert go to the `autogen-unlinked-channel-recv` route.

View File

@ -6,6 +6,7 @@ import (
"time"
"github.com/grafana/grafana/pkg/components/simplejson"
"github.com/grafana/grafana/pkg/infra/log"
legacymodels "github.com/grafana/grafana/pkg/services/alerting/models"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/tsdb/graphite"
@ -105,7 +106,7 @@ func addMigrationInfo(da *dashAlert) (map[string]string, map[string]string) {
return lbls, annotations
}
func (m *migration) makeAlertRule(cond condition, da dashAlert, folderUID string) (*alertRule, error) {
func (m *migration) makeAlertRule(l log.Logger, cond condition, da dashAlert, folderUID string) (*alertRule, error) {
lbls, annotations := addMigrationInfo(&da)
annotations["message"] = da.Message
var err error
@ -143,16 +144,8 @@ func (m *migration) makeAlertRule(cond condition, da dashAlert, folderUID string
Labels: lbls,
RuleGroupIndex: 1,
IsPaused: isPaused,
}
ar.NoDataState, err = transNoData(da.ParsedSettings.NoDataState)
if err != nil {
return nil, err
}
ar.ExecErrState, err = transExecErr(da.ParsedSettings.ExecutionErrorState)
if err != nil {
return nil, err
NoDataState: transNoData(l, da.ParsedSettings.NoDataState),
ExecErrState: transExecErr(l, da.ParsedSettings.ExecutionErrorState),
}
// Label for routing and silences.
@ -271,32 +264,36 @@ func ruleAdjustInterval(freq int64) int64 {
return freq - (freq % baseFreq)
}
func transNoData(s string) (string, error) {
func transNoData(l log.Logger, s string) string {
switch legacymodels.NoDataOption(s) {
case legacymodels.NoDataSetOK:
return string(ngmodels.OK), nil // values from ngalert/models/rule
return string(ngmodels.OK) // values from ngalert/models/rule
case "", legacymodels.NoDataSetNoData:
return string(ngmodels.NoData), nil
return string(ngmodels.NoData)
case legacymodels.NoDataSetAlerting:
return string(ngmodels.Alerting), nil
return string(ngmodels.Alerting)
case legacymodels.NoDataKeepState:
return string(ngmodels.NoData), nil // "keep last state" translates to no data because we now emit a special alert when the state is "noData". The result is that the evaluation will not return firing and instead we'll raise the special alert.
return string(ngmodels.NoData) // "keep last state" translates to no data because we now emit a special alert when the state is "noData". The result is that the evaluation will not return firing and instead we'll raise the special alert.
default:
l.Warn("Unable to translate execution of NoData state. Using default execution", "old", s, "new", ngmodels.NoData)
return string(ngmodels.NoData)
}
return "", fmt.Errorf("unrecognized No Data setting %v", s)
}
func transExecErr(s string) (string, error) {
func transExecErr(l log.Logger, s string) string {
switch legacymodels.ExecutionErrorOption(s) {
case "", legacymodels.ExecutionErrorSetAlerting:
return string(ngmodels.AlertingErrState), nil
return string(ngmodels.AlertingErrState)
case legacymodels.ExecutionErrorKeepState:
// Keep last state is translated to error as we now emit a
// DatasourceError alert when the state is error
return string(ngmodels.ErrorErrState), nil
return string(ngmodels.ErrorErrState)
case legacymodels.ExecutionErrorSetOk:
return string(ngmodels.OkErrState), nil
return string(ngmodels.OkErrState)
default:
l.Warn("Unable to translate execution of Error state. Using default execution", "old", s, "new", ngmodels.ErrorErrState)
return string(ngmodels.ErrorErrState)
}
return "", fmt.Errorf("unrecognized Execution Error setting %v", s)
}
func normalizeRuleName(daName string, uid string) string {

View File

@ -5,9 +5,12 @@ import (
"strings"
"testing"
"github.com/google/uuid"
"github.com/stretchr/testify/require"
"github.com/grafana/grafana/pkg/components/simplejson"
"github.com/grafana/grafana/pkg/infra/log/logtest"
"github.com/grafana/grafana/pkg/services/ngalert/models"
)
func TestMigrateAlertRuleQueries(t *testing.T) {
@ -93,7 +96,7 @@ func TestMakeAlertRule(t *testing.T) {
da := createTestDashAlert()
cnd := createTestDashAlertCondition()
ar, err := m.makeAlertRule(cnd, da, "folder")
ar, err := m.makeAlertRule(&logtest.Fake{}, cnd, da, "folder")
require.NoError(t, err)
require.Equal(t, da.Name, ar.Title)
@ -106,7 +109,7 @@ func TestMakeAlertRule(t *testing.T) {
da.Name = strings.Repeat("a", DefaultFieldMaxLength+1)
cnd := createTestDashAlertCondition()
ar, err := m.makeAlertRule(cnd, da, "folder")
ar, err := m.makeAlertRule(&logtest.Fake{}, cnd, da, "folder")
require.NoError(t, err)
require.Len(t, ar.Title, DefaultFieldMaxLength)
@ -123,7 +126,7 @@ func TestMakeAlertRule(t *testing.T) {
da := createTestDashAlert()
cnd := createTestDashAlertCondition()
ar, err := m.makeAlertRule(cnd, da, "folder")
ar, err := m.makeAlertRule(&logtest.Fake{}, cnd, da, "folder")
require.NoError(t, err)
require.False(t, ar.IsPaused)
})
@ -134,10 +137,32 @@ func TestMakeAlertRule(t *testing.T) {
da.State = "paused"
cnd := createTestDashAlertCondition()
ar, err := m.makeAlertRule(cnd, da, "folder")
ar, err := m.makeAlertRule(&logtest.Fake{}, cnd, da, "folder")
require.NoError(t, err)
require.True(t, ar.IsPaused)
})
t.Run("use default if execution of NoData is not known", func(t *testing.T) {
m := newTestMigration(t)
da := createTestDashAlert()
da.ParsedSettings.NoDataState = uuid.NewString()
cnd := createTestDashAlertCondition()
ar, err := m.makeAlertRule(&logtest.Fake{}, cnd, da, "folder")
require.Nil(t, err)
require.Equal(t, string(models.NoData), ar.NoDataState)
})
t.Run("use default if execution of Error is not known", func(t *testing.T) {
m := newTestMigration(t)
da := createTestDashAlert()
da.ParsedSettings.ExecutionErrorState = uuid.NewString()
cnd := createTestDashAlertCondition()
ar, err := m.makeAlertRule(&logtest.Fake{}, cnd, da, "folder")
require.Nil(t, err)
require.Equal(t, string(models.ErrorErrState), ar.ExecErrState)
})
}
func createTestDashAlert() dashAlert {

View File

@ -375,7 +375,7 @@ func (m *migration) Exec(sess *xorm.Session, mg *migrator.Migrator) error {
AlertId: da.Id,
}
}
rule, err := m.makeAlertRule(*newCond, da, folder.Uid)
rule, err := m.makeAlertRule(l, *newCond, da, folder.Uid)
if err != nil {
return fmt.Errorf("failed to migrate alert rule '%s' [ID:%d, DashboardUID:%s, orgID:%d]: %w", da.Name, da.Id, da.DashboardUID, da.OrgId, err)
}