Alerting: Support hysteresis command expression (#75189)

Backend: 

* Update the Grafana Alerting engine to provide feedback to HysteresisCommand. The feedback information is stored in state.Manager as a fingerprint of each state. The fingerprint is persisted to the database. Only fingerprints that belong to Pending and Alerting states are considered as "loaded" and provided back to the command.
   - add ResultFingerprint to state.State. It's different from other fingerprints we store in the state because it is calculated from the result labels.
  -  add rule_fingerprint column to alert_instance
   - update alerting evaluator to accept AlertingResultsReader via context, and update scheduler to provide it.
   - add AlertingResultsFromRuleState that implements the new interface in eval package
   - update getExprRequest to patch the hysteresis command.

* Only one "Recovery Threshold" query is allowed to be used in the alert rule and it must be the Condition.


Frontend:

* Add hysteresis option to Threshold in UI. It's called "Recovery Threshold"
* Add test for getUnloadEvaluatorTypeFromCondition
* Hide hysteresis in panel expressions

* Refactor isInvalid and add test for it
* Remove unnecesary React.memo
* Add tests for updateEvaluatorConditions

---------

Co-authored-by: Sonia Aguilar <soniaaguilarpeiron@gmail.com>
This commit is contained in:
Yuri Tseretyan
2024-01-04 11:47:13 -05:00
committed by GitHub
parent 29c251851d
commit f6a46744a6
33 changed files with 1804 additions and 201 deletions

View File

@@ -15,6 +15,7 @@ import (
"github.com/google/go-cmp/cmp"
"github.com/google/uuid"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
@@ -1315,3 +1316,77 @@ func TestIntegrationRulePause(t *testing.T) {
})
}
}
func TestIntegrationHysteresisRule(t *testing.T) {
testinfra.SQLiteIntegrationTest(t)
// Setup Grafana and its Database. Scheduler is set to evaluate every 1 second
dir, p := testinfra.CreateGrafDir(t, testinfra.GrafanaOpts{
DisableLegacyAlerting: true,
EnableUnifiedAlerting: true,
DisableAnonymous: true,
AppModeProduction: true,
NGAlertSchedulerBaseInterval: 1 * time.Second,
EnableFeatureToggles: []string{featuremgmt.FlagConfigurableSchedulerTick, featuremgmt.FlagRecoveryThreshold},
})
grafanaListedAddr, store := testinfra.StartGrafana(t, dir, p)
// Create a user to make authenticated requests
createUser(t, store, user.CreateUserCommand{
DefaultOrgRole: string(org.RoleAdmin),
Password: "password",
Login: "grafana",
})
apiClient := newAlertingApiClient(grafanaListedAddr, "grafana", "password")
folder := "hysteresis"
testDs := apiClient.CreateTestDatasource(t)
apiClient.CreateFolder(t, folder, folder)
bodyRaw, err := testData.ReadFile("test-data/hysteresis_rule.json")
require.NoError(t, err)
var postData apimodels.PostableRuleGroupConfig
require.NoError(t, json.Unmarshal(bodyRaw, &postData))
for _, rule := range postData.Rules {
for i := range rule.GrafanaManagedAlert.Data {
rule.GrafanaManagedAlert.Data[i].DatasourceUID = strings.ReplaceAll(rule.GrafanaManagedAlert.Data[i].DatasourceUID, "REPLACE_ME", testDs.Body.Datasource.UID)
}
}
changes, status, body := apiClient.PostRulesGroupWithStatus(t, folder, &postData)
require.Equalf(t, http.StatusAccepted, status, body)
require.Len(t, changes.Created, 1)
ruleUid := changes.Created[0]
var frame data.Frame
require.Eventuallyf(t, func() bool {
frame, status, body = apiClient.GetRuleHistoryWithStatus(t, ruleUid)
require.Equalf(t, http.StatusOK, status, body)
return frame.Rows() > 1
}, 15*time.Second, 1*time.Second, "Alert state history expected to have more than one record but got %d. Body: %s", frame.Rows(), body)
f, _ := frame.FieldByName("next")
alertingIdx := 0
normalIdx := 1
if f.At(alertingIdx).(string) != "Alerting" {
alertingIdx = 1
normalIdx = 0
}
assert.Equalf(t, "Alerting", f.At(alertingIdx).(string), body)
assert.Equalf(t, "Normal", f.At(normalIdx).(string), body)
type HistoryData struct {
Values map[string]int64
}
f, _ = frame.FieldByName("data")
var d HistoryData
require.NoErrorf(t, json.Unmarshal([]byte(f.At(alertingIdx).(string)), &d), body)
assert.EqualValuesf(t, 5, d.Values["B"], body)
require.NoErrorf(t, json.Unmarshal([]byte(f.At(normalIdx).(string)), &d), body)
assert.EqualValuesf(t, 1, d.Values["B"], body)
}

View File

@@ -0,0 +1,71 @@
{
"name": "Default",
"interval": "1s",
"rules": [
{
"grafana_alert": {
"title": "Hysteresis Test",
"condition": "C",
"no_data_state": "NoData",
"exec_err_state": "Error",
"data": [
{
"refId": "A",
"datasourceUid": "REPLACE_ME",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"model": {
"refId": "A",
"scenarioId": "predictable_csv_wave",
"csvWave": [
{
"timeStep": 1,
"valuesCSV": "5,3,2,1"
}
],
"seriesCount": 1
}
},
{
"refId": "B",
"datasourceUid": "__expr__",
"model": {
"refId": "B",
"type": "reduce",
"reducer": "last",
"expression": "A"
}
},
{
"refId": "C",
"datasourceUid": "__expr__",
"model": {
"refId": "C",
"type": "threshold",
"conditions": [
{
"evaluator": {
"params": [
4
],
"type": "gt"
},
"unloadEvaluator": {
"params": [
2
],
"type": "lt"
}
}
],
"expression": "B"
}
}
]
}
}
]
}

View File

@@ -12,6 +12,7 @@ import (
"time"
"github.com/google/uuid"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
@@ -695,6 +696,20 @@ func (a apiClient) UpdateRouteWithStatus(t *testing.T, route apimodels.Route) (i
return resp.StatusCode, string(body)
}
func (a apiClient) GetRuleHistoryWithStatus(t *testing.T, ruleUID string) (data.Frame, int, string) {
t.Helper()
u, err := url.Parse(fmt.Sprintf("%s/api/v1/rules/history", a.url))
require.NoError(t, err)
q := url.Values{}
q.Set("ruleUID", ruleUID)
u.RawQuery = q.Encode()
req, err := http.NewRequest(http.MethodGet, u.String(), nil)
require.NoError(t, err)
return sendRequest[data.Frame](t, req, http.StatusOK)
}
func sendRequest[T any](t *testing.T, req *http.Request, successStatusCode int) (T, int, string) {
client := &http.Client{}
resp, err := client.Do(req)

View File

@@ -394,6 +394,15 @@ func CreateGrafDir(t *testing.T, opts ...GrafanaOpts) (string, string) {
require.NoError(t, err)
_, err = logSection.NewKey("query_retries", fmt.Sprintf("%d", queryRetries))
require.NoError(t, err)
if o.NGAlertSchedulerBaseInterval > 0 {
unifiedAlertingSection, err := getOrCreateSection("unified_alerting")
require.NoError(t, err)
_, err = unifiedAlertingSection.NewKey("scheduler_tick_interval", o.NGAlertSchedulerBaseInterval.String())
require.NoError(t, err)
_, err = unifiedAlertingSection.NewKey("min_interval", o.NGAlertSchedulerBaseInterval.String())
require.NoError(t, err)
}
}
cfgPath := filepath.Join(cfgDir, "test.ini")
@@ -419,6 +428,7 @@ type GrafanaOpts struct {
EnableFeatureToggles []string
NGAlertAdminConfigPollInterval time.Duration
NGAlertAlertmanagerConfigPollInterval time.Duration
NGAlertSchedulerBaseInterval time.Duration
AnonymousUserRole org.RoleType
EnableQuota bool
DashboardOrgQuota *int64