Alerting: Load annotations from rule into State cache (#33542)

for https://github.com/grafana/alerting-squad/issues/127
This commit is contained in:
Kyle Brandt 2021-04-30 14:23:12 -04:00 committed by GitHub
parent 0c2bcbf2bc
commit 7823842c5d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 71 additions and 47 deletions

View File

@ -7,8 +7,8 @@ import (
// AlertInstance represents a single alert instance.
type AlertInstance struct {
DefinitionOrgID int64 `xorm:"def_org_id"`
DefinitionUID string `xorm:"def_uid"`
RuleOrgID int64 `xorm:"def_org_id"`
RuleUID string `xorm:"def_uid"`
Labels InstanceLabels
LabelsHash string
CurrentState InstanceStateType
@ -78,8 +78,8 @@ type FetchUniqueOrgIdsQuery struct {
// ListAlertInstancesQueryResult represents the result of listAlertInstancesQuery.
type ListAlertInstancesQueryResult struct {
DefinitionOrgID int64 `xorm:"def_org_id" json:"definitionOrgId"`
DefinitionUID string `xorm:"def_uid" json:"definitionUid"`
RuleOrgID int64 `xorm:"def_org_id" json:"definitionOrgId"`
RuleDefinitionUID string `xorm:"def_uid" json:"definitionUid"`
Labels InstanceLabels `json:"labels"`
LabelsHash string `json:"labeHash"`
CurrentState InstanceStateType `json:"currentState"`
@ -99,11 +99,11 @@ func ValidateAlertInstance(alertInstance *AlertInstance) error {
return fmt.Errorf("alert instance is invalid because it is nil")
}
if alertInstance.DefinitionOrgID == 0 {
if alertInstance.RuleOrgID == 0 {
return fmt.Errorf("alert instance is invalid due to missing alert definition organisation")
}
if alertInstance.DefinitionUID == "" {
if alertInstance.RuleUID == "" {
return fmt.Errorf("alert instance is invalid due to missing alert definition uid")
}

View File

@ -341,27 +341,48 @@ func (sch *schedule) WarmStateCache(st *state.Manager) {
st.ResetCache()
orgIdsCmd := models.FetchUniqueOrgIdsQuery{}
if err := sch.store.FetchOrgIds(&orgIdsCmd); err != nil {
sch.log.Error("unable to fetch orgIds", "msg", err.Error())
}
var states []*state.State
for _, orgIdResult := range orgIdsCmd.Result {
// Get Rules
ruleCmd := models.ListAlertRulesQuery{
OrgID: orgIdResult.DefinitionOrgID,
}
if err := sch.ruleStore.GetOrgAlertRules(&ruleCmd); err != nil {
sch.log.Error("unable to fetch previous state", "msg", err.Error())
}
ruleByUID := make(map[string]*models.AlertRule, len(ruleCmd.Result))
for _, rule := range ruleCmd.Result {
ruleByUID[rule.UID] = rule
}
// Get Instances
cmd := models.ListAlertInstancesQuery{
DefinitionOrgID: orgIdResult.DefinitionOrgID,
}
if err := sch.store.ListAlertInstances(&cmd); err != nil {
if err := sch.ruleStore.ListAlertInstances(&cmd); err != nil {
sch.log.Error("unable to fetch previous state", "msg", err.Error())
}
for _, entry := range cmd.Result {
ruleForEntry, ok := ruleByUID[entry.RuleDefinitionUID]
if !ok {
sch.log.Error("rule not found for instance, ignoring", "rule", entry.RuleDefinitionUID)
}
lbs := map[string]string(entry.Labels)
cacheId, err := entry.Labels.StringKey()
if err != nil {
sch.log.Error("error getting cacheId for entry", "msg", err.Error())
}
stateForEntry := &state.State{
AlertRuleUID: entry.DefinitionUID,
OrgID: entry.DefinitionOrgID,
AlertRuleUID: entry.RuleDefinitionUID,
OrgID: entry.RuleOrgID,
CacheId: cacheId,
Labels: lbs,
State: translateInstanceState(entry.CurrentState),
@ -369,6 +390,7 @@ func (sch *schedule) WarmStateCache(st *state.Manager) {
StartsAt: entry.CurrentStateSince,
EndsAt: entry.CurrentStateEnd,
LastEvaluationTime: entry.LastEvalTime,
Annotations: ruleForEntry.Annotations,
}
states = append(states, stateForEntry)
}

View File

@ -134,5 +134,6 @@ func (a *State) Equals(b *State) bool {
a.State.String() == b.State.String() &&
a.StartsAt == b.StartsAt &&
a.EndsAt == b.EndsAt &&
a.LastEvaluationTime == b.LastEvaluationTime
a.LastEvaluationTime == b.LastEvaluationTime &&
data.Labels(a.Annotations).String() == data.Labels(b.Annotations).String()
}

View File

@ -86,8 +86,8 @@ func (st DBstore) SaveAlertInstance(cmd *models.SaveAlertInstanceCommand) error
}
alertInstance := &models.AlertInstance{
DefinitionOrgID: cmd.DefinitionOrgID,
DefinitionUID: cmd.DefinitionUID,
RuleOrgID: cmd.DefinitionOrgID,
RuleUID: cmd.DefinitionUID,
Labels: cmd.Labels,
LabelsHash: labelsHash,
CurrentState: cmd.State,
@ -100,7 +100,7 @@ func (st DBstore) SaveAlertInstance(cmd *models.SaveAlertInstanceCommand) error
return err
}
params := append(make([]interface{}, 0), alertInstance.DefinitionOrgID, alertInstance.DefinitionUID, labelTupleJSON, alertInstance.LabelsHash, alertInstance.CurrentState, alertInstance.CurrentStateSince.Unix(), alertInstance.CurrentStateEnd.Unix(), alertInstance.LastEvalTime.Unix())
params := append(make([]interface{}, 0), alertInstance.RuleOrgID, alertInstance.RuleUID, labelTupleJSON, alertInstance.LabelsHash, alertInstance.CurrentState, alertInstance.CurrentStateSince.Unix(), alertInstance.CurrentStateEnd.Unix(), alertInstance.LastEvalTime.Unix())
upsertSQL := st.SQLStore.Dialect.UpsertSQL(
"alert_instance",

View File

@ -58,8 +58,8 @@ func TestAlertInstanceOperations(t *testing.T) {
require.NoError(t, err)
require.Equal(t, saveCmd.Labels, getCmd.Result.Labels)
require.Equal(t, alertRule1.OrgID, getCmd.Result.DefinitionOrgID)
require.Equal(t, alertRule1.UID, getCmd.Result.DefinitionUID)
require.Equal(t, alertRule1.OrgID, getCmd.Result.RuleOrgID)
require.Equal(t, alertRule1.UID, getCmd.Result.RuleUID)
})
t.Run("can save and read new alert instance with no labels", func(t *testing.T) {
@ -80,8 +80,8 @@ func TestAlertInstanceOperations(t *testing.T) {
err = dbstore.GetAlertInstance(getCmd)
require.NoError(t, err)
require.Equal(t, alertRule2.OrgID, getCmd.Result.DefinitionOrgID)
require.Equal(t, alertRule2.UID, getCmd.Result.DefinitionUID)
require.Equal(t, alertRule2.OrgID, getCmd.Result.RuleOrgID)
require.Equal(t, alertRule2.UID, getCmd.Result.RuleUID)
require.Equal(t, saveCmd.Labels, getCmd.Result.Labels)
})
@ -169,8 +169,8 @@ func TestAlertInstanceOperations(t *testing.T) {
require.Len(t, listQuery.Result, 1)
require.Equal(t, saveCmdTwo.DefinitionOrgID, listQuery.Result[0].DefinitionOrgID)
require.Equal(t, saveCmdTwo.DefinitionUID, listQuery.Result[0].DefinitionUID)
require.Equal(t, saveCmdTwo.DefinitionOrgID, listQuery.Result[0].RuleOrgID)
require.Equal(t, saveCmdTwo.DefinitionUID, listQuery.Result[0].RuleDefinitionUID)
require.Equal(t, saveCmdTwo.Labels, listQuery.Result[0].Labels)
require.Equal(t, saveCmdTwo.State, listQuery.Result[0].CurrentState)
})

View File

@ -8,6 +8,8 @@ import (
"testing"
"time"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
@ -32,11 +34,14 @@ type evalAppliedInfo struct {
func TestWarmStateCache(t *testing.T) {
evaluationTime, _ := time.Parse("2006-01-02", "2021-03-25")
dbstore := setupTestEnv(t, 1)
rule := createTestAlertRule(t, dbstore, 600)
expectedEntries := []*state.State{
{
AlertRuleUID: "test_uid",
OrgID: 123,
AlertRuleUID: rule.UID,
OrgID: rule.OrgID,
CacheId: `[["test1","testValue1"]]`,
Labels: data.Labels{"test1": "testValue1"},
State: eval.Normal,
@ -46,9 +51,10 @@ func TestWarmStateCache(t *testing.T) {
StartsAt: evaluationTime.Add(-1 * time.Minute),
EndsAt: evaluationTime.Add(1 * time.Minute),
LastEvaluationTime: evaluationTime,
Annotations: map[string]string{"testAnnoKey": "testAnnoValue"},
}, {
AlertRuleUID: "test_uid",
OrgID: 123,
AlertRuleUID: rule.UID,
OrgID: rule.OrgID,
CacheId: `[["test2","testValue2"]]`,
Labels: data.Labels{"test2": "testValue2"},
State: eval.Alerting,
@ -58,25 +64,25 @@ func TestWarmStateCache(t *testing.T) {
StartsAt: evaluationTime.Add(-1 * time.Minute),
EndsAt: evaluationTime.Add(1 * time.Minute),
LastEvaluationTime: evaluationTime,
Annotations: map[string]string{"testAnnoKey": "testAnnoValue"},
},
}
dbstore := setupTestEnv(t, 1)
saveCmd1 := &models.SaveAlertInstanceCommand{
DefinitionOrgID: 123,
DefinitionUID: "test_uid",
DefinitionOrgID: rule.OrgID,
DefinitionUID: rule.UID,
Labels: models.InstanceLabels{"test1": "testValue1"},
State: models.InstanceStateNormal,
LastEvalTime: evaluationTime,
CurrentStateSince: evaluationTime.Add(-1 * time.Minute),
CurrentStateEnd: evaluationTime.Add(1 * time.Minute),
}
_ = dbstore.SaveAlertInstance(saveCmd1)
saveCmd2 := &models.SaveAlertInstanceCommand{
DefinitionOrgID: 123,
DefinitionUID: "test_uid",
DefinitionOrgID: rule.OrgID,
DefinitionUID: rule.UID,
Labels: models.InstanceLabels{"test2": "testValue2"},
State: models.InstanceStateFiring,
LastEvalTime: evaluationTime,
@ -92,6 +98,7 @@ func TestWarmStateCache(t *testing.T) {
BaseInterval: time.Second,
Logger: log.New("ngalert cache warming test"),
Store: dbstore,
RuleStore: dbstore,
}
sched := schedule.NewScheduler(schedCfg, nil)
st := state.NewManager(schedCfg.Logger, nilMetrics)
@ -101,7 +108,11 @@ func TestWarmStateCache(t *testing.T) {
for _, entry := range expectedEntries {
cacheEntry, err := st.Get(entry.CacheId)
require.NoError(t, err)
assert.True(t, entry.Equals(cacheEntry))
if diff := cmp.Diff(entry, cacheEntry, cmpopts.IgnoreFields(state.State{}, "Results")); diff != "" {
t.Errorf("Result mismatch (-want +got):\n%s", diff)
t.FailNow()
}
}
})
}

View File

@ -76,6 +76,9 @@ func createTestAlertRule(t *testing.T, dbstore *store.DBstore, intervalSeconds i
Interval: model.Duration(time.Duration(intervalSeconds) * time.Second),
Rules: []apimodels.PostableExtendedRuleNode{
{
ApiRuleNode: &apimodels.ApiRuleNode{
Annotations: map[string]string{"testAnnoKey": "testAnnoValue"},
},
GrafanaManagedAlert: &apimodels.PostableGrafanaRule{
Title: fmt.Sprintf("an alert definition %d", d),
Condition: "A",

View File

@ -551,10 +551,7 @@
"=",
"$backend"
],
"groupBys": [
"resource.label.url_map_name",
"resource.label.backend_target_name"
],
"groupBys": ["resource.label.url_map_name", "resource.label.backend_target_name"],
"metricKind": "DELTA",
"metricType": "loadbalancing.googleapis.com/https/backend_latencies",
"perSeriesAligner": "ALIGN_DELTA",
@ -842,10 +839,7 @@
"=",
"$backend"
],
"groupBys": [
"resource.label.url_map_name",
"resource.label.backend_target_name"
],
"groupBys": ["resource.label.url_map_name", "resource.label.backend_target_name"],
"metricKind": "DELTA",
"metricType": "loadbalancing.googleapis.com/https/backend_request_bytes_count",
"perSeriesAligner": "ALIGN_RATE",
@ -1083,10 +1077,7 @@
"=",
"$backend"
],
"groupBys": [
"resource.label.url_map_name",
"resource.label.backend_target_name"
],
"groupBys": ["resource.label.url_map_name", "resource.label.backend_target_name"],
"metricKind": "DELTA",
"metricType": "loadbalancing.googleapis.com/https/backend_response_bytes_count",
"perSeriesAligner": "ALIGN_RATE",
@ -1263,11 +1254,7 @@
],
"schemaVersion": 27,
"style": "dark",
"tags": [
"Networking",
"Cloud Monitoring",
"GCP"
],
"tags": ["Networking", "Cloud Monitoring", "GCP"],
"templating": {
"list": [
{