mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: Load annotations from rule into State cache (#33542)
for https://github.com/grafana/alerting-squad/issues/127
This commit is contained in:
parent
0c2bcbf2bc
commit
7823842c5d
@ -7,8 +7,8 @@ import (
|
||||
|
||||
// AlertInstance represents a single alert instance.
|
||||
type AlertInstance struct {
|
||||
DefinitionOrgID int64 `xorm:"def_org_id"`
|
||||
DefinitionUID string `xorm:"def_uid"`
|
||||
RuleOrgID int64 `xorm:"def_org_id"`
|
||||
RuleUID string `xorm:"def_uid"`
|
||||
Labels InstanceLabels
|
||||
LabelsHash string
|
||||
CurrentState InstanceStateType
|
||||
@ -78,8 +78,8 @@ type FetchUniqueOrgIdsQuery struct {
|
||||
|
||||
// ListAlertInstancesQueryResult represents the result of listAlertInstancesQuery.
|
||||
type ListAlertInstancesQueryResult struct {
|
||||
DefinitionOrgID int64 `xorm:"def_org_id" json:"definitionOrgId"`
|
||||
DefinitionUID string `xorm:"def_uid" json:"definitionUid"`
|
||||
RuleOrgID int64 `xorm:"def_org_id" json:"definitionOrgId"`
|
||||
RuleDefinitionUID string `xorm:"def_uid" json:"definitionUid"`
|
||||
Labels InstanceLabels `json:"labels"`
|
||||
LabelsHash string `json:"labeHash"`
|
||||
CurrentState InstanceStateType `json:"currentState"`
|
||||
@ -99,11 +99,11 @@ func ValidateAlertInstance(alertInstance *AlertInstance) error {
|
||||
return fmt.Errorf("alert instance is invalid because it is nil")
|
||||
}
|
||||
|
||||
if alertInstance.DefinitionOrgID == 0 {
|
||||
if alertInstance.RuleOrgID == 0 {
|
||||
return fmt.Errorf("alert instance is invalid due to missing alert definition organisation")
|
||||
}
|
||||
|
||||
if alertInstance.DefinitionUID == "" {
|
||||
if alertInstance.RuleUID == "" {
|
||||
return fmt.Errorf("alert instance is invalid due to missing alert definition uid")
|
||||
}
|
||||
|
||||
|
@ -341,27 +341,48 @@ func (sch *schedule) WarmStateCache(st *state.Manager) {
|
||||
st.ResetCache()
|
||||
|
||||
orgIdsCmd := models.FetchUniqueOrgIdsQuery{}
|
||||
|
||||
if err := sch.store.FetchOrgIds(&orgIdsCmd); err != nil {
|
||||
sch.log.Error("unable to fetch orgIds", "msg", err.Error())
|
||||
}
|
||||
|
||||
var states []*state.State
|
||||
for _, orgIdResult := range orgIdsCmd.Result {
|
||||
// Get Rules
|
||||
ruleCmd := models.ListAlertRulesQuery{
|
||||
OrgID: orgIdResult.DefinitionOrgID,
|
||||
}
|
||||
if err := sch.ruleStore.GetOrgAlertRules(&ruleCmd); err != nil {
|
||||
sch.log.Error("unable to fetch previous state", "msg", err.Error())
|
||||
}
|
||||
|
||||
ruleByUID := make(map[string]*models.AlertRule, len(ruleCmd.Result))
|
||||
for _, rule := range ruleCmd.Result {
|
||||
ruleByUID[rule.UID] = rule
|
||||
}
|
||||
|
||||
// Get Instances
|
||||
cmd := models.ListAlertInstancesQuery{
|
||||
DefinitionOrgID: orgIdResult.DefinitionOrgID,
|
||||
}
|
||||
if err := sch.store.ListAlertInstances(&cmd); err != nil {
|
||||
if err := sch.ruleStore.ListAlertInstances(&cmd); err != nil {
|
||||
sch.log.Error("unable to fetch previous state", "msg", err.Error())
|
||||
}
|
||||
|
||||
for _, entry := range cmd.Result {
|
||||
ruleForEntry, ok := ruleByUID[entry.RuleDefinitionUID]
|
||||
if !ok {
|
||||
sch.log.Error("rule not found for instance, ignoring", "rule", entry.RuleDefinitionUID)
|
||||
}
|
||||
|
||||
lbs := map[string]string(entry.Labels)
|
||||
cacheId, err := entry.Labels.StringKey()
|
||||
if err != nil {
|
||||
sch.log.Error("error getting cacheId for entry", "msg", err.Error())
|
||||
}
|
||||
stateForEntry := &state.State{
|
||||
AlertRuleUID: entry.DefinitionUID,
|
||||
OrgID: entry.DefinitionOrgID,
|
||||
AlertRuleUID: entry.RuleDefinitionUID,
|
||||
OrgID: entry.RuleOrgID,
|
||||
CacheId: cacheId,
|
||||
Labels: lbs,
|
||||
State: translateInstanceState(entry.CurrentState),
|
||||
@ -369,6 +390,7 @@ func (sch *schedule) WarmStateCache(st *state.Manager) {
|
||||
StartsAt: entry.CurrentStateSince,
|
||||
EndsAt: entry.CurrentStateEnd,
|
||||
LastEvaluationTime: entry.LastEvalTime,
|
||||
Annotations: ruleForEntry.Annotations,
|
||||
}
|
||||
states = append(states, stateForEntry)
|
||||
}
|
||||
|
@ -134,5 +134,6 @@ func (a *State) Equals(b *State) bool {
|
||||
a.State.String() == b.State.String() &&
|
||||
a.StartsAt == b.StartsAt &&
|
||||
a.EndsAt == b.EndsAt &&
|
||||
a.LastEvaluationTime == b.LastEvaluationTime
|
||||
a.LastEvaluationTime == b.LastEvaluationTime &&
|
||||
data.Labels(a.Annotations).String() == data.Labels(b.Annotations).String()
|
||||
}
|
||||
|
@ -86,8 +86,8 @@ func (st DBstore) SaveAlertInstance(cmd *models.SaveAlertInstanceCommand) error
|
||||
}
|
||||
|
||||
alertInstance := &models.AlertInstance{
|
||||
DefinitionOrgID: cmd.DefinitionOrgID,
|
||||
DefinitionUID: cmd.DefinitionUID,
|
||||
RuleOrgID: cmd.DefinitionOrgID,
|
||||
RuleUID: cmd.DefinitionUID,
|
||||
Labels: cmd.Labels,
|
||||
LabelsHash: labelsHash,
|
||||
CurrentState: cmd.State,
|
||||
@ -100,7 +100,7 @@ func (st DBstore) SaveAlertInstance(cmd *models.SaveAlertInstanceCommand) error
|
||||
return err
|
||||
}
|
||||
|
||||
params := append(make([]interface{}, 0), alertInstance.DefinitionOrgID, alertInstance.DefinitionUID, labelTupleJSON, alertInstance.LabelsHash, alertInstance.CurrentState, alertInstance.CurrentStateSince.Unix(), alertInstance.CurrentStateEnd.Unix(), alertInstance.LastEvalTime.Unix())
|
||||
params := append(make([]interface{}, 0), alertInstance.RuleOrgID, alertInstance.RuleUID, labelTupleJSON, alertInstance.LabelsHash, alertInstance.CurrentState, alertInstance.CurrentStateSince.Unix(), alertInstance.CurrentStateEnd.Unix(), alertInstance.LastEvalTime.Unix())
|
||||
|
||||
upsertSQL := st.SQLStore.Dialect.UpsertSQL(
|
||||
"alert_instance",
|
||||
|
@ -58,8 +58,8 @@ func TestAlertInstanceOperations(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, saveCmd.Labels, getCmd.Result.Labels)
|
||||
require.Equal(t, alertRule1.OrgID, getCmd.Result.DefinitionOrgID)
|
||||
require.Equal(t, alertRule1.UID, getCmd.Result.DefinitionUID)
|
||||
require.Equal(t, alertRule1.OrgID, getCmd.Result.RuleOrgID)
|
||||
require.Equal(t, alertRule1.UID, getCmd.Result.RuleUID)
|
||||
})
|
||||
|
||||
t.Run("can save and read new alert instance with no labels", func(t *testing.T) {
|
||||
@ -80,8 +80,8 @@ func TestAlertInstanceOperations(t *testing.T) {
|
||||
err = dbstore.GetAlertInstance(getCmd)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, alertRule2.OrgID, getCmd.Result.DefinitionOrgID)
|
||||
require.Equal(t, alertRule2.UID, getCmd.Result.DefinitionUID)
|
||||
require.Equal(t, alertRule2.OrgID, getCmd.Result.RuleOrgID)
|
||||
require.Equal(t, alertRule2.UID, getCmd.Result.RuleUID)
|
||||
require.Equal(t, saveCmd.Labels, getCmd.Result.Labels)
|
||||
})
|
||||
|
||||
@ -169,8 +169,8 @@ func TestAlertInstanceOperations(t *testing.T) {
|
||||
|
||||
require.Len(t, listQuery.Result, 1)
|
||||
|
||||
require.Equal(t, saveCmdTwo.DefinitionOrgID, listQuery.Result[0].DefinitionOrgID)
|
||||
require.Equal(t, saveCmdTwo.DefinitionUID, listQuery.Result[0].DefinitionUID)
|
||||
require.Equal(t, saveCmdTwo.DefinitionOrgID, listQuery.Result[0].RuleOrgID)
|
||||
require.Equal(t, saveCmdTwo.DefinitionUID, listQuery.Result[0].RuleDefinitionUID)
|
||||
require.Equal(t, saveCmdTwo.Labels, listQuery.Result[0].Labels)
|
||||
require.Equal(t, saveCmdTwo.State, listQuery.Result[0].CurrentState)
|
||||
})
|
||||
|
@ -8,6 +8,8 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/google/go-cmp/cmp/cmpopts"
|
||||
"github.com/grafana/grafana-plugin-sdk-go/data"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
||||
|
||||
@ -32,11 +34,14 @@ type evalAppliedInfo struct {
|
||||
|
||||
func TestWarmStateCache(t *testing.T) {
|
||||
evaluationTime, _ := time.Parse("2006-01-02", "2021-03-25")
|
||||
dbstore := setupTestEnv(t, 1)
|
||||
|
||||
rule := createTestAlertRule(t, dbstore, 600)
|
||||
|
||||
expectedEntries := []*state.State{
|
||||
{
|
||||
AlertRuleUID: "test_uid",
|
||||
OrgID: 123,
|
||||
AlertRuleUID: rule.UID,
|
||||
OrgID: rule.OrgID,
|
||||
CacheId: `[["test1","testValue1"]]`,
|
||||
Labels: data.Labels{"test1": "testValue1"},
|
||||
State: eval.Normal,
|
||||
@ -46,9 +51,10 @@ func TestWarmStateCache(t *testing.T) {
|
||||
StartsAt: evaluationTime.Add(-1 * time.Minute),
|
||||
EndsAt: evaluationTime.Add(1 * time.Minute),
|
||||
LastEvaluationTime: evaluationTime,
|
||||
Annotations: map[string]string{"testAnnoKey": "testAnnoValue"},
|
||||
}, {
|
||||
AlertRuleUID: "test_uid",
|
||||
OrgID: 123,
|
||||
AlertRuleUID: rule.UID,
|
||||
OrgID: rule.OrgID,
|
||||
CacheId: `[["test2","testValue2"]]`,
|
||||
Labels: data.Labels{"test2": "testValue2"},
|
||||
State: eval.Alerting,
|
||||
@ -58,25 +64,25 @@ func TestWarmStateCache(t *testing.T) {
|
||||
StartsAt: evaluationTime.Add(-1 * time.Minute),
|
||||
EndsAt: evaluationTime.Add(1 * time.Minute),
|
||||
LastEvaluationTime: evaluationTime,
|
||||
Annotations: map[string]string{"testAnnoKey": "testAnnoValue"},
|
||||
},
|
||||
}
|
||||
|
||||
dbstore := setupTestEnv(t, 1)
|
||||
|
||||
saveCmd1 := &models.SaveAlertInstanceCommand{
|
||||
DefinitionOrgID: 123,
|
||||
DefinitionUID: "test_uid",
|
||||
DefinitionOrgID: rule.OrgID,
|
||||
DefinitionUID: rule.UID,
|
||||
Labels: models.InstanceLabels{"test1": "testValue1"},
|
||||
State: models.InstanceStateNormal,
|
||||
LastEvalTime: evaluationTime,
|
||||
CurrentStateSince: evaluationTime.Add(-1 * time.Minute),
|
||||
CurrentStateEnd: evaluationTime.Add(1 * time.Minute),
|
||||
}
|
||||
|
||||
_ = dbstore.SaveAlertInstance(saveCmd1)
|
||||
|
||||
saveCmd2 := &models.SaveAlertInstanceCommand{
|
||||
DefinitionOrgID: 123,
|
||||
DefinitionUID: "test_uid",
|
||||
DefinitionOrgID: rule.OrgID,
|
||||
DefinitionUID: rule.UID,
|
||||
Labels: models.InstanceLabels{"test2": "testValue2"},
|
||||
State: models.InstanceStateFiring,
|
||||
LastEvalTime: evaluationTime,
|
||||
@ -92,6 +98,7 @@ func TestWarmStateCache(t *testing.T) {
|
||||
BaseInterval: time.Second,
|
||||
Logger: log.New("ngalert cache warming test"),
|
||||
Store: dbstore,
|
||||
RuleStore: dbstore,
|
||||
}
|
||||
sched := schedule.NewScheduler(schedCfg, nil)
|
||||
st := state.NewManager(schedCfg.Logger, nilMetrics)
|
||||
@ -101,7 +108,11 @@ func TestWarmStateCache(t *testing.T) {
|
||||
for _, entry := range expectedEntries {
|
||||
cacheEntry, err := st.Get(entry.CacheId)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, entry.Equals(cacheEntry))
|
||||
|
||||
if diff := cmp.Diff(entry, cacheEntry, cmpopts.IgnoreFields(state.State{}, "Results")); diff != "" {
|
||||
t.Errorf("Result mismatch (-want +got):\n%s", diff)
|
||||
t.FailNow()
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
@ -76,6 +76,9 @@ func createTestAlertRule(t *testing.T, dbstore *store.DBstore, intervalSeconds i
|
||||
Interval: model.Duration(time.Duration(intervalSeconds) * time.Second),
|
||||
Rules: []apimodels.PostableExtendedRuleNode{
|
||||
{
|
||||
ApiRuleNode: &apimodels.ApiRuleNode{
|
||||
Annotations: map[string]string{"testAnnoKey": "testAnnoValue"},
|
||||
},
|
||||
GrafanaManagedAlert: &apimodels.PostableGrafanaRule{
|
||||
Title: fmt.Sprintf("an alert definition %d", d),
|
||||
Condition: "A",
|
||||
|
@ -551,10 +551,7 @@
|
||||
"=",
|
||||
"$backend"
|
||||
],
|
||||
"groupBys": [
|
||||
"resource.label.url_map_name",
|
||||
"resource.label.backend_target_name"
|
||||
],
|
||||
"groupBys": ["resource.label.url_map_name", "resource.label.backend_target_name"],
|
||||
"metricKind": "DELTA",
|
||||
"metricType": "loadbalancing.googleapis.com/https/backend_latencies",
|
||||
"perSeriesAligner": "ALIGN_DELTA",
|
||||
@ -842,10 +839,7 @@
|
||||
"=",
|
||||
"$backend"
|
||||
],
|
||||
"groupBys": [
|
||||
"resource.label.url_map_name",
|
||||
"resource.label.backend_target_name"
|
||||
],
|
||||
"groupBys": ["resource.label.url_map_name", "resource.label.backend_target_name"],
|
||||
"metricKind": "DELTA",
|
||||
"metricType": "loadbalancing.googleapis.com/https/backend_request_bytes_count",
|
||||
"perSeriesAligner": "ALIGN_RATE",
|
||||
@ -1083,10 +1077,7 @@
|
||||
"=",
|
||||
"$backend"
|
||||
],
|
||||
"groupBys": [
|
||||
"resource.label.url_map_name",
|
||||
"resource.label.backend_target_name"
|
||||
],
|
||||
"groupBys": ["resource.label.url_map_name", "resource.label.backend_target_name"],
|
||||
"metricKind": "DELTA",
|
||||
"metricType": "loadbalancing.googleapis.com/https/backend_response_bytes_count",
|
||||
"perSeriesAligner": "ALIGN_RATE",
|
||||
@ -1263,11 +1254,7 @@
|
||||
],
|
||||
"schemaVersion": 27,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"Networking",
|
||||
"Cloud Monitoring",
|
||||
"GCP"
|
||||
],
|
||||
"tags": ["Networking", "Cloud Monitoring", "GCP"],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user