Alerting: MuteTimeService to support TimeInterval and MuteTimeInterval fields in Alertmanager config (#91500)

* update notification policy provisioing to consider time intervals
* change names of intervals to be in order
This commit is contained in:
Yuri Tseretyan 2024-08-13 11:37:21 -04:00 committed by GitHub
parent d52d04b6d8
commit 7b919e3277
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 278 additions and 59 deletions

View File

@ -2213,10 +2213,10 @@ var testConfig = `
}]
}],
"mute_time_intervals": [{
"name": "interval",
"name": "interval-1",
"time_intervals": []
}, {
"name": "full-interval",
"name": "interval-2",
"time_intervals": [
{
"times": [

View File

@ -1,8 +1,8 @@
resource "grafana_mute_timing" "mute_timing_9e85e7a27b8f12ca" {
name = "interval"
resource "grafana_mute_timing" "mute_timing_28f674b3cd26d778" {
name = "interval-1"
}
resource "grafana_mute_timing" "mute_timing_b469bb50150a4298" {
name = "full-interval"
resource "grafana_mute_timing" "mute_timing_28f674b3cd26d77b" {
name = "interval-2"
intervals {

View File

@ -1 +1,43 @@
{"apiVersion":1,"muteTimes":[{"orgId":1,"name":"interval","time_intervals":[]},{"orgId":1,"name":"full-interval","time_intervals":[{"times":[{"start_time":"10:00","end_time":"12:00"}],"weekdays":["monday","wednesday","friday"],"days_of_month":["1","14:16","20"],"months":["1:3","7","12"],"years":["2023:2025"],"location":"America/New_York"}]}]}
{
"apiVersion": 1,
"muteTimes": [
{
"orgId": 1,
"name": "interval-1",
"time_intervals": []
},
{
"orgId": 1,
"name": "interval-2",
"time_intervals": [
{
"times": [
{
"start_time": "10:00",
"end_time": "12:00"
}
],
"weekdays": [
"monday",
"wednesday",
"friday"
],
"days_of_month": [
"1",
"14:16",
"20"
],
"months": [
"1:3",
"7",
"12"
],
"years": [
"2023:2025"
],
"location": "America/New_York"
}
]
}
]
}

View File

@ -1,10 +1,10 @@
apiVersion: 1
muteTimes:
- orgId: 1
name: interval
name: interval-1
time_intervals: []
- orgId: 1
name: full-interval
name: interval-2
time_intervals:
- times:
- start_time: "10:00"

View File

@ -7,6 +7,7 @@ import (
"fmt"
"hash/fnv"
"slices"
"strings"
"unsafe"
"github.com/prometheus/alertmanager/config"
@ -47,7 +48,9 @@ func (svc *MuteTimingService) GetMuteTimings(ctx context.Context, orgID int64) (
return nil, err
}
if rev.Config.AlertmanagerConfig.MuteTimeIntervals == nil {
intervals := getTimeIntervals(rev)
if len(intervals) == 0 {
return []definitions.MuteTimeInterval{}, nil
}
@ -56,8 +59,11 @@ func (svc *MuteTimingService) GetMuteTimings(ctx context.Context, orgID int64) (
return nil, err
}
result := make([]definitions.MuteTimeInterval, 0, len(rev.Config.AlertmanagerConfig.MuteTimeIntervals))
for _, interval := range rev.Config.AlertmanagerConfig.MuteTimeIntervals {
slices.SortFunc(intervals, func(a, b config.MuteTimeInterval) int {
return strings.Compare(a.Name, b.Name)
})
result := make([]definitions.MuteTimeInterval, 0, len(intervals))
for _, interval := range intervals {
version := calculateMuteTimeIntervalFingerprint(interval)
def := definitions.MuteTimeInterval{
UID: legacy_storage.NameToUid(interval.Name),
@ -79,14 +85,14 @@ func (svc *MuteTimingService) GetMuteTiming(ctx context.Context, nameOrUID strin
return definitions.MuteTimeInterval{}, err
}
mt, idx := getMuteTimingByName(rev, nameOrUID)
if idx == -1 {
mt, found := getMuteTimingByName(rev, nameOrUID)
if !found {
name, err := legacy_storage.UidToName(nameOrUID)
if err == nil {
mt, idx = getMuteTimingByName(rev, name)
mt, found = getMuteTimingByName(rev, name)
}
}
if idx == -1 {
if !found {
return definitions.MuteTimeInterval{}, ErrTimeIntervalNotFound.Errorf("")
}
@ -115,11 +121,11 @@ func (svc *MuteTimingService) CreateMuteTiming(ctx context.Context, mt definitio
return definitions.MuteTimeInterval{}, err
}
_, idx := getMuteTimingByName(revision, mt.Name)
if idx != -1 {
_, found := getMuteTimingByName(revision, mt.Name)
if found {
return definitions.MuteTimeInterval{}, ErrTimeIntervalExists.Errorf("")
}
revision.Config.AlertmanagerConfig.MuteTimeIntervals = append(revision.Config.AlertmanagerConfig.MuteTimeIntervals, mt.MuteTimeInterval)
revision.Config.AlertmanagerConfig.TimeIntervals = append(revision.Config.AlertmanagerConfig.TimeIntervals, config.TimeInterval(mt.MuteTimeInterval))
err = svc.xact.InTransaction(ctx, func(ctx context.Context) error {
if err := svc.configStore.Save(ctx, revision, orgID); err != nil {
@ -150,16 +156,16 @@ func (svc *MuteTimingService) UpdateMuteTiming(ctx context.Context, mt definitio
}
var old config.MuteTimeInterval
var idx = -1
found := false
if mt.UID != "" {
name, err := legacy_storage.UidToName(mt.UID)
if err == nil {
old, idx = getMuteTimingByName(revision, name)
old, found = getMuteTimingByName(revision, name)
}
} else {
old, idx = getMuteTimingByName(revision, mt.Name)
old, found = getMuteTimingByName(revision, mt.Name)
}
if idx == -1 {
if !found {
return definitions.MuteTimeInterval{}, ErrTimeIntervalNotFound.Errorf("")
}
@ -184,7 +190,7 @@ func (svc *MuteTimingService) UpdateMuteTiming(ctx context.Context, mt definitio
return definitions.MuteTimeInterval{}, MakeErrTimeIntervalInvalid(errors.New("name change is not allowed"))
}
revision.Config.AlertmanagerConfig.MuteTimeIntervals[idx] = mt.MuteTimeInterval
updateTimeInterval(revision, mt.MuteTimeInterval)
// TODO add diff and noop detection
err = svc.xact.InTransaction(ctx, func(ctx context.Context) error {
@ -211,14 +217,14 @@ func (svc *MuteTimingService) DeleteMuteTiming(ctx context.Context, nameOrUID st
return err
}
existing, idx := getMuteTimingByName(revision, nameOrUID)
if idx == -1 {
existing, found := getMuteTimingByName(revision, nameOrUID)
if !found {
name, err := legacy_storage.UidToName(nameOrUID)
if err == nil {
existing, idx = getMuteTimingByName(revision, name)
existing, found = getMuteTimingByName(revision, name)
}
}
if idx == -1 {
if !found {
svc.log.FromContext(ctx).Debug("Time interval was not found. Skip deleting", "name", nameOrUID)
return nil
}
@ -243,7 +249,7 @@ func (svc *MuteTimingService) DeleteMuteTiming(ctx context.Context, nameOrUID st
if err != nil {
return err
}
revision.Config.AlertmanagerConfig.MuteTimeIntervals = slices.Delete(revision.Config.AlertmanagerConfig.MuteTimeIntervals, idx, idx+1)
deleteTimeInterval(revision, existing)
return svc.xact.InTransaction(ctx, func(ctx context.Context) error {
keys, err := svc.ruleNotificationsStore.ListNotificationSettings(ctx, models.ListNotificationSettingsQuery{OrgID: orgID, TimeIntervalName: existing.Name})
@ -276,14 +282,47 @@ func isMuteTimeInUseInRoutes(name string, route *definitions.Route) bool {
return false
}
func getMuteTimingByName(rev *legacy_storage.ConfigRevision, name string) (config.MuteTimeInterval, int) {
idx := slices.IndexFunc(rev.Config.AlertmanagerConfig.MuteTimeIntervals, func(interval config.MuteTimeInterval) bool {
func getMuteTimingByName(rev *legacy_storage.ConfigRevision, name string) (config.MuteTimeInterval, bool) {
intervals := getTimeIntervals(rev)
idx := slices.IndexFunc(intervals, func(interval config.MuteTimeInterval) bool {
return interval.Name == name
})
if idx == -1 {
return config.MuteTimeInterval{}, idx
return config.MuteTimeInterval{}, false
}
return rev.Config.AlertmanagerConfig.MuteTimeIntervals[idx], idx
return intervals[idx], true
}
func getTimeIntervals(rev *legacy_storage.ConfigRevision) []config.MuteTimeInterval {
result := make([]config.MuteTimeInterval, 0, len(rev.Config.AlertmanagerConfig.TimeIntervals)+len(rev.Config.AlertmanagerConfig.MuteTimeIntervals))
for _, interval := range rev.Config.AlertmanagerConfig.TimeIntervals {
result = append(result, config.MuteTimeInterval(interval))
}
return append(result, rev.Config.AlertmanagerConfig.MuteTimeIntervals...)
}
func updateTimeInterval(rev *legacy_storage.ConfigRevision, interval config.MuteTimeInterval) {
for idx := range rev.Config.AlertmanagerConfig.MuteTimeIntervals {
if rev.Config.AlertmanagerConfig.MuteTimeIntervals[idx].Name == interval.Name {
rev.Config.AlertmanagerConfig.MuteTimeIntervals[idx] = interval
return
}
}
for idx := range rev.Config.AlertmanagerConfig.TimeIntervals {
if rev.Config.AlertmanagerConfig.TimeIntervals[idx].Name == interval.Name {
rev.Config.AlertmanagerConfig.TimeIntervals[idx] = config.TimeInterval(interval)
return
}
}
}
func deleteTimeInterval(rev *legacy_storage.ConfigRevision, interval config.MuteTimeInterval) {
rev.Config.AlertmanagerConfig.MuteTimeIntervals = slices.DeleteFunc(rev.Config.AlertmanagerConfig.MuteTimeIntervals, func(i config.MuteTimeInterval) bool {
return i.Name == interval.Name
})
rev.Config.AlertmanagerConfig.TimeIntervals = slices.DeleteFunc(rev.Config.AlertmanagerConfig.TimeIntervals, func(i config.TimeInterval) bool {
return i.Name == interval.Name
})
}
func calculateMuteTimeIntervalFingerprint(interval config.MuteTimeInterval) string {

View File

@ -34,6 +34,8 @@ func TestGetMuteTimings(t *testing.T) {
Name: "Test2",
TimeIntervals: nil,
},
},
TimeIntervals: []config.TimeInterval{
{
Name: "Test3",
TimeIntervals: nil,
@ -60,7 +62,7 @@ func TestGetMuteTimings(t *testing.T) {
result, err := sut.GetMuteTimings(context.Background(), 1)
require.NoError(t, err)
require.Len(t, result, len(revision.Config.AlertmanagerConfig.MuteTimeIntervals))
require.Len(t, result, len(revision.Config.AlertmanagerConfig.MuteTimeIntervals)+len(revision.Config.AlertmanagerConfig.TimeIntervals))
require.Equal(t, "Test1", result[0].Name)
require.EqualValues(t, provenances["Test1"], result[0].Provenance)
require.NotEmpty(t, result[0].Version)
@ -135,6 +137,12 @@ func TestGetMuteTiming(t *testing.T) {
TimeIntervals: nil,
},
},
TimeIntervals: []config.TimeInterval{
{
Name: "Test2",
TimeIntervals: nil,
},
},
},
},
},
@ -171,6 +179,37 @@ func TestGetMuteTiming(t *testing.T) {
})
})
t.Run("service looks in both places", func(t *testing.T) {
sut, store, prov := createMuteTimingSvcSut()
store.GetFn = func(ctx context.Context, orgID int64) (*legacy_storage.ConfigRevision, error) {
return revision, nil
}
prov.EXPECT().GetProvenance(mock.Anything, mock.Anything, mock.Anything).Return(models.ProvenanceFile, nil)
result, err := sut.GetMuteTiming(context.Background(), "Test2", orgID)
require.NoError(t, err)
require.Equal(t, "Test2", result.Name)
require.EqualValues(t, models.ProvenanceFile, result.Provenance)
require.Equal(t, legacy_storage.NameToUid(result.Name), result.UID)
require.NotEmpty(t, result.Version)
require.Len(t, store.Calls, 1)
require.Equal(t, "Get", store.Calls[0].Method)
require.Equal(t, orgID, store.Calls[0].Args[1])
prov.AssertCalled(t, "GetProvenance", mock.Anything, &result, orgID)
t.Run("and by UID", func(t *testing.T) {
result2, err := sut.GetMuteTiming(context.Background(), result.UID, orgID)
require.NoError(t, err)
require.Equal(t, result, result2)
})
})
t.Run("service returns ErrTimeIntervalNotFound if no mute timings", func(t *testing.T) {
sut, store, _ := createMuteTimingSvcSut()
store.GetFn = func(ctx context.Context, orgID int64) (*legacy_storage.ConfigRevision, error) {
@ -234,6 +273,11 @@ func TestCreateMuteTimings(t *testing.T) {
Name: "TEST",
},
},
TimeIntervals: []config.TimeInterval{
{
Name: "TEST2",
},
},
},
Receivers: nil,
},
@ -287,6 +331,16 @@ func TestCreateMuteTimings(t *testing.T) {
_, err := sut.CreateMuteTiming(context.Background(), timing, orgID)
require.Truef(t, ErrTimeIntervalExists.Is(err), "expected ErrTimeIntervalExists but got %s", err)
existing = config.MuteTimeInterval(initialConfig().AlertmanagerConfig.TimeIntervals[0])
timing = definitions.MuteTimeInterval{
MuteTimeInterval: existing,
Provenance: definitions.Provenance(models.ProvenanceFile),
}
_, err = sut.CreateMuteTiming(context.Background(), timing, orgID)
require.Truef(t, ErrTimeIntervalExists.Is(err), "expected ErrTimeIntervalExists but got %s", err)
})
t.Run("saves mute timing and provenance in a transaction", func(t *testing.T) {
@ -320,8 +374,8 @@ func TestCreateMuteTimings(t *testing.T) {
require.Equal(t, orgID, store.Calls[1].Args[2])
revision := store.Calls[1].Args[1].(*legacy_storage.ConfigRevision)
expectedTimings := append(initialConfig().AlertmanagerConfig.MuteTimeIntervals, expected)
require.EqualValues(t, expectedTimings, revision.Config.AlertmanagerConfig.MuteTimeIntervals)
expectedTimings := append(initialConfig().AlertmanagerConfig.TimeIntervals, config.TimeInterval(expected))
require.EqualValues(t, expectedTimings, revision.Config.AlertmanagerConfig.TimeIntervals)
prov.AssertCalled(t, "SetProvenance", mock.Anything, &timing, orgID, expectedProvenance)
})
@ -394,8 +448,11 @@ func TestUpdateMuteTimings(t *testing.T) {
AlertmanagerConfig: definitions.PostableApiAlertingConfig{
Config: definitions.Config{
MuteTimeIntervals: []config.MuteTimeInterval{
original,
},
TimeIntervals: []config.TimeInterval{
{
Name: "Test",
Name: "Test2",
},
},
},
@ -520,24 +577,6 @@ func TestUpdateMuteTimings(t *testing.T) {
require.ErrorIs(t, err, ErrVersionConflict)
})
t.Run("returns ErrMuteTimingsNotFound if mute timing does not exist", func(t *testing.T) {
sut, store, prov := createMuteTimingSvcSut()
store.GetFn = func(ctx context.Context, orgID int64) (*legacy_storage.ConfigRevision, error) {
return &legacy_storage.ConfigRevision{Config: initialConfig()}, nil
}
prov.EXPECT().GetProvenance(mock.Anything, mock.Anything, mock.Anything).Return(expectedProvenance, nil)
timing := definitions.MuteTimeInterval{
MuteTimeInterval: config.MuteTimeInterval{
Name: "No-timing",
},
Provenance: definitions.Provenance(expectedProvenance),
}
_, err := sut.UpdateMuteTiming(context.Background(), timing, orgID)
require.Truef(t, ErrTimeIntervalNotFound.Is(err), "expected ErrTimeIntervalNotFound but got %s", err)
})
t.Run("returns ErrMuteTimingsNotFound if mute timing does not exist", func(t *testing.T) {
sut, store, prov := createMuteTimingSvcSut()
store.GetFn = func(ctx context.Context, orgID int64) (*legacy_storage.ConfigRevision, error) {
@ -643,6 +682,52 @@ func TestUpdateMuteTimings(t *testing.T) {
})
})
t.Run("updates time interval where it is", func(t *testing.T) {
sut, store, prov := createMuteTimingSvcSut()
store.GetFn = func(ctx context.Context, orgID int64) (*legacy_storage.ConfigRevision, error) {
return &legacy_storage.ConfigRevision{Config: initialConfig()}, nil
}
store.SaveFn = func(ctx context.Context, revision *legacy_storage.ConfigRevision) error {
assertInTransaction(t, ctx)
return nil
}
prov.EXPECT().GetProvenance(mock.Anything, mock.Anything, mock.Anything).Return(expectedProvenance, nil)
prov.EXPECT().SetProvenance(mock.Anything, mock.Anything, mock.Anything, mock.Anything).RunAndReturn(
func(ctx context.Context, _ models.Provisionable, _ int64, _ models.Provenance) error {
assertInTransaction(t, ctx)
return nil
})
original := config.MuteTimeInterval(initialConfig().AlertmanagerConfig.TimeIntervals[0])
expected := expected
expected.Name = original.Name
timing := timing
timing.MuteTimeInterval = expected
timing.Version = calculateMuteTimeIntervalFingerprint(original)
expectedVersion := calculateMuteTimeIntervalFingerprint(expected)
result, err := sut.UpdateMuteTiming(context.Background(), timing, orgID)
require.NoError(t, err)
require.EqualValues(t, expected, result.MuteTimeInterval)
require.EqualValues(t, expectedProvenance, result.Provenance)
require.EqualValues(t, expectedVersion, result.Version)
require.Equal(t, legacy_storage.NameToUid(result.Name), result.UID)
require.Len(t, store.Calls, 2)
require.Equal(t, "Get", store.Calls[0].Method)
require.Equal(t, orgID, store.Calls[0].Args[1])
require.Equal(t, "Save", store.Calls[1].Method)
require.Equal(t, orgID, store.Calls[1].Args[2])
revision := store.Calls[1].Args[1].(*legacy_storage.ConfigRevision)
require.EqualValues(t, []config.TimeInterval{config.TimeInterval(expected)}, revision.Config.AlertmanagerConfig.TimeIntervals)
prov.AssertCalled(t, "SetProvenance", mock.Anything, &timing, orgID, expectedProvenance)
})
t.Run("propagates errors", func(t *testing.T) {
t.Run("when unable to read config", func(t *testing.T) {
sut, store, prov := createMuteTimingSvcSut()
@ -725,6 +810,11 @@ func TestDeleteMuteTimings(t *testing.T) {
},
timingToDelete,
},
TimeIntervals: []config.TimeInterval{
{
Name: "timing-to-delete2",
},
},
},
Receivers: nil,
},
@ -857,6 +947,45 @@ func TestDeleteMuteTimings(t *testing.T) {
})
})
t.Run("deletes time interval and provenance", func(t *testing.T) {
sut, store, prov := createMuteTimingSvcSut()
store.GetFn = func(ctx context.Context, orgID int64) (*legacy_storage.ConfigRevision, error) {
return &legacy_storage.ConfigRevision{Config: initialConfig()}, nil
}
store.SaveFn = func(ctx context.Context, revision *legacy_storage.ConfigRevision) error {
assertInTransaction(t, ctx)
return nil
}
prov.EXPECT().GetProvenance(mock.Anything, mock.Anything, mock.Anything).Return(models.ProvenanceNone, nil)
prov.EXPECT().DeleteProvenance(mock.Anything, mock.Anything, mock.Anything).RunAndReturn(
func(ctx context.Context, _ models.Provisionable, _ int64) error {
assertInTransaction(t, ctx)
return nil
})
timingToDelete := initialConfig().AlertmanagerConfig.TimeIntervals[0]
correctVersion := calculateMuteTimeIntervalFingerprint(config.MuteTimeInterval(timingToDelete))
err := sut.DeleteMuteTiming(context.Background(), timingToDelete.Name, orgID, "", correctVersion)
require.NoError(t, err)
require.Len(t, store.Calls, 2)
require.Equal(t, "Get", store.Calls[0].Method)
require.Equal(t, orgID, store.Calls[0].Args[1])
require.Equal(t, "Save", store.Calls[1].Method)
require.Equal(t, orgID, store.Calls[1].Args[2])
revision := store.Calls[1].Args[1].(*legacy_storage.ConfigRevision)
expectedMuteTimings := slices.DeleteFunc(initialConfig().AlertmanagerConfig.TimeIntervals, func(interval config.TimeInterval) bool {
return interval.Name == timingToDelete.Name
})
require.EqualValues(t, expectedMuteTimings, revision.Config.AlertmanagerConfig.TimeIntervals)
require.EqualValues(t, initialConfig().AlertmanagerConfig.MuteTimeIntervals, revision.Config.AlertmanagerConfig.MuteTimeIntervals)
prov.AssertCalled(t, "DeleteProvenance", mock.Anything, &definitions.MuteTimeInterval{MuteTimeInterval: config.MuteTimeInterval(timingToDelete)}, orgID)
})
t.Run("deletes mute timing and provenance by UID", func(t *testing.T) {
sut, store, prov := createMuteTimingSvcSut()
store.GetFn = func(ctx context.Context, orgID int64) (*legacy_storage.ConfigRevision, error) {

View File

@ -73,11 +73,14 @@ func (nps *NotificationPolicyService) UpdatePolicyTree(ctx context.Context, orgI
return fmt.Errorf("%w: %s", ErrValidation, err.Error())
}
muteTimes := map[string]struct{}{}
timeIntervals := map[string]struct{}{}
for _, mt := range revision.Config.AlertmanagerConfig.MuteTimeIntervals {
muteTimes[mt.Name] = struct{}{}
timeIntervals[mt.Name] = struct{}{}
}
err = tree.ValidateMuteTimes(muteTimes)
for _, mt := range revision.Config.AlertmanagerConfig.TimeIntervals {
timeIntervals[mt.Name] = struct{}{}
}
err = tree.ValidateMuteTimes(timeIntervals)
if err != nil {
return fmt.Errorf("%w: %s", ErrValidation, err.Error())
}

View File

@ -66,6 +66,12 @@ func TestNotificationPolicyService(t *testing.T) {
TimeIntervals: []timeinterval.TimeInterval{},
},
}
cfg.AlertmanagerConfig.TimeIntervals = []config.TimeInterval{
{
Name: "existing-ti",
TimeIntervals: []timeinterval.TimeInterval{},
},
}
data, _ := legacy_storage.SerializeAlertmanagerConfig(*cfg)
mockStore.On("GetLatestAlertmanagerConfiguration", mock.Anything, mock.Anything).
Return(&models.AlertConfiguration{AlertmanagerConfiguration: string(data)}, nil)
@ -75,7 +81,7 @@ func TestNotificationPolicyService(t *testing.T) {
newRoute := createTestRoutingTree()
newRoute.Routes = append(newRoute.Routes, &definitions.Route{
Receiver: "slack receiver",
MuteTimeIntervals: []string{"existing"},
MuteTimeIntervals: []string{"existing", "existing-ti"},
})
err := sut.UpdatePolicyTree(context.Background(), 1, newRoute, models.ProvenanceNone)