Alerting: Mute Timing service to prevent changing provenance status to none (#88462)

* use relaxed validation to not introduce breaking changes for now but to be able to use the service
in non-provisioning APIs.
This commit is contained in:
Yuri Tseretyan 2024-06-04 08:54:33 -04:00 committed by GitHub
parent 25c57f21cd
commit a63ef42816
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 182 additions and 22 deletions

View File

@ -56,7 +56,7 @@ type MuteTimingService interface {
GetMuteTiming(ctx context.Context, name string, orgID int64) (definitions.MuteTimeInterval, error)
CreateMuteTiming(ctx context.Context, mt definitions.MuteTimeInterval, orgID int64) (definitions.MuteTimeInterval, error)
UpdateMuteTiming(ctx context.Context, mt definitions.MuteTimeInterval, orgID int64) (definitions.MuteTimeInterval, error)
DeleteMuteTiming(ctx context.Context, name string, orgID int64) error
DeleteMuteTiming(ctx context.Context, name string, orgID int64, provenance definitions.Provenance) error
}
type AlertRuleService interface {
@ -303,7 +303,7 @@ func (srv *ProvisioningSrv) RoutePutMuteTiming(c *contextmodel.ReqContext, mt de
}
func (srv *ProvisioningSrv) RouteDeleteMuteTiming(c *contextmodel.ReqContext, name string) response.Response {
err := srv.muteTimings.DeleteMuteTiming(c.Req.Context(), name, c.SignedInUser.GetOrgID())
err := srv.muteTimings.DeleteMuteTiming(c.Req.Context(), name, c.SignedInUser.GetOrgID(), determineProvenance(c))
if err != nil {
return response.ErrOrFallback(http.StatusInternalServerError, "failed to delete mute timing", err)
}

View File

@ -4,6 +4,7 @@ import (
"errors"
"fmt"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/util/errutil"
)
@ -15,6 +16,11 @@ var (
ErrNoAlertmanagerConfiguration = errutil.Internal("alerting.notification.configMissing", errutil.WithPublicMessage("No alertmanager configuration present in this organization"))
ErrBadAlertmanagerConfiguration = errutil.Internal("alerting.notification.configCorrupted").MustTemplate("Failed to unmarshal the Alertmanager configuration", errutil.WithPublic("Current Alertmanager configuration in the storage is corrupted. Reset the configuration or rollback to a recent valid one."))
ErrProvenanceChangeNotAllowed = errutil.Forbidden("alerting.notifications.invalidProvenance").MustTemplate(
"Resource with provenance status '{{ .Public.CurrentProvenance }}' cannot be managed via API that handles resources with provenance status '{{ .Public.TargetProvenance }}'",
errutil.WithPublic("Resource with provenance status '{{ .Public.CurrentProvenance }}' cannot be managed via API that handles resources with provenance status '{{ .Public.TargetProvenance }}'. You must use appropriate API to manage this resource"),
)
ErrTimeIntervalNotFound = errutil.NotFound("alerting.notifications.time-intervals.notFound")
ErrTimeIntervalExists = errutil.BadRequest("alerting.notifications.time-intervals.nameExists", errutil.WithPublicMessage("Time interval with this name already exists. Use a different name or update existing one."))
ErrTimeIntervalInvalid = errutil.BadRequest("alerting.notifications.time-intervals.invalidFormat").MustTemplate("Invalid format of the submitted time interval", errutil.WithPublic("Time interval is in invalid format. Correct the payload and try again."))
@ -44,3 +50,19 @@ func MakeErrTimeIntervalInvalid(err error) error {
return ErrTimeIntervalInvalid.Build(data)
}
func MakeErrProvenanceChangeNotAllowed(from, to models.Provenance) error {
if to == "" {
to = "none"
}
if from == "" {
from = "none"
}
data := errutil.TemplateData{
Public: map[string]interface{}{
"TargetProvenance": to,
"SourceProvenance": from,
},
}
return ErrProvenanceChangeNotAllowed.Build(data)
}

View File

@ -15,6 +15,7 @@ type MuteTimingService struct {
provenanceStore ProvisioningStore
xact TransactionManager
log log.Logger
validator ProvenanceStatusTransitionValidator
}
func NewMuteTimingService(config AMConfigStore, prov ProvisioningStore, xact TransactionManager, log log.Logger) *MuteTimingService {
@ -23,6 +24,7 @@ func NewMuteTimingService(config AMConfigStore, prov ProvisioningStore, xact Tra
provenanceStore: prov,
xact: xact,
log: log,
validator: ValidateProvenanceRelaxed,
}
}
@ -116,6 +118,15 @@ func (svc *MuteTimingService) UpdateMuteTiming(ctx context.Context, mt definitio
return definitions.MuteTimeInterval{}, MakeErrTimeIntervalInvalid(err)
}
// check that provenance is not changed in an invalid way
storedProvenance, err := svc.provenanceStore.GetProvenance(ctx, &mt, orgID)
if err != nil {
return definitions.MuteTimeInterval{}, err
}
if err := svc.validator(storedProvenance, models.Provenance(mt.Provenance)); err != nil {
return definitions.MuteTimeInterval{}, err
}
revision, err := svc.configStore.Get(ctx, orgID)
if err != nil {
return definitions.MuteTimeInterval{}, err
@ -132,7 +143,6 @@ func (svc *MuteTimingService) UpdateMuteTiming(ctx context.Context, mt definitio
revision.cfg.AlertmanagerConfig.MuteTimeIntervals[idx] = mt.MuteTimeInterval
// TODO add diff and noop detection
// TODO add fail if different provenance
err = svc.xact.InTransaction(ctx, func(ctx context.Context) error {
if err := svc.configStore.Save(ctx, revision, orgID); err != nil {
return err
@ -146,7 +156,17 @@ func (svc *MuteTimingService) UpdateMuteTiming(ctx context.Context, mt definitio
}
// DeleteMuteTiming deletes the mute timing with the given name in the given org. If the mute timing does not exist, no error is returned.
func (svc *MuteTimingService) DeleteMuteTiming(ctx context.Context, name string, orgID int64) error {
func (svc *MuteTimingService) DeleteMuteTiming(ctx context.Context, name string, orgID int64, provenance definitions.Provenance) error {
target := definitions.MuteTimeInterval{MuteTimeInterval: config.MuteTimeInterval{Name: name}, Provenance: provenance}
// check that provenance is not changed in an invalid way
storedProvenance, err := svc.provenanceStore.GetProvenance(ctx, &target, orgID)
if err != nil {
return err
}
if err := svc.validator(storedProvenance, models.Provenance(provenance)); err != nil {
return err
}
revision, err := svc.configStore.Get(ctx, orgID)
if err != nil {
return err
@ -169,7 +189,6 @@ func (svc *MuteTimingService) DeleteMuteTiming(ctx context.Context, name string,
if err := svc.configStore.Save(ctx, revision, orgID); err != nil {
return err
}
target := definitions.MuteTimeInterval{MuteTimeInterval: config.MuteTimeInterval{Name: name}}
return svc.provenanceStore.DeleteProvenance(ctx, &target, orgID)
})
}

View File

@ -410,17 +410,35 @@ func TestUpdateMuteTimings(t *testing.T) {
require.Truef(t, ErrTimeIntervalInvalid.Base.Is(err), "expected ErrTimeIntervalInvalid but got %s", err)
})
t.Run("rejects mute timings if provenance is not right", func(t *testing.T) {
sut, _, prov := createMuteTimingSvcSut()
expectedErr := errors.New("test")
sut.validator = func(from, to models.Provenance) error {
return expectedErr
}
timing := definitions.MuteTimeInterval{
MuteTimeInterval: expected,
Provenance: definitions.Provenance(models.ProvenanceFile),
}
prov.EXPECT().GetProvenance(mock.Anything, mock.Anything, mock.Anything).Return(expectedProvenance, nil)
_, err := sut.UpdateMuteTiming(context.Background(), timing, orgID)
require.ErrorIs(t, err, expectedErr)
})
t.Run("returns ErrMuteTimingsNotFound if mute timing does not exist", func(t *testing.T) {
sut, store, _ := createMuteTimingSvcSut()
sut, store, prov := createMuteTimingSvcSut()
store.GetFn = func(ctx context.Context, orgID int64) (*cfgRevision, error) {
return &cfgRevision{cfg: initialConfig()}, nil
}
prov.EXPECT().GetProvenance(mock.Anything, mock.Anything, mock.Anything).Return(expectedProvenance, nil)
timing := definitions.MuteTimeInterval{
MuteTimeInterval: config.MuteTimeInterval{
Name: "No-timing",
},
Provenance: definitions.Provenance(models.ProvenanceFile),
Provenance: definitions.Provenance(expectedProvenance),
}
_, err := sut.UpdateMuteTiming(context.Background(), timing, orgID)
@ -437,6 +455,7 @@ func TestUpdateMuteTimings(t *testing.T) {
assertInTransaction(t, ctx)
return nil
}
prov.EXPECT().GetProvenance(mock.Anything, mock.Anything, mock.Anything).Return(expectedProvenance, nil)
prov.EXPECT().SetProvenance(mock.Anything, mock.Anything, mock.Anything, mock.Anything).RunAndReturn(
func(ctx context.Context, _ models.Provisionable, _ int64, _ models.Provenance) error {
assertInTransaction(t, ctx)
@ -464,8 +483,9 @@ func TestUpdateMuteTimings(t *testing.T) {
t.Run("propagates errors", func(t *testing.T) {
t.Run("when unable to read config", func(t *testing.T) {
sut, store, _ := createMuteTimingSvcSut()
sut, store, prov := createMuteTimingSvcSut()
expectedErr := errors.New("test-err")
prov.EXPECT().GetProvenance(mock.Anything, mock.Anything, mock.Anything).Return(expectedProvenance, nil)
store.GetFn = func(ctx context.Context, orgID int64) (*cfgRevision, error) {
return nil, expectedErr
}
@ -479,6 +499,9 @@ func TestUpdateMuteTimings(t *testing.T) {
return &cfgRevision{cfg: initialConfig()}, nil
}
expectedErr := fmt.Errorf("failed to save provenance")
sut.provenanceStore.(*MockProvisioningStore).EXPECT().
GetProvenance(mock.Anything, mock.Anything, mock.Anything).
Return(expectedProvenance, nil)
sut.provenanceStore.(*MockProvisioningStore).EXPECT().
SetProvenance(mock.Anything, mock.Anything, mock.Anything, mock.Anything).
Return(expectedErr)
@ -499,6 +522,9 @@ func TestUpdateMuteTimings(t *testing.T) {
store.GetFn = func(ctx context.Context, orgID int64) (*cfgRevision, error) {
return &cfgRevision{cfg: initialConfig()}, nil
}
sut.provenanceStore.(*MockProvisioningStore).EXPECT().
GetProvenance(mock.Anything, mock.Anything, mock.Anything).
Return(expectedProvenance, nil)
expectedErr := errors.New("test-err")
store.SaveFn = func(ctx context.Context, revision *cfgRevision) error {
return expectedErr
@ -547,9 +573,10 @@ func TestDeleteMuteTimings(t *testing.T) {
store.GetFn = func(ctx context.Context, orgID int64) (*cfgRevision, error) {
return &cfgRevision{cfg: initialConfig()}, nil
}
prov.EXPECT().GetProvenance(mock.Anything, mock.Anything, mock.Anything).Return(models.ProvenanceAPI, nil)
prov.EXPECT().DeleteProvenance(mock.Anything, mock.Anything, mock.Anything).Return(nil)
err := sut.DeleteMuteTiming(context.Background(), "no-timing", orgID)
err := sut.DeleteMuteTiming(context.Background(), "no-timing", orgID, definitions.Provenance(models.ProvenanceAPI))
require.NoError(t, err)
require.Len(t, store.Calls, 2)
@ -562,16 +589,35 @@ func TestDeleteMuteTimings(t *testing.T) {
require.EqualValues(t, initialConfig().AlertmanagerConfig.MuteTimeIntervals, revision.cfg.AlertmanagerConfig.MuteTimeIntervals)
prov.AssertCalled(t, "DeleteProvenance", mock.Anything, &definitions.MuteTimeInterval{MuteTimeInterval: config.MuteTimeInterval{Name: "no-timing"}}, orgID)
prov.AssertCalled(t, "DeleteProvenance", mock.Anything, &definitions.MuteTimeInterval{MuteTimeInterval: config.MuteTimeInterval{Name: "no-timing"}, Provenance: definitions.Provenance(models.ProvenanceAPI)}, orgID)
})
t.Run("returns ErrTimeIntervalInUse if mute timing is used", func(t *testing.T) {
sut, store, _ := createMuteTimingSvcSut()
t.Run("fails if it was created with different provenance", func(t *testing.T) {
sut, store, prov := createMuteTimingSvcSut()
expectedErr := errors.New("test")
sut.validator = func(from, to models.Provenance) error {
return expectedErr
}
store.GetFn = func(ctx context.Context, orgID int64) (*cfgRevision, error) {
return &cfgRevision{cfg: initialConfig()}, nil
}
prov.EXPECT().GetProvenance(mock.Anything, mock.Anything, mock.Anything).Return(models.ProvenanceAPI, nil)
prov.EXPECT().DeleteProvenance(mock.Anything, mock.Anything, mock.Anything).Return(nil)
err := sut.DeleteMuteTiming(context.Background(), usedTiming, orgID)
err := sut.DeleteMuteTiming(context.Background(), "no-timing", orgID, definitions.Provenance(models.ProvenanceNone))
require.ErrorIs(t, err, expectedErr)
require.Len(t, store.Calls, 0)
})
t.Run("returns ErrTimeIntervalInUse if mute timing is used", func(t *testing.T) {
sut, store, prov := createMuteTimingSvcSut()
store.GetFn = func(ctx context.Context, orgID int64) (*cfgRevision, error) {
return &cfgRevision{cfg: initialConfig()}, nil
}
prov.EXPECT().GetProvenance(mock.Anything, mock.Anything, mock.Anything).Return(models.ProvenanceAPI, nil)
err := sut.DeleteMuteTiming(context.Background(), usedTiming, orgID, definitions.Provenance(models.ProvenanceAPI))
require.Len(t, store.Calls, 1)
require.Equal(t, "Get", store.Calls[0].Method)
@ -588,13 +634,14 @@ func TestDeleteMuteTimings(t *testing.T) {
assertInTransaction(t, ctx)
return nil
}
prov.EXPECT().GetProvenance(mock.Anything, mock.Anything, mock.Anything).Return(models.ProvenanceNone, nil)
prov.EXPECT().DeleteProvenance(mock.Anything, mock.Anything, mock.Anything).RunAndReturn(
func(ctx context.Context, _ models.Provisionable, _ int64) error {
assertInTransaction(t, ctx)
return nil
})
err := sut.DeleteMuteTiming(context.Background(), timingToDelete.Name, orgID)
err := sut.DeleteMuteTiming(context.Background(), timingToDelete.Name, orgID, "")
require.NoError(t, err)
require.Len(t, store.Calls, 2)
@ -615,17 +662,19 @@ func TestDeleteMuteTimings(t *testing.T) {
t.Run("propagates errors", func(t *testing.T) {
t.Run("when unable to read config", func(t *testing.T) {
sut, store, _ := createMuteTimingSvcSut()
sut, store, prov := createMuteTimingSvcSut()
expectedErr := errors.New("test-err")
prov.EXPECT().GetProvenance(mock.Anything, mock.Anything, mock.Anything).Return(models.ProvenanceNone, nil)
store.GetFn = func(ctx context.Context, orgID int64) (*cfgRevision, error) {
return nil, expectedErr
}
err := sut.DeleteMuteTiming(context.Background(), timingToDelete.Name, orgID)
err := sut.DeleteMuteTiming(context.Background(), timingToDelete.Name, orgID, "")
require.ErrorIs(t, err, expectedErr)
})
t.Run("when provenance fails to save", func(t *testing.T) {
sut, store, _ := createMuteTimingSvcSut()
sut, store, prov := createMuteTimingSvcSut()
prov.EXPECT().GetProvenance(mock.Anything, mock.Anything, mock.Anything).Return(models.ProvenanceNone, nil)
store.GetFn = func(ctx context.Context, orgID int64) (*cfgRevision, error) {
return &cfgRevision{cfg: initialConfig()}, nil
}
@ -634,7 +683,7 @@ func TestDeleteMuteTimings(t *testing.T) {
DeleteProvenance(mock.Anything, mock.Anything, mock.Anything).
Return(expectedErr)
err := sut.DeleteMuteTiming(context.Background(), timingToDelete.Name, orgID)
err := sut.DeleteMuteTiming(context.Background(), timingToDelete.Name, orgID, "")
require.ErrorIs(t, err, expectedErr)
@ -646,7 +695,8 @@ func TestDeleteMuteTimings(t *testing.T) {
})
t.Run("when AM config fails to save", func(t *testing.T) {
sut, store, _ := createMuteTimingSvcSut()
sut, store, prov := createMuteTimingSvcSut()
prov.EXPECT().GetProvenance(mock.Anything, mock.Anything, mock.Anything).Return(models.ProvenanceNone, nil)
store.GetFn = func(ctx context.Context, orgID int64) (*cfgRevision, error) {
return &cfgRevision{cfg: initialConfig()}, nil
}
@ -655,7 +705,7 @@ func TestDeleteMuteTimings(t *testing.T) {
return expectedErr
}
err := sut.DeleteMuteTiming(context.Background(), timingToDelete.Name, orgID)
err := sut.DeleteMuteTiming(context.Background(), timingToDelete.Name, orgID, "")
require.ErrorIs(t, err, expectedErr)
@ -676,5 +726,8 @@ func createMuteTimingSvcSut() (*MuteTimingService, *alertmanagerConfigStoreFake,
provenanceStore: prov,
xact: newNopTransactionManager(),
log: log.NewNopLogger(),
validator: func(from, to models.Provenance) error {
return nil
},
}, store, prov
}

View File

@ -11,3 +11,18 @@ func canUpdateProvenanceInRuleGroup(storedProvenance, provenance models.Provenan
storedProvenance == models.ProvenanceNone ||
(storedProvenance == models.ProvenanceAPI && provenance == models.ProvenanceNone)
}
type ProvenanceStatusTransitionValidator = func(from, to models.Provenance) error
// ValidateProvenanceRelaxed checks if the transition of provenance status from `from` to `to` is allowed.
// Applies relaxed checks that prevents only transition from any status to `none`.
// Returns ErrProvenanceChangeNotAllowed if transition is not allowed
func ValidateProvenanceRelaxed(from, to models.Provenance) error {
if from == models.ProvenanceNone { // allow any transition from none
return nil
}
if to == models.ProvenanceNone { // allow any transition to none unless it's from "none" either
return MakeErrProvenanceChangeNotAllowed(from, to)
}
return nil
}

View File

@ -0,0 +1,51 @@
package provisioning
import (
"fmt"
"testing"
"github.com/stretchr/testify/assert"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/util"
)
func TestValidateProvenanceRelaxed(t *testing.T) {
all := []models.Provenance{
models.ProvenanceNone,
models.ProvenanceAPI,
models.ProvenanceFile,
models.Provenance(fmt.Sprintf("random-%s", util.GenerateShortUID())),
}
t.Run("all transitions from 'none' are allowed", func(t *testing.T) {
for _, provenance := range all {
assert.NoError(t, ValidateProvenanceRelaxed(models.ProvenanceNone, provenance))
}
})
t.Run("noop transitions are allowed", func(t *testing.T) {
for _, provenance := range all {
assert.NoError(t, ValidateProvenanceRelaxed(provenance, provenance))
}
})
t.Run("no transitions to 'none' are allowed", func(t *testing.T) {
for _, from := range all {
if from == models.ProvenanceNone {
continue
}
assert.ErrorIsf(t, ErrProvenanceChangeNotAllowed.Base, ValidateProvenanceRelaxed(from, models.ProvenanceNone), "transition %s -> 'none' is allowed but should not", from)
}
})
t.Run("transitions between others are are allowed", func(t *testing.T) {
for _, from := range all {
if from == models.ProvenanceNone {
continue
}
for _, to := range all {
if to == models.ProvenanceNone || from == to {
continue
}
assert.NoError(t, ValidateProvenanceRelaxed(from, to))
}
}
})
}

View File

@ -63,7 +63,7 @@ func (c *defaultMuteTimesProvisioner) Unprovision(ctx context.Context,
files []*AlertingFile) error {
for _, file := range files {
for _, deleteMuteTime := range file.DeleteMuteTimes {
err := c.muteTimingService.DeleteMuteTiming(ctx, deleteMuteTime.Name, deleteMuteTime.OrgID)
err := c.muteTimingService.DeleteMuteTiming(ctx, deleteMuteTime.Name, deleteMuteTime.OrgID, definitions.Provenance(models.ProvenanceFile))
if err != nil {
return err
}