Alerting: Send state to the remote Alertmanager (#78538)

* Alerting: Introduce a Mimir client as part of the Remote Alertmanager

Mimir client that understands the new APIs developed for mimir. Very much a WIP still.

* more wip

* appease the linter

* more linting

* add more code

* get state from kvstore, encode, send

* send state to the remote Alertmanager, extract fullstate logic into its own function

* pass kvstore to remote.NewAlertmanager()

* refactor

* add fake kvstore to tests

* tests

* use FileStore to get state

* always log 'completed state upload'

* refactor compareRemoteConfig

* base64-encode the state in the file store

* export silences and nflog filenames, refactor

* log 'completed state/config upload...' regardless of outcome

* add values to the state store in tests

* address code review comments

* log error from filestore

---------

Co-authored-by: gotjosh <josue.abreu@gmail.com>
This commit is contained in:
Santiago
2023-11-29 12:49:39 +01:00
committed by GitHub
parent 72d32eed27
commit 73776f37eb
8 changed files with 101 additions and 55 deletions

View File

@@ -27,8 +27,8 @@ import (
)
const (
notificationLogFilename = "notifications"
silencesFilename = "silences"
NotificationLogFilename = "notifications"
SilencesFilename = "silences"
workingDir = "alerting"
// maintenanceNotificationAndSilences how often should we flush and garbage collect notifications
@@ -89,22 +89,22 @@ func newAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store A
workingPath := filepath.Join(cfg.DataPath, workingDir, strconv.Itoa(int(orgID)))
fileStore := NewFileStore(orgID, kvStore, workingPath)
nflogFilepath, err := fileStore.FilepathFor(ctx, notificationLogFilename)
nflogFilepath, err := fileStore.FilepathFor(ctx, NotificationLogFilename)
if err != nil {
return nil, err
}
silencesFilePath, err := fileStore.FilepathFor(ctx, silencesFilename)
silencesFilepath, err := fileStore.FilepathFor(ctx, SilencesFilename)
if err != nil {
return nil, err
}
silencesOptions := maintenanceOptions{
filepath: silencesFilePath,
filepath: silencesFilepath,
retention: retentionNotificationsAndSilences,
maintenanceFrequency: silenceMaintenanceInterval,
maintenanceFunc: func(state alertingNotify.State) (int64, error) {
// Detached context here is to make sure that when the service is shut down the persist operation is executed.
return fileStore.Persist(context.Background(), silencesFilename, state)
return fileStore.Persist(context.Background(), SilencesFilename, state)
},
}
@@ -114,7 +114,7 @@ func newAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store A
maintenanceFrequency: notificationLogMaintenanceInterval,
maintenanceFunc: func(state alertingNotify.State) (int64, error) {
// Detached context here is to make sure that when the service is shut down the persist operation is executed.
return fileStore.Persist(context.Background(), notificationLogFilename, state)
return fileStore.Persist(context.Background(), NotificationLogFilename, state)
},
}

View File

@@ -356,7 +356,7 @@ func (moa *MultiOrgAlertmanager) cleanupOrphanLocalOrgState(ctx context.Context,
// Remove all orphaned items from kvstore by listing all existing items
// in our used namespace and comparing them to the currently active
// organizations.
storedFiles := []string{notificationLogFilename, silencesFilename}
storedFiles := []string{NotificationLogFilename, SilencesFilename}
for _, fileName := range storedFiles {
keys, err := moa.kvStore.Keys(ctx, kvstore.AllOrganizations, KVNamespace, fileName)
if err != nil {

View File

@@ -114,27 +114,27 @@ grafana_alerting_discovered_configurations 4
err := os.Mkdir(orphanDir, 0750)
require.NoError(t, err)
silencesPath := filepath.Join(orphanDir, silencesFilename)
silencesPath := filepath.Join(orphanDir, SilencesFilename)
err = os.WriteFile(silencesPath, []byte("file_1"), 0644)
require.NoError(t, err)
notificationPath := filepath.Join(orphanDir, notificationLogFilename)
notificationPath := filepath.Join(orphanDir, NotificationLogFilename)
err = os.WriteFile(notificationPath, []byte("file_2"), 0644)
require.NoError(t, err)
// We make sure that both files are on disk.
info, err := os.Stat(silencesPath)
require.NoError(t, err)
require.Equal(t, info.Name(), silencesFilename)
require.Equal(t, info.Name(), SilencesFilename)
info, err = os.Stat(notificationPath)
require.NoError(t, err)
require.Equal(t, info.Name(), notificationLogFilename)
require.Equal(t, info.Name(), NotificationLogFilename)
// We also populate the kvstore with orphaned records.
err = kvStore.Set(ctx, orgID, KVNamespace, silencesFilename, "file_1")
err = kvStore.Set(ctx, orgID, KVNamespace, SilencesFilename, "file_1")
require.NoError(t, err)
err = kvStore.Set(ctx, orgID, KVNamespace, notificationLogFilename, "file_1")
err = kvStore.Set(ctx, orgID, KVNamespace, NotificationLogFilename, "file_1")
require.NoError(t, err)
// Now re run the sync job once.
@@ -145,10 +145,10 @@ grafana_alerting_discovered_configurations 4
require.True(t, errors.Is(err, fs.ErrNotExist))
// The organization kvstore records should be gone by now.
_, exists, _ := kvStore.Get(ctx, orgID, KVNamespace, silencesFilename)
_, exists, _ := kvStore.Get(ctx, orgID, KVNamespace, SilencesFilename)
require.False(t, exists)
_, exists, _ = kvStore.Get(ctx, orgID, KVNamespace, notificationLogFilename)
_, exists, _ = kvStore.Get(ctx, orgID, KVNamespace, NotificationLogFilename)
require.False(t, exists)
}
}