grafana/pkg/services/ngalert/remote/remote_primary_forked_alertmanager.go
Fayzal Ghantiwala e321dbb690
Alerting: Use remote Alertmanager to test templates and receivers when enabled (#91570)
* Initial impl

* Add code to test templates and receivers

* Fix linter

* Fix forked am tests

* Update mimir client

* Remove trailing whitespace

* re-trigger CI
2024-08-15 16:56:14 +01:00

160 lines
6.2 KiB
Go

package remote
import (
"context"
"errors"
"fmt"
alertingNotify "github.com/grafana/alerting/notify"
"github.com/grafana/grafana/pkg/infra/log"
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/notifier"
)
type RemotePrimaryForkedAlertmanager struct {
log log.Logger
internal notifier.Alertmanager
remote remoteAlertmanager
}
func NewRemotePrimaryForkedAlertmanager(log log.Logger, internal notifier.Alertmanager, remote remoteAlertmanager) *RemotePrimaryForkedAlertmanager {
return &RemotePrimaryForkedAlertmanager{
log: log,
internal: internal,
remote: remote,
}
}
// ApplyConfig will send the configuration to the remote Alertmanager on startup.
func (fam *RemotePrimaryForkedAlertmanager) ApplyConfig(ctx context.Context, config *models.AlertConfiguration) error {
if err := fam.remote.ApplyConfig(ctx, config); err != nil {
return fmt.Errorf("failed to call ApplyConfig on the remote Alertmanager: %w", err)
}
if err := fam.internal.ApplyConfig(ctx, config); err != nil {
// An error in the internal Alertmanager shouldn't make the whole operation fail.
// We're replicating writes in the internal Alertmanager just for comparing and in case we need to roll back.
fam.log.Error("Error applying config to the internal Alertmanager", "err", err)
}
return nil
}
func (fam *RemotePrimaryForkedAlertmanager) SaveAndApplyConfig(ctx context.Context, config *apimodels.PostableUserConfig) error {
if err := fam.remote.SaveAndApplyConfig(ctx, config); err != nil {
return err
}
if err := fam.internal.SaveAndApplyConfig(ctx, config); err != nil {
// An error in the internal Alertmanager shouldn't make the whole operation fail.
// We're replicating writes in the internal Alertmanager just for comparing and in case we need to roll back.
fam.log.Error("Error applying config to the internal Alertmanager", "err", err)
}
return nil
}
func (fam *RemotePrimaryForkedAlertmanager) SaveAndApplyDefaultConfig(ctx context.Context) error {
if err := fam.remote.SaveAndApplyDefaultConfig(ctx); err != nil {
return fmt.Errorf("failed to send the default configuration to the remote Alertmanager: %w", err)
}
if err := fam.internal.SaveAndApplyDefaultConfig(ctx); err != nil {
// An error in the internal Alertmanager shouldn't make the whole operation fail.
// We're replicating writes in the internal Alertmanager just for comparing and in case we need to roll back.
fam.log.Error("Error applying the default configuration to the internal Alertmanager", "err", err)
}
return nil
}
func (fam *RemotePrimaryForkedAlertmanager) GetStatus(ctx context.Context) (apimodels.GettableStatus, error) {
return fam.remote.GetStatus(ctx)
}
func (fam *RemotePrimaryForkedAlertmanager) CreateSilence(ctx context.Context, silence *apimodels.PostableSilence) (string, error) {
originalID := silence.ID
id, err := fam.remote.CreateSilence(ctx, silence)
if err != nil {
return "", err
}
if originalID != "" && originalID != id {
// ID has changed, expire the old silence before creating a new one.
if err := fam.internal.DeleteSilence(ctx, originalID); err != nil {
if errors.Is(err, alertingNotify.ErrSilenceNotFound) {
// This can happen if the silence was created in the remote AM without using the Grafana UI
// in remote primary mode, or if the silence failed to be replicated in the internal AM.
fam.log.Warn("Failed to delete silence in the internal Alertmanager", "err", err, "id", originalID)
} else {
fam.log.Error("Failed to delete silence in the internal Alertmanager", "err", err, "id", originalID)
}
}
}
silence.ID = id
if _, err := fam.internal.CreateSilence(ctx, silence); err != nil {
fam.log.Error("Error creating silence in the internal Alertmanager", "err", err, "silence", silence)
}
return id, nil
}
func (fam *RemotePrimaryForkedAlertmanager) DeleteSilence(ctx context.Context, id string) error {
if err := fam.remote.DeleteSilence(ctx, id); err != nil {
return err
}
if err := fam.internal.DeleteSilence(ctx, id); err != nil {
fam.log.Error("Error deleting silence in the internal Alertmanager", "err", err, "id", id)
}
return nil
}
func (fam *RemotePrimaryForkedAlertmanager) GetSilence(ctx context.Context, id string) (apimodels.GettableSilence, error) {
return fam.remote.GetSilence(ctx, id)
}
func (fam *RemotePrimaryForkedAlertmanager) ListSilences(ctx context.Context, filter []string) (apimodels.GettableSilences, error) {
return fam.remote.ListSilences(ctx, filter)
}
func (fam *RemotePrimaryForkedAlertmanager) GetAlerts(ctx context.Context, active, silenced, inhibited bool, filter []string, receiver string) (apimodels.GettableAlerts, error) {
return fam.remote.GetAlerts(ctx, active, silenced, inhibited, filter, receiver)
}
func (fam *RemotePrimaryForkedAlertmanager) GetAlertGroups(ctx context.Context, active, silenced, inhibited bool, filter []string, receiver string) (apimodels.AlertGroups, error) {
return fam.remote.GetAlertGroups(ctx, active, silenced, inhibited, filter, receiver)
}
func (fam *RemotePrimaryForkedAlertmanager) PutAlerts(ctx context.Context, alerts apimodels.PostableAlerts) error {
return fam.remote.PutAlerts(ctx, alerts)
}
func (fam *RemotePrimaryForkedAlertmanager) GetReceivers(ctx context.Context) ([]apimodels.Receiver, error) {
return fam.remote.GetReceivers(ctx)
}
func (fam *RemotePrimaryForkedAlertmanager) TestReceivers(ctx context.Context, c apimodels.TestReceiversConfigBodyParams) (*alertingNotify.TestReceiversResult, int, error) {
return fam.remote.TestReceivers(ctx, c)
}
func (fam *RemotePrimaryForkedAlertmanager) TestTemplate(ctx context.Context, c apimodels.TestTemplatesConfigBodyParams) (*notifier.TestTemplatesResults, error) {
return fam.remote.TestTemplate(ctx, c)
}
func (fam *RemotePrimaryForkedAlertmanager) SilenceState(ctx context.Context) (alertingNotify.SilenceState, error) {
return fam.remote.SilenceState(ctx)
}
func (fam *RemotePrimaryForkedAlertmanager) StopAndWait() {
fam.internal.StopAndWait()
fam.remote.StopAndWait()
}
func (fam *RemotePrimaryForkedAlertmanager) Ready() bool {
// Both Alertmanagers must be ready.
if ready := fam.remote.Ready(); !ready {
return false
}
return fam.internal.Ready()
}