From e21f6a29b7c406c9368e2d802e1a9a911a7552ca Mon Sep 17 00:00:00 2001 From: Ricky Putra Date: Thu, 11 Feb 2021 22:13:53 +0800 Subject: [PATCH] Alerting: Fix so that sending an alert with the Alertmanager notifier doesn't fail when one of multiple configured URL's are down (#31079) Fixes behaviour of Notify that returns error when one of the dispatch event return error, to maintain high availability, we should return error when all dispatched events return error instead. Fixes #30509 --- pkg/services/alerting/notifiers/alertmanager.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pkg/services/alerting/notifiers/alertmanager.go b/pkg/services/alerting/notifiers/alertmanager.go index bc0b9ea6f25..fc11b9153af 100644 --- a/pkg/services/alerting/notifiers/alertmanager.go +++ b/pkg/services/alerting/notifiers/alertmanager.go @@ -2,6 +2,7 @@ package notifiers import ( "context" + "fmt" "regexp" "strings" "time" @@ -170,6 +171,7 @@ func (am *AlertmanagerNotifier) Notify(evalContext *alerting.EvalContext) error bodyJSON := simplejson.NewFromAny(alerts) body, _ := bodyJSON.MarshalJSON() + errCnt := 0 for _, url := range am.URL { cmd := &models.SendWebhookSync{ @@ -182,10 +184,15 @@ func (am *AlertmanagerNotifier) Notify(evalContext *alerting.EvalContext) error if err := bus.DispatchCtx(evalContext.Ctx, cmd); err != nil { am.log.Error("Failed to send alertmanager", "error", err, "alertmanager", am.Name, "url", url) - return err + errCnt++ } } + // This happens when every dispatch return error + if errCnt == len(am.URL) { + return fmt.Errorf("failed to send alert to alertmanager") + } + return nil }