From 093e5947f4698cdb79ee05eaa8ede2247f35b9a9 Mon Sep 17 00:00:00 2001 From: Ganesh Vernekar <15064823+codesome@users.noreply.github.com> Date: Thu, 25 Mar 2021 17:21:44 +0530 Subject: [PATCH] Upgrade Prometheus Alertmanager and small fixes (#32280) Signed-off-by: Ganesh Vernekar --- go.mod | 2 +- go.sum | 4 +-- pkg/services/ngalert/notifier/alertmanager.go | 25 +++++++++++++------ 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/go.mod b/go.mod index f3dac4b41e6..c5554c396c5 100644 --- a/go.mod +++ b/go.mod @@ -61,7 +61,7 @@ require ( github.com/opentracing/opentracing-go v1.2.0 github.com/patrickmn/go-cache v2.1.0+incompatible github.com/pkg/errors v0.9.1 - github.com/prometheus/alertmanager v0.21.1-0.20210315141118-bf9c43b57df6 + github.com/prometheus/alertmanager v0.21.1-0.20210324070758-10757eb5fb78 github.com/prometheus/client_golang v1.10.0 github.com/prometheus/client_model v0.2.0 github.com/prometheus/common v0.19.0 diff --git a/go.sum b/go.sum index e569beb4a05..c365f10d033 100644 --- a/go.sum +++ b/go.sum @@ -1334,8 +1334,8 @@ github.com/prometheus/alertmanager v0.20.0/go.mod h1:9g2i48FAyZW6BtbsnvHtMHQXl2a github.com/prometheus/alertmanager v0.21.0/go.mod h1:h7tJ81NA0VLWvWEayi1QltevFkLF3KxmC/malTcT8Go= github.com/prometheus/alertmanager v0.21.1-0.20200911160112-1fdff6b3f939/go.mod h1:imXRHOP6QTsE0fFsIsAV/cXimS32m7gVZOiUj11m6Ig= github.com/prometheus/alertmanager v0.21.1-0.20210211203738-a7ca7b1d2951/go.mod h1:6Yc2n2ap5/oP99x1yN6Ho+yL0w8a0oClIR5xxW/JLGs= -github.com/prometheus/alertmanager v0.21.1-0.20210315141118-bf9c43b57df6 h1:XXfT4HND6ZQtBDsOBuTczE/QeKxn6CZz+mUrfWVAwE0= -github.com/prometheus/alertmanager v0.21.1-0.20210315141118-bf9c43b57df6/go.mod h1:g6wbBgNXmelfXjJhLLl5NIJDpejM5oEjiSKDsqnTzio= +github.com/prometheus/alertmanager v0.21.1-0.20210324070758-10757eb5fb78 h1:au9OzjUv3GWdL4s98I84hx38oAs+xxxpv/9n2Xuh6n0= +github.com/prometheus/alertmanager v0.21.1-0.20210324070758-10757eb5fb78/go.mod h1:g6wbBgNXmelfXjJhLLl5NIJDpejM5oEjiSKDsqnTzio= github.com/prometheus/client_golang v0.8.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v0.9.2/go.mod h1:OsXs2jCmiKlQ1lTBmv21f2mNfw4xf/QclQDMrYNZzcM= diff --git a/pkg/services/ngalert/notifier/alertmanager.go b/pkg/services/ngalert/notifier/alertmanager.go index 476b91bfdf3..2dfbec51b94 100644 --- a/pkg/services/ngalert/notifier/alertmanager.go +++ b/pkg/services/ngalert/notifier/alertmanager.go @@ -8,6 +8,7 @@ import ( "time" gokit_log "github.com/go-kit/kit/log" + "github.com/grafana/alerting-api/pkg/api" "github.com/pkg/errors" "github.com/prometheus/alertmanager/dispatch" "github.com/prometheus/alertmanager/nflog" @@ -20,7 +21,6 @@ import ( "github.com/prometheus/alertmanager/types" "github.com/prometheus/client_golang/prometheus" - "github.com/grafana/alerting-api/pkg/api" "github.com/grafana/grafana/pkg/infra/log" "github.com/grafana/grafana/pkg/registry" "github.com/grafana/grafana/pkg/services/ngalert/models" @@ -51,6 +51,8 @@ type Alertmanager struct { dispatcher *dispatch.Dispatcher dispatcherWG sync.WaitGroup + stageMetrics *notify.Metrics + reloadConfigMtx sync.Mutex } @@ -66,6 +68,7 @@ func (am *Alertmanager) Init() (err error) { am.logger = log.New("alertmanager") r := prometheus.NewRegistry() am.marker = types.NewMarker(r) + am.stageMetrics = notify.NewMetrics(r) am.Store = store.DBstore{SQLStore: am.SQLStore} am.notificationLog, err = nflog.New( @@ -137,7 +140,7 @@ func (am *Alertmanager) SyncAndApplyConfigFromDatabase() error { if err != nil { return errors.Wrap(err, "get config from database") } - return errors.Wrap(am.ApplyConfig(cfg), "reload from config") + return errors.Wrap(am.applyConfig(cfg), "reload from config") } func (am *Alertmanager) getConfigFromDatabase() (*api.PostableUserConfig, error) { @@ -152,8 +155,16 @@ func (am *Alertmanager) getConfigFromDatabase() (*api.PostableUserConfig, error) } // ApplyConfig applies a new configuration by re-initializing all components using the configuration provided. -// It is not safe to call concurrently. func (am *Alertmanager) ApplyConfig(cfg *api.PostableUserConfig) error { + am.reloadConfigMtx.Lock() + defer am.reloadConfigMtx.Unlock() + + return am.applyConfig(cfg) +} + +// applyConfig applies a new configuration by re-initializing all components using the configuration provided. +// It is not safe to call concurrently. +func (am *Alertmanager) applyConfig(cfg *api.PostableUserConfig) error { // First, we need to make sure we persist the templates to disk. paths, _, err := PersistTemplates(cfg, am.WorkingDirPath()) if err != nil { @@ -176,7 +187,7 @@ func (am *Alertmanager) ApplyConfig(cfg *api.PostableUserConfig) error { silencingStage := notify.NewMuteStage(silence.NewSilencer(am.silences, am.marker, gokit_log.NewNopLogger())) for name := range integrationsMap { - stage := createReceiverStage(name, integrationsMap[name], waitFunc, am.notificationLog) + stage := am.createReceiverStage(name, integrationsMap[name], waitFunc, am.notificationLog) routingStage[name] = notify.MultiStage{silencingStage, stage} } @@ -279,7 +290,7 @@ func (am *Alertmanager) ListSilences(matchers []*labels.Matcher) ([]types.Silenc return active[i].EndsAt.Before(active[j].EndsAt) }) sort.Slice(pending, func(i int, j int) bool { - return pending[i].StartsAt.Before(pending[j].EndsAt) + return pending[i].EndsAt.Before(pending[j].EndsAt) }) sort.Slice(expired, func(i int, j int) bool { return expired[i].EndsAt.After(expired[j].EndsAt) @@ -300,7 +311,7 @@ func (am *Alertmanager) CreateSilence(silence *types.Silence) {} func (am *Alertmanager) DeleteSilence(silence *types.Silence) {} // createReceiverStage creates a pipeline of stages for a receiver. -func createReceiverStage(name string, integrations []notify.Integration, wait func() time.Duration, notificationLog notify.NotificationLog) notify.Stage { +func (am *Alertmanager) createReceiverStage(name string, integrations []notify.Integration, wait func() time.Duration, notificationLog notify.NotificationLog) notify.Stage { var fs notify.FanoutStage for i := range integrations { recv := &nflogpb.Receiver{ @@ -312,7 +323,7 @@ func createReceiverStage(name string, integrations []notify.Integration, wait fu s = append(s, notify.NewWaitStage(wait)) s = append(s, notify.NewDedupStage(&integrations[i], notificationLog, recv)) //TODO: This probably won't work w/o the metrics - s = append(s, notify.NewRetryStage(integrations[i], name, nil)) + s = append(s, notify.NewRetryStage(integrations[i], name, am.stageMetrics)) s = append(s, notify.NewSetNotifiesStage(notificationLog, recv)) fs = append(fs, s)