Alerting: create wrapper for Alertmanager to enable org level isolation (#37320)

Introduces org-level isolation for the Alertmanager and its components.

Silences, Alerts and Contact points are not separated by org and are not shared between them.

Co-authored with @davidmparrott and @papagian
This commit is contained in:
David Parrott 2021-08-24 03:28:09 -07:00 committed by GitHub
parent 7ebf4027a7
commit 7fbeefc090
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 673 additions and 148 deletions

View File

@ -385,7 +385,7 @@ func (hs *HTTPServer) registerRoutes() {
})
apiRoute.Get("/alert-notifiers", reqEditorRole, routing.Wrap(
GetAlertNotifiers(hs.Alertmanager != nil && hs.Cfg.IsNgAlertEnabled())),
GetAlertNotifiers(hs.MultiOrgAlertmanager != nil && hs.Cfg.IsNgAlertEnabled())),
)
apiRoute.Group("/alert-notifications", func(alertNotifications routing.RouteRegister) {

View File

@ -56,7 +56,7 @@ import (
"github.com/grafana/grafana/pkg/util/errutil"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
macaron "gopkg.in/macaron.v1"
"gopkg.in/macaron.v1"
)
func init() {
@ -104,7 +104,7 @@ type HTTPServer struct {
PluginDashboardService *plugindashboards.Service `inject:""`
AlertEngine *alerting.AlertEngine `inject:""`
LoadSchemaService *schemaloader.SchemaLoaderService `inject:""`
Alertmanager *notifier.Alertmanager `inject:""`
MultiOrgAlertmanager *notifier.MultiOrgAlertmanager `inject:""`
LibraryPanelService librarypanels.Service `inject:""`
LibraryElementService libraryelements.Service `inject:""`
SocialService social.Service `inject:""`

View File

@ -30,10 +30,8 @@ type Scheduler interface {
type Alertmanager interface {
// Configuration
// temporary add orgID parameter; this will move to the Alertmanager wrapper when it will be available
SaveAndApplyConfig(orgID int64, config *apimodels.PostableUserConfig) error
// temporary add orgID parameter; this will move to the Alertmanager wrapper when it will be available
SaveAndApplyDefaultConfig(orgID int64) error
SaveAndApplyConfig(config *apimodels.PostableUserConfig) error
SaveAndApplyDefaultConfig() error
GetStatus() apimodels.GettableStatus
// Silences
@ -52,19 +50,19 @@ type Alertmanager interface {
// API handlers.
type API struct {
Cfg *setting.Cfg
DatasourceCache datasources.CacheService
RouteRegister routing.RouteRegister
DataService *tsdb.Service
QuotaService *quota.QuotaService
Schedule schedule.ScheduleService
RuleStore store.RuleStore
InstanceStore store.InstanceStore
AlertingStore store.AlertingStore
AdminConfigStore store.AdminConfigurationStore
DataProxy *datasourceproxy.DatasourceProxyService
Alertmanager Alertmanager
StateManager *state.Manager
Cfg *setting.Cfg
DatasourceCache datasources.CacheService
RouteRegister routing.RouteRegister
DataService *tsdb.Service
QuotaService *quota.QuotaService
Schedule schedule.ScheduleService
RuleStore store.RuleStore
InstanceStore store.InstanceStore
AlertingStore store.AlertingStore
AdminConfigStore store.AdminConfigurationStore
DataProxy *datasourceproxy.DatasourceProxyService
MultiOrgAlertmanager *notifier.MultiOrgAlertmanager
StateManager *state.Manager
}
// RegisterAPIEndpoints registers API handlers
@ -78,7 +76,7 @@ func (api *API) RegisterAPIEndpoints(m *metrics.Metrics) {
api.RegisterAlertmanagerApiEndpoints(NewForkedAM(
api.DatasourceCache,
NewLotexAM(proxy, logger),
AlertmanagerSrv{store: api.AlertingStore, am: api.Alertmanager, log: logger},
AlertmanagerSrv{store: api.AlertingStore, mam: api.MultiOrgAlertmanager, log: logger},
), m)
// Register endpoints for proxying to Prometheus-compatible backends.
api.RegisterPrometheusApiEndpoints(NewForkedProm(

View File

@ -25,7 +25,7 @@ const (
)
type AlertmanagerSrv struct {
am Alertmanager
mam *notifier.MultiOrgAlertmanager
store store.AlertingStore
log log.Logger
}
@ -93,14 +93,25 @@ func (srv AlertmanagerSrv) loadSecureSettings(orgId int64, receivers []*apimodel
}
func (srv AlertmanagerSrv) RouteGetAMStatus(c *models.ReqContext) response.Response {
return response.JSON(http.StatusOK, srv.am.GetStatus())
am, errResp := srv.AlertmanagerFor(c.OrgId)
if errResp != nil {
return errResp
}
return response.JSON(http.StatusOK, am.GetStatus())
}
func (srv AlertmanagerSrv) RouteCreateSilence(c *models.ReqContext, postableSilence apimodels.PostableSilence) response.Response {
if !c.HasUserRole(models.ROLE_EDITOR) {
return ErrResp(http.StatusForbidden, errors.New("permission denied"), "")
}
silenceID, err := srv.am.CreateSilence(&postableSilence)
am, errResp := srv.AlertmanagerFor(c.OrgId)
if errResp != nil {
return errResp
}
silenceID, err := am.CreateSilence(&postableSilence)
if err != nil {
if errors.Is(err, notifier.ErrSilenceNotFound) {
return ErrResp(http.StatusNotFound, err, "")
@ -119,7 +130,13 @@ func (srv AlertmanagerSrv) RouteDeleteAlertingConfig(c *models.ReqContext) respo
if !c.HasUserRole(models.ROLE_EDITOR) {
return ErrResp(http.StatusForbidden, errors.New("permission denied"), "")
}
if err := srv.am.SaveAndApplyDefaultConfig(c.OrgId); err != nil {
am, errResp := srv.AlertmanagerFor(c.OrgId)
if errResp != nil {
return errResp
}
if err := am.SaveAndApplyDefaultConfig(); err != nil {
srv.log.Error("unable to save and apply default alertmanager configuration", "err", err)
return ErrResp(http.StatusInternalServerError, err, "failed to save and apply default Alertmanager configuration")
}
@ -131,8 +148,14 @@ func (srv AlertmanagerSrv) RouteDeleteSilence(c *models.ReqContext) response.Res
if !c.HasUserRole(models.ROLE_EDITOR) {
return ErrResp(http.StatusForbidden, errors.New("permission denied"), "")
}
am, errResp := srv.AlertmanagerFor(c.OrgId)
if errResp != nil {
return errResp
}
silenceID := c.Params(":SilenceId")
if err := srv.am.DeleteSilence(silenceID); err != nil {
if err := am.DeleteSilence(silenceID); err != nil {
if errors.Is(err, notifier.ErrSilenceNotFound) {
return ErrResp(http.StatusNotFound, err, "")
}
@ -202,7 +225,12 @@ func (srv AlertmanagerSrv) RouteGetAlertingConfig(c *models.ReqContext) response
}
func (srv AlertmanagerSrv) RouteGetAMAlertGroups(c *models.ReqContext) response.Response {
groups, err := srv.am.GetAlertGroups(
am, errResp := srv.AlertmanagerFor(c.OrgId)
if errResp != nil {
return errResp
}
groups, err := am.GetAlertGroups(
c.QueryBoolWithDefault("active", true),
c.QueryBoolWithDefault("silenced", true),
c.QueryBoolWithDefault("inhibited", true),
@ -221,7 +249,12 @@ func (srv AlertmanagerSrv) RouteGetAMAlertGroups(c *models.ReqContext) response.
}
func (srv AlertmanagerSrv) RouteGetAMAlerts(c *models.ReqContext) response.Response {
alerts, err := srv.am.GetAlerts(
am, errResp := srv.AlertmanagerFor(c.OrgId)
if errResp != nil {
return errResp
}
alerts, err := am.GetAlerts(
c.QueryBoolWithDefault("active", true),
c.QueryBoolWithDefault("silenced", true),
c.QueryBoolWithDefault("inhibited", true),
@ -243,8 +276,13 @@ func (srv AlertmanagerSrv) RouteGetAMAlerts(c *models.ReqContext) response.Respo
}
func (srv AlertmanagerSrv) RouteGetSilence(c *models.ReqContext) response.Response {
am, errResp := srv.AlertmanagerFor(c.OrgId)
if errResp != nil {
return errResp
}
silenceID := c.Params(":SilenceId")
gettableSilence, err := srv.am.GetSilence(silenceID)
gettableSilence, err := am.GetSilence(silenceID)
if err != nil {
if errors.Is(err, notifier.ErrSilenceNotFound) {
return ErrResp(http.StatusNotFound, err, "")
@ -256,7 +294,12 @@ func (srv AlertmanagerSrv) RouteGetSilence(c *models.ReqContext) response.Respon
}
func (srv AlertmanagerSrv) RouteGetSilences(c *models.ReqContext) response.Response {
gettableSilences, err := srv.am.ListSilences(c.QueryStrings("filter"))
am, errResp := srv.AlertmanagerFor(c.OrgId)
if errResp != nil {
return errResp
}
gettableSilences, err := am.ListSilences(c.QueryStrings("filter"))
if err != nil {
if errors.Is(err, notifier.ErrListSilencesBadPayload) {
return ErrResp(http.StatusBadRequest, err, "")
@ -293,7 +336,12 @@ func (srv AlertmanagerSrv) RoutePostAlertingConfig(c *models.ReqContext, body ap
return ErrResp(http.StatusInternalServerError, err, "failed to post process Alertmanager configuration")
}
if err := srv.am.SaveAndApplyConfig(c.OrgId, &body); err != nil {
am, errResp := srv.AlertmanagerFor(c.OrgId)
if errResp != nil {
return errResp
}
if err := am.SaveAndApplyConfig(&body); err != nil {
srv.log.Error("unable to save and apply alertmanager configuration", "err", err)
return ErrResp(http.StatusBadRequest, err, "failed to save and apply Alertmanager configuration")
}
@ -301,7 +349,7 @@ func (srv AlertmanagerSrv) RoutePostAlertingConfig(c *models.ReqContext, body ap
return response.JSON(http.StatusAccepted, util.DynMap{"message": "configuration created"})
}
func (srv AlertmanagerSrv) RoutePostAMAlerts(c *models.ReqContext, body apimodels.PostableAlerts) response.Response {
func (srv AlertmanagerSrv) RoutePostAMAlerts(_ *models.ReqContext, _ apimodels.PostableAlerts) response.Response {
return NotImplementedResp
}
@ -332,7 +380,12 @@ func (srv AlertmanagerSrv) RoutePostTestReceivers(c *models.ReqContext, body api
}
defer cancelFunc()
result, err := srv.am.TestReceivers(ctx, body)
am, errResp := srv.AlertmanagerFor(c.OrgId)
if errResp != nil {
return errResp
}
result, err := am.TestReceivers(ctx, body)
if err != nil {
if errors.Is(err, notifier.ErrNoReceivers) {
return response.Error(http.StatusBadRequest, "", err)
@ -429,3 +482,21 @@ func statusForTestReceivers(v []notifier.TestReceiverResult) int {
return http.StatusOK
}
}
func (srv AlertmanagerSrv) AlertmanagerFor(orgID int64) (Alertmanager, *response.NormalResponse) {
am, err := srv.mam.AlertmanagerFor(orgID)
if err == nil {
return am, nil
}
if errors.Is(err, notifier.ErrNoAlertmanagerForOrg) {
return nil, response.Error(http.StatusNotFound, err.Error(), nil)
}
if errors.Is(err, notifier.ErrAlertmanagerNotReady) {
return nil, response.Error(http.StatusConflict, err.Error(), nil)
}
srv.log.Error("unable to obtain the org's Alertmanager", "err", err)
return nil, response.Error(http.StatusInternalServerError, "unable to obtain org's Alertmanager", err)
}

View File

@ -2247,6 +2247,76 @@
"type": "object",
"x-go-package": "github.com/prometheus/common/config"
},
"TestReceiverConfigResult": {
"properties": {
"error": {
"type": "string",
"x-go-name": "Error"
},
"name": {
"type": "string",
"x-go-name": "Name"
},
"status": {
"type": "string",
"x-go-name": "Status"
},
"uid": {
"type": "string",
"x-go-name": "UID"
}
},
"type": "object",
"x-go-package": "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
},
"TestReceiverResult": {
"properties": {
"grafana_managed_receiver_configs": {
"items": {
"$ref": "#/definitions/TestReceiverConfigResult"
},
"type": "array",
"x-go-name": "Configs"
},
"name": {
"type": "string",
"x-go-name": "Name"
}
},
"type": "object",
"x-go-package": "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
},
"TestReceiversConfig": {
"properties": {
"receivers": {
"items": {
"$ref": "#/definitions/PostableApiReceiver"
},
"type": "array",
"x-go-name": "Receivers"
}
},
"type": "object",
"x-go-package": "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
},
"TestReceiversResult": {
"properties": {
"notified_at": {
"format": "date-time",
"type": "string",
"x-go-name": "NotifedAt"
},
"receivers": {
"items": {
"$ref": "#/definitions/TestReceiverResult"
},
"type": "array",
"x-go-name": "Receivers"
}
},
"type": "object",
"x-go-package": "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
},
"TestRulePayload": {
"properties": {
"expr": {
@ -2274,6 +2344,7 @@
"x-go-package": "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
},
"URL": {
"description": "The general form represented is:\n\n[scheme:][//[userinfo@]host][/]path[?query][#fragment]\n\nURLs that do not start with a slash after the scheme are interpreted as:\n\nscheme:opaque[?query][#fragment]\n\nNote that the Path field is stored in decoded form: /%47%6f%2f becomes /Go/.\nA consequence is that it is impossible to tell which slashes in the Path were\nslashes in the raw URL and which were %2f. This distinction is rarely important,\nbut when it is, the code should use RawPath, an optional field which only gets\nset if the default encoding is different from Path.\n\nURL's String method uses the EscapedPath method to obtain the path. See the\nEscapedPath method for more details.",
"properties": {
"ForceQuery": {
"type": "boolean"
@ -2306,9 +2377,9 @@
"$ref": "#/definitions/Userinfo"
}
},
"title": "URL is a custom URL type that allows validation at configuration load time.",
"title": "A URL represents a parsed URL (technically, a URI reference).",
"type": "object",
"x-go-package": "github.com/prometheus/common/config"
"x-go-package": "net/url"
},
"Userinfo": {
"description": "The Userinfo type is an immutable encapsulation of username and\npassword details for a URL. An existing Userinfo value is guaranteed\nto have a username set (potentially empty, as allowed by RFC 2396),\nand optionally a password.",
@ -2475,7 +2546,6 @@
"x-go-package": "github.com/prometheus/alertmanager/api/v2/models"
},
"alertGroup": {
"description": "AlertGroup alert group",
"properties": {
"alerts": {
"description": "alerts",
@ -2497,14 +2567,17 @@
"labels",
"receiver"
],
"type": "object"
"type": "object",
"x-go-name": "AlertGroup",
"x-go-package": "github.com/prometheus/alertmanager/api/v2/models"
},
"alertGroups": {
"description": "AlertGroups alert groups",
"items": {
"$ref": "#/definitions/alertGroup"
},
"type": "array"
"type": "array",
"x-go-name": "AlertGroups",
"x-go-package": "github.com/prometheus/alertmanager/api/v2/models"
},
"alertStatus": {
"description": "AlertStatus alert status",
@ -2624,6 +2697,7 @@
"$ref": "#/definitions/Duration"
},
"gettableAlert": {
"description": "GettableAlert gettable alert",
"properties": {
"annotations": {
"$ref": "#/definitions/labelSet"
@ -2682,9 +2756,7 @@
"status",
"updatedAt"
],
"type": "object",
"x-go-name": "GettableAlert",
"x-go-package": "github.com/prometheus/alertmanager/api/v2/models"
"type": "object"
},
"gettableAlerts": {
"items": {
@ -2928,6 +3000,7 @@
"x-go-package": "github.com/prometheus/alertmanager/api/v2/models"
},
"receiver": {
"description": "Receiver receiver",
"properties": {
"name": {
"description": "name",
@ -2938,9 +3011,7 @@
"required": [
"name"
],
"type": "object",
"x-go-name": "Receiver",
"x-go-package": "github.com/prometheus/alertmanager/api/v2/models"
"type": "object"
},
"silence": {
"description": "Silence silence",
@ -3523,6 +3594,49 @@
]
}
},
"/api/alertmanager/{Recipient}/config/api/v1/receivers/test": {
"post": {
"operationId": "RoutePostTestReceivers",
"parameters": [
{
"in": "query",
"items": {
"$ref": "#/definitions/PostableApiReceiver"
},
"name": "receivers",
"type": "array",
"x-go-name": "Receivers"
}
],
"responses": {
"200": {
"description": "Ack",
"schema": {
"$ref": "#/definitions/Ack"
}
},
"207": {
"$ref": "#/responses/MultiStatus"
},
"400": {
"description": "ValidationError",
"schema": {
"$ref": "#/definitions/ValidationError"
}
},
"408": {
"description": "Failure",
"schema": {
"$ref": "#/definitions/Failure"
}
}
},
"summary": "Test Grafana managed receivers without saving them.",
"tags": [
"alertmanager"
]
}
},
"/api/prometheus/{Recipient}/api/v1/alerts": {
"get": {
"description": "gets the current alerts",

View File

@ -1750,7 +1750,6 @@
"enum": [
"Alerting"
],
"x-go-enum-desc": "Alerting AlertingErrState",
"x-go-name": "ExecErrState"
},
"id": {
@ -1779,7 +1778,6 @@
"NoData",
"OK"
],
"x-go-enum-desc": "Alerting Alerting\nNoData NoData\nOK OK",
"x-go-name": "NoDataState"
},
"orgId": {
@ -2592,7 +2590,6 @@
"enum": [
"Alerting"
],
"x-go-enum-desc": "Alerting AlertingErrState",
"x-go-name": "ExecErrState"
},
"no_data_state": {
@ -2602,7 +2599,6 @@
"NoData",
"OK"
],
"x-go-enum-desc": "Alerting Alerting\nNoData NoData\nOK OK",
"x-go-name": "NoDataState"
},
"title": {
@ -3373,8 +3369,9 @@
"x-go-package": "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
},
"URL": {
"description": "The general form represented is:\n\n[scheme:][//[userinfo@]host][/]path[?query][#fragment]\n\nURLs that do not start with a slash after the scheme are interpreted as:\n\nscheme:opaque[?query][#fragment]\n\nNote that the Path field is stored in decoded form: /%47%6f%2f becomes /Go/.\nA consequence is that it is impossible to tell which slashes in the Path were\nslashes in the raw URL and which were %2f. This distinction is rarely important,\nbut when it is, the code should use RawPath, an optional field which only gets\nset if the default encoding is different from Path.\n\nURL's String method uses the EscapedPath method to obtain the path. See the\nEscapedPath method for more details.",
"type": "object",
"title": "URL is a custom URL type that allows validation at configuration load time.",
"title": "A URL represents a parsed URL (technically, a URI reference).",
"properties": {
"ForceQuery": {
"type": "boolean"
@ -3407,7 +3404,7 @@
"$ref": "#/definitions/Userinfo"
}
},
"x-go-package": "github.com/prometheus/common/config"
"x-go-package": "net/url"
},
"Userinfo": {
"description": "The Userinfo type is an immutable encapsulation of username and\npassword details for a URL. An existing Userinfo value is guaranteed\nto have a username set (potentially empty, as allowed by RFC 2396),\nand optionally a password.",
@ -3574,7 +3571,6 @@
"x-go-package": "github.com/prometheus/alertmanager/api/v2/models"
},
"alertGroup": {
"description": "AlertGroup alert group",
"type": "object",
"required": [
"alerts",
@ -3597,6 +3593,8 @@
"$ref": "#/definitions/receiver"
}
},
"x-go-name": "AlertGroup",
"x-go-package": "github.com/prometheus/alertmanager/api/v2/models",
"$ref": "#/definitions/alertGroup"
},
"alertGroups": {
@ -3726,6 +3724,7 @@
"$ref": "#/definitions/Duration"
},
"gettableAlert": {
"description": "GettableAlert gettable alert",
"type": "object",
"required": [
"labels",
@ -3785,19 +3784,19 @@
"x-go-name": "UpdatedAt"
}
},
"x-go-name": "GettableAlert",
"x-go-package": "github.com/prometheus/alertmanager/api/v2/models",
"$ref": "#/definitions/gettableAlert"
},
"gettableAlerts": {
"description": "GettableAlerts gettable alerts",
"type": "array",
"items": {
"$ref": "#/definitions/gettableAlert"
},
"x-go-name": "GettableAlerts",
"x-go-package": "github.com/prometheus/alertmanager/api/v2/models",
"$ref": "#/definitions/gettableAlerts"
},
"gettableSilence": {
"description": "GettableSilence gettable silence",
"type": "object",
"required": [
"comment",
@ -3850,8 +3849,6 @@
"x-go-name": "UpdatedAt"
}
},
"x-go-name": "GettableSilence",
"x-go-package": "github.com/prometheus/alertmanager/api/v2/models",
"$ref": "#/definitions/gettableSilence"
},
"gettableSilences": {
@ -3990,7 +3987,6 @@
"x-go-package": "github.com/prometheus/alertmanager/api/v2/models"
},
"postableSilence": {
"description": "PostableSilence postable silence",
"type": "object",
"required": [
"comment",
@ -4031,9 +4027,12 @@
"x-go-name": "StartsAt"
}
},
"x-go-name": "PostableSilence",
"x-go-package": "github.com/prometheus/alertmanager/api/v2/models",
"$ref": "#/definitions/postableSilence"
},
"receiver": {
"description": "Receiver receiver",
"type": "object",
"required": [
"name"
@ -4045,8 +4044,6 @@
"x-go-name": "Name"
}
},
"x-go-name": "Receiver",
"x-go-package": "github.com/prometheus/alertmanager/api/v2/models",
"$ref": "#/definitions/receiver"
},
"silence": {

View File

@ -4,6 +4,7 @@ import (
"fmt"
"regexp"
"strings"
"sync"
"time"
"github.com/prometheus/alertmanager/api/metrics"
@ -27,9 +28,9 @@ var GlobalMetrics = NewMetrics(prometheus.DefaultRegisterer)
type Metrics struct {
*metrics.Alerts
AlertState *prometheus.GaugeVec
// Registerer is for use by subcomponents which register their own metrics.
Registerer prometheus.Registerer
AlertState *prometheus.GaugeVec
RequestDuration *prometheus.HistogramVec
ActiveConfigurations prometheus.Gauge
EvalTotal *prometheus.CounterVec
@ -55,14 +56,14 @@ func (m *Metrics) SwapRegisterer(r prometheus.Registerer) {
func NewMetrics(r prometheus.Registerer) *Metrics {
return &Metrics{
Alerts: metrics.NewAlerts("v2", r),
Registerer: r,
Alerts: metrics.NewAlerts("v2", r),
AlertState: promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{
Namespace: "grafana",
Subsystem: "alerting",
Name: "alerts",
Help: "How many alerts by state.",
}, []string{"state"}),
Registerer: r,
RequestDuration: promauto.With(r).NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "grafana",
@ -125,6 +126,37 @@ func NewMetrics(r prometheus.Registerer) *Metrics {
}
}
// multi-thread safety and stable ordering of prometheus registries.
type OrgRegistries struct {
regsMu sync.Mutex
regs map[int64]prometheus.Registerer
}
func NewOrgRegistries() *OrgRegistries {
return &OrgRegistries{
regs: make(map[int64]prometheus.Registerer),
}
}
func (m *OrgRegistries) GetOrCreateOrgRegistry(orgID int64) prometheus.Registerer {
m.regsMu.Lock()
defer m.regsMu.Unlock()
orgRegistry, ok := m.regs[orgID]
if !ok {
reg := prometheus.NewRegistry()
m.regs[orgID] = reg
return reg
}
return orgRegistry
}
func (m *OrgRegistries) RemoveOrgRegistry(org int64) {
m.regsMu.Lock()
defer m.regsMu.Unlock()
delete(m.regs, org)
}
// Instrument wraps a middleware, instrumenting the request latencies.
func Instrument(
method,

View File

@ -55,7 +55,7 @@ type AlertNG struct {
stateManager *state.Manager
// Alerting notification services
Alertmanager *notifier.Alertmanager
MultiOrgAlertmanager *notifier.MultiOrgAlertmanager
}
func init() {
@ -74,9 +74,10 @@ func (ng *AlertNG) Init() error {
Logger: ng.Log,
}
var err error
ng.Alertmanager, err = notifier.New(ng.Cfg, store, ng.Metrics)
if err != nil {
ng.MultiOrgAlertmanager = notifier.NewMultiOrgAlertmanager(ng.Cfg, store, store)
// Let's make sure we're able to complete an initial sync of Alertmanagers before we start the alerting components.
if err := ng.MultiOrgAlertmanager.LoadAndSyncAlertmanagersForOrgs(context.Background()); err != nil {
return err
}
@ -89,7 +90,8 @@ func (ng *AlertNG) Init() error {
InstanceStore: store,
RuleStore: store,
AdminConfigStore: store,
Notifier: ng.Alertmanager,
OrgStore: store,
MultiOrgNotifier: ng.MultiOrgAlertmanager,
Metrics: ng.Metrics,
AdminConfigPollInterval: ng.Cfg.AdminConfigPollInterval,
}
@ -98,19 +100,19 @@ func (ng *AlertNG) Init() error {
ng.schedule = schedule.NewScheduler(schedCfg, ng.DataService, ng.Cfg.AppURL, ng.stateManager)
api := api.API{
Cfg: ng.Cfg,
DatasourceCache: ng.DatasourceCache,
RouteRegister: ng.RouteRegister,
DataService: ng.DataService,
Schedule: ng.schedule,
DataProxy: ng.DataProxy,
QuotaService: ng.QuotaService,
InstanceStore: store,
RuleStore: store,
AlertingStore: store,
AdminConfigStore: store,
Alertmanager: ng.Alertmanager,
StateManager: ng.stateManager,
Cfg: ng.Cfg,
DatasourceCache: ng.DatasourceCache,
RouteRegister: ng.RouteRegister,
DataService: ng.DataService,
Schedule: ng.schedule,
DataProxy: ng.DataProxy,
QuotaService: ng.QuotaService,
InstanceStore: store,
RuleStore: store,
AlertingStore: store,
AdminConfigStore: store,
MultiOrgAlertmanager: ng.MultiOrgAlertmanager,
StateManager: ng.stateManager,
}
api.RegisterAPIEndpoints(ng.Metrics)
@ -127,7 +129,7 @@ func (ng *AlertNG) Run(ctx context.Context) error {
return ng.schedule.Run(subCtx)
})
children.Go(func() error {
return ng.Alertmanager.Run(subCtx)
return ng.MultiOrgAlertmanager.Run(subCtx)
})
return children.Wait()
}

View File

@ -34,13 +34,11 @@ import (
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/notifier/channels"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/services/sqlstore"
"github.com/grafana/grafana/pkg/setting"
)
const (
pollInterval = 1 * time.Minute
workingDir = "alerting"
workingDir = "alerting"
// How long should we keep silences and notification entries on-disk after they've served their purpose.
retentionNotificationsAndSilences = 5 * 24 * time.Hour
// maintenanceNotificationAndSilences how often should we flush and gargabe collect notifications and silences
@ -73,18 +71,15 @@ const (
}
}
`
//TODO: temporary until fix org isolation
mainOrgID = 1
)
type Alertmanager struct {
logger log.Logger
gokitLogger gokit_log.Logger
Settings *setting.Cfg `inject:""`
SQLStore *sqlstore.SQLStore `inject:""`
Settings *setting.Cfg
Store store.AlertingStore
Metrics *metrics.Metrics `inject:""`
Metrics *metrics.Metrics
notificationLog *nflog.Log
marker types.Marker
@ -108,18 +103,20 @@ type Alertmanager struct {
reloadConfigMtx sync.RWMutex
config *apimodels.PostableUserConfig
configHash [16]byte
orgID int64
}
func New(cfg *setting.Cfg, store store.AlertingStore, m *metrics.Metrics) (*Alertmanager, error) {
func newAlertmanager(orgID int64, cfg *setting.Cfg, store store.AlertingStore, m *metrics.Metrics) (*Alertmanager, error) {
am := &Alertmanager{
Settings: cfg,
stopc: make(chan struct{}),
logger: log.New("alertmanager"),
logger: log.New("alertmanager", "org", orgID),
marker: types.NewMarker(m.Registerer),
stageMetrics: notify.NewMetrics(m.Registerer),
dispatcherMetrics: dispatch.NewDispatcherMetrics(m.Registerer),
Store: store,
Metrics: m,
orgID: orgID,
}
am.gokitLogger = gokit_log.NewLogfmtLogger(logging.NewWrapper(am.logger))
@ -174,25 +171,7 @@ func (am *Alertmanager) ready() bool {
return am.config != nil
}
func (am *Alertmanager) Run(ctx context.Context) error {
// Make sure dispatcher starts. We can tolerate future reload failures.
if err := am.SyncAndApplyConfigFromDatabase(mainOrgID); err != nil {
am.logger.Error("unable to sync configuration", "err", err)
}
for {
select {
case <-ctx.Done():
return am.StopAndWait()
case <-time.After(pollInterval):
if err := am.SyncAndApplyConfigFromDatabase(mainOrgID); err != nil {
am.logger.Error("unable to sync configuration", "err", err)
}
}
}
}
func (am *Alertmanager) StopAndWait() error {
func (am *Alertmanager) StopAndWait() {
if am.dispatcher != nil {
am.dispatcher.Stop()
}
@ -206,12 +185,11 @@ func (am *Alertmanager) StopAndWait() error {
close(am.stopc)
am.wg.Wait()
return nil
}
// SaveAndApplyDefaultConfig saves the default configuration the database and applies the configuration to the Alertmanager.
// It rollbacks the save if we fail to apply the configuration.
func (am *Alertmanager) SaveAndApplyDefaultConfig(orgID int64) error {
func (am *Alertmanager) SaveAndApplyDefaultConfig() error {
am.reloadConfigMtx.Lock()
defer am.reloadConfigMtx.Unlock()
@ -219,7 +197,7 @@ func (am *Alertmanager) SaveAndApplyDefaultConfig(orgID int64) error {
AlertmanagerConfiguration: alertmanagerDefaultConfiguration,
Default: true,
ConfigurationVersion: fmt.Sprintf("v%d", ngmodels.AlertConfigurationVersion),
OrgID: orgID,
OrgID: am.orgID,
}
cfg, err := Load([]byte(alertmanagerDefaultConfiguration))
@ -243,7 +221,7 @@ func (am *Alertmanager) SaveAndApplyDefaultConfig(orgID int64) error {
// SaveAndApplyConfig saves the configuration the database and applies the configuration to the Alertmanager.
// It rollbacks the save if we fail to apply the configuration.
func (am *Alertmanager) SaveAndApplyConfig(orgID int64, cfg *apimodels.PostableUserConfig) error {
func (am *Alertmanager) SaveAndApplyConfig(cfg *apimodels.PostableUserConfig) error {
rawConfig, err := json.Marshal(&cfg)
if err != nil {
return fmt.Errorf("failed to serialize to the Alertmanager configuration: %w", err)
@ -255,7 +233,7 @@ func (am *Alertmanager) SaveAndApplyConfig(orgID int64, cfg *apimodels.PostableU
cmd := &ngmodels.SaveAlertmanagerConfigurationCmd{
AlertmanagerConfiguration: string(rawConfig),
ConfigurationVersion: fmt.Sprintf("v%d", ngmodels.AlertConfigurationVersion),
OrgID: orgID,
OrgID: am.orgID,
}
err = am.Store.SaveAlertmanagerConfigurationWithCallback(cmd, func() error {
@ -274,12 +252,12 @@ func (am *Alertmanager) SaveAndApplyConfig(orgID int64, cfg *apimodels.PostableU
// SyncAndApplyConfigFromDatabase picks the latest config from database and restarts
// the components with the new config.
func (am *Alertmanager) SyncAndApplyConfigFromDatabase(orgID int64) error {
func (am *Alertmanager) SyncAndApplyConfigFromDatabase() error {
am.reloadConfigMtx.Lock()
defer am.reloadConfigMtx.Unlock()
// First, let's get the configuration we need from the database.
q := &ngmodels.GetLatestAlertmanagerConfigurationQuery{OrgID: mainOrgID}
q := &ngmodels.GetLatestAlertmanagerConfigurationQuery{OrgID: am.orgID}
if err := am.Store.GetLatestAlertmanagerConfiguration(q); err != nil {
// If there's no configuration in the database, let's use the default configuration.
if errors.Is(err, store.ErrNoAlertmanagerConfiguration) {
@ -289,7 +267,7 @@ func (am *Alertmanager) SyncAndApplyConfigFromDatabase(orgID int64) error {
AlertmanagerConfiguration: alertmanagerDefaultConfiguration,
Default: true,
ConfigurationVersion: fmt.Sprintf("v%d", ngmodels.AlertConfigurationVersion),
OrgID: orgID,
OrgID: am.orgID,
}
if err := am.Store.SaveAlertmanagerConfiguration(savecmd); err != nil {
return err
@ -389,7 +367,7 @@ func (am *Alertmanager) applyConfig(cfg *apimodels.PostableUserConfig, rawConfig
// Finally, build the integrations map using the receiver configuration and templates.
integrationsMap, err := am.buildIntegrationsMap(cfg.AlertmanagerConfig.Receivers, tmpl)
if err != nil {
return err
return fmt.Errorf("failed to build integration map: %w", err)
}
// Now, let's put together our notification pipeline
routingStage := make(notify.RoutingStage, len(integrationsMap))
@ -433,7 +411,7 @@ func (am *Alertmanager) applyConfig(cfg *apimodels.PostableUserConfig, rawConfig
}
func (am *Alertmanager) WorkingDirPath() string {
return filepath.Join(am.Settings.DataPath, workingDir, strconv.Itoa(mainOrgID))
return filepath.Join(am.Settings.DataPath, workingDir, strconv.Itoa(int(am.orgID)))
}
// buildIntegrationsMap builds a map of name to the list of Grafana integration notifiers off of a list of receiver config.

View File

@ -47,14 +47,14 @@ func setupAMTest(t *testing.T) *Alertmanager {
Logger: log.New("alertmanager-test"),
}
am, err := New(cfg, store, m)
am, err := newAlertmanager(1, cfg, store, m)
require.NoError(t, err)
return am
}
func TestAlertmanager_ShouldUseDefaultConfigurationWhenNoConfiguration(t *testing.T) {
am := setupAMTest(t)
require.NoError(t, am.SyncAndApplyConfigFromDatabase(mainOrgID))
require.NoError(t, am.SyncAndApplyConfigFromDatabase())
require.NotNil(t, am.config)
}

View File

@ -0,0 +1,147 @@
package notifier
import (
"context"
"fmt"
"sync"
"time"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/setting"
)
var (
SyncOrgsPollInterval = 1 * time.Minute
)
var (
ErrNoAlertmanagerForOrg = fmt.Errorf("Alertmanager does not exist for this organization")
ErrAlertmanagerNotReady = fmt.Errorf("Alertmanager is not ready yet")
)
type MultiOrgAlertmanager struct {
alertmanagersMtx sync.RWMutex
alertmanagers map[int64]*Alertmanager
settings *setting.Cfg
logger log.Logger
configStore store.AlertingStore
orgStore store.OrgStore
orgRegistry *metrics.OrgRegistries
}
func NewMultiOrgAlertmanager(cfg *setting.Cfg, configStore store.AlertingStore, orgStore store.OrgStore) *MultiOrgAlertmanager {
return &MultiOrgAlertmanager{
settings: cfg,
logger: log.New("multiorg.alertmanager"),
alertmanagers: map[int64]*Alertmanager{},
configStore: configStore,
orgStore: orgStore,
orgRegistry: metrics.NewOrgRegistries(),
}
}
func (moa *MultiOrgAlertmanager) Run(ctx context.Context) error {
moa.logger.Info("starting MultiOrg Alertmanager")
for {
select {
case <-ctx.Done():
moa.StopAndWait()
return nil
case <-time.After(SyncOrgsPollInterval):
if err := moa.LoadAndSyncAlertmanagersForOrgs(ctx); err != nil {
moa.logger.Error("error while synchronizing Alertmanager orgs", "err", err)
}
}
}
}
func (moa *MultiOrgAlertmanager) LoadAndSyncAlertmanagersForOrgs(ctx context.Context) error {
moa.logger.Debug("synchronizing Alertmanagers for orgs")
// First, load all the organizations from the database.
orgIDs, err := moa.orgStore.GetOrgs(ctx)
if err != nil {
return err
}
// Then, sync them by creating or deleting Alertmanagers as necessary.
moa.SyncAlertmanagersForOrgs(orgIDs)
moa.logger.Debug("done synchronizing Alertmanagers for orgs")
return nil
}
func (moa *MultiOrgAlertmanager) SyncAlertmanagersForOrgs(orgIDs []int64) {
orgsFound := make(map[int64]struct{}, len(orgIDs))
moa.alertmanagersMtx.Lock()
for _, orgID := range orgIDs {
orgsFound[orgID] = struct{}{}
existing, found := moa.alertmanagers[orgID]
if !found {
reg := moa.orgRegistry.GetOrCreateOrgRegistry(orgID)
am, err := newAlertmanager(orgID, moa.settings, moa.configStore, metrics.NewMetrics(reg))
if err != nil {
moa.logger.Error("unable to create Alertmanager for org", "org", orgID, "err", err)
}
moa.alertmanagers[orgID] = am
existing = am
}
//TODO: This will create an N+1 query
if err := existing.SyncAndApplyConfigFromDatabase(); err != nil {
moa.logger.Error("failed to apply Alertmanager config for org", "org", orgID, "err", err)
}
}
amsToStop := map[int64]*Alertmanager{}
for orgId, am := range moa.alertmanagers {
if _, exists := orgsFound[orgId]; !exists {
amsToStop[orgId] = am
delete(moa.alertmanagers, orgId)
moa.orgRegistry.RemoveOrgRegistry(orgId)
}
}
moa.alertmanagersMtx.Unlock()
// Now, we can stop the Alertmanagers without having to hold a lock.
for orgID, am := range amsToStop {
moa.logger.Info("stopping Alertmanager", "org", orgID)
am.StopAndWait()
moa.logger.Info("stopped Alertmanager", "org", orgID)
}
}
func (moa *MultiOrgAlertmanager) StopAndWait() {
moa.alertmanagersMtx.Lock()
defer moa.alertmanagersMtx.Unlock()
for _, am := range moa.alertmanagers {
am.StopAndWait()
}
}
// AlertmanagerFor returns the Alertmanager instance for the organization provided.
// When the organization does not have an active Alertmanager, it returns a ErrNoAlertmanagerForOrg.
// When the Alertmanager of the organization is not ready, it returns a ErrAlertmanagerNotReady.
func (moa *MultiOrgAlertmanager) AlertmanagerFor(orgID int64) (*Alertmanager, error) {
moa.alertmanagersMtx.RLock()
defer moa.alertmanagersMtx.RUnlock()
orgAM, existing := moa.alertmanagers[orgID]
if !existing {
return nil, ErrNoAlertmanagerForOrg
}
if !orgAM.Ready() {
return nil, ErrAlertmanagerNotReady
}
return orgAM, nil
}

View File

@ -0,0 +1,92 @@
package notifier
import (
"context"
"testing"
"time"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/setting"
"github.com/stretchr/testify/require"
)
func TestMultiOrgAlertmanager_SyncAlertmanagersForOrgs(t *testing.T) {
configStore := &FakeConfigStore{
configs: map[int64]*models.AlertConfiguration{},
}
orgStore := &FakeOrgStore{
orgs: []int64{1, 2, 3},
}
SyncOrgsPollInterval = 10 * time.Minute // Don't poll in unit tests.
mam := NewMultiOrgAlertmanager(&setting.Cfg{}, configStore, orgStore)
ctx := context.Background()
// Ensure that one Alertmanager is created per org.
{
require.NoError(t, mam.LoadAndSyncAlertmanagersForOrgs(ctx))
require.Len(t, mam.alertmanagers, 3)
}
// When an org is removed, it should detect it.
{
orgStore.orgs = []int64{1, 3}
require.NoError(t, mam.LoadAndSyncAlertmanagersForOrgs(ctx))
require.Len(t, mam.alertmanagers, 2)
}
// if the org comes back, it should detect it.
{
orgStore.orgs = []int64{1, 2, 3, 4}
require.NoError(t, mam.LoadAndSyncAlertmanagersForOrgs(ctx))
require.Len(t, mam.alertmanagers, 4)
}
}
func TestMultiOrgAlertmanager_AlertmanagerFor(t *testing.T) {
configStore := &FakeConfigStore{
configs: map[int64]*models.AlertConfiguration{},
}
orgStore := &FakeOrgStore{
orgs: []int64{1, 2, 3},
}
SyncOrgsPollInterval = 10 * time.Minute // Don't poll in unit tests.
mam := NewMultiOrgAlertmanager(&setting.Cfg{}, configStore, orgStore)
ctx := context.Background()
// Ensure that one Alertmanagers is created per org.
{
require.NoError(t, mam.LoadAndSyncAlertmanagersForOrgs(ctx))
require.Len(t, mam.alertmanagers, 3)
}
// First, let's try to request an Alertmanager from an org that doesn't exist.
{
_, err := mam.AlertmanagerFor(5)
require.EqualError(t, err, ErrNoAlertmanagerForOrg.Error())
}
// Now, let's try to request an Alertmanager that is not ready.
{
// let's delete its "running config" to make it non-ready
mam.alertmanagers[1].config = nil
_, err := mam.AlertmanagerFor(1)
require.EqualError(t, err, ErrAlertmanagerNotReady.Error())
}
// With an Alertmanager that exists, it responds correctly.
{
am, err := mam.AlertmanagerFor(2)
require.NoError(t, err)
require.Equal(t, *am.GetStatus().VersionInfo.Version, "N/A")
require.Equal(t, am.orgID, int64(2))
require.NotNil(t, am.config)
}
// Let's now remove the previous queried organization.
orgStore.orgs = []int64{1, 3}
require.NoError(t, mam.LoadAndSyncAlertmanagersForOrgs(ctx))
{
_, err := mam.AlertmanagerFor(2)
require.EqualError(t, err, ErrNoAlertmanagerForOrg.Error())
}
}

View File

@ -1 +0,0 @@
package notifier

View File

@ -0,0 +1,56 @@
package notifier
import (
"context"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/store"
)
type FakeConfigStore struct {
configs map[int64]*models.AlertConfiguration
}
func (f *FakeConfigStore) GetLatestAlertmanagerConfiguration(query *models.GetLatestAlertmanagerConfigurationQuery) error {
var ok bool
query.Result, ok = f.configs[query.OrgID]
if !ok {
return store.ErrNoAlertmanagerConfiguration
}
return nil
}
func (f *FakeConfigStore) SaveAlertmanagerConfiguration(cmd *models.SaveAlertmanagerConfigurationCmd) error {
f.configs[cmd.OrgID] = &models.AlertConfiguration{
AlertmanagerConfiguration: cmd.AlertmanagerConfiguration,
OrgID: cmd.OrgID,
ConfigurationVersion: "v1",
Default: cmd.Default,
}
return nil
}
func (f *FakeConfigStore) SaveAlertmanagerConfigurationWithCallback(cmd *models.SaveAlertmanagerConfigurationCmd, callback store.SaveCallback) error {
f.configs[cmd.OrgID] = &models.AlertConfiguration{
AlertmanagerConfiguration: cmd.AlertmanagerConfiguration,
OrgID: cmd.OrgID,
ConfigurationVersion: "v1",
Default: cmd.Default,
}
if err := callback(); err != nil {
return err
}
return nil
}
type FakeOrgStore struct {
orgs []int64
}
func (f *FakeOrgStore) GetOrgs(_ context.Context) ([]int64, error) {
return f.orgs, nil
}

View File

@ -9,10 +9,10 @@ import (
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/alerting"
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/notifier"
"github.com/grafana/grafana/pkg/services/ngalert/sender"
"github.com/grafana/grafana/pkg/services/ngalert/state"
"github.com/grafana/grafana/pkg/services/ngalert/store"
@ -39,11 +39,6 @@ type ScheduleService interface {
overrideCfg(cfg SchedulerCfg)
}
// Notifier handles the delivery of alert notifications to the end user
type Notifier interface {
PutAlerts(alerts apimodels.PostableAlerts) error
}
type schedule struct {
// base tick rate (fastest possible configured check)
baseInterval time.Duration
@ -74,14 +69,15 @@ type schedule struct {
ruleStore store.RuleStore
instanceStore store.InstanceStore
adminConfigStore store.AdminConfigurationStore
orgStore store.OrgStore
dataService *tsdb.Service
stateManager *state.Manager
appURL string
notifier Notifier
metrics *metrics.Metrics
multiOrgNotifier *notifier.MultiOrgAlertmanager
metrics *metrics.Metrics
// Senders help us send alerts to external Alertmanagers.
sendersMtx sync.RWMutex
@ -100,9 +96,10 @@ type SchedulerCfg struct {
StopAppliedFunc func(models.AlertRuleKey)
Evaluator eval.Evaluator
RuleStore store.RuleStore
OrgStore store.OrgStore
InstanceStore store.InstanceStore
AdminConfigStore store.AdminConfigurationStore
Notifier Notifier
MultiOrgNotifier *notifier.MultiOrgAlertmanager
Metrics *metrics.Metrics
AdminConfigPollInterval time.Duration
}
@ -122,9 +119,10 @@ func NewScheduler(cfg SchedulerCfg, dataService *tsdb.Service, appURL string, st
evaluator: cfg.Evaluator,
ruleStore: cfg.RuleStore,
instanceStore: cfg.InstanceStore,
orgStore: cfg.OrgStore,
dataService: dataService,
adminConfigStore: cfg.AdminConfigStore,
notifier: cfg.Notifier,
multiOrgNotifier: cfg.MultiOrgNotifier,
metrics: cfg.Metrics,
appURL: appURL,
stateManager: stateManager,
@ -454,10 +452,15 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key models.AlertRul
processedStates := sch.stateManager.ProcessEvalResults(alertRule, results)
sch.saveAlertStates(processedStates)
alerts := FromAlertStateToPostableAlerts(sch.log, processedStates, sch.stateManager, sch.appURL)
sch.log.Debug("sending alerts to notifier", "count", len(alerts.PostableAlerts), "alerts", alerts.PostableAlerts)
err = sch.notifier.PutAlerts(alerts)
if err != nil {
sch.log.Error("failed to put alerts in the notifier", "count", len(alerts.PostableAlerts), "err", err)
sch.log.Debug("sending alerts to notifier", "count", len(alerts.PostableAlerts), "alerts", alerts.PostableAlerts, "org", alertRule.OrgID)
n, err := sch.multiOrgNotifier.AlertmanagerFor(alertRule.OrgID)
if err == nil {
if err := n.PutAlerts(alerts); err != nil {
sch.log.Error("failed to put alerts in the notifier", "count", len(alerts.PostableAlerts), "err", err)
}
} else {
sch.log.Error("unable to lookup local notifier for this org - alerts not delivered", "org", alertRule.OrgID, "count", len(alerts.PostableAlerts), "err", err)
}
// Send alerts to external Alertmanager(s) if we have a sender for this organization.

View File

@ -14,6 +14,7 @@ import (
"github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/notifier"
"github.com/grafana/grafana/pkg/services/ngalert/state"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/setting"
@ -243,7 +244,7 @@ func setupScheduler(t *testing.T, rs store.RuleStore, is store.InstanceStore, ac
RuleStore: rs,
InstanceStore: is,
AdminConfigStore: acs,
Notifier: &fakeNotifier{},
MultiOrgNotifier: notifier.NewMultiOrgAlertmanager(&setting.Cfg{}, &notifier.FakeConfigStore{}, &notifier.FakeOrgStore{}),
Logger: logger,
Metrics: metrics.NewMetrics(prometheus.NewRegistry()),
AdminConfigPollInterval: 10 * time.Minute, // do not poll in unit tests.

View File

@ -10,7 +10,6 @@ import (
"time"
models2 "github.com/grafana/grafana/pkg/models"
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/util"
@ -230,13 +229,6 @@ func (f *fakeAdminConfigStore) UpdateAdminConfiguration(cmd store.UpdateAdminCon
return nil
}
// fakeNotifier represents a fake internal Alertmanager.
type fakeNotifier struct{}
func (n *fakeNotifier) PutAlerts(alerts apimodels.PostableAlerts) error {
return nil
}
type FakeExternalAlertmanager struct {
t *testing.T
mtx sync.Mutex

View File

@ -0,0 +1,26 @@
package store
import (
"context"
"github.com/grafana/grafana/pkg/services/sqlstore"
)
type OrgStore interface {
GetOrgs(ctx context.Context) ([]int64, error)
}
func (st DBstore) GetOrgs(ctx context.Context) ([]int64, error) {
orgs := make([]int64, 0)
err := st.SQLStore.WithDbSession(ctx, func(sess *sqlstore.DBSession) error {
q := "SELECT id FROM org"
if err := sess.SQL(q).Find(&orgs); err != nil {
return err
}
return nil
})
if err != nil {
return nil, err
}
return orgs, nil
}

View File

@ -6,6 +6,9 @@ import (
"net/http"
"regexp"
"testing"
"time"
"github.com/grafana/grafana/pkg/services/ngalert/notifier"
"github.com/grafana/grafana/pkg/bus"
"github.com/grafana/grafana/pkg/models"
@ -16,6 +19,13 @@ import (
)
func TestAlertmanagerConfigurationIsTransactional(t *testing.T) {
// TODO: We need a reliable way to ensure Alertmanagers have synced correctly.
// For now, make them sync quicker.
p := notifier.SyncOrgsPollInterval
notifier.SyncOrgsPollInterval = 2 * time.Second
t.Cleanup(func() {
notifier.SyncOrgsPollInterval = p
})
dir, path := testinfra.CreateGrafDir(t, testinfra.GrafanaOpts{
EnableFeatureToggles: []string{"ngalert"},
DisableAnonymous: true,
@ -85,7 +95,7 @@ func TestAlertmanagerConfigurationIsTransactional(t *testing.T) {
}
`
resp := postRequest(t, alertConfigURL, payload, http.StatusBadRequest) // nolint
require.JSONEq(t, `{"message":"failed to save and apply Alertmanager configuration: the receiver is invalid: failed to validate receiver \"slack.receiver\" of type \"slack\": token must be specified when using the Slack chat API"}`, getBody(t, resp.Body))
require.JSONEq(t, `{"message":"failed to save and apply Alertmanager configuration: failed to build integration map: the receiver is invalid: failed to validate receiver \"slack.receiver\" of type \"slack\": token must be specified when using the Slack chat API"}`, getBody(t, resp.Body))
resp = getRequest(t, alertConfigURL, http.StatusOK) // nolint
require.JSONEq(t, defaultAlertmanagerConfigJSON, getBody(t, resp.Body))
@ -94,6 +104,13 @@ func TestAlertmanagerConfigurationIsTransactional(t *testing.T) {
// editor42 from organisation 42 posts configuration
alertConfigURL = fmt.Sprintf("http://editor-42:editor-42@%s/api/alertmanager/grafana/config/api/v1/alerts", grafanaListedAddr)
// Before we start operating, make sure we've synced this org.
require.Eventually(t, func() bool {
resp, err := http.Get(alertConfigURL) // nolint
require.NoError(t, err)
return resp.StatusCode == http.StatusOK
}, 10*time.Second, 2*time.Second)
// Post the alertmanager config.
{
mockChannel := newMockNotificationChannel(t, grafanaListedAddr)