Alerting: Introduces /api/v1/ngalert/alertmanagers to expose discovered and dropped Alertmanager(s) (#37632)

* Alerting: Expose discovered and dropped Alertmanagers

Exposes the API for discovered and dropped Alertmanagers.

* make admin config poll interval configurable

* update after rebase

* wordsmith

* More wordsmithing

* change name of the config

* settings package too
This commit is contained in:
gotjosh 2021-08-13 13:14:36 +01:00 committed by GitHub
parent db91a55d49
commit f3f3fcc727
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 514 additions and 123 deletions

View File

@ -715,6 +715,11 @@ global_session = -1
# global limit of alerts
global_alert_rule = -1
#################################### Unified Alerting ####################
[unified_alerting]
# Specify the frequency of polling for admin config changes.
admin_config_poll_interval_seconds = 60
#################################### Alerting ############################
[alerting]
# Disable alerting engine & UI features

View File

@ -698,6 +698,11 @@
# global limit of alerts
;global_alert_rule = -1
#################################### Unified Alerting ####################
[unified_alerting]
# Specify the frequency of polling for admin config changes.
;admin_config_poll_interval_seconds = 60
#################################### Alerting ############################
[alerting]
# Disable alerting engine & UI features

View File

@ -1099,6 +1099,16 @@ Sets a global limit on number of alert rules that can be created. Default is -1
<hr>
## [unified_alerting]
For more information about the Unified Alerting feature in Grafana, refer to [Unified Alerting]({{< relref "../unified-alerting/_index.md" >}}}).
### admin_config_poll_interval_seconds
Specify the frequency of polling for admin config changes. The default value is `60`.
<hr>
## [alerting]
For more information about the Alerting feature in Grafana, refer to [Alerts overview]({{< relref "../alerting/_index.md" >}}).

View File

@ -1,6 +1,7 @@
package api
import (
"net/url"
"time"
"github.com/grafana/grafana/pkg/api/routing"
@ -20,6 +21,11 @@ import (
// timeNow makes it possible to test usage of time
var timeNow = time.Now
type Scheduler interface {
AlertmanagersFor(orgID int64) []*url.URL
DroppedAlertmanagersFor(orgID int64) []*url.URL
}
type Alertmanager interface {
// Configuration
// temporary add orgID parameter; this will move to the Alertmanager wrapper when it will be available
@ -63,19 +69,19 @@ func (api *API) RegisterAPIEndpoints(m *metrics.Metrics) {
DataProxy: api.DataProxy,
}
// Register endpoints for proxing to Alertmanager-compatible backends.
// Register endpoints for proxying to Alertmanager-compatible backends.
api.RegisterAlertmanagerApiEndpoints(NewForkedAM(
api.DatasourceCache,
NewLotexAM(proxy, logger),
AlertmanagerSrv{store: api.AlertingStore, am: api.Alertmanager, log: logger},
), m)
// Register endpoints for proxing to Prometheus-compatible backends.
// Register endpoints for proxying to Prometheus-compatible backends.
api.RegisterPrometheusApiEndpoints(NewForkedProm(
api.DatasourceCache,
NewLotexProm(proxy, logger),
PrometheusSrv{log: logger, manager: api.StateManager, store: api.RuleStore},
), m)
// Register endpoints for proxing to Cortex Ruler-compatible backends.
// Register endpoints for proxying to Cortex Ruler-compatible backends.
api.RegisterRulerApiEndpoints(NewForkedRuler(
api.DatasourceCache,
NewLotexRuler(proxy, logger),
@ -89,7 +95,8 @@ func (api *API) RegisterAPIEndpoints(m *metrics.Metrics) {
log: logger,
}, m)
api.RegisterConfigurationApiEndpoints(AdminSrv{
store: api.AdminConfigStore,
log: logger,
store: api.AdminConfigStore,
log: logger,
scheduler: api.Schedule,
}, m)
}

View File

@ -10,11 +10,32 @@ import (
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/util"
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
)
type AdminSrv struct {
store store.AdminConfigurationStore
log log.Logger
scheduler Scheduler
store store.AdminConfigurationStore
log log.Logger
}
func (srv AdminSrv) RouteGetAlertmanagers(c *models.ReqContext) response.Response {
urls := srv.scheduler.AlertmanagersFor(c.OrgId)
droppedURLs := srv.scheduler.DroppedAlertmanagersFor(c.OrgId)
ams := v1.AlertManagersResult{Active: make([]v1.AlertManager, len(urls)), Dropped: make([]v1.AlertManager, len(droppedURLs))}
for i, url := range urls {
ams.Active[i].URL = url.String()
}
for i, url := range droppedURLs {
ams.Dropped[i].URL = url.String()
}
return response.JSON(http.StatusOK, apimodels.GettableAlertmanagers{
Status: "success",
Data: ams,
})
}
func (srv AdminSrv) RouteGetNGalertConfig(c *models.ReqContext) response.Response {
@ -56,7 +77,7 @@ func (srv AdminSrv) RoutePostNGalertConfig(c *models.ReqContext, body apimodels.
return ErrResp(http.StatusBadRequest, err, msg)
}
return response.JSON(http.StatusCreated, "admin configuration updated")
return response.JSON(http.StatusCreated, util.DynMap{"message": "admin configuration updated"})
}
func (srv AdminSrv) RouteDeleteNGalertConfig(c *models.ReqContext) response.Response {
@ -70,5 +91,5 @@ func (srv AdminSrv) RouteDeleteNGalertConfig(c *models.ReqContext) response.Resp
return ErrResp(http.StatusInternalServerError, err, "")
}
return response.JSON(http.StatusOK, "admin configuration deleted")
return response.JSON(http.StatusOK, util.DynMap{"message": "admin configuration deleted"})
}

View File

@ -21,6 +21,7 @@ import (
type ConfigurationApiService interface {
RouteDeleteNGalertConfig(*models.ReqContext) response.Response
RouteGetAlertmanagers(*models.ReqContext) response.Response
RouteGetNGalertConfig(*models.ReqContext) response.Response
RoutePostNGalertConfig(*models.ReqContext, apimodels.PostableNGalertConfig) response.Response
}
@ -36,6 +37,15 @@ func (api *API) RegisterConfigurationApiEndpoints(srv ConfigurationApiService, m
m,
),
)
group.Get(
toMacaronPath("/api/v1/ngalert/alertmanagers"),
metrics.Instrument(
http.MethodGet,
"/api/v1/ngalert/alertmanagers",
srv.RouteGetAlertmanagers,
m,
),
)
group.Get(
toMacaronPath("/api/v1/ngalert/admin_config"),
metrics.Instrument(

View File

@ -1,5 +1,17 @@
package definitions
import v1 "github.com/prometheus/client_golang/api/prometheus/v1"
// swagger:route GET /api/v1/ngalert/alertmanagers configuration RouteGetAlertmanagers
//
// Get the discovered and dropped Alertmanagers of the user's organization based on the specified configuration.
//
// Produces:
// - application/json
//
// Responses:
// 200: GettableAlertmanagers
// swagger:route GET /api/v1/ngalert/admin_config configuration RouteGetNGalertConfig
//
// Get the NGalert configuration of the user's organization, returns 404 if no configuration is present.
@ -49,3 +61,9 @@ type PostableNGalertConfig struct {
type GettableNGalertConfig struct {
Alertmanagers []string `json:"alertmanagers"`
}
// swagger:model
type GettableAlertmanagers struct {
Status string `json:"status"`
Data v1.AlertManagersResult `json:"data"`
}

View File

@ -57,12 +57,8 @@
"type": "object",
"x-go-package": "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
},
"AlertGroup": {
"$ref": "#/definitions/alertGroup"
},
"AlertGroups": {
"$ref": "#/definitions/alertGroups"
},
"AlertGroup": {},
"AlertGroups": {},
"AlertInstancesResponse": {
"properties": {
"instances": {
@ -81,6 +77,38 @@
"type": "object",
"x-go-package": "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
},
"AlertManager": {
"properties": {
"url": {
"type": "string",
"x-go-name": "URL"
}
},
"title": "AlertManager models a configured Alert Manager.",
"type": "object",
"x-go-package": "github.com/prometheus/client_golang/api/prometheus/v1"
},
"AlertManagersResult": {
"properties": {
"activeAlertManagers": {
"items": {
"$ref": "#/definitions/AlertManager"
},
"type": "array",
"x-go-name": "Active"
},
"droppedAlertManagers": {
"items": {
"$ref": "#/definitions/AlertManager"
},
"type": "array",
"x-go-name": "Dropped"
}
},
"title": "AlertManagersResult contains the result from querying the alertmanagers endpoint.",
"type": "object",
"x-go-package": "github.com/prometheus/client_golang/api/prometheus/v1"
},
"AlertQuery": {
"properties": {
"datasourceUid": {
@ -477,12 +505,21 @@
"Failure": {
"$ref": "#/definitions/ResponseDetails"
},
"GettableAlert": {
"$ref": "#/definitions/gettableAlert"
},
"GettableAlerts": {
"$ref": "#/definitions/gettableAlerts"
"GettableAlert": {},
"GettableAlertmanagers": {
"properties": {
"data": {
"$ref": "#/definitions/AlertManagersResult"
},
"status": {
"type": "string",
"x-go-name": "Status"
}
},
"type": "object",
"x-go-package": "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
},
"GettableAlerts": {},
"GettableApiAlertingConfig": {
"properties": {
"global": {
@ -786,8 +823,12 @@
"type": "object",
"x-go-package": "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
},
"GettableSilence": {},
"GettableSilences": {},
"GettableSilence": {
"$ref": "#/definitions/gettableSilence"
},
"GettableSilences": {
"$ref": "#/definitions/gettableSilences"
},
"GettableStatus": {
"properties": {
"cluster": {
@ -2243,6 +2284,7 @@
"x-go-package": "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
},
"URL": {
"description": "The general form represented is:\n\n[scheme:][//[userinfo@]host][/]path[?query][#fragment]\n\nURLs that do not start with a slash after the scheme are interpreted as:\n\nscheme:opaque[?query][#fragment]\n\nNote that the Path field is stored in decoded form: /%47%6f%2f becomes /Go/.\nA consequence is that it is impossible to tell which slashes in the Path were\nslashes in the raw URL and which were %2f. This distinction is rarely important,\nbut when it is, the code should use RawPath, an optional field which only gets\nset if the default encoding is different from Path.\n\nURL's String method uses the EscapedPath method to obtain the path. See the\nEscapedPath method for more details.",
"properties": {
"ForceQuery": {
"type": "boolean"
@ -2275,9 +2317,9 @@
"$ref": "#/definitions/Userinfo"
}
},
"title": "URL is a custom URL type that allows validation at configuration load time.",
"title": "A URL represents a parsed URL (technically, a URI reference).",
"type": "object",
"x-go-package": "github.com/prometheus/common/config"
"x-go-package": "net/url"
},
"Userinfo": {
"description": "The Userinfo type is an immutable encapsulation of username and\npassword details for a URL. An existing Userinfo value is guaranteed\nto have a username set (potentially empty, as allowed by RFC 2396),\nand optionally a password.",
@ -2449,7 +2491,7 @@
"alerts": {
"description": "alerts",
"items": {
"$ref": "#/definitions/gettableAlert"
"$ref": "#/definitions/GettableAlert"
},
"type": "array",
"x-go-name": "Alerts"
@ -2729,7 +2771,7 @@
"gettableSilences": {
"description": "GettableSilences gettable silences",
"items": {
"$ref": "#/definitions/GettableSilence"
"$ref": "#/definitions/gettableSilence"
},
"type": "array",
"x-go-name": "GettableSilences",
@ -3888,6 +3930,26 @@
]
}
},
"/api/v1/ngalert/alertmanagers": {
"get": {
"operationId": "RouteGetAlertmanagers",
"produces": [
"application/json"
],
"responses": {
"200": {
"description": "GettableAlertmanagers",
"schema": {
"$ref": "#/definitions/GettableAlertmanagers"
}
}
},
"summary": "Get the discovered and dropped Alertmanagers of the user's organization based on the specified configuration.",
"tags": [
"configuration"
]
}
},
"/api/v1/receiver/test/{Recipient}": {
"post": {
"consumes": [

View File

@ -870,6 +870,26 @@
}
}
},
"/api/v1/ngalert/alertmanagers": {
"get": {
"produces": [
"application/json"
],
"tags": [
"configuration"
],
"summary": "Get the discovered and dropped Alertmanagers of the user's organization based on the specified configuration.",
"operationId": "RouteGetAlertmanagers",
"responses": {
"200": {
"description": "GettableAlertmanagers",
"schema": {
"$ref": "#/definitions/GettableAlertmanagers"
}
}
}
}
},
"/api/v1/receiver/test/{Recipient}": {
"post": {
"description": "Test receiver",
@ -1016,10 +1036,10 @@
"x-go-package": "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
},
"AlertGroup": {
"$ref": "#/definitions/alertGroup"
"$ref": "#/definitions/AlertGroup"
},
"AlertGroups": {
"$ref": "#/definitions/alertGroups"
"$ref": "#/definitions/AlertGroups"
},
"AlertInstancesResponse": {
"type": "object",
@ -1039,6 +1059,38 @@
},
"x-go-package": "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
},
"AlertManager": {
"type": "object",
"title": "AlertManager models a configured Alert Manager.",
"properties": {
"url": {
"type": "string",
"x-go-name": "URL"
}
},
"x-go-package": "github.com/prometheus/client_golang/api/prometheus/v1"
},
"AlertManagersResult": {
"type": "object",
"title": "AlertManagersResult contains the result from querying the alertmanagers endpoint.",
"properties": {
"activeAlertManagers": {
"type": "array",
"items": {
"$ref": "#/definitions/AlertManager"
},
"x-go-name": "Active"
},
"droppedAlertManagers": {
"type": "array",
"items": {
"$ref": "#/definitions/AlertManager"
},
"x-go-name": "Dropped"
}
},
"x-go-package": "github.com/prometheus/client_golang/api/prometheus/v1"
},
"AlertQuery": {
"type": "object",
"title": "AlertQuery represents a single query associated with an alert definition.",
@ -1439,10 +1491,23 @@
"$ref": "#/definitions/ResponseDetails"
},
"GettableAlert": {
"$ref": "#/definitions/gettableAlert"
"$ref": "#/definitions/GettableAlert"
},
"GettableAlertmanagers": {
"type": "object",
"properties": {
"data": {
"$ref": "#/definitions/AlertManagersResult"
},
"status": {
"type": "string",
"x-go-name": "Status"
}
},
"x-go-package": "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
},
"GettableAlerts": {
"$ref": "#/definitions/gettableAlerts"
"$ref": "#/definitions/GettableAlerts"
},
"GettableApiAlertingConfig": {
"type": "object",
@ -1748,10 +1813,10 @@
"x-go-package": "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
},
"GettableSilence": {
"$ref": "#/definitions/GettableSilence"
"$ref": "#/definitions/gettableSilence"
},
"GettableSilences": {
"$ref": "#/definitions/GettableSilences"
"$ref": "#/definitions/gettableSilences"
},
"GettableStatus": {
"type": "object",
@ -3212,8 +3277,9 @@
"x-go-package": "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
},
"URL": {
"description": "The general form represented is:\n\n[scheme:][//[userinfo@]host][/]path[?query][#fragment]\n\nURLs that do not start with a slash after the scheme are interpreted as:\n\nscheme:opaque[?query][#fragment]\n\nNote that the Path field is stored in decoded form: /%47%6f%2f becomes /Go/.\nA consequence is that it is impossible to tell which slashes in the Path were\nslashes in the raw URL and which were %2f. This distinction is rarely important,\nbut when it is, the code should use RawPath, an optional field which only gets\nset if the default encoding is different from Path.\n\nURL's String method uses the EscapedPath method to obtain the path. See the\nEscapedPath method for more details.",
"type": "object",
"title": "URL is a custom URL type that allows validation at configuration load time.",
"title": "A URL represents a parsed URL (technically, a URI reference).",
"properties": {
"ForceQuery": {
"type": "boolean"
@ -3246,7 +3312,7 @@
"$ref": "#/definitions/Userinfo"
}
},
"x-go-package": "github.com/prometheus/common/config"
"x-go-package": "net/url"
},
"Userinfo": {
"description": "The Userinfo type is an immutable encapsulation of username and\npassword details for a URL. An existing Userinfo value is guaranteed\nto have a username set (potentially empty, as allowed by RFC 2396),\nand optionally a password.",
@ -3425,7 +3491,7 @@
"description": "alerts",
"type": "array",
"items": {
"$ref": "#/definitions/gettableAlert"
"$ref": "#/definitions/GettableAlert"
},
"x-go-name": "Alerts"
},
@ -3699,7 +3765,7 @@
"description": "GettableSilences gettable silences",
"type": "array",
"items": {
"$ref": "#/definitions/GettableSilence"
"$ref": "#/definitions/gettableSilence"
},
"x-go-name": "GettableSilences",
"x-go-package": "github.com/prometheus/alertmanager/api/v2/models"

View File

@ -81,17 +81,19 @@ func (ng *AlertNG) Init() error {
}
schedCfg := schedule.SchedulerCfg{
C: clock.New(),
BaseInterval: baseInterval,
Logger: log.New("ngalert.scheduler"),
MaxAttempts: maxAttempts,
Evaluator: eval.Evaluator{Cfg: ng.Cfg, Log: ng.Log},
InstanceStore: store,
RuleStore: store,
AdminConfigStore: store,
Notifier: ng.Alertmanager,
Metrics: ng.Metrics,
C: clock.New(),
BaseInterval: baseInterval,
Logger: log.New("ngalert.scheduler"),
MaxAttempts: maxAttempts,
Evaluator: eval.Evaluator{Cfg: ng.Cfg, Log: ng.Log},
InstanceStore: store,
RuleStore: store,
AdminConfigStore: store,
Notifier: ng.Alertmanager,
Metrics: ng.Metrics,
AdminConfigPollInterval: ng.Cfg.AdminConfigPollInterval,
}
ng.stateManager = state.NewManager(ng.Log, ng.Metrics, store, store)
ng.schedule = schedule.NewScheduler(schedCfg, ng.DataService, ng.Cfg.AppURL, ng.stateManager)

View File

@ -25,14 +25,13 @@ import (
// timeNow makes it possible to test usage of time
var timeNow = time.Now
// AdminConfigPollingInterval of how often we sync admin configuration.
var AdminConfigPollingInterval = 1 * time.Minute
// ScheduleService handles scheduling
type ScheduleService interface {
Run(context.Context) error
Pause() error
Unpause() error
AlertmanagersFor(orgID int64) []*url.URL
DroppedAlertmanagersFor(orgID int64) []*url.URL
// the following are used by tests only used for tests
evalApplied(models.AlertRuleKey, time.Time)
@ -85,50 +84,53 @@ type schedule struct {
metrics *metrics.Metrics
// Senders help us send alerts to external Alertmanagers.
sendersMtx sync.RWMutex
sendersCfgHash map[int64]string
senders map[int64]*sender.Sender
sendersMtx sync.RWMutex
sendersCfgHash map[int64]string
senders map[int64]*sender.Sender
adminConfigPollInterval time.Duration
}
// SchedulerCfg is the scheduler configuration.
type SchedulerCfg struct {
C clock.Clock
BaseInterval time.Duration
Logger log.Logger
EvalAppliedFunc func(models.AlertRuleKey, time.Time)
MaxAttempts int64
StopAppliedFunc func(models.AlertRuleKey)
Evaluator eval.Evaluator
RuleStore store.RuleStore
InstanceStore store.InstanceStore
AdminConfigStore store.AdminConfigurationStore
Notifier Notifier
Metrics *metrics.Metrics
C clock.Clock
BaseInterval time.Duration
Logger log.Logger
EvalAppliedFunc func(models.AlertRuleKey, time.Time)
MaxAttempts int64
StopAppliedFunc func(models.AlertRuleKey)
Evaluator eval.Evaluator
RuleStore store.RuleStore
InstanceStore store.InstanceStore
AdminConfigStore store.AdminConfigurationStore
Notifier Notifier
Metrics *metrics.Metrics
AdminConfigPollInterval time.Duration
}
// NewScheduler returns a new schedule.
func NewScheduler(cfg SchedulerCfg, dataService *tsdb.Service, appURL string, stateManager *state.Manager) *schedule {
ticker := alerting.NewTicker(cfg.C.Now(), time.Second*0, cfg.C, int64(cfg.BaseInterval.Seconds()))
sch := schedule{
registry: alertRuleRegistry{alertRuleInfo: make(map[models.AlertRuleKey]alertRuleInfo)},
maxAttempts: cfg.MaxAttempts,
clock: cfg.C,
baseInterval: cfg.BaseInterval,
log: cfg.Logger,
heartbeat: ticker,
evalAppliedFunc: cfg.EvalAppliedFunc,
stopAppliedFunc: cfg.StopAppliedFunc,
evaluator: cfg.Evaluator,
ruleStore: cfg.RuleStore,
instanceStore: cfg.InstanceStore,
dataService: dataService,
adminConfigStore: cfg.AdminConfigStore,
notifier: cfg.Notifier,
metrics: cfg.Metrics,
appURL: appURL,
stateManager: stateManager,
senders: map[int64]*sender.Sender{},
sendersCfgHash: map[int64]string{},
registry: alertRuleRegistry{alertRuleInfo: make(map[models.AlertRuleKey]alertRuleInfo)},
maxAttempts: cfg.MaxAttempts,
clock: cfg.C,
baseInterval: cfg.BaseInterval,
log: cfg.Logger,
heartbeat: ticker,
evalAppliedFunc: cfg.EvalAppliedFunc,
stopAppliedFunc: cfg.StopAppliedFunc,
evaluator: cfg.Evaluator,
ruleStore: cfg.RuleStore,
instanceStore: cfg.InstanceStore,
dataService: dataService,
adminConfigStore: cfg.AdminConfigStore,
notifier: cfg.Notifier,
metrics: cfg.Metrics,
appURL: appURL,
stateManager: stateManager,
senders: map[int64]*sender.Sender{},
sendersCfgHash: map[int64]string{},
adminConfigPollInterval: cfg.AdminConfigPollInterval,
}
return &sch
}
@ -290,7 +292,7 @@ func (sch *schedule) DroppedAlertmanagersFor(orgID int64) []*url.URL {
func (sch *schedule) adminConfigSync(ctx context.Context) error {
for {
select {
case <-time.After(AdminConfigPollingInterval):
case <-time.After(sch.adminConfigPollInterval):
if err := sch.SyncAndApplyConfigFromDatabase(); err != nil {
sch.log.Error("unable to sync admin configuration", "err", err)
}

View File

@ -99,9 +99,10 @@ func TestWarmStateCache(t *testing.T) {
BaseInterval: time.Second,
Logger: log.New("ngalert cache warming test"),
RuleStore: dbstore,
InstanceStore: dbstore,
Metrics: metrics.NewMetrics(prometheus.NewRegistry()),
RuleStore: dbstore,
InstanceStore: dbstore,
Metrics: metrics.NewMetrics(prometheus.NewRegistry()),
AdminConfigPollInterval: 10 * time.Minute, // do not poll in unit tests.
}
st := state.NewManager(schedCfg.Logger, nilMetrics, dbstore, dbstore)
st.Warm()
@ -143,10 +144,11 @@ func TestAlertingTicker(t *testing.T) {
StopAppliedFunc: func(alertDefKey models.AlertRuleKey) {
stopAppliedCh <- alertDefKey
},
RuleStore: dbstore,
InstanceStore: dbstore,
Logger: log.New("ngalert schedule test"),
Metrics: metrics.NewMetrics(prometheus.NewRegistry()),
RuleStore: dbstore,
InstanceStore: dbstore,
Logger: log.New("ngalert schedule test"),
Metrics: metrics.NewMetrics(prometheus.NewRegistry()),
AdminConfigPollInterval: 10 * time.Minute, // do not poll in unit tests.
}
st := state.NewManager(schedCfg.Logger, nilMetrics, dbstore, dbstore)
sched := schedule.NewScheduler(schedCfg, nil, "http://localhost", st)

View File

@ -27,7 +27,7 @@ import (
func TestSendingToExternalAlertmanager(t *testing.T) {
t.Cleanup(registry.ClearOverrides)
fakeAM := newFakeExternalAlertmanager(t)
fakeAM := NewFakeExternalAlertmanager(t)
defer fakeAM.Close()
fakeRuleStore := newFakeRuleStore(t)
fakeInstanceStore := &fakeInstanceStore{}
@ -61,7 +61,6 @@ func TestSendingToExternalAlertmanager(t *testing.T) {
cancel()
})
go func() {
AdminConfigPollingInterval = 10 * time.Minute // Do not poll in unit tests.
err := sched.Run(ctx)
require.NoError(t, err)
}()
@ -95,7 +94,7 @@ func TestSendingToExternalAlertmanager(t *testing.T) {
func TestSendingToExternalAlertmanager_WithMultipleOrgs(t *testing.T) {
t.Cleanup(registry.ClearOverrides)
fakeAM := newFakeExternalAlertmanager(t)
fakeAM := NewFakeExternalAlertmanager(t)
defer fakeAM.Close()
fakeRuleStore := newFakeRuleStore(t)
fakeInstanceStore := &fakeInstanceStore{}
@ -130,7 +129,6 @@ func TestSendingToExternalAlertmanager_WithMultipleOrgs(t *testing.T) {
cancel()
})
go func() {
AdminConfigPollingInterval = 10 * time.Minute // Do not poll in unit tests.
err := sched.Run(ctx)
require.NoError(t, err)
}()
@ -161,7 +159,7 @@ func TestSendingToExternalAlertmanager_WithMultipleOrgs(t *testing.T) {
}, 20*time.Second, 200*time.Millisecond)
// 2. Next, let's modify the configuration of an organization by adding an extra alertmanager.
fakeAM2 := newFakeExternalAlertmanager(t)
fakeAM2 := NewFakeExternalAlertmanager(t)
adminConfig2 = &models.AdminConfiguration{OrgID: 2, Alertmanagers: []string{fakeAM.server.URL, fakeAM2.server.URL}}
cmd = store.UpdateAdminConfigurationCmd{AdminConfiguration: adminConfig2}
require.NoError(t, fakeAdminConfigStore.UpdateAdminConfiguration(cmd))
@ -238,16 +236,17 @@ func setupScheduler(t *testing.T, rs store.RuleStore, is store.InstanceStore, ac
logger := log.New("ngalert schedule test")
nilMetrics := metrics.NewMetrics(nil)
schedCfg := SchedulerCfg{
C: mockedClock,
BaseInterval: time.Second,
MaxAttempts: 1,
Evaluator: eval.Evaluator{Cfg: &setting.Cfg{ExpressionsEnabled: true}, Log: logger},
RuleStore: rs,
InstanceStore: is,
AdminConfigStore: acs,
Notifier: &fakeNotifier{},
Logger: logger,
Metrics: metrics.NewMetrics(prometheus.NewRegistry()),
C: mockedClock,
BaseInterval: time.Second,
MaxAttempts: 1,
Evaluator: eval.Evaluator{Cfg: &setting.Cfg{ExpressionsEnabled: true}, Log: logger},
RuleStore: rs,
InstanceStore: is,
AdminConfigStore: acs,
Notifier: &fakeNotifier{},
Logger: logger,
Metrics: metrics.NewMetrics(prometheus.NewRegistry()),
AdminConfigPollInterval: 10 * time.Minute, // do not poll in unit tests.
}
st := state.NewManager(schedCfg.Logger, nilMetrics, rs, is)
return NewScheduler(schedCfg, nil, "http://localhost", st), mockedClock

View File

@ -237,17 +237,17 @@ func (n *fakeNotifier) PutAlerts(alerts apimodels.PostableAlerts) error {
return nil
}
type fakeExternalAlertmanager struct {
type FakeExternalAlertmanager struct {
t *testing.T
mtx sync.Mutex
alerts amv2.PostableAlerts
server *httptest.Server
}
func newFakeExternalAlertmanager(t *testing.T) *fakeExternalAlertmanager {
func NewFakeExternalAlertmanager(t *testing.T) *FakeExternalAlertmanager {
t.Helper()
am := &fakeExternalAlertmanager{
am := &FakeExternalAlertmanager{
t: t,
alerts: amv2.PostableAlerts{},
}
@ -256,7 +256,11 @@ func newFakeExternalAlertmanager(t *testing.T) *fakeExternalAlertmanager {
return am
}
func (am *fakeExternalAlertmanager) AlertNamesCompare(expected []string) bool {
func (am *FakeExternalAlertmanager) URL() string {
return am.server.URL
}
func (am *FakeExternalAlertmanager) AlertNamesCompare(expected []string) bool {
n := []string{}
alerts := am.Alerts()
@ -275,20 +279,20 @@ func (am *fakeExternalAlertmanager) AlertNamesCompare(expected []string) bool {
return assert.ObjectsAreEqual(expected, n)
}
func (am *fakeExternalAlertmanager) AlertsCount() int {
func (am *FakeExternalAlertmanager) AlertsCount() int {
am.mtx.Lock()
defer am.mtx.Unlock()
return len(am.alerts)
}
func (am *fakeExternalAlertmanager) Alerts() amv2.PostableAlerts {
func (am *FakeExternalAlertmanager) Alerts() amv2.PostableAlerts {
am.mtx.Lock()
defer am.mtx.Unlock()
return am.alerts
}
func (am *fakeExternalAlertmanager) Handler() func(w http.ResponseWriter, r *http.Request) {
func (am *FakeExternalAlertmanager) Handler() func(w http.ResponseWriter, r *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
b, err := ioutil.ReadAll(r.Body)
require.NoError(am.t, err)
@ -302,6 +306,6 @@ func (am *fakeExternalAlertmanager) Handler() func(w http.ResponseWriter, r *htt
}
}
func (am *fakeExternalAlertmanager) Close() {
func (am *FakeExternalAlertmanager) Close() {
am.server.Close()
}

View File

@ -405,6 +405,9 @@ type Cfg struct {
// Geomap base layer config
GeomapDefaultBaseLayerConfig map[string]interface{}
GeomapEnableCustomBaseLayers bool
// Unified Alerting
AdminConfigPollInterval time.Duration
}
// IsLiveConfigEnabled returns true if live should be able to save configs to SQL tables
@ -898,6 +901,10 @@ func (cfg *Cfg) Load(args *CommandLineArgs) error {
return err
}
if err := cfg.readUnifiedAlertingSettings(iniFile); err != nil {
return err
}
explore := iniFile.Section("explore")
ExploreEnabled = explore.Key("enabled").MustBool(true)
@ -1349,6 +1356,13 @@ func readRenderingSettings(iniFile *ini.File, cfg *Cfg) error {
return nil
}
func (cfg *Cfg) readUnifiedAlertingSettings(iniFile *ini.File) error {
ua := iniFile.Section("unified_alerting")
s := ua.Key("admin_config_poll_interval_seconds").MustInt(60)
cfg.AdminConfigPollInterval = time.Second * time.Duration(s)
return nil
}
func readAlertingSettings(iniFile *ini.File) error {
alerting := iniFile.Section("alerting")
AlertingEnabled = alerting.Key("enabled").MustBool(true)

View File

@ -0,0 +1,156 @@
package alerting
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"testing"
"time"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/prometheus/common/model"
"github.com/grafana/grafana/pkg/services/ngalert/schedule"
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/models"
"github.com/stretchr/testify/require"
"github.com/grafana/grafana/pkg/bus"
"github.com/grafana/grafana/pkg/tests/testinfra"
)
func TestAdminConfiguration_SendingToExternalAlertmanagers(t *testing.T) {
dir, path := testinfra.CreateGrafDir(t, testinfra.GrafanaOpts{
EnableFeatureToggles: []string{"ngalert"},
DisableAnonymous: true,
NGAlertAdminConfigIntervalSeconds: 2,
})
s := testinfra.SetUpDatabase(t, dir)
// override bus to get the GetSignedInUserQuery handler
s.Bus = bus.GetBus()
grafanaListedAddr := testinfra.StartGrafana(t, dir, path, s)
// Create a user to make authenticated requests
createUser(t, s, models.CreateUserCommand{
DefaultOrgRole: string(models.ROLE_ADMIN),
Login: "grafana",
Password: "password",
})
// Create a couple of "fake" Alertmanagers
fakeAM1 := schedule.NewFakeExternalAlertmanager(t)
fakeAM2 := schedule.NewFakeExternalAlertmanager(t)
// Now, let's test the configuration API.
{
alertsURL := fmt.Sprintf("http://grafana:password@%s/api/v1/ngalert/admin_config", grafanaListedAddr)
resp := getRequest(t, alertsURL, http.StatusNotFound) // nolint
b, err := ioutil.ReadAll(resp.Body)
require.NoError(t, err)
require.JSONEq(t, string(b), "{\"message\": \"no admin configuration available\"}")
}
// Now, lets re-set external Alertmanagers.
{
ac := apimodels.PostableNGalertConfig{
Alertmanagers: []string{fakeAM1.URL(), fakeAM2.URL()},
}
buf := bytes.Buffer{}
enc := json.NewEncoder(&buf)
err := enc.Encode(&ac)
require.NoError(t, err)
alertsURL := fmt.Sprintf("http://grafana:password@%s/api/v1/ngalert/admin_config", grafanaListedAddr)
resp := postRequest(t, alertsURL, buf.String(), http.StatusCreated) // nolint
b, err := ioutil.ReadAll(resp.Body)
require.NoError(t, err)
require.JSONEq(t, string(b), "{\"message\": \"admin configuration updated\"}")
}
// If we get the configuration again, it shows us what we've set.
{
alertsURL := fmt.Sprintf("http://grafana:password@%s/api/v1/ngalert/admin_config", grafanaListedAddr)
resp := getRequest(t, alertsURL, http.StatusOK) // nolint
b, err := ioutil.ReadAll(resp.Body)
require.NoError(t, err)
require.JSONEq(t, string(b), fmt.Sprintf("{\"alertmanagers\":[\"%s\",\"%s\"]}\n", fakeAM1.URL(), fakeAM2.URL()))
}
// With the configuration set, we should eventually discover those Alertmanagers set.
{
alertsURL := fmt.Sprintf("http://grafana:password@%s/api/v1/ngalert/alertmanagers", grafanaListedAddr)
require.Eventually(t, func() bool {
resp := getRequest(t, alertsURL, http.StatusOK) // nolint
b, err := ioutil.ReadAll(resp.Body)
require.NoError(t, err)
var alertmanagers apimodels.GettableAlertmanagers
require.NoError(t, json.Unmarshal(b, &alertmanagers))
return len(alertmanagers.Data.Active) == 2
}, 80*time.Second, 4*time.Second)
}
// Now, let's set an alert that should fire as quickly as possible.
{
// create the namespace we'll save our alerts to
_, err := createFolder(t, s, 0, "default")
require.NoError(t, err)
interval, err := model.ParseDuration("10s")
require.NoError(t, err)
rules := apimodels.PostableRuleGroupConfig{
Name: "arulegroup",
Interval: interval,
Rules: []apimodels.PostableExtendedRuleNode{
{
ApiRuleNode: &apimodels.ApiRuleNode{
For: interval,
Labels: map[string]string{"label1": "val1"},
Annotations: map[string]string{"annotation1": "val1"},
},
// this rule does not explicitly set no data and error states
// therefore it should get the default values
GrafanaManagedAlert: &apimodels.PostableGrafanaRule{
Title: "AlwaysFiring",
Condition: "A",
Data: []ngmodels.AlertQuery{
{
RefID: "A",
RelativeTimeRange: ngmodels.RelativeTimeRange{
From: ngmodels.Duration(time.Duration(5) * time.Hour),
To: ngmodels.Duration(time.Duration(3) * time.Hour),
},
DatasourceUID: "-100",
Model: json.RawMessage(`{
"type": "math",
"expression": "2 + 3 > 1"
}`),
},
},
},
},
},
}
buf := bytes.Buffer{}
enc := json.NewEncoder(&buf)
err = enc.Encode(&rules)
require.NoError(t, err)
ruleURL := fmt.Sprintf("http://grafana:password@%s/api/ruler/grafana/api/v1/rules/default", grafanaListedAddr)
// nolint
_ = postRequest(t, ruleURL, buf.String(), http.StatusAccepted)
}
//Eventually, our Alertmanagers should receiver the alert.
{
require.Eventually(t, func() bool {
return fakeAM1.AlertsCount() == 1 && fakeAM2.AlertsCount() == 1
}, 60*time.Second, 5*time.Second)
}
}

View File

@ -56,10 +56,10 @@ func postRequest(t *testing.T, url string, body string, expStatusCode int) *http
buf := bytes.NewReader([]byte(body))
// nolint:gosec
resp, err := http.Post(url, "application/json", buf)
require.NoError(t, err)
t.Cleanup(func() {
require.NoError(t, resp.Body.Close())
})
require.NoError(t, err)
if expStatusCode != resp.StatusCode {
b, err := ioutil.ReadAll(resp.Body)
require.NoError(t, err)

View File

@ -224,6 +224,13 @@ func CreateGrafDir(t *testing.T, opts ...GrafanaOpts) (string, string) {
_, err = featureSection.NewKey("enable", strings.Join(o.EnableFeatureToggles, " "))
require.NoError(t, err)
}
if o.NGAlertAdminConfigIntervalSeconds != 0 {
ngalertingSection, err := cfg.NewSection("ngalerting")
require.NoError(t, err)
_, err = ngalertingSection.NewKey("admin_config_poll_interval_seconds", fmt.Sprintf("%d", o.NGAlertAdminConfigIntervalSeconds))
require.NoError(t, err)
}
if o.AnonymousUserRole != "" {
_, err = anonSect.NewKey("org_role", string(o.AnonymousUserRole))
require.NoError(t, err)
@ -265,12 +272,13 @@ func CreateGrafDir(t *testing.T, opts ...GrafanaOpts) (string, string) {
}
type GrafanaOpts struct {
EnableCSP bool
EnableFeatureToggles []string
AnonymousUserRole models.RoleType
EnableQuota bool
DisableAnonymous bool
CatalogAppEnabled bool
ViewersCanEdit bool
PluginAdminEnabled bool
EnableCSP bool
EnableFeatureToggles []string
NGAlertAdminConfigIntervalSeconds int
AnonymousUserRole models.RoleType
EnableQuota bool
DisableAnonymous bool
CatalogAppEnabled bool
ViewersCanEdit bool
PluginAdminEnabled bool
}