mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting/metrics (#33547)
* moves alerting metrics to their own pkg * adds grafana_alerting_alerts (by state) metric * alerts_received_{total,invalid} * embed alertmanager alerting struct in ng metrics & remove duplicated notification metrics (already embed alertmanager notifier metrics) * use silence metrics from alertmanager lib * fix - manager has metrics * updates ngalert tests * comment lint Signed-off-by: Owen Diehl <ow.diehl@gmail.com> * cleaner prom registry code * removes ngalert global metrics * new registry use in all tests * ngalert metrics impl service, hack testinfra code to prevent duplicate metric registrations * nilmetrics unexported
This commit is contained in:
parent
b45120b999
commit
5e48b54549
@ -3,8 +3,8 @@ package api
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/state"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
|
||||
"github.com/go-macaron/binding"
|
||||
|
||||
@ -24,11 +24,6 @@ import (
|
||||
// timeNow makes it possible to test usage of time
|
||||
var timeNow = time.Now
|
||||
|
||||
// metrics are a globally registered metric suite for alerting.
|
||||
// TODO: refactor testware to allow these to be created without
|
||||
// panicking on duplicate registration, thus enabling non-global vars.
|
||||
var metrics = NewMetrics(prometheus.DefaultRegisterer)
|
||||
|
||||
type Alertmanager interface {
|
||||
// Configuration
|
||||
SaveAndApplyConfig(config *apimodels.PostableUserConfig) error
|
||||
@ -60,7 +55,7 @@ type API struct {
|
||||
}
|
||||
|
||||
// RegisterAPIEndpoints registers API handlers
|
||||
func (api *API) RegisterAPIEndpoints() {
|
||||
func (api *API) RegisterAPIEndpoints(m *metrics.Metrics) {
|
||||
logger := log.New("ngalert.api")
|
||||
proxy := &AlertingProxy{
|
||||
DataProxy: api.DataProxy,
|
||||
@ -71,26 +66,26 @@ func (api *API) RegisterAPIEndpoints() {
|
||||
api.DatasourceCache,
|
||||
NewLotexAM(proxy, logger),
|
||||
AlertmanagerSrv{store: api.AlertingStore, am: api.Alertmanager, log: logger},
|
||||
), metrics)
|
||||
), m)
|
||||
// Register endpoints for proxing to Prometheus-compatible backends.
|
||||
api.RegisterPrometheusApiEndpoints(NewForkedProm(
|
||||
api.DatasourceCache,
|
||||
NewLotexProm(proxy, logger),
|
||||
PrometheusSrv{log: logger, manager: api.StateManager, store: api.RuleStore},
|
||||
), metrics)
|
||||
), m)
|
||||
// Register endpoints for proxing to Cortex Ruler-compatible backends.
|
||||
api.RegisterRulerApiEndpoints(NewForkedRuler(
|
||||
api.DatasourceCache,
|
||||
NewLotexRuler(proxy, logger),
|
||||
RulerSrv{DatasourceCache: api.DatasourceCache, store: api.RuleStore, log: logger},
|
||||
), metrics)
|
||||
), m)
|
||||
api.RegisterTestingApiEndpoints(TestingApiSrv{
|
||||
AlertingProxy: proxy,
|
||||
Cfg: api.Cfg,
|
||||
DataService: api.DataService,
|
||||
DatasourceCache: api.DatasourceCache,
|
||||
log: logger,
|
||||
}, metrics)
|
||||
}, m)
|
||||
|
||||
// Legacy routes; they will be removed in v8
|
||||
api.RouteRegister.Group("/api/alert-definitions", func(alertDefinitions routing.RouteRegister) {
|
||||
|
@ -17,6 +17,7 @@ import (
|
||||
"github.com/grafana/grafana/pkg/middleware"
|
||||
"github.com/grafana/grafana/pkg/models"
|
||||
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
|
||||
)
|
||||
|
||||
type AlertmanagerApiService interface {
|
||||
@ -32,99 +33,99 @@ type AlertmanagerApiService interface {
|
||||
RoutePostAlertingConfig(*models.ReqContext, apimodels.PostableUserConfig) response.Response
|
||||
}
|
||||
|
||||
func (api *API) RegisterAlertmanagerApiEndpoints(srv AlertmanagerApiService, metrics *Metrics) {
|
||||
func (api *API) RegisterAlertmanagerApiEndpoints(srv AlertmanagerApiService, m *metrics.Metrics) {
|
||||
api.RouteRegister.Group("", func(group routing.RouteRegister) {
|
||||
group.Post(
|
||||
toMacaronPath("/api/alertmanager/{Recipient}/api/v2/silences"),
|
||||
binding.Bind(apimodels.PostableSilence{}),
|
||||
Instrument(
|
||||
metrics.Instrument(
|
||||
http.MethodPost,
|
||||
"/api/alertmanager/{Recipient}/api/v2/silences",
|
||||
srv.RouteCreateSilence,
|
||||
metrics,
|
||||
m,
|
||||
),
|
||||
)
|
||||
group.Delete(
|
||||
toMacaronPath("/api/alertmanager/{Recipient}/config/api/v1/alerts"),
|
||||
Instrument(
|
||||
metrics.Instrument(
|
||||
http.MethodDelete,
|
||||
"/api/alertmanager/{Recipient}/config/api/v1/alerts",
|
||||
srv.RouteDeleteAlertingConfig,
|
||||
metrics,
|
||||
m,
|
||||
),
|
||||
)
|
||||
group.Delete(
|
||||
toMacaronPath("/api/alertmanager/{Recipient}/api/v2/silence/{SilenceId}"),
|
||||
Instrument(
|
||||
metrics.Instrument(
|
||||
http.MethodDelete,
|
||||
"/api/alertmanager/{Recipient}/api/v2/silence/{SilenceId}",
|
||||
srv.RouteDeleteSilence,
|
||||
metrics,
|
||||
m,
|
||||
),
|
||||
)
|
||||
group.Get(
|
||||
toMacaronPath("/api/alertmanager/{Recipient}/api/v2/alerts/groups"),
|
||||
Instrument(
|
||||
metrics.Instrument(
|
||||
http.MethodGet,
|
||||
"/api/alertmanager/{Recipient}/api/v2/alerts/groups",
|
||||
srv.RouteGetAMAlertGroups,
|
||||
metrics,
|
||||
m,
|
||||
),
|
||||
)
|
||||
group.Get(
|
||||
toMacaronPath("/api/alertmanager/{Recipient}/api/v2/alerts"),
|
||||
Instrument(
|
||||
metrics.Instrument(
|
||||
http.MethodGet,
|
||||
"/api/alertmanager/{Recipient}/api/v2/alerts",
|
||||
srv.RouteGetAMAlerts,
|
||||
metrics,
|
||||
m,
|
||||
),
|
||||
)
|
||||
group.Get(
|
||||
toMacaronPath("/api/alertmanager/{Recipient}/config/api/v1/alerts"),
|
||||
Instrument(
|
||||
metrics.Instrument(
|
||||
http.MethodGet,
|
||||
"/api/alertmanager/{Recipient}/config/api/v1/alerts",
|
||||
srv.RouteGetAlertingConfig,
|
||||
metrics,
|
||||
m,
|
||||
),
|
||||
)
|
||||
group.Get(
|
||||
toMacaronPath("/api/alertmanager/{Recipient}/api/v2/silence/{SilenceId}"),
|
||||
Instrument(
|
||||
metrics.Instrument(
|
||||
http.MethodGet,
|
||||
"/api/alertmanager/{Recipient}/api/v2/silence/{SilenceId}",
|
||||
srv.RouteGetSilence,
|
||||
metrics,
|
||||
m,
|
||||
),
|
||||
)
|
||||
group.Get(
|
||||
toMacaronPath("/api/alertmanager/{Recipient}/api/v2/silences"),
|
||||
Instrument(
|
||||
metrics.Instrument(
|
||||
http.MethodGet,
|
||||
"/api/alertmanager/{Recipient}/api/v2/silences",
|
||||
srv.RouteGetSilences,
|
||||
metrics,
|
||||
m,
|
||||
),
|
||||
)
|
||||
group.Post(
|
||||
toMacaronPath("/api/alertmanager/{Recipient}/api/v2/alerts"),
|
||||
binding.Bind(apimodels.PostableAlerts{}),
|
||||
Instrument(
|
||||
metrics.Instrument(
|
||||
http.MethodPost,
|
||||
"/api/alertmanager/{Recipient}/api/v2/alerts",
|
||||
srv.RoutePostAMAlerts,
|
||||
metrics,
|
||||
m,
|
||||
),
|
||||
)
|
||||
group.Post(
|
||||
toMacaronPath("/api/alertmanager/{Recipient}/config/api/v1/alerts"),
|
||||
binding.Bind(apimodels.PostableUserConfig{}),
|
||||
Instrument(
|
||||
metrics.Instrument(
|
||||
http.MethodPost,
|
||||
"/api/alertmanager/{Recipient}/config/api/v1/alerts",
|
||||
srv.RoutePostAlertingConfig,
|
||||
metrics,
|
||||
m,
|
||||
),
|
||||
)
|
||||
}, middleware.ReqSignedIn)
|
||||
|
@ -14,6 +14,7 @@ import (
|
||||
"github.com/grafana/grafana/pkg/api/routing"
|
||||
"github.com/grafana/grafana/pkg/middleware"
|
||||
"github.com/grafana/grafana/pkg/models"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
|
||||
)
|
||||
|
||||
type PrometheusApiService interface {
|
||||
@ -21,24 +22,24 @@ type PrometheusApiService interface {
|
||||
RouteGetRuleStatuses(*models.ReqContext) response.Response
|
||||
}
|
||||
|
||||
func (api *API) RegisterPrometheusApiEndpoints(srv PrometheusApiService, metrics *Metrics) {
|
||||
func (api *API) RegisterPrometheusApiEndpoints(srv PrometheusApiService, m *metrics.Metrics) {
|
||||
api.RouteRegister.Group("", func(group routing.RouteRegister) {
|
||||
group.Get(
|
||||
toMacaronPath("/api/prometheus/{Recipient}/api/v1/alerts"),
|
||||
Instrument(
|
||||
metrics.Instrument(
|
||||
http.MethodGet,
|
||||
"/api/prometheus/{Recipient}/api/v1/alerts",
|
||||
srv.RouteGetAlertStatuses,
|
||||
metrics,
|
||||
m,
|
||||
),
|
||||
)
|
||||
group.Get(
|
||||
toMacaronPath("/api/prometheus/{Recipient}/api/v1/rules"),
|
||||
Instrument(
|
||||
metrics.Instrument(
|
||||
http.MethodGet,
|
||||
"/api/prometheus/{Recipient}/api/v1/rules",
|
||||
srv.RouteGetRuleStatuses,
|
||||
metrics,
|
||||
m,
|
||||
),
|
||||
)
|
||||
}, middleware.ReqSignedIn)
|
||||
|
@ -17,6 +17,7 @@ import (
|
||||
"github.com/grafana/grafana/pkg/middleware"
|
||||
"github.com/grafana/grafana/pkg/models"
|
||||
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
|
||||
)
|
||||
|
||||
type RulerApiService interface {
|
||||
@ -28,61 +29,61 @@ type RulerApiService interface {
|
||||
RoutePostNameRulesConfig(*models.ReqContext, apimodels.PostableRuleGroupConfig) response.Response
|
||||
}
|
||||
|
||||
func (api *API) RegisterRulerApiEndpoints(srv RulerApiService, metrics *Metrics) {
|
||||
func (api *API) RegisterRulerApiEndpoints(srv RulerApiService, m *metrics.Metrics) {
|
||||
api.RouteRegister.Group("", func(group routing.RouteRegister) {
|
||||
group.Delete(
|
||||
toMacaronPath("/api/ruler/{Recipient}/api/v1/rules/{Namespace}"),
|
||||
Instrument(
|
||||
metrics.Instrument(
|
||||
http.MethodDelete,
|
||||
"/api/ruler/{Recipient}/api/v1/rules/{Namespace}",
|
||||
srv.RouteDeleteNamespaceRulesConfig,
|
||||
metrics,
|
||||
m,
|
||||
),
|
||||
)
|
||||
group.Delete(
|
||||
toMacaronPath("/api/ruler/{Recipient}/api/v1/rules/{Namespace}/{Groupname}"),
|
||||
Instrument(
|
||||
metrics.Instrument(
|
||||
http.MethodDelete,
|
||||
"/api/ruler/{Recipient}/api/v1/rules/{Namespace}/{Groupname}",
|
||||
srv.RouteDeleteRuleGroupConfig,
|
||||
metrics,
|
||||
m,
|
||||
),
|
||||
)
|
||||
group.Get(
|
||||
toMacaronPath("/api/ruler/{Recipient}/api/v1/rules/{Namespace}"),
|
||||
Instrument(
|
||||
metrics.Instrument(
|
||||
http.MethodGet,
|
||||
"/api/ruler/{Recipient}/api/v1/rules/{Namespace}",
|
||||
srv.RouteGetNamespaceRulesConfig,
|
||||
metrics,
|
||||
m,
|
||||
),
|
||||
)
|
||||
group.Get(
|
||||
toMacaronPath("/api/ruler/{Recipient}/api/v1/rules/{Namespace}/{Groupname}"),
|
||||
Instrument(
|
||||
metrics.Instrument(
|
||||
http.MethodGet,
|
||||
"/api/ruler/{Recipient}/api/v1/rules/{Namespace}/{Groupname}",
|
||||
srv.RouteGetRulegGroupConfig,
|
||||
metrics,
|
||||
m,
|
||||
),
|
||||
)
|
||||
group.Get(
|
||||
toMacaronPath("/api/ruler/{Recipient}/api/v1/rules"),
|
||||
Instrument(
|
||||
metrics.Instrument(
|
||||
http.MethodGet,
|
||||
"/api/ruler/{Recipient}/api/v1/rules",
|
||||
srv.RouteGetRulesConfig,
|
||||
metrics,
|
||||
m,
|
||||
),
|
||||
)
|
||||
group.Post(
|
||||
toMacaronPath("/api/ruler/{Recipient}/api/v1/rules/{Namespace}"),
|
||||
binding.Bind(apimodels.PostableRuleGroupConfig{}),
|
||||
Instrument(
|
||||
metrics.Instrument(
|
||||
http.MethodPost,
|
||||
"/api/ruler/{Recipient}/api/v1/rules/{Namespace}",
|
||||
srv.RoutePostNameRulesConfig,
|
||||
metrics,
|
||||
m,
|
||||
),
|
||||
)
|
||||
}, middleware.ReqSignedIn)
|
||||
|
@ -17,6 +17,7 @@ import (
|
||||
"github.com/grafana/grafana/pkg/middleware"
|
||||
"github.com/grafana/grafana/pkg/models"
|
||||
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
|
||||
)
|
||||
|
||||
type TestingApiService interface {
|
||||
@ -25,36 +26,36 @@ type TestingApiService interface {
|
||||
RouteTestRuleConfig(*models.ReqContext, apimodels.TestRulePayload) response.Response
|
||||
}
|
||||
|
||||
func (api *API) RegisterTestingApiEndpoints(srv TestingApiService, metrics *Metrics) {
|
||||
func (api *API) RegisterTestingApiEndpoints(srv TestingApiService, m *metrics.Metrics) {
|
||||
api.RouteRegister.Group("", func(group routing.RouteRegister) {
|
||||
group.Post(
|
||||
toMacaronPath("/api/v1/eval"),
|
||||
binding.Bind(apimodels.EvalQueriesPayload{}),
|
||||
Instrument(
|
||||
metrics.Instrument(
|
||||
http.MethodPost,
|
||||
"/api/v1/eval",
|
||||
srv.RouteEvalQueries,
|
||||
metrics,
|
||||
m,
|
||||
),
|
||||
)
|
||||
group.Post(
|
||||
toMacaronPath("/api/v1/receiver/test/{Recipient}"),
|
||||
binding.Bind(apimodels.ExtendedReceiver{}),
|
||||
Instrument(
|
||||
metrics.Instrument(
|
||||
http.MethodPost,
|
||||
"/api/v1/receiver/test/{Recipient}",
|
||||
srv.RouteTestReceiverConfig,
|
||||
metrics,
|
||||
m,
|
||||
),
|
||||
)
|
||||
group.Post(
|
||||
toMacaronPath("/api/v1/rule/test/{Recipient}"),
|
||||
binding.Bind(apimodels.TestRulePayload{}),
|
||||
Instrument(
|
||||
metrics.Instrument(
|
||||
http.MethodPost,
|
||||
"/api/v1/rule/test/{Recipient}",
|
||||
srv.RouteTestRuleConfig,
|
||||
metrics,
|
||||
m,
|
||||
),
|
||||
)
|
||||
}, middleware.ReqSignedIn)
|
||||
|
@ -9,6 +9,7 @@ import (
|
||||
"github.com/grafana/grafana/pkg/api/response"
|
||||
"github.com/grafana/grafana/pkg/models"
|
||||
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
|
||||
"github.com/grafana/grafana/pkg/middleware"
|
||||
)
|
||||
|
||||
@ -16,16 +17,16 @@ type {{classname}}Service interface { {{#operation}}
|
||||
{{nickname}}(*models.ReqContext{{#bodyParams}}, apimodels.{{dataType}}{{/bodyParams}}) response.Response{{/operation}}
|
||||
}
|
||||
|
||||
func (api *API) Register{{classname}}Endpoints(srv {{classname}}Service, metrics *Metrics) {
|
||||
func (api *API) Register{{classname}}Endpoints(srv {{classname}}Service, m *metrics.Metrics) {
|
||||
api.RouteRegister.Group("", func(group routing.RouteRegister){ {{#operations}}{{#operation}}
|
||||
group.{{httpMethod}}(
|
||||
toMacaronPath("{{{path}}}"){{#bodyParams}},
|
||||
binding.Bind(apimodels.{{dataType}}{}){{/bodyParams}},
|
||||
Instrument(
|
||||
metrics.Instrument(
|
||||
http.Method{{httpMethod}},
|
||||
"{{{path}}}",
|
||||
srv.{{nickname}},
|
||||
metrics,
|
||||
m,
|
||||
),
|
||||
){{/operation}}{{/operations}}
|
||||
}, middleware.ReqSignedIn)
|
||||
|
@ -1,4 +1,4 @@
|
||||
package api
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
@ -6,9 +6,12 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/alertmanager/api/metrics"
|
||||
|
||||
"github.com/grafana/grafana/pkg/api/response"
|
||||
"github.com/grafana/grafana/pkg/api/routing"
|
||||
"github.com/grafana/grafana/pkg/models"
|
||||
"github.com/grafana/grafana/pkg/registry"
|
||||
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
@ -20,57 +23,42 @@ const (
|
||||
ProxyBackend = "proxy"
|
||||
)
|
||||
|
||||
var GlobalMetrics = NewMetrics(prometheus.DefaultRegisterer)
|
||||
|
||||
type Metrics struct {
|
||||
alerts *prometheus.GaugeVec
|
||||
alertsInvalid prometheus.Counter
|
||||
alertsReceived prometheus.Counter
|
||||
notificationLatency prometheus.Histogram
|
||||
notifications *prometheus.CounterVec
|
||||
notificationsFailed *prometheus.CounterVec
|
||||
requestDuration *prometheus.HistogramVec
|
||||
silences *prometheus.GaugeVec
|
||||
*metrics.Alerts
|
||||
AlertState *prometheus.GaugeVec
|
||||
// Registerer is for use by subcomponents which register their own metrics.
|
||||
Registerer prometheus.Registerer
|
||||
RequestDuration *prometheus.HistogramVec
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterService(GlobalMetrics)
|
||||
}
|
||||
|
||||
func (m *Metrics) Init() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// SwapRegisterer overwrites the prometheus register used by a *Metrics in place.
|
||||
// It's used by tests to prevent duplicate registration errors
|
||||
func (m *Metrics) SwapRegisterer(r prometheus.Registerer) {
|
||||
next := NewMetrics(r)
|
||||
*m = *next
|
||||
}
|
||||
|
||||
func NewMetrics(r prometheus.Registerer) *Metrics {
|
||||
return &Metrics{
|
||||
alerts: promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{
|
||||
Alerts: metrics.NewAlerts("v2", r),
|
||||
AlertState: promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: "grafana",
|
||||
Subsystem: "alerting",
|
||||
Name: "alerts",
|
||||
Help: "How many alerts by state.",
|
||||
}, []string{"state"}),
|
||||
alertsInvalid: promauto.With(r).NewCounter(prometheus.CounterOpts{
|
||||
Namespace: "grafana",
|
||||
Subsystem: "alerting",
|
||||
Name: "alerts_invalid_total",
|
||||
Help: "The total number of invalid received alerts.",
|
||||
}),
|
||||
alertsReceived: promauto.With(r).NewCounter(prometheus.CounterOpts{
|
||||
Namespace: "grafana",
|
||||
Subsystem: "alerting",
|
||||
Name: "alerts_received_total",
|
||||
Help: "The total number of received alerts.",
|
||||
}),
|
||||
notificationLatency: promauto.With(r).NewHistogram(prometheus.HistogramOpts{
|
||||
Namespace: "grafana",
|
||||
Subsystem: "alerting",
|
||||
Name: "notification_latency_seconds",
|
||||
Help: "Histogram of notification deliveries",
|
||||
Buckets: prometheus.DefBuckets,
|
||||
}),
|
||||
notifications: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: "grafana",
|
||||
Subsystem: "alerting",
|
||||
Name: "notifications_total",
|
||||
Help: "The total number of attempted notfications by integration.",
|
||||
}, []string{"integration"}),
|
||||
notificationsFailed: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: "grafana",
|
||||
Subsystem: "alerting",
|
||||
Name: "notifications_failed_total",
|
||||
Help: "The total number of failed notfications by integration.",
|
||||
}, []string{"integration"}),
|
||||
requestDuration: promauto.With(r).NewHistogramVec(
|
||||
Registerer: r,
|
||||
RequestDuration: promauto.With(r).NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Namespace: "grafana",
|
||||
Subsystem: "alerting",
|
||||
@ -80,12 +68,6 @@ func NewMetrics(r prometheus.Registerer) *Metrics {
|
||||
},
|
||||
[]string{"method", "route", "status_code", "backend"},
|
||||
),
|
||||
silences: promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: "grafana",
|
||||
Subsystem: "alerting",
|
||||
Name: "silences",
|
||||
Help: "The total number of silences by state.",
|
||||
}, []string{"state"}),
|
||||
}
|
||||
}
|
||||
|
||||
@ -124,7 +106,7 @@ func Instrument(
|
||||
"backend": backend,
|
||||
}
|
||||
res.WriteTo(c)
|
||||
metrics.requestDuration.With(ls).Observe(time.Since(start).Seconds())
|
||||
metrics.RequestDuration.With(ls).Observe(time.Since(start).Seconds())
|
||||
}
|
||||
}
|
||||
|
@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/state"
|
||||
|
||||
"github.com/benbjohnson/clock"
|
||||
@ -45,6 +46,7 @@ type AlertNG struct {
|
||||
DataService *tsdb.Service `inject:""`
|
||||
Alertmanager *notifier.Alertmanager `inject:""`
|
||||
DataProxy *datasourceproxy.DatasourceProxyService `inject:""`
|
||||
Metrics *metrics.Metrics `inject:""`
|
||||
Log log.Logger
|
||||
schedule schedule.ScheduleService
|
||||
stateManager *state.Manager
|
||||
@ -57,7 +59,7 @@ func init() {
|
||||
// Init initializes the AlertingService.
|
||||
func (ng *AlertNG) Init() error {
|
||||
ng.Log = log.New("ngalert")
|
||||
ng.stateManager = state.NewManager(ng.Log)
|
||||
ng.stateManager = state.NewManager(ng.Log, ng.Metrics)
|
||||
baseInterval := baseIntervalSeconds * time.Second
|
||||
|
||||
store := store.DBstore{BaseInterval: baseInterval, DefaultIntervalSeconds: defaultIntervalSeconds, SQLStore: ng.SQLStore}
|
||||
@ -87,7 +89,7 @@ func (ng *AlertNG) Init() error {
|
||||
Alertmanager: ng.Alertmanager,
|
||||
StateManager: ng.stateManager,
|
||||
}
|
||||
api.RegisterAPIEndpoints()
|
||||
api.RegisterAPIEndpoints(ng.Metrics)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
@ -22,7 +22,6 @@ import (
|
||||
"github.com/prometheus/alertmanager/silence"
|
||||
"github.com/prometheus/alertmanager/template"
|
||||
"github.com/prometheus/alertmanager/types"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/common/model"
|
||||
|
||||
"github.com/grafana/grafana/pkg/components/securejsondata"
|
||||
@ -31,6 +30,7 @@ import (
|
||||
"github.com/grafana/grafana/pkg/registry"
|
||||
"github.com/grafana/grafana/pkg/services/alerting"
|
||||
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
|
||||
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/notifier/channels"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/store"
|
||||
@ -81,6 +81,7 @@ type Alertmanager struct {
|
||||
Settings *setting.Cfg `inject:""`
|
||||
SQLStore *sqlstore.SQLStore `inject:""`
|
||||
Store store.AlertingStore
|
||||
Metrics *metrics.Metrics `inject:""`
|
||||
|
||||
notificationLog *nflog.Log
|
||||
marker types.Marker
|
||||
@ -116,13 +117,19 @@ func (am *Alertmanager) IsDisabled() bool {
|
||||
return !am.Settings.IsNgAlertEnabled()
|
||||
}
|
||||
|
||||
func (am *Alertmanager) Init() (err error) {
|
||||
func (am *Alertmanager) Init() error {
|
||||
return am.InitWithMetrics(am.Metrics)
|
||||
}
|
||||
|
||||
// InitWithMetrics uses the supplied metrics for instantiation and
|
||||
// allows testware to circumvent duplicate registration errors.
|
||||
func (am *Alertmanager) InitWithMetrics(m *metrics.Metrics) (err error) {
|
||||
am.stopc = make(chan struct{})
|
||||
am.logger = log.New("alertmanager")
|
||||
r := prometheus.NewRegistry()
|
||||
am.marker = types.NewMarker(r)
|
||||
am.stageMetrics = notify.NewMetrics(r)
|
||||
am.dispatcherMetrics = dispatch.NewDispatcherMetrics(r)
|
||||
am.marker = types.NewMarker(m.Registerer)
|
||||
am.stageMetrics = notify.NewMetrics(m.Registerer)
|
||||
am.dispatcherMetrics = dispatch.NewDispatcherMetrics(m.Registerer)
|
||||
am.Metrics = m
|
||||
am.Store = store.DBstore{SQLStore: am.SQLStore}
|
||||
|
||||
// Initialize the notification log
|
||||
@ -137,6 +144,7 @@ func (am *Alertmanager) Init() (err error) {
|
||||
}
|
||||
// Initialize silences
|
||||
am.silences, err = silence.New(silence.Options{
|
||||
Metrics: m.Registerer,
|
||||
SnapshotFile: filepath.Join(am.WorkingDirPath(), "silences"),
|
||||
Retention: retentionNotificationsAndSilences,
|
||||
})
|
||||
@ -456,12 +464,19 @@ func (am *Alertmanager) PutAlerts(postableAlerts apimodels.PostableAlerts) error
|
||||
alert.EndsAt = now.Add(defaultResolveTimeout)
|
||||
}
|
||||
|
||||
if alert.EndsAt.After(now) {
|
||||
am.Metrics.Firing().Inc()
|
||||
} else {
|
||||
am.Metrics.Resolved().Inc()
|
||||
}
|
||||
|
||||
if err := alert.Validate(); err != nil {
|
||||
if validationErr == nil {
|
||||
validationErr = &AlertValidationError{}
|
||||
}
|
||||
validationErr.Alerts = append(validationErr.Alerts, a)
|
||||
validationErr.Errors = append(validationErr.Errors, err)
|
||||
am.Metrics.Invalid().Inc()
|
||||
continue
|
||||
}
|
||||
|
||||
|
@ -19,6 +19,7 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
|
||||
"github.com/grafana/grafana/pkg/services/sqlstore"
|
||||
"github.com/grafana/grafana/pkg/setting"
|
||||
)
|
||||
@ -27,7 +28,7 @@ func TestAlertmanager_ShouldUseDefaultConfigurationWhenNoConfiguration(t *testin
|
||||
am := &Alertmanager{}
|
||||
am.Settings = &setting.Cfg{}
|
||||
am.SQLStore = sqlstore.InitTestDB(t)
|
||||
require.NoError(t, am.Init())
|
||||
require.NoError(t, am.InitWithMetrics(metrics.NewMetrics(prometheus.NewRegistry())))
|
||||
require.NoError(t, am.SyncAndApplyConfigFromDatabase())
|
||||
require.NotNil(t, am.config)
|
||||
}
|
||||
@ -44,7 +45,7 @@ func TestPutAlert(t *testing.T) {
|
||||
DataPath: dir,
|
||||
}
|
||||
|
||||
require.NoError(t, am.Init())
|
||||
require.NoError(t, am.InitWithMetrics(metrics.NewMetrics(prometheus.NewRegistry())))
|
||||
|
||||
startTime := time.Now()
|
||||
endTime := startTime.Add(2 * time.Hour)
|
||||
|
@ -2,12 +2,14 @@ package state
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/grafana/grafana-plugin-sdk-go/data"
|
||||
|
||||
"github.com/grafana/grafana/pkg/infra/log"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
|
||||
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||
prometheusModel "github.com/prometheus/common/model"
|
||||
)
|
||||
@ -16,12 +18,14 @@ type cache struct {
|
||||
states map[string]*State
|
||||
mtxStates sync.RWMutex
|
||||
log log.Logger
|
||||
metrics *metrics.Metrics
|
||||
}
|
||||
|
||||
func newCache(logger log.Logger) *cache {
|
||||
func newCache(logger log.Logger, metrics *metrics.Metrics) *cache {
|
||||
return &cache{
|
||||
states: make(map[string]*State),
|
||||
log: logger,
|
||||
states: make(map[string]*State),
|
||||
log: logger,
|
||||
metrics: metrics,
|
||||
}
|
||||
}
|
||||
|
||||
@ -118,13 +122,23 @@ func (c *cache) reset() {
|
||||
func (c *cache) trim() {
|
||||
c.mtxStates.Lock()
|
||||
defer c.mtxStates.Unlock()
|
||||
|
||||
ct := make(map[eval.State]int)
|
||||
|
||||
for _, v := range c.states {
|
||||
if len(v.Results) > 100 {
|
||||
newResults := make([]Evaluation, 100)
|
||||
copy(newResults, v.Results[100:])
|
||||
// Keep last 100 results
|
||||
copy(newResults, v.Results[len(v.Results)-100:])
|
||||
v.Results = newResults
|
||||
c.set(v)
|
||||
}
|
||||
|
||||
n := ct[v.State]
|
||||
ct[v.State] = n + 1
|
||||
}
|
||||
|
||||
for k, n := range ct {
|
||||
c.metrics.AlertState.WithLabelValues(strings.ToLower(k.String())).Set(float64(n))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,25 +1,29 @@
|
||||
package state
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/grafana/grafana/pkg/infra/log"
|
||||
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
|
||||
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||
)
|
||||
|
||||
type Manager struct {
|
||||
cache *cache
|
||||
quit chan struct{}
|
||||
Log log.Logger
|
||||
cache *cache
|
||||
quit chan struct{}
|
||||
Log log.Logger
|
||||
metrics *metrics.Metrics
|
||||
}
|
||||
|
||||
func NewManager(logger log.Logger) *Manager {
|
||||
func NewManager(logger log.Logger, metrics *metrics.Metrics) *Manager {
|
||||
manager := &Manager{
|
||||
cache: newCache(logger),
|
||||
quit: make(chan struct{}),
|
||||
Log: logger,
|
||||
cache: newCache(logger, metrics),
|
||||
quit: make(chan struct{}),
|
||||
Log: logger,
|
||||
metrics: metrics,
|
||||
}
|
||||
go manager.cleanUp()
|
||||
return manager
|
||||
@ -95,8 +99,11 @@ func (st *Manager) GetStatesByRuleUID() map[string][]*State {
|
||||
}
|
||||
|
||||
func (st *Manager) cleanUp() {
|
||||
ticker := time.NewTicker(time.Duration(60) * time.Minute)
|
||||
st.Log.Debug("starting cleanup process", "intervalMinutes", 60)
|
||||
// TODO: parameterize?
|
||||
// Setting to a reasonable default scrape interval for Prometheus.
|
||||
dur := time.Duration(15) * time.Second
|
||||
ticker := time.NewTicker(dur)
|
||||
st.Log.Debug("starting cleanup process", "dur", fmt.Sprint(dur))
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
|
@ -6,6 +6,7 @@ import (
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/state"
|
||||
|
||||
"github.com/grafana/grafana/pkg/infra/log"
|
||||
@ -16,6 +17,8 @@ import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
var nilMetrics = metrics.NewMetrics(nil)
|
||||
|
||||
func TestProcessEvalResults(t *testing.T) {
|
||||
evaluationTime, err := time.Parse("2006-01-02", "2021-03-25")
|
||||
if err != nil {
|
||||
@ -775,7 +778,7 @@ func TestProcessEvalResults(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
st := state.NewManager(log.New("test_state_manager"))
|
||||
st := state.NewManager(log.New("test_state_manager"), nilMetrics)
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
for _, res := range tc.evalResults {
|
||||
_ = st.ProcessEvalResults(tc.alertRule, res)
|
||||
|
@ -94,7 +94,7 @@ func TestWarmStateCache(t *testing.T) {
|
||||
Store: dbstore,
|
||||
}
|
||||
sched := schedule.NewScheduler(schedCfg, nil)
|
||||
st := state.NewManager(schedCfg.Logger)
|
||||
st := state.NewManager(schedCfg.Logger, nilMetrics)
|
||||
sched.WarmStateCache(st)
|
||||
|
||||
t.Run("instance cache has expected entries", func(t *testing.T) {
|
||||
@ -140,7 +140,7 @@ func TestAlertingTicker(t *testing.T) {
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
st := state.NewManager(schedCfg.Logger)
|
||||
st := state.NewManager(schedCfg.Logger, nilMetrics)
|
||||
go func() {
|
||||
err := sched.Ticker(ctx, st)
|
||||
require.NoError(t, err)
|
||||
|
@ -16,7 +16,9 @@ import (
|
||||
"github.com/grafana/grafana/pkg/models"
|
||||
"github.com/grafana/grafana/pkg/registry"
|
||||
"github.com/grafana/grafana/pkg/server"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
|
||||
"github.com/grafana/grafana/pkg/services/sqlstore"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"gopkg.in/ini.v1"
|
||||
@ -26,8 +28,10 @@ import (
|
||||
// The server address is returned.
|
||||
func StartGrafana(t *testing.T, grafDir, cfgPath string, sqlStore *sqlstore.SQLStore) string {
|
||||
t.Helper()
|
||||
|
||||
ctx := context.Background()
|
||||
// Prevent duplicate registration errors between tests by replacing
|
||||
// the registry used.
|
||||
metrics.GlobalMetrics.SwapRegisterer(prometheus.NewRegistry())
|
||||
|
||||
origSQLStore := registry.GetService(sqlstore.ServiceName)
|
||||
t.Cleanup(func() {
|
||||
|
Loading…
Reference in New Issue
Block a user