diff --git a/pkg/infra/usagestats/service/service.go b/pkg/infra/usagestats/service/service.go index dd8fd32d7fd..bce9fea95be 100644 --- a/pkg/infra/usagestats/service/service.go +++ b/pkg/infra/usagestats/service/service.go @@ -11,19 +11,17 @@ import ( "github.com/grafana/grafana/pkg/infra/usagestats" "github.com/grafana/grafana/pkg/login/social" "github.com/grafana/grafana/pkg/plugins" - "github.com/grafana/grafana/pkg/services/alerting" "github.com/grafana/grafana/pkg/services/sqlstore" "github.com/grafana/grafana/pkg/setting" ) type UsageStats struct { - Cfg *setting.Cfg - Bus bus.Bus - SQLStore *sqlstore.SQLStore - AlertingUsageStats alerting.UsageStatsQuerier - PluginManager plugins.Manager - SocialService social.Service - kvStore *kvstore.NamespacedKVStore + Cfg *setting.Cfg + Bus bus.Bus + SQLStore *sqlstore.SQLStore + PluginManager plugins.Manager + SocialService social.Service + kvStore *kvstore.NamespacedKVStore log log.Logger @@ -33,20 +31,17 @@ type UsageStats struct { startTime time.Time } -func ProvideService(cfg *setting.Cfg, bus bus.Bus, sqlStore *sqlstore.SQLStore, - alertingStats alerting.UsageStatsQuerier, pluginManager plugins.Manager, - socialService social.Service, - kvStore kvstore.KVStore) *UsageStats { +func ProvideService(cfg *setting.Cfg, bus bus.Bus, sqlStore *sqlstore.SQLStore, pluginManager plugins.Manager, + socialService social.Service, kvStore kvstore.KVStore) *UsageStats { s := &UsageStats{ - Cfg: cfg, - Bus: bus, - SQLStore: sqlStore, - AlertingUsageStats: alertingStats, - oauthProviders: socialService.GetOAuthProviders(), - PluginManager: pluginManager, - kvStore: kvstore.WithNamespace(kvStore, 0, "infra.usagestats"), - log: log.New("infra.usagestats"), - startTime: time.Now(), + Cfg: cfg, + Bus: bus, + SQLStore: sqlStore, + oauthProviders: socialService.GetOAuthProviders(), + PluginManager: pluginManager, + kvStore: kvstore.WithNamespace(kvStore, 0, "infra.usagestats"), + log: log.New("infra.usagestats"), + startTime: time.Now(), } return s diff --git a/pkg/infra/usagestats/service/usage_stats.go b/pkg/infra/usagestats/service/usage_stats.go index 2175b077655..5bdbaa13184 100644 --- a/pkg/infra/usagestats/service/usage_stats.go +++ b/pkg/infra/usagestats/service/usage_stats.go @@ -150,28 +150,6 @@ func (uss *UsageStats) GetUsageReport(ctx context.Context) (usagestats.Report, e metrics["stats.packaging."+uss.Cfg.Packaging+".count"] = 1 metrics["stats.distributor."+uss.Cfg.ReportingDistributor+".count"] = 1 - // Alerting stats - alertingUsageStats, err := uss.AlertingUsageStats.QueryUsageStats() - if err != nil { - uss.log.Error("Failed to get alerting usage stats", "error", err) - return report, err - } - - var addAlertingUsageStats = func(dsType string, usageCount int) { - metrics[fmt.Sprintf("stats.alerting.ds.%s.count", dsType)] = usageCount - } - - alertingOtherCount := 0 - for dsType, usageCount := range alertingUsageStats.DatasourceUsage { - if uss.ShouldBeReported(dsType) { - addAlertingUsageStats(dsType, usageCount) - } else { - alertingOtherCount += usageCount - } - } - - addAlertingUsageStats("other", alertingOtherCount) - // fetch datasource access stats dsAccessStats := models.GetDataSourceAccessStatsQuery{} if err := uss.Bus.Dispatch(&dsAccessStats); err != nil { diff --git a/pkg/infra/usagestats/service/usage_stats_test.go b/pkg/infra/usagestats/service/usage_stats_test.go index 22eaf3f1fcd..7fc54de8ff0 100644 --- a/pkg/infra/usagestats/service/usage_stats_test.go +++ b/pkg/infra/usagestats/service/usage_stats_test.go @@ -19,7 +19,6 @@ import ( "github.com/grafana/grafana/pkg/models" "github.com/grafana/grafana/pkg/plugins" "github.com/grafana/grafana/pkg/plugins/manager" - "github.com/grafana/grafana/pkg/services/alerting" "github.com/grafana/grafana/pkg/services/sqlstore" "github.com/grafana/grafana/pkg/setting" "github.com/stretchr/testify/assert" @@ -200,7 +199,6 @@ func TestMetrics(t *testing.T) { }) createConcurrentTokens(t, uss.SQLStore) - uss.AlertingUsageStats = &alertingUsageMock{} uss.oauthProviders = map[string]bool{ "github": true, @@ -370,11 +368,6 @@ func TestMetrics(t *testing.T) { assert.Equal(t, 6+7, metrics.Get("stats.ds_access.other.direct.count").MustInt()) assert.Equal(t, 4+8, metrics.Get("stats.ds_access.other.proxy.count").MustInt()) - assert.Equal(t, 1, metrics.Get("stats.alerting.ds.prometheus.count").MustInt()) - assert.Equal(t, 2, metrics.Get("stats.alerting.ds.graphite.count").MustInt()) - assert.Equal(t, 5, metrics.Get("stats.alerting.ds.mysql.count").MustInt()) - assert.Equal(t, 90, metrics.Get("stats.alerting.ds.other.count").MustInt()) - assert.Equal(t, 1, metrics.Get("stats.alert_notifiers.slack.count").MustInt()) assert.Equal(t, 2, metrics.Get("stats.alert_notifiers.webhook.count").MustInt()) @@ -559,19 +552,6 @@ func TestMetrics(t *testing.T) { }) } -type alertingUsageMock struct{} - -func (aum *alertingUsageMock) QueryUsageStats() (*alerting.UsageStats, error) { - return &alerting.UsageStats{ - DatasourceUsage: map[string]int{ - "prometheus": 1, - "graphite": 2, - "mysql": 5, - "unknown-datasource": 90, - }, - }, nil -} - type fakePluginManager struct { manager.PluginManager @@ -640,14 +620,13 @@ func createService(t *testing.T, cfg setting.Cfg) *UsageStats { sqlStore := sqlstore.InitTestDB(t) return &UsageStats{ - Bus: bus.New(), - Cfg: &cfg, - SQLStore: sqlStore, - AlertingUsageStats: &alertingUsageMock{}, - externalMetrics: make([]usagestats.MetricsFunc, 0), - PluginManager: &fakePluginManager{}, - kvStore: kvstore.WithNamespace(kvstore.ProvideService(sqlStore), 0, "infra.usagestats"), - log: log.New("infra.usagestats"), - startTime: time.Now().Add(-1 * time.Minute), + Bus: bus.New(), + Cfg: &cfg, + SQLStore: sqlStore, + externalMetrics: make([]usagestats.MetricsFunc, 0), + PluginManager: &fakePluginManager{}, + kvStore: kvstore.WithNamespace(kvstore.ProvideService(sqlStore), 0, "infra.usagestats"), + log: log.New("infra.usagestats"), + startTime: time.Now().Add(-1 * time.Minute), } } diff --git a/pkg/services/alerting/engine.go b/pkg/services/alerting/engine.go index 2a4ac15543d..1a7478b8bcd 100644 --- a/pkg/services/alerting/engine.go +++ b/pkg/services/alerting/engine.go @@ -9,6 +9,7 @@ import ( "github.com/benbjohnson/clock" "github.com/grafana/grafana/pkg/bus" "github.com/grafana/grafana/pkg/infra/log" + "github.com/grafana/grafana/pkg/infra/usagestats" "github.com/grafana/grafana/pkg/models" "github.com/grafana/grafana/pkg/plugins" "github.com/grafana/grafana/pkg/services/rendering" @@ -29,13 +30,14 @@ type AlertEngine struct { DataService plugins.DataRequestHandler Cfg *setting.Cfg - execQueue chan *Job - ticker *Ticker - scheduler scheduler - evalHandler evalHandler - ruleReader ruleReader - log log.Logger - resultHandler resultHandler + execQueue chan *Job + ticker *Ticker + scheduler scheduler + evalHandler evalHandler + ruleReader ruleReader + log log.Logger + resultHandler resultHandler + usageStatsService usagestats.Service } // IsDisabled returns true if the alerting service is disable for this instance. @@ -45,13 +47,14 @@ func (e *AlertEngine) IsDisabled() bool { // ProvideAlertEngine returns a new AlertEngine. func ProvideAlertEngine(renderer rendering.Service, bus bus.Bus, requestValidator models.PluginRequestValidator, - dataService plugins.DataRequestHandler, cfg *setting.Cfg) *AlertEngine { + dataService plugins.DataRequestHandler, usageStatsService usagestats.Service, cfg *setting.Cfg) *AlertEngine { e := &AlertEngine{ - Cfg: cfg, - RenderService: renderer, - Bus: bus, - RequestValidator: requestValidator, - DataService: dataService, + Cfg: cfg, + RenderService: renderer, + Bus: bus, + RequestValidator: requestValidator, + DataService: dataService, + usageStatsService: usageStatsService, } e.ticker = NewTicker(time.Now(), time.Second*0, clock.New(), 1) e.execQueue = make(chan *Job, 1000) @@ -61,6 +64,8 @@ func ProvideAlertEngine(renderer rendering.Service, bus bus.Bus, requestValidato e.log = log.New("alerting.engine") e.resultHandler = newResultHandler(e.RenderService) + e.registerUsageMetrics() + return e } @@ -237,3 +242,27 @@ func (e *AlertEngine) processJob(attemptID int, attemptChan chan int, cancelChan close(attemptChan) }() } + +func (e *AlertEngine) registerUsageMetrics() { + e.usageStatsService.RegisterMetricsFunc(func() (map[string]interface{}, error) { + alertingUsageStats, err := e.QueryUsageStats() + if err != nil { + return nil, err + } + + alertingOtherCount := 0 + metrics := map[string]interface{}{} + + for dsType, usageCount := range alertingUsageStats.DatasourceUsage { + if e.usageStatsService.ShouldBeReported(dsType) { + metrics[fmt.Sprintf("stats.alerting.ds.%s.count", dsType)] = usageCount + } else { + alertingOtherCount += usageCount + } + } + + metrics["stats.alerting.ds.other.count"] = alertingOtherCount + + return metrics, nil + }) +} diff --git a/pkg/services/alerting/engine_integration_test.go b/pkg/services/alerting/engine_integration_test.go index 4db265d548e..83872961321 100644 --- a/pkg/services/alerting/engine_integration_test.go +++ b/pkg/services/alerting/engine_integration_test.go @@ -18,7 +18,8 @@ import ( func TestEngineTimeouts(t *testing.T) { Convey("Alerting engine timeout tests", t, func() { - engine := ProvideAlertEngine(nil, nil, nil, nil, setting.NewCfg()) + usMock := &usageStatsMock{t: t} + engine := ProvideAlertEngine(nil, nil, nil, nil, usMock, setting.NewCfg()) setting.AlertingNotificationTimeout = 30 * time.Second setting.AlertingMaxAttempts = 3 engine.resultHandler = &FakeResultHandler{} diff --git a/pkg/services/alerting/engine_test.go b/pkg/services/alerting/engine_test.go index 3326f690b37..3d57a75a46e 100644 --- a/pkg/services/alerting/engine_test.go +++ b/pkg/services/alerting/engine_test.go @@ -5,11 +5,15 @@ import ( "errors" "math" "testing" - "time" + "github.com/grafana/grafana/pkg/bus" + "github.com/grafana/grafana/pkg/components/simplejson" + "github.com/grafana/grafana/pkg/infra/usagestats" + "github.com/grafana/grafana/pkg/models" "github.com/grafana/grafana/pkg/setting" . "github.com/smartystreets/goconvey/convey" + "github.com/stretchr/testify/require" ) type FakeEvalHandler struct { @@ -37,15 +41,65 @@ func (handler *FakeResultHandler) handle(evalContext *EvalContext) error { return nil } +type usageStatsMock struct { + t *testing.T + metricsFuncs []usagestats.MetricsFunc +} + +func (usm *usageStatsMock) RegisterMetricsFunc(fn usagestats.MetricsFunc) { + usm.metricsFuncs = append(usm.metricsFuncs, fn) +} + +func (usm *usageStatsMock) GetUsageReport(_ context.Context) (usagestats.Report, error) { + all := make(map[string]interface{}) + for _, fn := range usm.metricsFuncs { + fnMetrics, err := fn() + require.NoError(usm.t, err) + + for name, value := range fnMetrics { + all[name] = value + } + } + return usagestats.Report{Metrics: all}, nil +} + +func (usm *usageStatsMock) ShouldBeReported(_ string) bool { + return true +} + func TestEngineProcessJob(t *testing.T) { Convey("Alerting engine job processing", t, func() { - engine := ProvideAlertEngine(nil, nil, nil, nil, setting.NewCfg()) + bus := bus.New() + usMock := &usageStatsMock{t: t} + engine := ProvideAlertEngine(nil, bus, nil, nil, usMock, setting.NewCfg()) setting.AlertingEvaluationTimeout = 30 * time.Second setting.AlertingNotificationTimeout = 30 * time.Second setting.AlertingMaxAttempts = 3 engine.resultHandler = &FakeResultHandler{} job := &Job{running: true, Rule: &Rule{}} + Convey("Should register usage metrics func", func() { + bus.AddHandler(func(q *models.GetAllAlertsQuery) error { + settings, err := simplejson.NewJson([]byte(`{"conditions": [{"query": { "datasourceId": 1}}]}`)) + if err != nil { + return err + } + q.Result = []*models.Alert{{Settings: settings}} + return nil + }) + + bus.AddHandler(func(q *models.GetDataSourceQuery) error { + q.Result = &models.DataSource{Id: 1, Type: models.DS_PROMETHEUS} + return nil + }) + + report, err := usMock.GetUsageReport(context.Background()) + So(err, ShouldBeNil) + + So(report.Metrics["stats.alerting.ds.prometheus.count"], ShouldEqual, 1) + So(report.Metrics["stats.alerting.ds.other.count"], ShouldEqual, 0) + }) + Convey("Should trigger retry if needed", func() { Convey("error + not last attempt -> retry", func() { engine.evalHandler = NewFakeEvalHandler(0)