Usagestats: Add usage stats about what type of datasource is used in alerting. (#23125)

This commit is contained in:
Carl Bergquist 2020-04-02 08:45:04 +02:00 committed by GitHub
parent b017e437d8
commit 15bff3114f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 496 additions and 6 deletions

View File

@ -7,6 +7,7 @@ import (
"github.com/grafana/grafana/pkg/bus"
"github.com/grafana/grafana/pkg/login/social"
"github.com/grafana/grafana/pkg/models"
"github.com/grafana/grafana/pkg/services/alerting"
"github.com/grafana/grafana/pkg/services/sqlstore"
"github.com/grafana/grafana/pkg/infra/log"
@ -21,16 +22,19 @@ func init() {
}
type UsageStatsService struct {
Cfg *setting.Cfg `inject:""`
Bus bus.Bus `inject:""`
SQLStore *sqlstore.SqlStore `inject:""`
License models.Licensing `inject:""`
Cfg *setting.Cfg `inject:""`
Bus bus.Bus `inject:""`
SQLStore *sqlstore.SqlStore `inject:""`
AlertingUsageStats alerting.UsageStatsQuerier `inject:""`
License models.Licensing `inject:""`
log log.Logger
oauthProviders map[string]bool
}
func (uss *UsageStatsService) Init() error {
uss.log = log.New("infra.usagestats")
uss.oauthProviders = social.GetOAuthProviders(uss.Cfg)
return nil
}

View File

@ -94,6 +94,29 @@ func (uss *UsageStatsService) sendUsageStats(oauthProviders map[string]bool) {
metrics["stats.packaging."+setting.Packaging+".count"] = 1
// Alerting stats
alertingUsageStats, err := uss.AlertingUsageStats.QueryUsageStats()
if err != nil {
uss.log.Error("Failed to get alerting usage stats", "error", err)
return
}
var addAlertingUsageStats = func(dsType string, usageCount int) {
metrics[fmt.Sprintf("stats.alerting.ds.%s.count", dsType)] = usageCount
}
alertingOtherCount := 0
for dsType, usageCount := range alertingUsageStats.DatasourceUsage {
if models.IsKnownDataSourcePlugin(dsType) {
addAlertingUsageStats(dsType, usageCount)
} else {
alertingOtherCount += usageCount
}
}
addAlertingUsageStats("other", alertingOtherCount)
// fetch datasource access stats
dsAccessStats := models.GetDataSourceAccessStatsQuery{}
if err := uss.Bus.Dispatch(&dsAccessStats); err != nil {
metricsLogger.Error("Failed to get datasource access stats", "error", err)
@ -123,6 +146,7 @@ func (uss *UsageStatsService) sendUsageStats(oauthProviders map[string]bool) {
metrics["stats.ds_access.other."+access+".count"] = count
}
// get stats about alert notifier usage
anStats := models.GetAlertNotifierUsageStatsQuery{}
if err := uss.Bus.Dispatch(&anStats); err != nil {
metricsLogger.Error("Failed to get alert notification stats", "error", err)
@ -133,6 +157,7 @@ func (uss *UsageStatsService) sendUsageStats(oauthProviders map[string]bool) {
metrics["stats.alert_notifiers."+stats.Type+".count"] = stats.Count
}
// Add stats about auth configuration
authTypes := map[string]bool{}
authTypes["anonymous"] = setting.AnonymousEnabled
authTypes["basic_auth"] = setting.BasicAuthEnabled

View File

@ -2,13 +2,15 @@ package usagestats
import (
"bytes"
"github.com/grafana/grafana/pkg/services/licensing"
"io/ioutil"
"runtime"
"sync"
"testing"
"time"
"github.com/grafana/grafana/pkg/services/alerting"
"github.com/grafana/grafana/pkg/services/licensing"
"net/http"
"net/http/httptest"
@ -143,6 +145,8 @@ func TestMetrics(t *testing.T) {
return nil
})
uss.AlertingUsageStats = &alertingUsageMock{}
var wg sync.WaitGroup
var responseBuffer *bytes.Buffer
var req *http.Request
@ -245,6 +249,11 @@ func TestMetrics(t *testing.T) {
So(metrics.Get("stats.ds_access.other.direct.count").MustInt(), ShouldEqual, 6+7)
So(metrics.Get("stats.ds_access.other.proxy.count").MustInt(), ShouldEqual, 4+8)
So(metrics.Get("stats.alerting.ds.prometheus.count").MustInt(), ShouldEqual, 1)
So(metrics.Get("stats.alerting.ds.graphite.count").MustInt(), ShouldEqual, 2)
So(metrics.Get("stats.alerting.ds.mysql.count").MustInt(), ShouldEqual, 5)
So(metrics.Get("stats.alerting.ds.other.count").MustInt(), ShouldEqual, 90)
So(metrics.Get("stats.alert_notifiers.slack.count").MustInt(), ShouldEqual, 1)
So(metrics.Get("stats.alert_notifiers.webhook.count").MustInt(), ShouldEqual, 2)
@ -326,3 +335,16 @@ func waitTimeout(wg *sync.WaitGroup, timeout time.Duration) bool {
return true // timed out
}
}
type alertingUsageMock struct{}
func (aum *alertingUsageMock) QueryUsageStats() (*alerting.UsageStats, error) {
return &alerting.UsageStats{
DatasourceUsage: map[string]int{
"prometheus": 1,
"graphite": 2,
"mysql": 5,
"unknown-datasource": 90,
},
}, nil
}

View File

@ -0,0 +1,113 @@
package alerting
import (
"encoding/json"
"github.com/grafana/grafana/pkg/components/simplejson"
"github.com/grafana/grafana/pkg/models"
)
// DatasourceAlertUsage is a hash where the key represents the
// Datasource type and the value represents how many alerts
// that use the datasources.
type DatasourceAlertUsage map[string]int
// UsageStats contains stats about alert rules configured in
// Grafana.
type UsageStats struct {
DatasourceUsage DatasourceAlertUsage
}
// UsageStatsQuerier returns usage stats about alert rules
// configured in Grafana.
type UsageStatsQuerier interface {
QueryUsageStats() (*UsageStats, error)
}
// QueryUsageStats returns usage stats about alert rules
// configured in Grafana.
func (ae *AlertEngine) QueryUsageStats() (*UsageStats, error) {
cmd := &models.GetAllAlertsQuery{}
err := ae.Bus.Dispatch(cmd)
if err != nil {
return nil, err
}
dsUsage, err := ae.mapRulesToUsageStats(cmd.Result)
if err != nil {
return nil, err
}
return &UsageStats{
DatasourceUsage: dsUsage,
}, nil
}
func (ae *AlertEngine) mapRulesToUsageStats(rules []*models.Alert) (DatasourceAlertUsage, error) {
// map of datasourceId type and frequency
typeCount := map[int64]int{}
for _, a := range rules {
dss, err := ae.parseAlertRuleModel(a.Settings)
if err != nil {
ae.log.Debug("could not parse settings for alert rule", "id", a.Id)
continue
}
for _, d := range dss {
// aggregated datasource usage based on datasource id
typeCount[d]++
}
}
// map of datsource types and frequency
result := map[string]int{}
for k, v := range typeCount {
query := &models.GetDataSourceByIdQuery{Id: k}
err := ae.Bus.Dispatch(query)
if err != nil {
return map[string]int{}, nil
}
// aggregate datasource usages based on datasource type
result[query.Result.Type] += v
}
return result, nil
}
func (ae *AlertEngine) parseAlertRuleModel(settings *simplejson.Json) ([]int64, error) {
datasourceIDs := []int64{}
model := alertJSONModel{}
if settings == nil {
return datasourceIDs, nil
}
bytes, err := settings.MarshalJSON()
if err != nil {
return datasourceIDs, err
}
err = json.Unmarshal(bytes, &model)
if err != nil {
return datasourceIDs, err
}
for _, condition := range model.Conditions {
datasourceIDs = append(datasourceIDs, condition.Query.DatasourceID)
}
return datasourceIDs, nil
}
type alertCondition struct {
Query *conditionQuery `json:"query"`
}
type conditionQuery struct {
DatasourceID int64 `json:"datasourceId"`
}
type alertJSONModel struct {
Conditions []*alertCondition `json:"conditions"`
}

View File

@ -0,0 +1,125 @@
package alerting
import (
"io/ioutil"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/grafana/grafana/pkg/bus"
"github.com/grafana/grafana/pkg/components/simplejson"
"github.com/grafana/grafana/pkg/models"
"github.com/stretchr/testify/require"
)
func TestAlertingUsageStats(t *testing.T) {
ae := &AlertEngine{
Bus: bus.New(),
}
ae.Bus.AddHandler(func(query *models.GetAllAlertsQuery) error {
var createFake = func(file string) *simplejson.Json {
content, err := ioutil.ReadFile(file)
require.NoError(t, err, "expected to be able to read file")
j, _ := simplejson.NewJson(content)
return j
}
query.Result = []*models.Alert{
{Id: 1, Settings: createFake("testdata/settings/one_condition.json")},
{Id: 2, Settings: createFake("testdata/settings/two_conditions.json")},
{Id: 2, Settings: createFake("testdata/settings/three_conditions.json")},
{Id: 3, Settings: createFake("testdata/settings/empty.json")},
}
return nil
})
ae.Bus.AddHandler(func(query *models.GetDataSourceByIdQuery) error {
ds := map[int64]*models.DataSource{
1: {Type: "influxdb"},
2: {Type: "graphite"},
3: {Type: "prometheus"},
4: {Type: "prometheus"},
}
r, exist := ds[query.Id]
if !exist {
return models.ErrDataSourceNotFound
}
query.Result = r
return nil
})
err := ae.Init()
require.NoError(t, err, "Init should not return error")
result, err := ae.QueryUsageStats()
require.NoError(t, err, "getAlertingUsage should not return error")
expected := map[string]int{
"prometheus": 4,
"graphite": 2,
}
for k := range expected {
if expected[k] != result.DatasourceUsage[k] {
t.Errorf("result missmatch for %s. got %v expected %v", k, result.DatasourceUsage[k], expected[k])
}
}
}
func TestParsingAlertRuleSettings(t *testing.T) {
tcs := []struct {
name string
file string
expected []int64
shouldErr require.ErrorAssertionFunc
}{
{
name: "can parse singel condition",
file: "testdata/settings/one_condition.json",
expected: []int64{3},
shouldErr: require.NoError,
},
{
name: "can parse multiple conditions",
file: "testdata/settings/two_conditions.json",
expected: []int64{3, 2},
shouldErr: require.NoError,
},
{
name: "can parse empty json",
file: "testdata/settings/empty.json",
expected: []int64{},
shouldErr: require.NoError,
},
{
name: "can parse blank content",
file: "testdata/settings/invalid_format.json",
expected: []int64{},
shouldErr: require.NoError,
},
}
ae := &AlertEngine{}
err := ae.Init()
require.NoError(t, err, "Init should not return an error")
for _, tc := range tcs {
t.Run(tc.name, func(t *testing.T) {
content, err := ioutil.ReadFile(tc.file)
require.NoError(t, err, "expected to be able to read file")
j, _ := simplejson.NewJson(content)
result, err := ae.parseAlertRuleModel(j)
tc.shouldErr(t, err)
diff := cmp.Diff(tc.expected, result)
if diff != "" {
t.Errorf("result missmatch (-want +got) %s\n", diff)
}
})
}
}

View File

@ -10,6 +10,7 @@ import (
tlog "github.com/opentracing/opentracing-go/log"
"github.com/benbjohnson/clock"
"github.com/grafana/grafana/pkg/bus"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/registry"
"github.com/grafana/grafana/pkg/services/rendering"
@ -23,6 +24,7 @@ import (
// are sent.
type AlertEngine struct {
RenderService rendering.Service `inject:""`
Bus bus.Bus `inject:""`
execQueue chan *Job
ticker *Ticker

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1,38 @@
{
"conditions": [
{
"evaluator": {
"params": [
60
],
"type": "gt"
},
"query": {
"datasourceId": 3,
"model": {
"refId": "A",
"scenario": "random_walk",
"scenarioId": "csv_metric_values",
"stringInput": "1,20,90,30,5,0",
"target": ""
},
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"enabled": true,
"frequency": "60s",
"handler": 1,
"name": "TestData - Always OK",
"noDataState": "no_data",
"notifications": []
}

View File

@ -0,0 +1,94 @@
{
"conditions": [
{
"evaluator": {
"params": [
60
],
"type": "gt"
},
"query": {
"datasourceId": 3,
"model": {
"refId": "A",
"scenario": "random_walk",
"scenarioId": "csv_metric_values",
"stringInput": "1,20,90,30,5,0",
"target": ""
},
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
},
{
"evaluator": {
"params": [
60
],
"type": "gt"
},
"query": {
"datasourceId": 2,
"model": {
"refId": "A",
"scenario": "random_walk",
"scenarioId": "csv_metric_values",
"stringInput": "1,20,90,30,5,0",
"target": ""
},
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
},
{
"evaluator": {
"params": [
60
],
"type": "gt"
},
"query": {
"datasourceId": 4,
"model": {
"refId": "A",
"scenario": "random_walk",
"scenarioId": "csv_metric_values",
"stringInput": "1,20,90,30,5,0",
"target": ""
},
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"enabled": true,
"frequency": "60s",
"handler": 1,
"name": "TestData - Always OK",
"noDataState": "no_data",
"notifications": []
}

View File

@ -0,0 +1,66 @@
{
"conditions": [
{
"evaluator": {
"params": [
60
],
"type": "gt"
},
"query": {
"datasourceId": 3,
"model": {
"refId": "A",
"scenario": "random_walk",
"scenarioId": "csv_metric_values",
"stringInput": "1,20,90,30,5,0",
"target": ""
},
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
},
{
"evaluator": {
"params": [
60
],
"type": "gt"
},
"query": {
"datasourceId": 2,
"model": {
"refId": "A",
"scenario": "random_walk",
"scenarioId": "csv_metric_values",
"stringInput": "1,20,90,30,5,0",
"target": ""
},
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"enabled": true,
"frequency": "60s",
"handler": 1,
"name": "TestData - Always OK",
"noDataState": "no_data",
"notifications": []
}