mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Add monitoring mixing for Grafana (#28285)
Co-authored-by: Tom Wilkie <tom.wilkie@gmail.com>
This commit is contained in:
parent
febdad4da2
commit
6002df580f
3
grafana-mixin/.gitignore
vendored
Normal file
3
grafana-mixin/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
alerts.yaml
|
||||
rules.yaml
|
||||
dashboards_out
|
21
grafana-mixin/Makefile
Normal file
21
grafana-mixin/Makefile
Normal file
@ -0,0 +1,21 @@
|
||||
JSONNET_FMT := jsonnetfmt -n 2 --max-blank-lines 2 --string-style s --comment-style s
|
||||
|
||||
all: fmt lint build clean
|
||||
|
||||
fmt:
|
||||
find . -name 'vendor' -prune -o -name '*.libsonnet' -print -o -name '*.jsonnet' -print | \
|
||||
xargs -n 1 -- $(JSONNET_FMT) -i
|
||||
|
||||
lint:
|
||||
find . -name 'vendor' -prune -o -name '*.libsonnet' -print -o -name '*.jsonnet' -print | \
|
||||
while read f; do \
|
||||
$(JSONNET_FMT) "$$f" | diff -u "$$f" -; \
|
||||
done
|
||||
|
||||
mixtool lint mixin.libsonnet
|
||||
|
||||
build:
|
||||
mixtool generate all mixin.libsonnet
|
||||
|
||||
clean:
|
||||
rm -rf dashboards_out alerts.yaml rules.yaml
|
28
grafana-mixin/README.md
Normal file
28
grafana-mixin/README.md
Normal file
@ -0,0 +1,28 @@
|
||||
# Grafana Mixin
|
||||
|
||||
_This is a work in progress. We aim for it to become a good role model for alerts
|
||||
and dashboards eventually, but it is not quite there yet._
|
||||
|
||||
The Grafana Mixin is a set of configurable, reusable, and extensible alerts and
|
||||
dashboards based on the metrics exported by Grafana. The mixin creates
|
||||
recording and alerting rules for Prometheus and suitable dashboard descriptions
|
||||
for Grafana.
|
||||
|
||||
To use them, you need to have `mixtool` and `jsonnetfmt` installed. If you
|
||||
have a working Go development environment, it's easiest to run the following:
|
||||
|
||||
```bash
|
||||
$ go get github.com/monitoring-mixins/mixtool/cmd/mixtool
|
||||
$ go get github.com/google/go-jsonnet/cmd/jsonnetfmt
|
||||
```
|
||||
|
||||
You can then build the Prometheus rules files `alerts.yaml` and
|
||||
`rules.yaml` and a directory `dashboard_out` with the JSON dashboard files
|
||||
for Grafana:
|
||||
|
||||
```bash
|
||||
$ make build
|
||||
```
|
||||
|
||||
For more advanced uses of mixins, see
|
||||
https://github.com/monitoring-mixins/docs.
|
14
grafana-mixin/alerts/alerts.yaml
Normal file
14
grafana-mixin/alerts/alerts.yaml
Normal file
@ -0,0 +1,14 @@
|
||||
groups:
|
||||
- name: GrafanaAlerts
|
||||
rules:
|
||||
- alert: GrafanaRequestsFailing
|
||||
for: 5m
|
||||
expr: |
|
||||
100 * namespace_job_handler_statuscode:http_request_total:rate5m{handler!~"/datasources/proxy/:id.*|/ds/query|/tsdb/query", statuscode=~"5.."}
|
||||
/
|
||||
namespace_job_handler_statuscode:http_request_total:rate5m{handler!~"/datasources/proxy/:id.*|/ds/query|/tsdb/query"}
|
||||
> 0.5
|
||||
labels:
|
||||
severity: 'critical'
|
||||
annotations:
|
||||
message: "'{{ $labels.namespace }}' / '{{ $labels.job }}' / '{{ $labels.handler }}' is experiencing {{ $value | humanize }}% errors"
|
528
grafana-mixin/dashboards/grafana-overview.json
Normal file
528
grafana-mixin/dashboards/grafana-overview.json
Normal file
@ -0,0 +1,528 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "-- Grafana --",
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"id": 35,
|
||||
"iteration": 1602761142538,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": "$datasource",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {},
|
||||
"mappings": [],
|
||||
"noValue": "0",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"mean"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
}
|
||||
},
|
||||
"pluginVersion": "7.0.4",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "grafana_alerting_result_total{job=~\"$job\", instance=~\"$instance\", state=\"alerting\"}",
|
||||
"instant": true,
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Firing Alerts",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": "$datasource",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 0
|
||||
},
|
||||
"id": 8,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"mean"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
}
|
||||
},
|
||||
"pluginVersion": "7.0.4",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(grafana_stat_totals_dashboard{job=~\"$job\", instance=~\"$instance\"})",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Dashboards",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": "$datasource",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"align": null
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 10,
|
||||
"options": {
|
||||
"showHeader": true
|
||||
},
|
||||
"pluginVersion": "7.0.4",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "grafana_build_info{job=~\"$job\", instance=~\"$instance\"}",
|
||||
"instant": true,
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Build Info",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "labelsToFields",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true,
|
||||
"Value": true,
|
||||
"branch": true,
|
||||
"container": true,
|
||||
"goversion": true,
|
||||
"namespace": true,
|
||||
"pod": true,
|
||||
"revision": true
|
||||
},
|
||||
"indexByName": {
|
||||
"Time": 7,
|
||||
"Value": 11,
|
||||
"branch": 4,
|
||||
"container": 8,
|
||||
"edition": 2,
|
||||
"goversion": 6,
|
||||
"instance": 1,
|
||||
"job": 0,
|
||||
"namespace": 9,
|
||||
"pod": 10,
|
||||
"revision": 5,
|
||||
"version": 3
|
||||
},
|
||||
"renameByName": {}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 5
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 2,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"dataLinks": []
|
||||
},
|
||||
"percentage": false,
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (statuscode) (irate(http_request_total{job=~\"$job\", instance=~\"$instance\"}[1m])) ",
|
||||
"interval": "",
|
||||
"legendFormat": "{{statuscode}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeRegions": [],
|
||||
"timeShift": null,
|
||||
"title": "RPS",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"$$hashKey": "object:157",
|
||||
"format": "reqps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:158",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false,
|
||||
"alignLevel": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 5
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"dataLinks": []
|
||||
},
|
||||
"percentage": false,
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max(http_request_duration_milliseconds{job=~\"$job\", instance=~\"$instance\", quantile=\"0.99\"})",
|
||||
"interval": "",
|
||||
"legendFormat": "max-99th",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "max(http_request_duration_milliseconds{job=~\"$job\", instance=~\"$instance\", quantile=\"0.9\"})",
|
||||
"interval": "",
|
||||
"legendFormat": "max-90th",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(http_request_duration_milliseconds_sum{job=~\"$job\", instance=~\"$instance\"}[$__interval])) / sum(irate(http_request_duration_milliseconds_count{job=~\"$job\", instance=~\"$instance\"}[$__interval])) ",
|
||||
"interval": "",
|
||||
"legendFormat": "avg",
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeRegions": [],
|
||||
"timeShift": null,
|
||||
"title": "Request Latency",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"$$hashKey": "object:210",
|
||||
"format": "ms",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:211",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false,
|
||||
"alignLevel": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"schemaVersion": 25,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "prometheus",
|
||||
"value": "prometheus"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "datasource",
|
||||
"options": [],
|
||||
"query": "prometheus",
|
||||
"queryValue": "",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"tags": [],
|
||||
"text": "All",
|
||||
"value": [
|
||||
"$__all"
|
||||
]
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"definition": "label_values(grafana_build_info, job)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": null,
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"options": [],
|
||||
"query": "label_values(grafana_build_info, job)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"definition": "label_values(grafana_build_info, instance)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": null,
|
||||
"multi": true,
|
||||
"name": "instance",
|
||||
"options": [],
|
||||
"query": "label_values(grafana_build_info, instance)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "Grafana Overview",
|
||||
"uid": "6be0s85Mk",
|
||||
"version": 4
|
||||
}
|
15
grafana-mixin/mixin.libsonnet
Normal file
15
grafana-mixin/mixin.libsonnet
Normal file
@ -0,0 +1,15 @@
|
||||
{
|
||||
grafanaDashboards: {
|
||||
'grafana-overview.json': (import 'dashboards/grafana-overview.json'),
|
||||
},
|
||||
|
||||
// Helper function to ensure that we don't override other rules, by forcing
|
||||
// the patching of the groups list, and not the overall rules object.
|
||||
local importRules(rules) = {
|
||||
groups+: std.native('parseYaml')(rules)[0].groups,
|
||||
},
|
||||
|
||||
prometheusRules+: importRules(importstr 'rules/rules.yaml'),
|
||||
|
||||
prometheusAlerts+: importRules(importstr 'alerts/alerts.yaml'),
|
||||
}
|
7
grafana-mixin/rules/rules.yaml
Normal file
7
grafana-mixin/rules/rules.yaml
Normal file
@ -0,0 +1,7 @@
|
||||
groups:
|
||||
- name: grafana_rules
|
||||
rules:
|
||||
# Record error rate of http requests excluding dataproxy, /ds/query and /tsdb/query requests
|
||||
- record: namespace_job_handler_statuscode:http_request_total:rate5m
|
||||
expr: |
|
||||
sum by (namespace, job, handler, statuscode) (rate(http_request_total[5m]))
|
@ -224,6 +224,7 @@ def lint_backend_step(edition):
|
||||
'revive -formatter stylish -config scripts/go/configs/revive.toml ./pkg/...',
|
||||
'./scripts/revive-strict',
|
||||
'./scripts/tidy-check.sh',
|
||||
'./scripts/mixin-check.sh,
|
||||
],
|
||||
}
|
||||
|
||||
|
7
scripts/mixin-check.sh
Executable file
7
scripts/mixin-check.sh
Executable file
@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
set -eo pipefail
|
||||
|
||||
cd grafana-mixin
|
||||
go install github.com/monitoring-mixins/mixtool/cmd/mixtool
|
||||
go install github.com/google/go-jsonnet/cmd/jsonnetfmt
|
||||
make lint build
|
Loading…
Reference in New Issue
Block a user