mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
grafana-mixin: Fix GrafanaRequestsFailing alert (#43116)
Due to PromQL's label matching, both sides of the division will have same series; that means that, whenever there's a 5xx error, both sides will have the same value and the division will be `1`. I believe the idea was to get the ratio of 5xx compared will all status code, and to do that, we need to aggregate the `status_code` dimension away.
This commit is contained in:
31
grafana-mixin/alerts/alerts.libsonnet
Normal file
31
grafana-mixin/alerts/alerts.libsonnet
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
_config+:: {
|
||||
grafanaRequestsFailingThresholdPercent: 50,
|
||||
},
|
||||
|
||||
prometheusAlerts+:: {
|
||||
groups+: [
|
||||
{
|
||||
name: 'GrafanaAlerts',
|
||||
rules: [
|
||||
{
|
||||
alert: 'GrafanaRequestsFailing',
|
||||
expr: |||
|
||||
100 * namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m{handler!~"/api/datasources/proxy/:id.*|/api/ds/query|/api/tsdb/query", status_code=~"5.."}
|
||||
/ ignoring (status_code)
|
||||
sum without (status_code) (namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m{handler!~"/api/datasources/proxy/:id.*|/api/ds/query|/api/tsdb/query"})
|
||||
> %(grafanaRequestsFailingThresholdPercent)s
|
||||
||| % $._config,
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
message: '{{ $labels.namespace }}/{{ $labels.job }}/{{ $labels.handler }} is experiencing {{ $value | humanize }}% errors',
|
||||
},
|
||||
'for': '5m',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
groups:
|
||||
- name: GrafanaAlerts
|
||||
rules:
|
||||
- alert: GrafanaRequestsFailing
|
||||
for: 5m
|
||||
expr: |
|
||||
100 * namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m{handler!~"/api/datasources/proxy/:id.*|/api/ds/query|/api/tsdb/query", status_code=~"5.."}
|
||||
/
|
||||
namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m{handler!~"/api/datasources/proxy/:id.*|/api/ds/query|/api/tsdb/query"}
|
||||
> 0.5
|
||||
labels:
|
||||
severity: 'warning'
|
||||
annotations:
|
||||
message: "'{{ $labels.namespace }}' / '{{ $labels.job }}' / '{{ $labels.handler }}' is experiencing {{ $value | humanize }}% errors"
|
||||
Reference in New Issue
Block a user