Prometheus: Fix recording rules expansion (#24977)

* First pass solution

* Refactor solution

* Add test coverage, update tests

* Fix behaviour for multiple labels, add test for this

* Add recordin rules to devenv prometheus

* Update devenv/prometheus2 instead of devenv/prometheus

* Add newlines

* Fix label matching if labels include comma, add test coverage

* Refactor

* Refactor, simplify
This commit is contained in:
Ivana Huckova
2020-05-22 15:16:01 +02:00
committed by GitHub
parent 4f1bbdfc0a
commit 4d18bda2e1
7 changed files with 103 additions and 21 deletions

View File

@@ -1,3 +1,4 @@
FROM prom/prometheus:v2.7.2
ADD prometheus.yml /etc/prometheus/
ADD alert.rules /etc/prometheus/
ADD recording.yml /etc/prometheus/
ADD alert.yml /etc/prometheus/

View File

@@ -1,10 +0,0 @@
# Alert Rules
ALERT AppCrash
IF process_open_fds > 0
FOR 15s
LABELS { severity="critical" }
ANNOTATIONS {
summary = "Number of open fds > 0",
description = "Just testing"
}

View File

@@ -0,0 +1,11 @@
groups:
- name: ALERT
rules:
- alert: AppCrash
expr: process_open_fds > 0
for: 15s
labels:
severity: critical
annotations:
summary: Number of open fds > 0
description: Just testing

View File

@@ -5,17 +5,17 @@ global:
# scrape_timeout is set to the global default (10s).
# Load and evaluate rules in this file every 'evaluation_interval' seconds.
#rule_files:
# - "alert.rules"
# - "first.rules"
rule_files:
- "alert.yml"
- "recording.yml"
# - "second.rules"
# alerting:
# alertmanagers:
# - scheme: http
# static_configs:
# - targets:
# - "127.0.0.1:9093"
alerting:
alertmanagers:
- scheme: http
static_configs:
- targets:
- "alertmanager:9093"
scrape_configs:
- job_name: 'prometheus'

View File

@@ -0,0 +1,16 @@
groups:
- name: RECORDING_RULES
rules:
- record: instance_path:requests:rate5m
expr: rate(prometheus_http_requests_total{job="prometheus"}[5m])
- record: path:requests:rate5m
expr: sum without (instance)(instance_path:requests:rate5m{job="prometheus"})
- record: instance_path:reloads_failures:rate5m
expr: rate(prometheus_tsdb_reloads_failures_total{job="prometheus"}[5m])
- record: instance_path:reloads:rate5m
expr: rate(prometheus_tsdb_reloads_total{job="prometheus"}[5m])
- record: instance_path:request_failures_per_requests:ratio_rate5m
expr: |2
instance_path:reloads_failures:rate5m{job="prometheus"}
/
instance_path:reloads:rate5m{job="prometheus"}