grafana/alerting_model.json
2016-06-09 12:14:50 +02:00

168 lines
4.3 KiB
JSON

{
"alert": {
"name": "Majority servers down",
"frequency": 60,
"notify": ["group1", "group2"],
"expressions": [
{
"left": [
{
"type": "query",
"refId": "A",
"timeRange": {"from": "5m", "to": "now-1m"},
},
{
"type": "function",
"name": "max"
}
],
"operator": ">",
"right": [
{
"type": "constant",
"value": 100
}
],
"level": 2,
}
]
},
"alert": {
"name": "Majority servers down take2",
"frequency": 60,
"notify": ["group1", "group2"],
"expressions": [
{
"left": [
{
"type": "query",
"refId": "A",
"timeRange": {"from": "5m", "to": "now-1m"},
},
{
"type": "function",
"name": "max"
}
],
"operator": ">",
"right": [
{
"type": "query",
"refId": "A",
"timeRange": {"from": "now-1d-5m", "to": "now-1d"},
},
{
"type": "function",
"name": "max"
}
],
"level": 2,
}
]
},
"alert": {
"name": "CPU usage last 5min above 90%",
"frequency": 60,
"expressions": [
{
"expr": "query(#A, 5m, now, avg)",
"operator": ">",
"critLevel": 90,
}
]
},
"alert": {
"name": "Series count above 10",
"frequency": "1m",
"expressions": [
{
"expr": "query(#A, 5m, now, avg) | countSeries()",
"operator": ">",
"critLevel": 10,
}
]
},
"alert": {
"name": "Disk Free Zero in 3 days",
"frequency": "1d",
"expressions": [
{
"expr": "query(#A, 1d, now, trend(3d))",
"operator": ">",
"critLevel": 0,
}
]
},
"alert": {
"name": "Server requests is zero for more than 10min",
"frequency": "1d",
"expressions": [
{
"expr": "query(#A, 10m, now, sum)",
"operator": "=",
"critLevel": 0,
}
]
},
"alert": {
"name": "Timeouts should not be more than 0.1% of requests",
"frequency": "1d",
"expressions": [
{
"expr": "query(#A, 10m, now, sum) | subtract | query(#B, 10m, now, sum)",
"operator": ">",
"critLevel": 0,
}
]
},
"alert": {
"name": "CPU usage last 5min changed by more than 20% compared to last 24hours",
"frequency": "1m",
"value": "query(#A, 5m, now, avg)",
"operator": "percent change",
"threshold": "query(#A, 1d, now, avg)",
},
"alert": {
"name": "CPU higher than 90%",
"frequency": "1m",
"valueExpr": "query(#A, 5m, now, avg)",
"evalType": "greater than",
"critLevel": 20,
"warnLevel": 10,
},
"alert": {
"name": "CPU usage last 5min changed by more than 20% compared to last 24hours",
"frequency": "1m",
"expr": "query(#A, 5m, now, avg) percentGreaterThan()",
"evalType": "percentscre change",
"evalExpr": "query(#A, 1d, now, avg)",
"critLevel": 20,
"warnLevel": 10,
},
"alert": {
"name": "CPU usage last 5min changed by more than 20% compared to last 24hours",
"frequency": "1m",
"valueQuery": "query(#A, 5m, now, avg) ",
"evalType": "simple", "// other options are: percent change, trend"
"evalQuery": "query(#A, 1d, now, avg)",
"comparison": "greater than",
"critLevel": 20,
"warnLevel": 10,
},
"alert": {
"name": "CPU usage last 5min changed by more than 20% compared to last 24hours",
"frequency": "1m",
"valueQuery": "query(#A, 5m, now, avg) | Evaluate Against: Static Threshold | >200 Warn | >300 Critical",
"valueQuery": "query(#A, 5m, now, avg) | Evaluate Against: Percent Change Compared To | query(#B, 5m, now, avg) | >200 Warn | >300 Critical",
"valueQuery": "query(#A, 5m, now, trend) | Evaluate Against: Forcast | 7days | >200 Warn | >300 Critical",
"evalType": "simple", "// other options are: percent change, trend"
"evalQuery": "query(#A, 1d, now, avg)",
"comparison": "greater than",
"critLevel": 20,
"warnLevel": 10,
},
}