Alerting: QoL improvements to the unified alerting multi-replica devenv (#64907)

This commit is contained in:
gotjosh 2023-03-17 07:14:31 +00:00 committed by GitHub
parent e22e12455d
commit 406431df4e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 258 additions and 35 deletions

2
.gitignore vendored
View File

@ -79,6 +79,8 @@ public/css/*.min.css
/devenv/docker-compose.override.yaml
/devenv/.env
/devenv/docker/blocks/tempo/tempo-data/
/devenv/docker/ha-test-unified-alerting/logs/webhook/dumps/
/devenv/docker/ha-test-unified-alerting/logs/webhook/webhook-listener.log
conf/custom.ini
/conf/provisioning/**/custom.yaml

View File

@ -0,0 +1,12 @@
FROM golang:1.19
ADD webhook-listener.go /go/src/webhook/webhook-listener.go
WORKDIR /go/src/webhook
RUN mkdir /tmp/logs
RUN go build -o /bin webhook-listener.go
ENV PORT 8080
ENTRYPOINT [ "/bin/webhook-listener" ]

View File

@ -32,7 +32,7 @@ services:
environment:
- VIRTUAL_HOST=prometheus.loc
ports:
- 909
- 9090
nginx-proxy:
image: jwilder/nginx-proxy
ports:
@ -43,48 +43,53 @@ services:
db:
condition: service_healthy
grafana1:
image: grafana/grafana:dev
volumes:
- ./grafana/provisioning/:/etc/grafana/provisioning/
environment:
- VIRTUAL_HOST=grafana.loc
- GF_FEATURE_TOGGLES_ENABLE=ngalert
- GF_UNIFIED_ALERTING_HA_PEERS=ha-test-unified-alerting_grafana2_1:9094,ha-test-unified-alerting_grafana1_1:9094
- GF_SERVER_ROOT_URL=http://grafana.loc
- GF_DATABASE_NAME=grafana
- GF_DATABASE_USER=grafana
- GF_DATABASE_PASSWORD=password
- GF_DATABASE_TYPE=mysql
- GF_DATABASE_HOST=db:3306
- GF_DATABASE_MAX_OPEN_CONN=300
- GF_SESSION_PROVIDER=mysql
- GF_SESSION_PROVIDER_CONFIG=grafana:password@tcp(db:3306)/grafana?allowNativePasswords=true
extends:
file: ./grafana-service.yml
service: grafana
ports:
- 3010:3000
depends_on:
db:
condition: service_healthy
grafana2:
image: grafana/grafana:dev
volumes:
- ./grafana/provisioning/:/etc/grafana/provisioning/
environment:
- VIRTUAL_HOST=grafana.loc
- GF_FEATURE_TOGGLES_ENABLE=ngalert
- GF_UNIFIED_ALERTING_HA_PEERS=ha-test-unified-alerting_grafana2_1:9094,ha-test-unified-alerting_grafana1_1:9094
- GF_SERVER_ROOT_URL=http://grafana.loc
- GF_DATABASE_NAME=grafana
- GF_DATABASE_USER=grafana
- GF_DATABASE_PASSWORD=password
- GF_DATABASE_TYPE=mysql
- GF_DATABASE_HOST=db:3306
- GF_DATABASE_MAX_OPEN_CONN=300
- GF_SESSION_PROVIDER=mysql
- GF_SESSION_PROVIDER_CONFIG=grafana:password@tcp(db:3306)/grafana?allowNativePasswords=true
extends:
file: ./grafana-service.yml
service: grafana
ports:
- 3020:3000
depends_on:
db:
condition: service_healthy
grafana1:
condition: service_healthy
grafana3:
extends:
file: ./grafana-service.yml
service: grafana
ports:
- 3030:3000
depends_on:
db:
condition: service_healthy
grafana2:
condition: service_healthy
grafana4:
extends:
file: ./grafana-service.yml
service: grafana
ports:
- 3040:3000
depends_on:
db:
condition: service_healthy
grafana3:
condition: service_healthy
webhook:
image: webhook-receiver
build:
context: .
dockerfile: Dockerfile
ports:
- "18081:8080"
volumes:
- "./logs/webhook:/tmp/logs:rw"

View File

@ -0,0 +1,23 @@
services:
grafana:
image: grafana/grafana-dev:3a22eba17f23b18faa27436ab2f9c3ea977b550b
volumes:
- ./grafana/provisioning/:/etc/grafana/provisioning/
environment:
- VIRTUAL_HOST=grafana.loc
- GF_FEATURE_TOGGLES_ENABLE=ngalert
- GF_UNIFIED_ALERTING_HA_PEERS=ha-test-unified-alerting-grafana2-1:9094,ha-test-unified-alerting-grafana1-1:9094,ha-test-unified-alerting-grafana3-1:9094,ha-test-unified-alerting-grafana4-1:9094
- GF_SERVER_ROOT_URL=http://grafana.loc
- GF_DATABASE_NAME=grafana
- GF_DATABASE_USER=grafana
- GF_DATABASE_PASSWORD=password
- GF_DATABASE_TYPE=mysql
- GF_DATABASE_HOST=db:3306
- GF_DATABASE_MAX_OPEN_CONN=300
- GF_SESSION_PROVIDER=mysql
- GF_SESSION_PROVIDER_CONFIG=grafana:password@tcp(db:3306)/grafana?allowNativePasswords=true
healthcheck:
test: timeout 10s bash -c ':> /dev/tcp/127.0.0.1/3000' || exit 1
interval: 5s
timeout: 15s
retries: 3

View File

@ -0,0 +1,181 @@
package main
import (
"encoding/json"
"flag"
"fmt"
"io/ioutil"
"log"
"net/http"
"os"
"path"
"path/filepath"
"sync"
"time"
)
var (
fingerprints = make(Fingerprints)
mu sync.Mutex
waitSeconds int
logFile bool
logFileName = filepath.Join(os.TempDir(), "/logs/webhook-listener.log")
dumpDir = filepath.Join(os.TempDir(), "/logs/dumps")
)
type Alert struct {
Fingerprint string `json:"fingerprint"`
StartsAt time.Time `json:"startsAt"`
Status string `json:"status"`
}
type Data struct {
Receiver string `json:"receiver"`
Status string `json:"status"`
Alerts []Alert `json:"alerts"`
}
// Fingerprints keeps track of the number of alerts received
// by fingerprint and StartsAt time.
type Fingerprints map[string]map[time.Time]tracker
type tracker struct {
Updates int `json:"updates"`
Statuses []string `json:"statuses"`
}
func updateFingerprints(v Data) {
mu.Lock()
defer mu.Unlock()
for _, alert := range v.Alerts {
m, ok := fingerprints[alert.Fingerprint]
if !ok {
m = make(map[time.Time]tracker)
}
t, ok := m[alert.StartsAt]
if !ok {
t = tracker{
Updates: 0,
Statuses: []string{},
}
}
t.Updates += 1
t.Statuses = append(t.Statuses, alert.Status)
m[alert.StartsAt] = t
fingerprints[alert.Fingerprint] = m
}
}
func parseFlags() {
flag.BoolVar(&logFile, "log-file", true, "Whether to log to file")
flag.IntVar(&waitSeconds, "wait-seconds", 0, "The number of seconds to wait before sending an HTTP response")
flag.Parse()
}
func saveDump(data []byte) {
if !logFile {
return
}
if len(data) == 0 {
fmt.Println("empty dump - not saving")
return
}
ts := time.Now().UnixNano()
name := path.Join(dumpDir, fmt.Sprintf("%d.json", ts))
for i := 1; i <= 1000; i++ {
if _, err := os.Stat(name); os.IsNotExist(err) {
break
}
name = path.Join(dumpDir, fmt.Sprintf("%d_%04d.json", ts, i))
}
log.Printf("saving dump to %s", name)
err := os.WriteFile(name, data, os.ModePerm)
if err != nil {
log.Printf("cannot save to file %s: %s\n", name, err)
}
}
func main() {
parseFlags()
_, err := os.Stat(dumpDir)
if os.IsNotExist(err) {
err = os.MkdirAll(dumpDir, os.ModePerm)
if err != nil {
log.Panicf("can't create directory '%s'", dumpDir)
}
}
if logFile {
//create your file with desired read/write permissions
f, err := os.OpenFile(logFileName, os.O_WRONLY|os.O_CREATE|os.O_APPEND, os.ModePerm)
if err != nil {
log.Fatal(err)
}
defer f.Close()
log.SetOutput(f)
}
waitDuration := time.Duration(waitSeconds) * time.Second
http.HandleFunc("/", func(writer http.ResponseWriter, request *http.Request) {
writer.WriteHeader(http.StatusOK)
writer.Write([]byte(landingPage))
})
http.HandleFunc("/listen", func(w http.ResponseWriter, r *http.Request) {
log.Printf("got submission from: %s\n", r.RemoteAddr)
b, err := ioutil.ReadAll(r.Body)
if err != nil {
log.Println(err)
w.WriteHeader(http.StatusBadRequest)
return
}
saveDump(b)
v := Data{}
if err := json.Unmarshal(b, &v); err != nil {
log.Println(err)
w.WriteHeader(http.StatusBadRequest)
return
}
fmt.Printf("receiver: %s, status: %s\n", v.Receiver, v.Status)
updateFingerprints(v)
<-time.After(waitDuration)
})
http.HandleFunc("/fingerprints", func(w http.ResponseWriter, r *http.Request) {
b, err := func() ([]byte, error) {
mu.Lock()
defer mu.Unlock()
return json.Marshal(fingerprints)
}()
if err != nil {
log.Println(err)
w.WriteHeader(http.StatusInternalServerError)
return
}
w.Header().Add("Content-Type", "application/json")
w.Write(b)
})
log.Println("Listening")
log.Printf("Wait Duration %v\n", waitDuration)
http.ListenAndServe("0.0.0.0:8080", nil)
}
const landingPage = `
<!doctype html>
<html>
<head>
<title>Webhook listener</title>
</head>
<body>
<h1>Webhook Listener<h1>
<p> For setup, please point your webhook configuration to the "/listen" endpoint. </p>
<p> For debugging, please use the "/fingerprints" endpoint. </p>
</body>
</html>
`