From 406431df4ed7dccac8f383733936e6e1f6db4104 Mon Sep 17 00:00:00 2001 From: gotjosh Date: Fri, 17 Mar 2023 07:14:31 +0000 Subject: [PATCH] Alerting: QoL improvements to the unified alerting multi-replica devenv (#64907) --- .gitignore | 2 + .../ha-test-unified-alerting/Dockerfile | 12 ++ .../docker-compose.yaml | 75 ++++---- .../grafana-service.yml | 23 +++ .../logs/webhook/.gitkeep | 0 .../webhook-listener.go | 181 ++++++++++++++++++ 6 files changed, 258 insertions(+), 35 deletions(-) create mode 100644 devenv/docker/ha-test-unified-alerting/Dockerfile create mode 100644 devenv/docker/ha-test-unified-alerting/grafana-service.yml create mode 100644 devenv/docker/ha-test-unified-alerting/logs/webhook/.gitkeep create mode 100644 devenv/docker/ha-test-unified-alerting/webhook-listener.go diff --git a/.gitignore b/.gitignore index 9762833b380..39b7a1e2ae4 100644 --- a/.gitignore +++ b/.gitignore @@ -79,6 +79,8 @@ public/css/*.min.css /devenv/docker-compose.override.yaml /devenv/.env /devenv/docker/blocks/tempo/tempo-data/ +/devenv/docker/ha-test-unified-alerting/logs/webhook/dumps/ +/devenv/docker/ha-test-unified-alerting/logs/webhook/webhook-listener.log conf/custom.ini /conf/provisioning/**/custom.yaml diff --git a/devenv/docker/ha-test-unified-alerting/Dockerfile b/devenv/docker/ha-test-unified-alerting/Dockerfile new file mode 100644 index 00000000000..cc9b14e2e4e --- /dev/null +++ b/devenv/docker/ha-test-unified-alerting/Dockerfile @@ -0,0 +1,12 @@ +FROM golang:1.19 + +ADD webhook-listener.go /go/src/webhook/webhook-listener.go + +WORKDIR /go/src/webhook + +RUN mkdir /tmp/logs +RUN go build -o /bin webhook-listener.go + +ENV PORT 8080 + +ENTRYPOINT [ "/bin/webhook-listener" ] diff --git a/devenv/docker/ha-test-unified-alerting/docker-compose.yaml b/devenv/docker/ha-test-unified-alerting/docker-compose.yaml index 2424f1f0376..12ec330b1f2 100644 --- a/devenv/docker/ha-test-unified-alerting/docker-compose.yaml +++ b/devenv/docker/ha-test-unified-alerting/docker-compose.yaml @@ -32,7 +32,7 @@ services: environment: - VIRTUAL_HOST=prometheus.loc ports: - - 909 + - 9090 nginx-proxy: image: jwilder/nginx-proxy ports: @@ -43,48 +43,53 @@ services: db: condition: service_healthy grafana1: - image: grafana/grafana:dev - volumes: - - ./grafana/provisioning/:/etc/grafana/provisioning/ - environment: - - VIRTUAL_HOST=grafana.loc - - GF_FEATURE_TOGGLES_ENABLE=ngalert - - GF_UNIFIED_ALERTING_HA_PEERS=ha-test-unified-alerting_grafana2_1:9094,ha-test-unified-alerting_grafana1_1:9094 - - GF_SERVER_ROOT_URL=http://grafana.loc - - GF_DATABASE_NAME=grafana - - GF_DATABASE_USER=grafana - - GF_DATABASE_PASSWORD=password - - GF_DATABASE_TYPE=mysql - - GF_DATABASE_HOST=db:3306 - - GF_DATABASE_MAX_OPEN_CONN=300 - - GF_SESSION_PROVIDER=mysql - - GF_SESSION_PROVIDER_CONFIG=grafana:password@tcp(db:3306)/grafana?allowNativePasswords=true + extends: + file: ./grafana-service.yml + service: grafana ports: - 3010:3000 depends_on: db: condition: service_healthy - grafana2: - image: grafana/grafana:dev - volumes: - - ./grafana/provisioning/:/etc/grafana/provisioning/ - environment: - - VIRTUAL_HOST=grafana.loc - - GF_FEATURE_TOGGLES_ENABLE=ngalert - - GF_UNIFIED_ALERTING_HA_PEERS=ha-test-unified-alerting_grafana2_1:9094,ha-test-unified-alerting_grafana1_1:9094 - - GF_SERVER_ROOT_URL=http://grafana.loc - - GF_DATABASE_NAME=grafana - - GF_DATABASE_USER=grafana - - GF_DATABASE_PASSWORD=password - - GF_DATABASE_TYPE=mysql - - GF_DATABASE_HOST=db:3306 - - GF_DATABASE_MAX_OPEN_CONN=300 - - GF_SESSION_PROVIDER=mysql - - GF_SESSION_PROVIDER_CONFIG=grafana:password@tcp(db:3306)/grafana?allowNativePasswords=true + extends: + file: ./grafana-service.yml + service: grafana ports: - 3020:3000 depends_on: db: condition: service_healthy - + grafana1: + condition: service_healthy + grafana3: + extends: + file: ./grafana-service.yml + service: grafana + ports: + - 3030:3000 + depends_on: + db: + condition: service_healthy + grafana2: + condition: service_healthy + grafana4: + extends: + file: ./grafana-service.yml + service: grafana + ports: + - 3040:3000 + depends_on: + db: + condition: service_healthy + grafana3: + condition: service_healthy + webhook: + image: webhook-receiver + build: + context: . + dockerfile: Dockerfile + ports: + - "18081:8080" + volumes: + - "./logs/webhook:/tmp/logs:rw" diff --git a/devenv/docker/ha-test-unified-alerting/grafana-service.yml b/devenv/docker/ha-test-unified-alerting/grafana-service.yml new file mode 100644 index 00000000000..d5729041d36 --- /dev/null +++ b/devenv/docker/ha-test-unified-alerting/grafana-service.yml @@ -0,0 +1,23 @@ +services: + grafana: + image: grafana/grafana-dev:3a22eba17f23b18faa27436ab2f9c3ea977b550b + volumes: + - ./grafana/provisioning/:/etc/grafana/provisioning/ + environment: + - VIRTUAL_HOST=grafana.loc + - GF_FEATURE_TOGGLES_ENABLE=ngalert + - GF_UNIFIED_ALERTING_HA_PEERS=ha-test-unified-alerting-grafana2-1:9094,ha-test-unified-alerting-grafana1-1:9094,ha-test-unified-alerting-grafana3-1:9094,ha-test-unified-alerting-grafana4-1:9094 + - GF_SERVER_ROOT_URL=http://grafana.loc + - GF_DATABASE_NAME=grafana + - GF_DATABASE_USER=grafana + - GF_DATABASE_PASSWORD=password + - GF_DATABASE_TYPE=mysql + - GF_DATABASE_HOST=db:3306 + - GF_DATABASE_MAX_OPEN_CONN=300 + - GF_SESSION_PROVIDER=mysql + - GF_SESSION_PROVIDER_CONFIG=grafana:password@tcp(db:3306)/grafana?allowNativePasswords=true + healthcheck: + test: timeout 10s bash -c ':> /dev/tcp/127.0.0.1/3000' || exit 1 + interval: 5s + timeout: 15s + retries: 3 diff --git a/devenv/docker/ha-test-unified-alerting/logs/webhook/.gitkeep b/devenv/docker/ha-test-unified-alerting/logs/webhook/.gitkeep new file mode 100644 index 00000000000..e69de29bb2d diff --git a/devenv/docker/ha-test-unified-alerting/webhook-listener.go b/devenv/docker/ha-test-unified-alerting/webhook-listener.go new file mode 100644 index 00000000000..18724ec2a84 --- /dev/null +++ b/devenv/docker/ha-test-unified-alerting/webhook-listener.go @@ -0,0 +1,181 @@ +package main + +import ( + "encoding/json" + "flag" + "fmt" + "io/ioutil" + "log" + "net/http" + "os" + "path" + "path/filepath" + "sync" + "time" +) + +var ( + fingerprints = make(Fingerprints) + mu sync.Mutex + waitSeconds int + logFile bool + logFileName = filepath.Join(os.TempDir(), "/logs/webhook-listener.log") + dumpDir = filepath.Join(os.TempDir(), "/logs/dumps") +) + +type Alert struct { + Fingerprint string `json:"fingerprint"` + StartsAt time.Time `json:"startsAt"` + Status string `json:"status"` +} + +type Data struct { + Receiver string `json:"receiver"` + Status string `json:"status"` + Alerts []Alert `json:"alerts"` +} + +// Fingerprints keeps track of the number of alerts received +// by fingerprint and StartsAt time. +type Fingerprints map[string]map[time.Time]tracker + +type tracker struct { + Updates int `json:"updates"` + Statuses []string `json:"statuses"` +} + +func updateFingerprints(v Data) { + mu.Lock() + defer mu.Unlock() + for _, alert := range v.Alerts { + m, ok := fingerprints[alert.Fingerprint] + if !ok { + m = make(map[time.Time]tracker) + } + + t, ok := m[alert.StartsAt] + if !ok { + t = tracker{ + Updates: 0, + Statuses: []string{}, + } + } + + t.Updates += 1 + t.Statuses = append(t.Statuses, alert.Status) + + m[alert.StartsAt] = t + fingerprints[alert.Fingerprint] = m + } +} + +func parseFlags() { + flag.BoolVar(&logFile, "log-file", true, "Whether to log to file") + flag.IntVar(&waitSeconds, "wait-seconds", 0, "The number of seconds to wait before sending an HTTP response") + flag.Parse() +} + +func saveDump(data []byte) { + if !logFile { + return + } + + if len(data) == 0 { + fmt.Println("empty dump - not saving") + return + } + ts := time.Now().UnixNano() + name := path.Join(dumpDir, fmt.Sprintf("%d.json", ts)) + for i := 1; i <= 1000; i++ { + if _, err := os.Stat(name); os.IsNotExist(err) { + break + } + name = path.Join(dumpDir, fmt.Sprintf("%d_%04d.json", ts, i)) + } + log.Printf("saving dump to %s", name) + err := os.WriteFile(name, data, os.ModePerm) + if err != nil { + log.Printf("cannot save to file %s: %s\n", name, err) + } +} + +func main() { + parseFlags() + + _, err := os.Stat(dumpDir) + if os.IsNotExist(err) { + err = os.MkdirAll(dumpDir, os.ModePerm) + if err != nil { + log.Panicf("can't create directory '%s'", dumpDir) + } + } + + if logFile { + //create your file with desired read/write permissions + f, err := os.OpenFile(logFileName, os.O_WRONLY|os.O_CREATE|os.O_APPEND, os.ModePerm) + if err != nil { + log.Fatal(err) + } + defer f.Close() + log.SetOutput(f) + } + + waitDuration := time.Duration(waitSeconds) * time.Second + http.HandleFunc("/", func(writer http.ResponseWriter, request *http.Request) { + + writer.WriteHeader(http.StatusOK) + writer.Write([]byte(landingPage)) + }) + + http.HandleFunc("/listen", func(w http.ResponseWriter, r *http.Request) { + log.Printf("got submission from: %s\n", r.RemoteAddr) + b, err := ioutil.ReadAll(r.Body) + if err != nil { + log.Println(err) + w.WriteHeader(http.StatusBadRequest) + return + } + saveDump(b) + v := Data{} + if err := json.Unmarshal(b, &v); err != nil { + log.Println(err) + w.WriteHeader(http.StatusBadRequest) + return + } + fmt.Printf("receiver: %s, status: %s\n", v.Receiver, v.Status) + updateFingerprints(v) + <-time.After(waitDuration) + }) + http.HandleFunc("/fingerprints", func(w http.ResponseWriter, r *http.Request) { + b, err := func() ([]byte, error) { + mu.Lock() + defer mu.Unlock() + return json.Marshal(fingerprints) + }() + if err != nil { + log.Println(err) + w.WriteHeader(http.StatusInternalServerError) + return + } + w.Header().Add("Content-Type", "application/json") + w.Write(b) + }) + log.Println("Listening") + log.Printf("Wait Duration %v\n", waitDuration) + http.ListenAndServe("0.0.0.0:8080", nil) +} + +const landingPage = ` + + + +Webhook listener + + +

Webhook Listener

+ +

For setup, please point your webhook configuration to the "/listen" endpoint.

+

For debugging, please use the "/fingerprints" endpoint.

+ + +`