mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Merge pull request #14883 from sharkpc0813/14701-fix-alert-context
fix that alert context and result handle context do not use the same derived context.
This commit is contained in:
commit
711f9bc034
@ -106,6 +106,7 @@ var (
|
|||||||
unfinishedWorkTimeout = time.Second * 5
|
unfinishedWorkTimeout = time.Second * 5
|
||||||
// TODO: Make alertTimeout and alertMaxAttempts configurable in the config file.
|
// TODO: Make alertTimeout and alertMaxAttempts configurable in the config file.
|
||||||
alertTimeout = time.Second * 30
|
alertTimeout = time.Second * 30
|
||||||
|
resultHandleTimeout = time.Second * 30
|
||||||
alertMaxAttempts = 3
|
alertMaxAttempts = 3
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -116,7 +117,7 @@ func (e *AlertingService) processJobWithRetry(grafanaCtx context.Context, job *J
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
cancelChan := make(chan context.CancelFunc, alertMaxAttempts)
|
cancelChan := make(chan context.CancelFunc, alertMaxAttempts*2)
|
||||||
attemptChan := make(chan int, 1)
|
attemptChan := make(chan int, 1)
|
||||||
|
|
||||||
// Initialize with first attemptID=1
|
// Initialize with first attemptID=1
|
||||||
@ -204,6 +205,15 @@ func (e *AlertingService) processJob(attemptID int, attemptChan chan int, cancel
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// create new context with timeout for notifications
|
||||||
|
resultHandleCtx, resultHandleCancelFn := context.WithTimeout(context.Background(), resultHandleTimeout)
|
||||||
|
cancelChan <- resultHandleCancelFn
|
||||||
|
|
||||||
|
// override the context used for evaluation with a new context for notifications.
|
||||||
|
// This makes it possible for notifiers to execute when datasources
|
||||||
|
// dont respond within the timeout limit. We should rewrite this so notifications
|
||||||
|
// dont reuse the evalContext and get its own context.
|
||||||
|
evalContext.Ctx = resultHandleCtx
|
||||||
evalContext.Rule.State = evalContext.GetNewState()
|
evalContext.Rule.State = evalContext.GetNewState()
|
||||||
e.resultHandler.Handle(evalContext)
|
e.resultHandler.Handle(evalContext)
|
||||||
span.Finish()
|
span.Finish()
|
||||||
|
148
pkg/services/alerting/engine_integration_test.go
Normal file
148
pkg/services/alerting/engine_integration_test.go
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
// +build integration
|
||||||
|
|
||||||
|
package alerting
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"net"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
. "github.com/smartystreets/goconvey/convey"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestEngineTimeouts(t *testing.T) {
|
||||||
|
Convey("Alerting engine timeout tests", t, func() {
|
||||||
|
engine := NewEngine()
|
||||||
|
engine.resultHandler = &FakeResultHandler{}
|
||||||
|
job := &Job{Running: true, Rule: &Rule{}}
|
||||||
|
|
||||||
|
Convey("Should trigger as many retries as needed", func() {
|
||||||
|
Convey("pended alert for datasource -> result handler should be worked", func() {
|
||||||
|
// reduce alert timeout to test quickly
|
||||||
|
originAlertTimeout := alertTimeout
|
||||||
|
alertTimeout = 2 * time.Second
|
||||||
|
transportTimeoutInterval := 2 * time.Second
|
||||||
|
serverBusySleepDuration := 1 * time.Second
|
||||||
|
|
||||||
|
evalHandler := NewFakeCommonTimeoutHandler(transportTimeoutInterval, serverBusySleepDuration)
|
||||||
|
resultHandler := NewFakeCommonTimeoutHandler(transportTimeoutInterval, serverBusySleepDuration)
|
||||||
|
engine.evalHandler = evalHandler
|
||||||
|
engine.resultHandler = resultHandler
|
||||||
|
|
||||||
|
engine.processJobWithRetry(context.TODO(), job)
|
||||||
|
|
||||||
|
So(evalHandler.EvalSucceed, ShouldEqual, true)
|
||||||
|
So(resultHandler.ResultHandleSucceed, ShouldEqual, true)
|
||||||
|
|
||||||
|
// initialize for other tests.
|
||||||
|
alertTimeout = originAlertTimeout
|
||||||
|
engine.resultHandler = &FakeResultHandler{}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
type FakeCommonTimeoutHandler struct {
|
||||||
|
TransportTimeoutDuration time.Duration
|
||||||
|
ServerBusySleepDuration time.Duration
|
||||||
|
EvalSucceed bool
|
||||||
|
ResultHandleSucceed bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewFakeCommonTimeoutHandler(transportTimeoutDuration time.Duration, serverBusySleepDuration time.Duration) *FakeCommonTimeoutHandler {
|
||||||
|
return &FakeCommonTimeoutHandler{
|
||||||
|
TransportTimeoutDuration: transportTimeoutDuration,
|
||||||
|
ServerBusySleepDuration: serverBusySleepDuration,
|
||||||
|
EvalSucceed: false,
|
||||||
|
ResultHandleSucceed: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (handler *FakeCommonTimeoutHandler) Eval(evalContext *EvalContext) {
|
||||||
|
// 1. prepare mock server
|
||||||
|
path := "/evaltimeout"
|
||||||
|
srv := runBusyServer(path, handler.ServerBusySleepDuration)
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
// 2. send requests
|
||||||
|
url := srv.URL + path
|
||||||
|
res, err := sendRequest(evalContext.Ctx, url, handler.TransportTimeoutDuration)
|
||||||
|
if res != nil {
|
||||||
|
defer res.Body.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
evalContext.Error = errors.New("Fake evaluation timeout test failure")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if res.StatusCode == 200 {
|
||||||
|
handler.EvalSucceed = true
|
||||||
|
}
|
||||||
|
|
||||||
|
evalContext.Error = errors.New("Fake evaluation timeout test failure; wrong response")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (handler *FakeCommonTimeoutHandler) Handle(evalContext *EvalContext) error {
|
||||||
|
// 1. prepare mock server
|
||||||
|
path := "/resulthandle"
|
||||||
|
srv := runBusyServer(path, handler.ServerBusySleepDuration)
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
// 2. send requests
|
||||||
|
url := srv.URL + path
|
||||||
|
res, err := sendRequest(evalContext.Ctx, url, handler.TransportTimeoutDuration)
|
||||||
|
if res != nil {
|
||||||
|
defer res.Body.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
evalContext.Error = errors.New("Fake result handle timeout test failure")
|
||||||
|
return evalContext.Error
|
||||||
|
}
|
||||||
|
|
||||||
|
if res.StatusCode == 200 {
|
||||||
|
handler.ResultHandleSucceed = true
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
evalContext.Error = errors.New("Fake result handle timeout test failure; wrong response")
|
||||||
|
|
||||||
|
return evalContext.Error
|
||||||
|
}
|
||||||
|
|
||||||
|
func runBusyServer(path string, serverBusySleepDuration time.Duration) *httptest.Server {
|
||||||
|
mux := http.NewServeMux()
|
||||||
|
server := httptest.NewServer(mux)
|
||||||
|
|
||||||
|
mux.HandleFunc(path, func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
time.Sleep(serverBusySleepDuration)
|
||||||
|
})
|
||||||
|
|
||||||
|
return server
|
||||||
|
}
|
||||||
|
|
||||||
|
func sendRequest(context context.Context, url string, transportTimeoutInterval time.Duration) (resp *http.Response, err error) {
|
||||||
|
req, err := http.NewRequest("GET", url, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
req = req.WithContext(context)
|
||||||
|
|
||||||
|
transport := http.Transport{
|
||||||
|
Dial: (&net.Dialer{
|
||||||
|
Timeout: transportTimeoutInterval,
|
||||||
|
KeepAlive: transportTimeoutInterval,
|
||||||
|
}).Dial,
|
||||||
|
}
|
||||||
|
client := http.Client{
|
||||||
|
Transport: &transport,
|
||||||
|
}
|
||||||
|
|
||||||
|
return client.Do(req)
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user