mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
CloudWatch Logs: Add retry strategy for hitting max concurrent queries (#39290)
* Add error passing and retry strategy * Change generic error to specific one * Make the error more generic * Refactor retry strategy * Add retry that handles multiple queries * Rollback some backend changes * Remove simple retry strategy * Add comments * Add tests * Small test fixes * Add log timeout config * Fix tests * Fix tests * Add validation * Remove commented code and add comment * Fix snapshots * Remove unnecessary cast
This commit is contained in:
@@ -14,9 +14,22 @@ import (
|
||||
"github.com/grafana/grafana-plugin-sdk-go/backend"
|
||||
"github.com/grafana/grafana-plugin-sdk-go/data"
|
||||
"github.com/grafana/grafana/pkg/components/simplejson"
|
||||
"github.com/grafana/grafana/pkg/util/errutil"
|
||||
"golang.org/x/sync/errgroup"
|
||||
)
|
||||
|
||||
var LimitExceededException = "LimitExceededException"
|
||||
|
||||
type AWSError struct {
|
||||
Code string
|
||||
Message string
|
||||
Payload map[string]string
|
||||
}
|
||||
|
||||
func (e *AWSError) Error() string {
|
||||
return fmt.Sprintf("%s: %s", e.Code, e.Message)
|
||||
}
|
||||
|
||||
func (e *cloudWatchExecutor) executeLogActions(ctx context.Context, req *backend.QueryDataRequest) (*backend.QueryDataResponse, error) {
|
||||
resp := backend.NewQueryDataResponse()
|
||||
|
||||
@@ -33,6 +46,13 @@ func (e *cloudWatchExecutor) executeLogActions(ctx context.Context, req *backend
|
||||
eg.Go(func() error {
|
||||
dataframe, err := e.executeLogAction(ectx, model, query, req.PluginContext)
|
||||
if err != nil {
|
||||
var AWSError *AWSError
|
||||
if errors.As(err, &AWSError) {
|
||||
resultChan <- backend.Responses{
|
||||
query.RefID: backend.DataResponse{Frames: data.Frames{}, Error: AWSError},
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -56,6 +76,7 @@ func (e *cloudWatchExecutor) executeLogActions(ctx context.Context, req *backend
|
||||
for refID, response := range result {
|
||||
respD := resp.Responses[refID]
|
||||
respD.Frames = response.Frames
|
||||
respD.Error = response.Error
|
||||
resp.Responses[refID] = respD
|
||||
}
|
||||
}
|
||||
@@ -96,7 +117,7 @@ func (e *cloudWatchExecutor) executeLogAction(ctx context.Context, model *simple
|
||||
data, err = e.handleGetLogEvents(ctx, logsClient, model)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, errutil.Wrapf(err, "failed to execute log action with subtype: %s", subType)
|
||||
}
|
||||
|
||||
return data, nil
|
||||
@@ -224,6 +245,11 @@ func (e *cloudWatchExecutor) handleStartQuery(ctx context.Context, logsClient cl
|
||||
model *simplejson.Json, timeRange backend.TimeRange, refID string) (*data.Frame, error) {
|
||||
startQueryResponse, err := e.executeStartQuery(ctx, logsClient, model, timeRange)
|
||||
if err != nil {
|
||||
var awsErr awserr.Error
|
||||
if errors.As(err, &awsErr) && awsErr.Code() == "LimitExceededException" {
|
||||
plog.Debug("executeStartQuery limit exceeded", "err", awsErr)
|
||||
return nil, &AWSError{Code: LimitExceededException, Message: err.Error()}
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user