mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
CloudWatch: Logs queries should retry on throttling errors (#92535)
CloudWatch: logs queries should retry on throttling errors
This commit is contained in:
parent
d6f871490e
commit
2d10068714
@ -25,6 +25,7 @@ import (
|
||||
|
||||
const (
|
||||
limitExceededException = "LimitExceededException"
|
||||
throttlingException = "ThrottlingException"
|
||||
defaultEventLimit = int64(10)
|
||||
defaultLogGroupLimit = int64(50)
|
||||
logIdentifierInternal = "__log__grafana_internal__"
|
||||
@ -233,6 +234,9 @@ func (e *cloudWatchExecutor) executeStartQuery(ctx context.Context, logsClient c
|
||||
if errors.As(err, &awsErr) && awsErr.Code() == "LimitExceededException" {
|
||||
e.logger.FromContext(ctx).Debug("ExecuteStartQuery limit exceeded", "err", awsErr)
|
||||
err = &AWSError{Code: limitExceededException, Message: err.Error()}
|
||||
} else if errors.As(err, &awsErr) && awsErr.Code() == "ThrottlingException" {
|
||||
e.logger.FromContext(ctx).Debug("ExecuteStartQuery rate exceeded", "err", awsErr)
|
||||
err = &AWSError{Code: throttlingException, Message: err.Error()}
|
||||
}
|
||||
err = errorsource.DownstreamError(err, false)
|
||||
}
|
||||
|
@ -208,6 +208,53 @@ describe('CloudWatchLogsQueryRunner', () => {
|
||||
});
|
||||
});
|
||||
|
||||
it('should call getQueryResults until the query returns even if it the startQuery gets a throttling error from aws', async () => {
|
||||
const { runner } = setupMockedLogsQueryRunner();
|
||||
|
||||
const options: DataQueryRequest<CloudWatchLogsQuery> = {
|
||||
...LogsRequestMock,
|
||||
targets: rawLogQueriesStub,
|
||||
};
|
||||
|
||||
const queryFn = jest
|
||||
.fn()
|
||||
.mockReturnValueOnce(of(startQueryErrorWhenThrottlingResponseStub))
|
||||
.mockReturnValueOnce(of(startQuerySuccessResponseStub))
|
||||
.mockReturnValueOnce(of(getQuerySuccessResponseStub));
|
||||
|
||||
const response = runner.handleLogQueries(rawLogQueriesStub, options, queryFn);
|
||||
const results = await lastValueFrom(response);
|
||||
expect(queryFn).toHaveBeenCalledTimes(3);
|
||||
|
||||
// first call
|
||||
expect(queryFn).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
expect.objectContaining({
|
||||
targets: expect.arrayContaining([expect.objectContaining({ subtype: 'StartQuery' })]),
|
||||
})
|
||||
);
|
||||
// we retry because the first call failed with the rate limiting error
|
||||
expect(queryFn).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
expect.objectContaining({
|
||||
targets: expect.arrayContaining([expect.objectContaining({ subtype: 'StartQuery' })]),
|
||||
})
|
||||
);
|
||||
// we get results because second call was successful
|
||||
expect(queryFn).toHaveBeenNthCalledWith(
|
||||
3,
|
||||
expect.objectContaining({
|
||||
targets: expect.arrayContaining([expect.objectContaining({ subtype: 'GetQueryResults' })]),
|
||||
})
|
||||
);
|
||||
|
||||
expect(results).toEqual({
|
||||
...getQuerySuccessResponseStub,
|
||||
errors: [],
|
||||
key: 'test-key',
|
||||
});
|
||||
});
|
||||
|
||||
it('should return an error if it timesout before the start queries can get past a rate limiting error', async () => {
|
||||
const { runner } = setupMockedLogsQueryRunner();
|
||||
// first time timeout is called it will not be timed out, second time it will be timed out
|
||||
@ -469,6 +516,18 @@ const startQueryErrorWhenRateLimitedResponseStub = {
|
||||
],
|
||||
};
|
||||
|
||||
const startQueryErrorWhenThrottlingResponseStub = {
|
||||
data: [],
|
||||
errors: [
|
||||
{
|
||||
refId: 'A',
|
||||
message:
|
||||
'failed to execute log action with subtype: StartQuery: ThrottlingException: ThrottlingException: Rate exceeded',
|
||||
status: 500,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const startQueryErrorWhenBadSyntaxResponseStub = {
|
||||
data: [],
|
||||
state: 'Error',
|
||||
|
@ -94,7 +94,10 @@ function splitErrorsData(errors: DataQueryError[]) {
|
||||
const refIdsForRequestsToRetry: string[] = [];
|
||||
const errorsNotToRetry: DataQueryError[] = [];
|
||||
errors.map((err) => {
|
||||
if (err?.message?.includes('LimitExceededException') && err.refId) {
|
||||
if (
|
||||
err?.refId &&
|
||||
(err.message?.includes('LimitExceededException') || err.message?.includes('ThrottlingException'))
|
||||
) {
|
||||
refIdsForRequestsToRetry.push(err.refId);
|
||||
} else {
|
||||
errorsNotToRetry.push(err);
|
||||
|
Loading…
Reference in New Issue
Block a user