mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
CloudWatch: Use single timeout for log queries (#44848)
This commit is contained in:
parent
3d0cff5410
commit
16e001e762
@ -81,8 +81,6 @@ const displayAlert = (datasourceName: string, region: string) =>
|
||||
const displayCustomError = (title: string, message: string) =>
|
||||
store.dispatch(notifyApp(createErrorNotification(title, message)));
|
||||
|
||||
export const MAX_ATTEMPTS = 5;
|
||||
|
||||
export class CloudWatchDatasource
|
||||
extends DataSourceWithBackend<CloudWatchQuery, CloudWatchJsonData>
|
||||
implements DataSourceWithLogsContextSupport
|
||||
@ -180,6 +178,11 @@ export class CloudWatchDatasource
|
||||
region: this.replace(this.getActualRegion(target.region), options.scopedVars, true, 'region'),
|
||||
}));
|
||||
|
||||
const startTime = new Date();
|
||||
const timeoutFunc = () => {
|
||||
return Date.now() >= startTime.valueOf() + rangeUtil.intervalToMs(this.logsTimeout);
|
||||
};
|
||||
|
||||
return runWithRetry(
|
||||
(targets: StartQueryRequest[]) => {
|
||||
return this.makeLogActionRequest('StartQuery', targets, {
|
||||
@ -189,9 +192,7 @@ export class CloudWatchDatasource
|
||||
});
|
||||
},
|
||||
queryParams,
|
||||
{
|
||||
timeout: rangeUtil.intervalToMs(this.logsTimeout),
|
||||
}
|
||||
timeoutFunc
|
||||
).pipe(
|
||||
mergeMap(({ frames, error }: { frames: DataFrame[]; error?: DataQueryError }) =>
|
||||
// This queries for the results
|
||||
@ -202,7 +203,8 @@ export class CloudWatchDatasource
|
||||
refId: dataFrame.refId!,
|
||||
statsGroups: (logQueries.find((target) => target.refId === dataFrame.refId)! as CloudWatchLogsQuery)
|
||||
.statsGroups,
|
||||
}))
|
||||
})),
|
||||
timeoutFunc
|
||||
).pipe(
|
||||
map((response: DataQueryResponse) => {
|
||||
if (!response.error && error) {
|
||||
@ -310,7 +312,8 @@ export class CloudWatchDatasource
|
||||
limit?: number;
|
||||
region: string;
|
||||
statsGroups?: string[];
|
||||
}>
|
||||
}>,
|
||||
timeoutFunc: () => boolean
|
||||
): Observable<DataQueryResponse> {
|
||||
this.logQueries = {};
|
||||
queryParams.forEach((param) => {
|
||||
@ -363,7 +366,7 @@ export class CloudWatchDatasource
|
||||
}
|
||||
}),
|
||||
map(([dataFrames, failedAttempts]) => {
|
||||
if (failedAttempts >= MAX_ATTEMPTS) {
|
||||
if (timeoutFunc()) {
|
||||
for (const frame of dataFrames) {
|
||||
set(frame, 'meta.custom.Status', CloudWatchLogsQueryStatus.Cancelled);
|
||||
}
|
||||
@ -381,13 +384,12 @@ export class CloudWatchDatasource
|
||||
)
|
||||
? LoadingState.Done
|
||||
: LoadingState.Loading,
|
||||
error:
|
||||
failedAttempts >= MAX_ATTEMPTS
|
||||
? {
|
||||
message: `error: query timed out after ${MAX_ATTEMPTS} attempts`,
|
||||
type: DataQueryErrorType.Timeout,
|
||||
}
|
||||
: undefined,
|
||||
error: timeoutFunc()
|
||||
? {
|
||||
message: `error: query timed out after ${failedAttempts} attempts`,
|
||||
type: DataQueryErrorType.Timeout,
|
||||
}
|
||||
: undefined,
|
||||
};
|
||||
}),
|
||||
takeWhile(({ state }) => state !== LoadingState.Error && state !== LoadingState.Done, true)
|
||||
|
@ -8,7 +8,7 @@ import {
|
||||
} from '@grafana/data';
|
||||
|
||||
import * as redux from 'app/store/store';
|
||||
import { CloudWatchDatasource, MAX_ATTEMPTS } from '../datasource';
|
||||
import { CloudWatchDatasource } from '../datasource';
|
||||
import { TemplateSrv } from 'app/features/templating/template_srv';
|
||||
import {
|
||||
MetricEditorMode,
|
||||
@ -177,7 +177,7 @@ describe('CloudWatchDatasource', () => {
|
||||
jest.spyOn(rxjsUtils, 'increasingInterval').mockImplementation(() => interval(100));
|
||||
});
|
||||
|
||||
it('should stop querying when no more data received a number of times in a row', async () => {
|
||||
it('should stop querying when timed out', async () => {
|
||||
const { ds } = getTestContext();
|
||||
const fakeFrames = genMockFrames(20);
|
||||
const initialRecordsMatched = fakeFrames[0].meta!.stats!.find((stat) => stat.displayName === 'Records scanned')!
|
||||
@ -213,8 +213,13 @@ describe('CloudWatchDatasource', () => {
|
||||
}
|
||||
});
|
||||
|
||||
const iterations = 15;
|
||||
// Times out after 15 passes for consistent testing
|
||||
const timeoutFunc = () => {
|
||||
return i >= iterations;
|
||||
};
|
||||
const myResponse = await lastValueFrom(
|
||||
ds.logsQuery([{ queryId: 'fake-query-id', region: 'default', refId: 'A' }])
|
||||
ds.logsQuery([{ queryId: 'fake-query-id', region: 'default', refId: 'A' }], timeoutFunc)
|
||||
);
|
||||
|
||||
const expectedData = [
|
||||
@ -235,10 +240,10 @@ describe('CloudWatchDatasource', () => {
|
||||
state: 'Done',
|
||||
error: {
|
||||
type: DataQueryErrorType.Timeout,
|
||||
message: `error: query timed out after ${MAX_ATTEMPTS} attempts`,
|
||||
message: `error: query timed out after 5 attempts`,
|
||||
},
|
||||
});
|
||||
expect(i).toBe(15);
|
||||
expect(i).toBe(iterations);
|
||||
});
|
||||
|
||||
it('should continue querying as long as new data is being received', async () => {
|
||||
@ -256,8 +261,12 @@ describe('CloudWatchDatasource', () => {
|
||||
}
|
||||
});
|
||||
|
||||
const startTime = new Date();
|
||||
const timeoutFunc = () => {
|
||||
return Date.now() >= startTime.valueOf() + 6000;
|
||||
};
|
||||
const myResponse = await lastValueFrom(
|
||||
ds.logsQuery([{ queryId: 'fake-query-id', region: 'default', refId: 'A' }])
|
||||
ds.logsQuery([{ queryId: 'fake-query-id', region: 'default', refId: 'A' }], timeoutFunc)
|
||||
);
|
||||
expect(myResponse).toEqual({
|
||||
data: [fakeFrames[fakeFrames.length - 1]],
|
||||
@ -281,8 +290,12 @@ describe('CloudWatchDatasource', () => {
|
||||
}
|
||||
});
|
||||
|
||||
const startTime = new Date();
|
||||
const timeoutFunc = () => {
|
||||
return Date.now() >= startTime.valueOf() + 6000;
|
||||
};
|
||||
const myResponse = await lastValueFrom(
|
||||
ds.logsQuery([{ queryId: 'fake-query-id', region: 'default', refId: 'A' }])
|
||||
ds.logsQuery([{ queryId: 'fake-query-id', region: 'default', refId: 'A' }], timeoutFunc)
|
||||
);
|
||||
|
||||
expect(myResponse).toEqual({
|
||||
|
@ -6,11 +6,13 @@ import { DataResponse, FetchError } from '@grafana/runtime';
|
||||
import { StartQueryRequest } from '../types';
|
||||
|
||||
describe('runWithRetry', () => {
|
||||
const timeoutPass = () => false;
|
||||
const timeoutFail = () => true;
|
||||
it('returns results if no retry is needed', async () => {
|
||||
const queryFunc = jest.fn();
|
||||
queryFunc.mockReturnValueOnce(of([createResponseFrame('A')]));
|
||||
const targets = [targetA];
|
||||
const values = await lastValueFrom(runWithRetry(queryFunc, targets).pipe(toArray()));
|
||||
const values = await lastValueFrom(runWithRetry(queryFunc, targets, timeoutPass).pipe(toArray()));
|
||||
expect(queryFunc).toBeCalledTimes(1);
|
||||
expect(queryFunc).toBeCalledWith(targets);
|
||||
expect(values).toEqual([{ frames: [createResponseFrame('A')] }]);
|
||||
@ -23,7 +25,7 @@ describe('runWithRetry', () => {
|
||||
queryFunc.mockReturnValueOnce(throwError(() => createErrorResponse(targets)));
|
||||
queryFunc.mockReturnValueOnce(of([createResponseFrame('A')]));
|
||||
|
||||
const valuesPromise = lastValueFrom(runWithRetry(queryFunc, targets).pipe(toArray()));
|
||||
const valuesPromise = lastValueFrom(runWithRetry(queryFunc, targets, timeoutPass).pipe(toArray()));
|
||||
jest.runAllTimers();
|
||||
const values = await valuesPromise;
|
||||
|
||||
@ -33,14 +35,14 @@ describe('runWithRetry', () => {
|
||||
expect(values).toEqual([{ frames: [createResponseFrame('A')] }]);
|
||||
});
|
||||
|
||||
it('fails if reaching timoeut and no data was retrieved', async () => {
|
||||
it('fails if reaching timeout and no data was retrieved', async () => {
|
||||
jest.useFakeTimers();
|
||||
const targets = [targetA];
|
||||
const queryFunc = jest.fn();
|
||||
queryFunc.mockReturnValueOnce(throwError(() => createErrorResponse(targets)));
|
||||
queryFunc.mockReturnValueOnce(of([createResponseFrame('A')]));
|
||||
|
||||
const valuesPromise = lastValueFrom(runWithRetry(queryFunc, targets, { timeout: 0 }).pipe(toArray()));
|
||||
const valuesPromise = lastValueFrom(runWithRetry(queryFunc, targets, timeoutFail).pipe(toArray()));
|
||||
jest.runAllTimers();
|
||||
let error;
|
||||
try {
|
||||
@ -60,7 +62,7 @@ describe('runWithRetry', () => {
|
||||
const queryFunc = jest.fn();
|
||||
queryFunc.mockReturnValueOnce(throwError(() => 'random error'));
|
||||
|
||||
const valuesPromise = lastValueFrom(runWithRetry(queryFunc, targets).pipe(toArray()));
|
||||
const valuesPromise = lastValueFrom(runWithRetry(queryFunc, targets, timeoutPass).pipe(toArray()));
|
||||
jest.runAllTimers();
|
||||
let error;
|
||||
try {
|
||||
@ -79,7 +81,7 @@ describe('runWithRetry', () => {
|
||||
const queryFunc = jest.fn();
|
||||
queryFunc.mockReturnValueOnce(of([createResponseFrame('A'), createResponseFrame('B')]));
|
||||
|
||||
const values = await lastValueFrom(runWithRetry(queryFunc, targets).pipe(toArray()));
|
||||
const values = await lastValueFrom(runWithRetry(queryFunc, targets, timeoutPass).pipe(toArray()));
|
||||
|
||||
expect(queryFunc).toBeCalledTimes(1);
|
||||
expect(queryFunc).nthCalledWith(1, targets);
|
||||
@ -101,7 +103,7 @@ describe('runWithRetry', () => {
|
||||
|
||||
queryFunc.mockReturnValueOnce(of([createResponseFrame('B')]));
|
||||
|
||||
const valuesPromise = lastValueFrom(runWithRetry(queryFunc, targets).pipe(toArray()));
|
||||
const valuesPromise = lastValueFrom(runWithRetry(queryFunc, targets, timeoutPass).pipe(toArray()));
|
||||
jest.runAllTimers();
|
||||
const values = await valuesPromise;
|
||||
|
||||
@ -129,7 +131,7 @@ describe('runWithRetry', () => {
|
||||
)
|
||||
);
|
||||
|
||||
const valuesPromise = lastValueFrom(runWithRetry(queryFunc, targets, { timeout: 0 }).pipe(toArray()));
|
||||
const valuesPromise = lastValueFrom(runWithRetry(queryFunc, targets, timeoutFail).pipe(toArray()));
|
||||
jest.runAllTimers();
|
||||
const values = await valuesPromise;
|
||||
|
||||
@ -172,9 +174,7 @@ describe('runWithRetry', () => {
|
||||
)
|
||||
);
|
||||
|
||||
const valuesPromise = lastValueFrom(
|
||||
runWithRetry(queryFunc, targets, { timeoutFunc: (retry) => retry >= 2 }).pipe(toArray())
|
||||
);
|
||||
const valuesPromise = lastValueFrom(runWithRetry(queryFunc, targets, (retry) => retry >= 2).pipe(toArray()));
|
||||
jest.runAllTimers();
|
||||
const values = await valuesPromise;
|
||||
|
||||
|
@ -5,8 +5,6 @@ import { DataFrame, DataFrameJSON, DataQueryError } from '@grafana/data';
|
||||
|
||||
type Result = { frames: DataFrameJSON[]; error?: string };
|
||||
|
||||
const defaultTimeout = 30_000;
|
||||
|
||||
/**
|
||||
* A retry strategy specifically for cloud watch logs query. Cloud watch logs queries need first starting the query
|
||||
* and the polling for the results. The start query can fail because of the concurrent queries rate limit,
|
||||
@ -23,11 +21,7 @@ const defaultTimeout = 30_000;
|
||||
export function runWithRetry(
|
||||
queryFun: (targets: StartQueryRequest[]) => Observable<DataFrame[]>,
|
||||
targets: StartQueryRequest[],
|
||||
options: {
|
||||
timeout?: number;
|
||||
timeoutFunc?: (retry: number, startTime: number) => boolean;
|
||||
retryWaitFunc?: (retry: number) => number;
|
||||
} = {}
|
||||
timeoutFunc: (retry: number, startTime: number) => boolean
|
||||
): Observable<{ frames: DataFrame[]; error?: DataQueryError }> {
|
||||
const startTime = new Date();
|
||||
let retries = 0;
|
||||
@ -35,17 +29,9 @@ export function runWithRetry(
|
||||
let subscription: Subscription;
|
||||
let collected = {};
|
||||
|
||||
const timeoutFunction = options.timeoutFunc
|
||||
? options.timeoutFunc
|
||||
: (retry: number, startTime: number) => {
|
||||
return Date.now() >= startTime + (options.timeout === undefined ? defaultTimeout : options.timeout);
|
||||
};
|
||||
|
||||
const retryWaitFunction = options.retryWaitFunc
|
||||
? options.retryWaitFunc
|
||||
: (retry: number) => {
|
||||
return Math.pow(2, retry) * 1000 + Math.random() * 100;
|
||||
};
|
||||
const retryWaitFunction = (retry: number) => {
|
||||
return Math.pow(2, retry) * 1000 + Math.random() * 100;
|
||||
};
|
||||
|
||||
return new Observable((observer) => {
|
||||
// Run function is where the logic takes place. We have it in a function so we can call it recursively.
|
||||
@ -82,7 +68,7 @@ export function runWithRetry(
|
||||
return;
|
||||
}
|
||||
|
||||
if (timeoutFunction(retries, startTime.valueOf())) {
|
||||
if (timeoutFunc(retries, startTime.valueOf())) {
|
||||
// We timed out but we could have started some queries
|
||||
if (Object.keys(collected).length || Object.keys(errorData.good).length) {
|
||||
const dataResponse = toDataQueryResponse({
|
||||
|
Loading…
Reference in New Issue
Block a user