CloudWatch: Refactor query batching (#78581)

Co-authored-by: Fiona Liao <fiona.y.liao@gmail.com>
This commit is contained in:
Shirley 2023-11-30 05:43:34 +08:00 committed by GitHub
parent 3c89e68fc1
commit 86311e3a33
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 88 additions and 46 deletions

View File

@ -13,72 +13,79 @@ var nonWordRegex = regexp.MustCompile(`\W+`)
// getMetricQueryBatches separates queries into batches if necessary. Metric Insight queries cannot run together, and math expressions must be run
// with all the queries they reference.
func getMetricQueryBatches(queries []*models.CloudWatchQuery, logger log.Logger) [][]*models.CloudWatchQuery {
metricInsightIndices := []int{}
mathIndices := []int{}
for i, query := range queries {
switch query.GetGetMetricDataAPIMode() {
case models.GMDApiModeSQLExpression:
metricInsightIndices = append(metricInsightIndices, i)
case models.GMDApiModeMathExpression:
mathIndices = append(mathIndices, i)
default:
}
}
// We only need multiple batches if there are multiple metrics insight queries
if len(metricInsightIndices) <= 1 {
if !hasMultipleMetricInsights(queries) {
return [][]*models.CloudWatchQuery{queries}
}
logger.Debug("Separating queries into batches")
// Map ids to their queries
idToIndex := map[string]int{}
for i, query := range queries {
if query.Id != "" {
idToIndex[query.Id] = i
// set up list of math expression queries since below we loop over them to get what query IDs they reference
var mathQueries []*models.CloudWatchQuery
for _, query := range queries {
if query.GetGetMetricDataAPIMode() == models.GMDApiModeMathExpression {
mathQueries = append(mathQueries, query)
}
}
// Find and track which queries are referenced by math queries
queryReferences := make([][]int, len(queries))
isReferenced := make([]bool, len(queries))
for _, idx := range mathIndices {
tokens := nonWordRegex.Split(queries[idx].Expression, -1)
references := []int{}
for _, token := range tokens {
ref, found := idToIndex[token]
// put queries into a set in order to facilitate lookup below
idToQuery := make(map[string]*models.CloudWatchQuery, len(queries))
for _, q := range queries {
idToQuery[q.Id] = q
}
// gets query IDs which are referenced in math expressions
mathQueryIdToReferences := make(map[string][]*models.CloudWatchQuery)
// we will use this set of referenced queries to determine the root queries below
referencedQueries := make(map[string]bool)
for _, mathQuery := range mathQueries {
substrings := nonWordRegex.Split(mathQuery.Expression, -1)
for _, id := range substrings {
query, found := idToQuery[id]
if found {
references = append(references, ref)
isReferenced[ref] = true
mathQueryIdToReferences[mathQuery.Id] = append(mathQueryIdToReferences[mathQuery.Id], query)
referencedQueries[query.Id] = true
}
}
queryReferences[idx] = references
}
// Create a new batch for every query not used in another query
batches := [][]*models.CloudWatchQuery{}
for i, used := range isReferenced {
if !used {
batches = append(batches, getReferencedQueries(queries, queryReferences, i))
for _, query := range queries {
// if a query is not referenced, then it is a "root" query
if _, ok := referencedQueries[query.Id]; !ok {
batches = append(batches, getConnectedQueries(query, mathQueryIdToReferences))
}
}
return batches
}
// getReferencedQueries gets all the queries referenced by startQuery and its referenced queries
func getReferencedQueries(queries []*models.CloudWatchQuery, queryReferences [][]int, startQuery int) []*models.CloudWatchQuery {
usedQueries := make([]bool, len(queries))
batch := []*models.CloudWatchQuery{}
// getConnectedQueries does a breadth-first search to find all the query ids connected to the root id by references. The root id is also returned in the response.
func getConnectedQueries(root *models.CloudWatchQuery, queryReferences map[string][]*models.CloudWatchQuery) []*models.CloudWatchQuery {
visited := map[string]bool{root.Id: true}
queriesToReturn := []*models.CloudWatchQuery{}
queriesToAdd := []int{startQuery}
usedQueries[startQuery] = true
for i := 0; i < len(queriesToAdd); i++ {
batch = append(batch, queries[queriesToAdd[i]])
for _, queryIdx := range queryReferences[queriesToAdd[i]] {
if !usedQueries[queryIdx] {
usedQueries[queryIdx] = true
queriesToAdd = append(queriesToAdd, queryIdx)
queriesToVisit := []*models.CloudWatchQuery{root}
for i := 0; i < len(queriesToVisit); i++ {
currentQuery := queriesToVisit[i]
queriesToReturn = append(queriesToReturn, currentQuery)
for _, queryRef := range queryReferences[currentQuery.Id] {
if !visited[queryRef.Id] {
visited[queryRef.Id] = true
queriesToVisit = append(queriesToVisit, queryRef)
}
}
}
return batch
return queriesToReturn
}
func hasMultipleMetricInsights(queries []*models.CloudWatchQuery) bool {
count := 0
for _, query := range queries {
if query.GetGetMetricDataAPIMode() == models.GMDApiModeSQLExpression {
count++
}
if count > 1 {
return true
}
}
return false
}

View File

@ -22,6 +22,7 @@ func TestGetMetricQueryBatches(t *testing.T) {
MetricQueryType: models.MetricQueryTypeQuery,
Id: "i3",
}
metricStat := models.CloudWatchQuery{
MetricQueryType: models.MetricQueryTypeSearch,
MetricEditorMode: models.MetricEditorModeBuilder,
@ -33,6 +34,24 @@ func TestGetMetricQueryBatches(t *testing.T) {
Expression: "PERIOD(i1)",
Id: "m1",
}
m99_ref_m98 := models.CloudWatchQuery{
MetricQueryType: models.MetricQueryTypeSearch,
MetricEditorMode: models.MetricEditorModeRaw,
Expression: "PERIOD(m98)",
Id: "m99",
}
m98_ref_m88 := models.CloudWatchQuery{
MetricQueryType: models.MetricQueryTypeSearch,
MetricEditorMode: models.MetricEditorModeRaw,
Expression: "PERIOD(m88)",
Id: "m98",
}
m88_ref_m98 := models.CloudWatchQuery{
MetricQueryType: models.MetricQueryTypeSearch,
MetricEditorMode: models.MetricEditorModeRaw,
Expression: "PERIOD(m98)",
Id: "m88",
}
m2_ref_i1 := models.CloudWatchQuery{
MetricQueryType: models.MetricQueryTypeSearch,
MetricEditorMode: models.MetricEditorModeRaw,
@ -58,6 +77,22 @@ func TestGetMetricQueryBatches(t *testing.T) {
Id: "m5",
}
t.Run("m99 ref m98 which ref m88 which ref m98, with 2 insights", func(t *testing.T) {
batch := []*models.CloudWatchQuery{
&insight1,
&insight2,
&m99_ref_m98,
&m98_ref_m88,
&m88_ref_m98,
}
result := getMetricQueryBatches(batch, logger)
assert.Len(t, result, 3)
assert.ElementsMatch(t, []*models.CloudWatchQuery{&insight1}, result[0])
assert.ElementsMatch(t, []*models.CloudWatchQuery{&insight2}, result[1])
assert.ElementsMatch(t, []*models.CloudWatchQuery{&m99_ref_m98, &m98_ref_m88, &m88_ref_m98}, result[2])
})
t.Run("zero insight queries should not separate into batches", func(t *testing.T) {
batch := []*models.CloudWatchQuery{
&metricStat,