mirror of
https://github.com/grafana/grafana.git
synced 2024-11-27 11:20:27 -06:00
611 lines
16 KiB
Go
611 lines
16 KiB
Go
package cloudwatch
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"regexp"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/grafana/grafana/pkg/log"
|
|
"github.com/grafana/grafana/pkg/models"
|
|
"github.com/grafana/grafana/pkg/setting"
|
|
"github.com/grafana/grafana/pkg/tsdb"
|
|
"golang.org/x/sync/errgroup"
|
|
|
|
"github.com/aws/aws-sdk-go/aws"
|
|
"github.com/aws/aws-sdk-go/aws/awserr"
|
|
"github.com/aws/aws-sdk-go/aws/request"
|
|
"github.com/aws/aws-sdk-go/service/cloudwatch"
|
|
"github.com/aws/aws-sdk-go/service/ec2/ec2iface"
|
|
"github.com/grafana/grafana/pkg/components/null"
|
|
"github.com/grafana/grafana/pkg/components/simplejson"
|
|
"github.com/grafana/grafana/pkg/metrics"
|
|
)
|
|
|
|
type CloudWatchExecutor struct {
|
|
*models.DataSource
|
|
ec2Svc ec2iface.EC2API
|
|
}
|
|
|
|
type DatasourceInfo struct {
|
|
Profile string
|
|
Region string
|
|
AuthType string
|
|
AssumeRoleArn string
|
|
Namespace string
|
|
|
|
AccessKey string
|
|
SecretKey string
|
|
}
|
|
|
|
func NewCloudWatchExecutor(dsInfo *models.DataSource) (tsdb.TsdbQueryEndpoint, error) {
|
|
return &CloudWatchExecutor{}, nil
|
|
}
|
|
|
|
var (
|
|
plog log.Logger
|
|
standardStatistics map[string]bool
|
|
aliasFormat *regexp.Regexp
|
|
)
|
|
|
|
func init() {
|
|
plog = log.New("tsdb.cloudwatch")
|
|
tsdb.RegisterTsdbQueryEndpoint("cloudwatch", NewCloudWatchExecutor)
|
|
standardStatistics = map[string]bool{
|
|
"Average": true,
|
|
"Maximum": true,
|
|
"Minimum": true,
|
|
"Sum": true,
|
|
"SampleCount": true,
|
|
}
|
|
aliasFormat = regexp.MustCompile(`\{\{\s*(.+?)\s*\}\}`)
|
|
}
|
|
|
|
func (e *CloudWatchExecutor) Query(ctx context.Context, dsInfo *models.DataSource, queryContext *tsdb.TsdbQuery) (*tsdb.Response, error) {
|
|
var result *tsdb.Response
|
|
e.DataSource = dsInfo
|
|
queryType := queryContext.Queries[0].Model.Get("type").MustString("")
|
|
var err error
|
|
|
|
switch queryType {
|
|
case "metricFindQuery":
|
|
result, err = e.executeMetricFindQuery(ctx, queryContext)
|
|
case "annotationQuery":
|
|
result, err = e.executeAnnotationQuery(ctx, queryContext)
|
|
case "timeSeriesQuery":
|
|
fallthrough
|
|
default:
|
|
result, err = e.executeTimeSeriesQuery(ctx, queryContext)
|
|
}
|
|
|
|
return result, err
|
|
}
|
|
|
|
func (e *CloudWatchExecutor) executeTimeSeriesQuery(ctx context.Context, queryContext *tsdb.TsdbQuery) (*tsdb.Response, error) {
|
|
results := &tsdb.Response{
|
|
Results: make(map[string]*tsdb.QueryResult),
|
|
}
|
|
resultChan := make(chan *tsdb.QueryResult, len(queryContext.Queries))
|
|
|
|
eg, ectx := errgroup.WithContext(ctx)
|
|
|
|
getMetricDataQueries := make(map[string]map[string]*CloudWatchQuery)
|
|
for i, model := range queryContext.Queries {
|
|
queryType := model.Model.Get("type").MustString()
|
|
if queryType != "timeSeriesQuery" && queryType != "" {
|
|
continue
|
|
}
|
|
|
|
RefId := queryContext.Queries[i].RefId
|
|
query, err := parseQuery(queryContext.Queries[i].Model)
|
|
if err != nil {
|
|
results.Results[RefId] = &tsdb.QueryResult{
|
|
Error: err,
|
|
}
|
|
return results, nil
|
|
}
|
|
query.RefId = RefId
|
|
|
|
if query.Id != "" {
|
|
if _, ok := getMetricDataQueries[query.Region]; !ok {
|
|
getMetricDataQueries[query.Region] = make(map[string]*CloudWatchQuery)
|
|
}
|
|
getMetricDataQueries[query.Region][query.Id] = query
|
|
continue
|
|
}
|
|
|
|
if query.Id == "" && query.Expression != "" {
|
|
results.Results[query.RefId] = &tsdb.QueryResult{
|
|
Error: fmt.Errorf("Invalid query: id should be set if using expression"),
|
|
}
|
|
return results, nil
|
|
}
|
|
|
|
eg.Go(func() error {
|
|
defer func() {
|
|
if err := recover(); err != nil {
|
|
plog.Error("Execute Query Panic", "error", err, "stack", log.Stack(1))
|
|
if theErr, ok := err.(error); ok {
|
|
resultChan <- &tsdb.QueryResult{
|
|
RefId: query.RefId,
|
|
Error: theErr,
|
|
}
|
|
}
|
|
}
|
|
}()
|
|
|
|
queryRes, err := e.executeQuery(ectx, query, queryContext)
|
|
if ae, ok := err.(awserr.Error); ok && ae.Code() == "500" {
|
|
return err
|
|
}
|
|
if err != nil {
|
|
resultChan <- &tsdb.QueryResult{
|
|
RefId: query.RefId,
|
|
Error: err,
|
|
}
|
|
return nil
|
|
}
|
|
resultChan <- queryRes
|
|
return nil
|
|
})
|
|
}
|
|
|
|
if len(getMetricDataQueries) > 0 {
|
|
for region, getMetricDataQuery := range getMetricDataQueries {
|
|
q := getMetricDataQuery
|
|
eg.Go(func() error {
|
|
defer func() {
|
|
if err := recover(); err != nil {
|
|
plog.Error("Execute Get Metric Data Query Panic", "error", err, "stack", log.Stack(1))
|
|
if theErr, ok := err.(error); ok {
|
|
resultChan <- &tsdb.QueryResult{
|
|
Error: theErr,
|
|
}
|
|
}
|
|
}
|
|
}()
|
|
|
|
queryResponses, err := e.executeGetMetricDataQuery(ectx, region, q, queryContext)
|
|
if ae, ok := err.(awserr.Error); ok && ae.Code() == "500" {
|
|
return err
|
|
}
|
|
for _, queryRes := range queryResponses {
|
|
if err != nil {
|
|
queryRes.Error = err
|
|
}
|
|
resultChan <- queryRes
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
}
|
|
|
|
if err := eg.Wait(); err != nil {
|
|
return nil, err
|
|
}
|
|
close(resultChan)
|
|
for result := range resultChan {
|
|
results.Results[result.RefId] = result
|
|
}
|
|
|
|
return results, nil
|
|
}
|
|
|
|
func (e *CloudWatchExecutor) executeQuery(ctx context.Context, query *CloudWatchQuery, queryContext *tsdb.TsdbQuery) (*tsdb.QueryResult, error) {
|
|
client, err := e.getClient(query.Region)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
startTime, err := queryContext.TimeRange.ParseFrom()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
endTime, err := queryContext.TimeRange.ParseTo()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if !startTime.Before(endTime) {
|
|
return nil, fmt.Errorf("Invalid time range: Start time must be before end time")
|
|
}
|
|
|
|
params := &cloudwatch.GetMetricStatisticsInput{
|
|
Namespace: aws.String(query.Namespace),
|
|
MetricName: aws.String(query.MetricName),
|
|
Dimensions: query.Dimensions,
|
|
Period: aws.Int64(int64(query.Period)),
|
|
}
|
|
if len(query.Statistics) > 0 {
|
|
params.Statistics = query.Statistics
|
|
}
|
|
if len(query.ExtendedStatistics) > 0 {
|
|
params.ExtendedStatistics = query.ExtendedStatistics
|
|
}
|
|
|
|
// 1 minutes resolution metrics is stored for 15 days, 15 * 24 * 60 = 21600
|
|
if query.HighResolution && (((endTime.Unix() - startTime.Unix()) / int64(query.Period)) > 21600) {
|
|
return nil, errors.New("too long query period")
|
|
}
|
|
var resp *cloudwatch.GetMetricStatisticsOutput
|
|
for startTime.Before(endTime) {
|
|
params.StartTime = aws.Time(startTime)
|
|
if query.HighResolution {
|
|
startTime = startTime.Add(time.Duration(1440*query.Period) * time.Second)
|
|
} else {
|
|
startTime = endTime
|
|
}
|
|
params.EndTime = aws.Time(startTime)
|
|
|
|
if setting.Env == setting.DEV {
|
|
plog.Debug("CloudWatch query", "raw query", params)
|
|
}
|
|
|
|
partResp, err := client.GetMetricStatisticsWithContext(ctx, params, request.WithResponseReadTimeout(10*time.Second))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if resp != nil {
|
|
resp.Datapoints = append(resp.Datapoints, partResp.Datapoints...)
|
|
} else {
|
|
resp = partResp
|
|
|
|
}
|
|
metrics.M_Aws_CloudWatch_GetMetricStatistics.Inc()
|
|
}
|
|
|
|
queryRes, err := parseResponse(resp, query)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return queryRes, nil
|
|
}
|
|
|
|
func (e *CloudWatchExecutor) executeGetMetricDataQuery(ctx context.Context, region string, queries map[string]*CloudWatchQuery, queryContext *tsdb.TsdbQuery) ([]*tsdb.QueryResult, error) {
|
|
queryResponses := make([]*tsdb.QueryResult, 0)
|
|
|
|
// validate query
|
|
for _, query := range queries {
|
|
if !(len(query.Statistics) == 1 && len(query.ExtendedStatistics) == 0) &&
|
|
!(len(query.Statistics) == 0 && len(query.ExtendedStatistics) == 1) {
|
|
return queryResponses, errors.New("Statistics count should be 1")
|
|
}
|
|
}
|
|
|
|
client, err := e.getClient(region)
|
|
if err != nil {
|
|
return queryResponses, err
|
|
}
|
|
|
|
startTime, err := queryContext.TimeRange.ParseFrom()
|
|
if err != nil {
|
|
return queryResponses, err
|
|
}
|
|
|
|
endTime, err := queryContext.TimeRange.ParseTo()
|
|
if err != nil {
|
|
return queryResponses, err
|
|
}
|
|
|
|
params := &cloudwatch.GetMetricDataInput{
|
|
StartTime: aws.Time(startTime),
|
|
EndTime: aws.Time(endTime),
|
|
ScanBy: aws.String("TimestampAscending"),
|
|
}
|
|
for _, query := range queries {
|
|
// 1 minutes resolution metrics is stored for 15 days, 15 * 24 * 60 = 21600
|
|
if query.HighResolution && (((endTime.Unix() - startTime.Unix()) / int64(query.Period)) > 21600) {
|
|
return queryResponses, errors.New("too long query period")
|
|
}
|
|
|
|
mdq := &cloudwatch.MetricDataQuery{
|
|
Id: aws.String(query.Id),
|
|
ReturnData: aws.Bool(query.ReturnData),
|
|
}
|
|
if query.Expression != "" {
|
|
mdq.Expression = aws.String(query.Expression)
|
|
} else {
|
|
mdq.MetricStat = &cloudwatch.MetricStat{
|
|
Metric: &cloudwatch.Metric{
|
|
Namespace: aws.String(query.Namespace),
|
|
MetricName: aws.String(query.MetricName),
|
|
},
|
|
Period: aws.Int64(int64(query.Period)),
|
|
}
|
|
for _, d := range query.Dimensions {
|
|
mdq.MetricStat.Metric.Dimensions = append(mdq.MetricStat.Metric.Dimensions,
|
|
&cloudwatch.Dimension{
|
|
Name: d.Name,
|
|
Value: d.Value,
|
|
})
|
|
}
|
|
if len(query.Statistics) == 1 {
|
|
mdq.MetricStat.Stat = query.Statistics[0]
|
|
} else {
|
|
mdq.MetricStat.Stat = query.ExtendedStatistics[0]
|
|
}
|
|
}
|
|
params.MetricDataQueries = append(params.MetricDataQueries, mdq)
|
|
}
|
|
|
|
nextToken := ""
|
|
mdr := make(map[string]*cloudwatch.MetricDataResult)
|
|
for {
|
|
if nextToken != "" {
|
|
params.NextToken = aws.String(nextToken)
|
|
}
|
|
resp, err := client.GetMetricDataWithContext(ctx, params)
|
|
if err != nil {
|
|
return queryResponses, err
|
|
}
|
|
metrics.M_Aws_CloudWatch_GetMetricData.Add(float64(len(params.MetricDataQueries)))
|
|
|
|
for _, r := range resp.MetricDataResults {
|
|
if _, ok := mdr[*r.Id]; !ok {
|
|
mdr[*r.Id] = r
|
|
} else {
|
|
mdr[*r.Id].Timestamps = append(mdr[*r.Id].Timestamps, r.Timestamps...)
|
|
mdr[*r.Id].Values = append(mdr[*r.Id].Values, r.Values...)
|
|
}
|
|
}
|
|
|
|
if resp.NextToken == nil || *resp.NextToken == "" {
|
|
break
|
|
}
|
|
nextToken = *resp.NextToken
|
|
}
|
|
|
|
for i, r := range mdr {
|
|
if *r.StatusCode != "Complete" {
|
|
return queryResponses, fmt.Errorf("Part of query is failed: %s", *r.StatusCode)
|
|
}
|
|
|
|
queryRes := tsdb.NewQueryResult()
|
|
queryRes.RefId = queries[i].RefId
|
|
query := queries[*r.Id]
|
|
|
|
series := tsdb.TimeSeries{
|
|
Tags: map[string]string{},
|
|
Points: make([]tsdb.TimePoint, 0),
|
|
}
|
|
for _, d := range query.Dimensions {
|
|
series.Tags[*d.Name] = *d.Value
|
|
}
|
|
s := ""
|
|
if len(query.Statistics) == 1 {
|
|
s = *query.Statistics[0]
|
|
} else {
|
|
s = *query.ExtendedStatistics[0]
|
|
}
|
|
series.Name = formatAlias(query, s, series.Tags)
|
|
|
|
for j, t := range r.Timestamps {
|
|
expectedTimestamp := r.Timestamps[j].Add(time.Duration(query.Period) * time.Second)
|
|
if j > 0 && expectedTimestamp.Before(*t) {
|
|
series.Points = append(series.Points, tsdb.NewTimePoint(null.FloatFromPtr(nil), float64(expectedTimestamp.Unix()*1000)))
|
|
}
|
|
series.Points = append(series.Points, tsdb.NewTimePoint(null.FloatFrom(*r.Values[j]), float64((*t).Unix())*1000))
|
|
}
|
|
|
|
queryRes.Series = append(queryRes.Series, &series)
|
|
queryRes.Meta = simplejson.New()
|
|
queryResponses = append(queryResponses, queryRes)
|
|
}
|
|
|
|
return queryResponses, nil
|
|
}
|
|
|
|
func parseDimensions(model *simplejson.Json) ([]*cloudwatch.Dimension, error) {
|
|
var result []*cloudwatch.Dimension
|
|
|
|
for k, v := range model.Get("dimensions").MustMap() {
|
|
kk := k
|
|
if vv, ok := v.(string); ok {
|
|
result = append(result, &cloudwatch.Dimension{
|
|
Name: &kk,
|
|
Value: &vv,
|
|
})
|
|
} else {
|
|
return nil, errors.New("failed to parse")
|
|
}
|
|
}
|
|
|
|
sort.Slice(result, func(i, j int) bool {
|
|
return *result[i].Name < *result[j].Name
|
|
})
|
|
return result, nil
|
|
}
|
|
|
|
func parseStatistics(model *simplejson.Json) ([]string, []string, error) {
|
|
var statistics []string
|
|
var extendedStatistics []string
|
|
|
|
for _, s := range model.Get("statistics").MustArray() {
|
|
if ss, ok := s.(string); ok {
|
|
if _, isStandard := standardStatistics[ss]; isStandard {
|
|
statistics = append(statistics, ss)
|
|
} else {
|
|
extendedStatistics = append(extendedStatistics, ss)
|
|
}
|
|
} else {
|
|
return nil, nil, errors.New("failed to parse")
|
|
}
|
|
}
|
|
|
|
return statistics, extendedStatistics, nil
|
|
}
|
|
|
|
func parseQuery(model *simplejson.Json) (*CloudWatchQuery, error) {
|
|
region, err := model.Get("region").String()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
namespace, err := model.Get("namespace").String()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
metricName, err := model.Get("metricName").String()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
id := model.Get("id").MustString("")
|
|
expression := model.Get("expression").MustString("")
|
|
|
|
dimensions, err := parseDimensions(model)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
statistics, extendedStatistics, err := parseStatistics(model)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
p := model.Get("period").MustString("")
|
|
if p == "" {
|
|
if namespace == "AWS/EC2" {
|
|
p = "300"
|
|
} else {
|
|
p = "60"
|
|
}
|
|
}
|
|
|
|
var period int
|
|
if regexp.MustCompile(`^\d+$`).Match([]byte(p)) {
|
|
period, err = strconv.Atoi(p)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
} else {
|
|
d, err := time.ParseDuration(p)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
period = int(d.Seconds())
|
|
}
|
|
|
|
alias := model.Get("alias").MustString()
|
|
if alias == "" {
|
|
alias = "{{metric}}_{{stat}}"
|
|
}
|
|
|
|
returnData := model.Get("returnData").MustBool(false)
|
|
highResolution := model.Get("highResolution").MustBool(false)
|
|
|
|
return &CloudWatchQuery{
|
|
Region: region,
|
|
Namespace: namespace,
|
|
MetricName: metricName,
|
|
Dimensions: dimensions,
|
|
Statistics: aws.StringSlice(statistics),
|
|
ExtendedStatistics: aws.StringSlice(extendedStatistics),
|
|
Period: period,
|
|
Alias: alias,
|
|
Id: id,
|
|
Expression: expression,
|
|
ReturnData: returnData,
|
|
HighResolution: highResolution,
|
|
}, nil
|
|
}
|
|
|
|
func formatAlias(query *CloudWatchQuery, stat string, dimensions map[string]string) string {
|
|
if len(query.Id) > 0 && len(query.Expression) > 0 {
|
|
return query.Id
|
|
}
|
|
|
|
data := map[string]string{}
|
|
data["region"] = query.Region
|
|
data["namespace"] = query.Namespace
|
|
data["metric"] = query.MetricName
|
|
data["stat"] = stat
|
|
data["period"] = strconv.Itoa(query.Period)
|
|
for k, v := range dimensions {
|
|
data[k] = v
|
|
}
|
|
|
|
result := aliasFormat.ReplaceAllFunc([]byte(query.Alias), func(in []byte) []byte {
|
|
labelName := strings.Replace(string(in), "{{", "", 1)
|
|
labelName = strings.Replace(labelName, "}}", "", 1)
|
|
labelName = strings.TrimSpace(labelName)
|
|
if val, exists := data[labelName]; exists {
|
|
return []byte(val)
|
|
}
|
|
|
|
return in
|
|
})
|
|
|
|
return string(result)
|
|
}
|
|
|
|
func parseResponse(resp *cloudwatch.GetMetricStatisticsOutput, query *CloudWatchQuery) (*tsdb.QueryResult, error) {
|
|
queryRes := tsdb.NewQueryResult()
|
|
|
|
queryRes.RefId = query.RefId
|
|
var value float64
|
|
for _, s := range append(query.Statistics, query.ExtendedStatistics...) {
|
|
series := tsdb.TimeSeries{
|
|
Tags: map[string]string{},
|
|
Points: make([]tsdb.TimePoint, 0),
|
|
}
|
|
for _, d := range query.Dimensions {
|
|
series.Tags[*d.Name] = *d.Value
|
|
}
|
|
series.Name = formatAlias(query, *s, series.Tags)
|
|
|
|
lastTimestamp := make(map[string]time.Time)
|
|
sort.Slice(resp.Datapoints, func(i, j int) bool {
|
|
return (*resp.Datapoints[i].Timestamp).Before(*resp.Datapoints[j].Timestamp)
|
|
})
|
|
for _, v := range resp.Datapoints {
|
|
switch *s {
|
|
case "Average":
|
|
value = *v.Average
|
|
case "Maximum":
|
|
value = *v.Maximum
|
|
case "Minimum":
|
|
value = *v.Minimum
|
|
case "Sum":
|
|
value = *v.Sum
|
|
case "SampleCount":
|
|
value = *v.SampleCount
|
|
default:
|
|
if strings.Index(*s, "p") == 0 && v.ExtendedStatistics[*s] != nil {
|
|
value = *v.ExtendedStatistics[*s]
|
|
}
|
|
}
|
|
|
|
// terminate gap of data points
|
|
timestamp := *v.Timestamp
|
|
if _, ok := lastTimestamp[*s]; ok {
|
|
nextTimestampFromLast := lastTimestamp[*s].Add(time.Duration(query.Period) * time.Second)
|
|
for timestamp.After(nextTimestampFromLast) {
|
|
series.Points = append(series.Points, tsdb.NewTimePoint(null.FloatFromPtr(nil), float64(nextTimestampFromLast.Unix()*1000)))
|
|
nextTimestampFromLast = nextTimestampFromLast.Add(time.Duration(query.Period) * time.Second)
|
|
}
|
|
}
|
|
lastTimestamp[*s] = timestamp
|
|
|
|
series.Points = append(series.Points, tsdb.NewTimePoint(null.FloatFrom(value), float64(timestamp.Unix()*1000)))
|
|
}
|
|
|
|
queryRes.Series = append(queryRes.Series, &series)
|
|
queryRes.Meta = simplejson.New()
|
|
if len(resp.Datapoints) > 0 && resp.Datapoints[0].Unit != nil {
|
|
if unit, ok := cloudwatchUnitMappings[*resp.Datapoints[0].Unit]; ok {
|
|
queryRes.Meta.Set("unit", unit)
|
|
}
|
|
}
|
|
}
|
|
|
|
return queryRes, nil
|
|
}
|