mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: Add max limit for Loki query size in state history API (#89646)
* add setting for query limit * update BuildLogQuery to return error if limit is exceeded * move tests for BuildLogQuery to separate suite
This commit is contained in:
parent
9a3477dd11
commit
4a5aab54a5
@ -1387,6 +1387,12 @@ loki_basic_auth_password =
|
||||
# Optional max query length for queries sent to Loki. Default is 721h which matches the default Loki value.
|
||||
loki_max_query_length = 721h
|
||||
|
||||
# For "loki" only.
|
||||
# Maximum size in bytes for queries sent to Loki. This limit is applied to user provided filters as well as system defined ones, e.g. applied by access control.
|
||||
# If filter exceeds the limit, API returns error with code "alerting.state-history.loki.requestTooLong".
|
||||
# Default is 64kb
|
||||
loki_max_query_size = 65536
|
||||
|
||||
[unified_alerting.state_history.external_labels]
|
||||
# Optional extra labels to attach to outbound state history records or log streams.
|
||||
# Any number of label key-value-pairs can be provided.
|
||||
|
@ -1379,6 +1379,12 @@ disabled_labels =
|
||||
# Optional max query length for queries sent to Loki. Default is 721h which matches the default Loki value.
|
||||
; loki_max_query_length = 360h
|
||||
|
||||
# For "loki" only.
|
||||
# Maximum size in bytes for queries sent to Loki. This limit is applied to user provided filters as well as system defined ones, e.g. applied by access control.
|
||||
# If filter exceeds the limit, API returns error with code "alerting.state-history.loki.requestTooLong".
|
||||
# Default is 64kb
|
||||
;loki_max_query_size = 65536
|
||||
|
||||
[unified_alerting.state_history.external_labels]
|
||||
# Optional extra labels to attach to outbound state history records or log streams.
|
||||
# Any number of label key-value-pairs can be provided.
|
||||
|
@ -45,6 +45,7 @@ var (
|
||||
|
||||
type lokiQueryClient interface {
|
||||
RangeQuery(ctx context.Context, query string, start, end, limit int64) (historian.QueryRes, error)
|
||||
MaxQuerySize() int
|
||||
}
|
||||
|
||||
// LokiHistorianStore is a read store that queries Loki for alert state history.
|
||||
@ -98,8 +99,12 @@ func (r *LokiHistorianStore) Get(ctx context.Context, query *annotations.ItemQue
|
||||
}
|
||||
}
|
||||
|
||||
logQL, err := historian.BuildLogQuery(buildHistoryQuery(query, accessResources.Dashboards, rule.UID))
|
||||
logQL, err := historian.BuildLogQuery(buildHistoryQuery(query, accessResources.Dashboards, rule.UID), r.client.MaxQuerySize())
|
||||
if err != nil {
|
||||
grafanaErr := errutil.Error{}
|
||||
if errors.As(err, &grafanaErr) {
|
||||
return make([]*annotations.ItemDTO, 0), err
|
||||
}
|
||||
return make([]*annotations.ItemDTO, 0), ErrLokiStoreInternal.Errorf("failed to build loki query: %w", err)
|
||||
}
|
||||
|
||||
|
@ -778,6 +778,7 @@ func NewFakeLokiClient() *FakeLokiClient {
|
||||
ReadPathURL: url,
|
||||
Encoder: historian.JsonEncoder{},
|
||||
MaxQueryLength: 721 * time.Hour,
|
||||
MaxQuerySize: 65536,
|
||||
},
|
||||
metrics: metrics,
|
||||
log: log.New("ngalert.state.historian", "backend", "loki"),
|
||||
@ -812,6 +813,10 @@ func (c *FakeLokiClient) RangeQuery(ctx context.Context, query string, from, to,
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (c *FakeLokiClient) MaxQuerySize() int {
|
||||
return c.cfg.MaxQuerySize
|
||||
}
|
||||
|
||||
func TestUseStore(t *testing.T) {
|
||||
t.Run("false if state history disabled", func(t *testing.T) {
|
||||
cfg := setting.UnifiedAlertingStateHistorySettings{
|
||||
|
@ -12,6 +12,7 @@ import (
|
||||
"github.com/grafana/grafana-plugin-sdk-go/data"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
|
||||
"github.com/grafana/grafana/pkg/apimachinery/errutil"
|
||||
"github.com/grafana/grafana/pkg/components/simplejson"
|
||||
"github.com/grafana/grafana/pkg/infra/log"
|
||||
"github.com/grafana/grafana/pkg/infra/tracing"
|
||||
@ -41,10 +42,30 @@ const (
|
||||
|
||||
const defaultQueryRange = 6 * time.Hour
|
||||
|
||||
var (
|
||||
ErrLokiQueryTooLong = errutil.BadRequest("loki.requestTooLong").MustTemplate(
|
||||
"Request to Loki exceeded ({{.Public.QuerySize}} bytes) configured maximum size of {{.Public.MaxLimit}} bytes",
|
||||
errutil.WithPublic("Query for Loki exceeded the configured limit of {{.Public.MaxLimit}} bytes. Remove some filters and try again."),
|
||||
)
|
||||
)
|
||||
|
||||
func NewErrLokiQueryTooLong(query string, maxLimit int) error {
|
||||
return ErrLokiQueryTooLong.Build(errutil.TemplateData{
|
||||
Private: map[string]any{
|
||||
"query": query,
|
||||
},
|
||||
Public: map[string]any{
|
||||
"MaxLimit": maxLimit,
|
||||
"QuerySize": len(query),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
type remoteLokiClient interface {
|
||||
Ping(context.Context) error
|
||||
Push(context.Context, []Stream) error
|
||||
RangeQuery(ctx context.Context, logQL string, start, end, limit int64) (QueryRes, error)
|
||||
MaxQuerySize() int
|
||||
}
|
||||
|
||||
// RemoteLokibackend is a state.Historian that records state history to an external Loki instance.
|
||||
@ -112,7 +133,7 @@ func (h *RemoteLokiBackend) Record(ctx context.Context, rule history_model.RuleM
|
||||
|
||||
// Query retrieves state history entries from an external Loki instance and formats the results into a dataframe.
|
||||
func (h *RemoteLokiBackend) Query(ctx context.Context, query models.HistoryQuery) (*data.Frame, error) {
|
||||
logQL, err := BuildLogQuery(query)
|
||||
logQL, err := BuildLogQuery(query, h.client.MaxQuerySize())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -366,7 +387,9 @@ func isValidOperator(op string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func BuildLogQuery(query models.HistoryQuery) (string, error) {
|
||||
// BuildLogQuery converts models.HistoryQuery and a list of folder UIDs to a Loki query.
|
||||
// Returns a Loki query or error if log query cannot be constructed. If user-defined query exceeds maximum allowed size returns ErrLokiQueryTooLong
|
||||
func BuildLogQuery(query models.HistoryQuery, maxQuerySize int) (string, error) {
|
||||
selectors, err := buildSelectors(query)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to build the provided selectors: %w", err)
|
||||
@ -400,6 +423,10 @@ func BuildLogQuery(query models.HistoryQuery) (string, error) {
|
||||
}
|
||||
logQL += labelFilters
|
||||
|
||||
if len(logQL) > maxQuerySize {
|
||||
// if the query is too long even without filter by folders, then fail
|
||||
return "", NewErrLokiQueryTooLong(logQL, maxQuerySize)
|
||||
}
|
||||
return logQL, nil
|
||||
}
|
||||
|
||||
|
@ -42,6 +42,7 @@ type LokiConfig struct {
|
||||
ExternalLabels map[string]string
|
||||
Encoder encoder
|
||||
MaxQueryLength time.Duration
|
||||
MaxQuerySize int
|
||||
}
|
||||
|
||||
func NewLokiConfig(cfg setting.UnifiedAlertingStateHistorySettings) (LokiConfig, error) {
|
||||
@ -77,6 +78,7 @@ func NewLokiConfig(cfg setting.UnifiedAlertingStateHistorySettings) (LokiConfig,
|
||||
TenantID: cfg.LokiTenantID,
|
||||
ExternalLabels: cfg.ExternalLabels,
|
||||
MaxQueryLength: cfg.LokiMaxQueryLength,
|
||||
MaxQuerySize: cfg.LokiMaxQuerySize,
|
||||
// Snappy-compressed protobuf is the default, same goes for Promtail.
|
||||
Encoder: SnappyProtoEncoder{},
|
||||
}, nil
|
||||
@ -281,6 +283,10 @@ func (c *HttpLokiClient) RangeQuery(ctx context.Context, logQL string, start, en
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (c *HttpLokiClient) MaxQuerySize() int {
|
||||
return c.cfg.MaxQuerySize
|
||||
}
|
||||
|
||||
type QueryRes struct {
|
||||
Data QueryData `json:"data"`
|
||||
}
|
||||
|
@ -8,10 +8,15 @@ import (
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/grafana/grafana-plugin-sdk-go/data"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/testutil"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/grafana/grafana/pkg/infra/log"
|
||||
"github.com/grafana/grafana/pkg/infra/tracing"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/client"
|
||||
@ -20,9 +25,6 @@ import (
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/state"
|
||||
history_model "github.com/grafana/grafana/pkg/services/ngalert/state/historian/model"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/testutil"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestRemoteLokiBackend(t *testing.T) {
|
||||
@ -214,81 +216,110 @@ func TestRemoteLokiBackend(t *testing.T) {
|
||||
selector, err = NewSelector("label", "invalid", "value")
|
||||
require.Error(t, err)
|
||||
})
|
||||
}
|
||||
|
||||
t.Run("buildLogQuery", func(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
query models.HistoryQuery
|
||||
exp string
|
||||
}{
|
||||
{
|
||||
name: "default includes state history label and orgID label",
|
||||
query: models.HistoryQuery{},
|
||||
exp: `{orgID="0",from="state-history"}`,
|
||||
func TestBuildLogQuery(t *testing.T) {
|
||||
maxQuerySize := 110
|
||||
cases := []struct {
|
||||
name string
|
||||
query models.HistoryQuery
|
||||
exp string
|
||||
expErr error
|
||||
}{
|
||||
{
|
||||
name: "default includes state history label and orgID label",
|
||||
query: models.HistoryQuery{},
|
||||
exp: `{orgID="0",from="state-history"}`,
|
||||
},
|
||||
{
|
||||
name: "adds stream label filter for orgID",
|
||||
query: models.HistoryQuery{
|
||||
OrgID: 123,
|
||||
},
|
||||
{
|
||||
name: "adds stream label filter for orgID",
|
||||
query: models.HistoryQuery{
|
||||
OrgID: 123,
|
||||
exp: `{orgID="123",from="state-history"}`,
|
||||
},
|
||||
{
|
||||
name: "filters ruleUID in log line",
|
||||
query: models.HistoryQuery{
|
||||
OrgID: 123,
|
||||
RuleUID: "rule-uid",
|
||||
},
|
||||
exp: `{orgID="123",from="state-history"} | json | ruleUID="rule-uid"`,
|
||||
},
|
||||
{
|
||||
name: "filters dashboardUID in log line",
|
||||
query: models.HistoryQuery{
|
||||
OrgID: 123,
|
||||
DashboardUID: "dash-uid",
|
||||
},
|
||||
exp: `{orgID="123",from="state-history"} | json | dashboardUID="dash-uid"`,
|
||||
},
|
||||
{
|
||||
name: "filters panelID in log line",
|
||||
query: models.HistoryQuery{
|
||||
OrgID: 123,
|
||||
PanelID: 456,
|
||||
},
|
||||
exp: `{orgID="123",from="state-history"} | json | panelID=456`,
|
||||
},
|
||||
{
|
||||
name: "filters instance labels in log line",
|
||||
query: models.HistoryQuery{
|
||||
OrgID: 123,
|
||||
Labels: map[string]string{
|
||||
"customlabel": "customvalue",
|
||||
"labeltwo": "labelvaluetwo",
|
||||
},
|
||||
exp: `{orgID="123",from="state-history"}`,
|
||||
},
|
||||
{
|
||||
name: "filters ruleUID in log line",
|
||||
query: models.HistoryQuery{
|
||||
OrgID: 123,
|
||||
RuleUID: "rule-uid",
|
||||
exp: `{orgID="123",from="state-history"} | json | labels_customlabel="customvalue" | labels_labeltwo="labelvaluetwo"`,
|
||||
},
|
||||
{
|
||||
name: "filters both instance labels + ruleUID",
|
||||
query: models.HistoryQuery{
|
||||
OrgID: 123,
|
||||
RuleUID: "rule-uid",
|
||||
Labels: map[string]string{
|
||||
"customlabel": "customvalue",
|
||||
},
|
||||
exp: `{orgID="123",from="state-history"} | json | ruleUID="rule-uid"`,
|
||||
},
|
||||
{
|
||||
name: "filters dashboardUID in log line",
|
||||
query: models.HistoryQuery{
|
||||
OrgID: 123,
|
||||
DashboardUID: "dash-uid",
|
||||
exp: `{orgID="123",from="state-history"} | json | ruleUID="rule-uid" | labels_customlabel="customvalue"`,
|
||||
},
|
||||
{
|
||||
name: "should return if query does not exceed max limit",
|
||||
query: models.HistoryQuery{
|
||||
OrgID: 123,
|
||||
RuleUID: "rule-uid",
|
||||
Labels: map[string]string{
|
||||
"customlabel": strings.Repeat("!", 24),
|
||||
},
|
||||
exp: `{orgID="123",from="state-history"} | json | dashboardUID="dash-uid"`,
|
||||
},
|
||||
{
|
||||
name: "filters panelID in log line",
|
||||
query: models.HistoryQuery{
|
||||
OrgID: 123,
|
||||
PanelID: 456,
|
||||
exp: `{orgID="123",from="state-history"} | json | ruleUID="rule-uid" | labels_customlabel="!!!!!!!!!!!!!!!!!!!!!!!!"`,
|
||||
},
|
||||
{
|
||||
name: "should return error if query is too long",
|
||||
query: models.HistoryQuery{
|
||||
OrgID: 123,
|
||||
RuleUID: "rule-uid",
|
||||
Labels: map[string]string{
|
||||
"customlabel": strings.Repeat("!", 25),
|
||||
},
|
||||
exp: `{orgID="123",from="state-history"} | json | panelID=456`,
|
||||
},
|
||||
{
|
||||
name: "filters instance labels in log line",
|
||||
query: models.HistoryQuery{
|
||||
OrgID: 123,
|
||||
Labels: map[string]string{
|
||||
"customlabel": "customvalue",
|
||||
"labeltwo": "labelvaluetwo",
|
||||
},
|
||||
},
|
||||
exp: `{orgID="123",from="state-history"} | json | labels_customlabel="customvalue" | labels_labeltwo="labelvaluetwo"`,
|
||||
},
|
||||
{
|
||||
name: "filters both instance labels + ruleUID",
|
||||
query: models.HistoryQuery{
|
||||
OrgID: 123,
|
||||
RuleUID: "rule-uid",
|
||||
Labels: map[string]string{
|
||||
"customlabel": "customvalue",
|
||||
},
|
||||
},
|
||||
exp: `{orgID="123",from="state-history"} | json | ruleUID="rule-uid" | labels_customlabel="customvalue"`,
|
||||
},
|
||||
}
|
||||
expErr: ErrLokiQueryTooLong,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
res, err := BuildLogQuery(tc.query)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, tc.exp, res)
|
||||
})
|
||||
}
|
||||
})
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
res, err := BuildLogQuery(tc.query, maxQuerySize)
|
||||
if tc.expErr != nil {
|
||||
require.ErrorIs(t, err, tc.expErr)
|
||||
return
|
||||
}
|
||||
require.LessOrEqual(t, len(res), maxQuerySize)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, tc.exp, res)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMerge(t *testing.T) {
|
||||
|
@ -64,6 +64,7 @@ const (
|
||||
stateHistoryDefaultEnabled = true
|
||||
lokiDefaultMaxQueryLength = 721 * time.Hour // 30d1h, matches the default value in Loki
|
||||
defaultRecordingRequestTimeout = 10 * time.Second
|
||||
lokiDefaultMaxQuerySize = 65536 // 64kb
|
||||
)
|
||||
|
||||
type UnifiedAlertingSettings struct {
|
||||
@ -160,6 +161,7 @@ type UnifiedAlertingStateHistorySettings struct {
|
||||
LokiBasicAuthPassword string
|
||||
LokiBasicAuthUsername string
|
||||
LokiMaxQueryLength time.Duration
|
||||
LokiMaxQuerySize int
|
||||
MultiPrimary string
|
||||
MultiSecondaries []string
|
||||
ExternalLabels map[string]string
|
||||
@ -404,6 +406,7 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
|
||||
LokiBasicAuthUsername: stateHistory.Key("loki_basic_auth_username").MustString(""),
|
||||
LokiBasicAuthPassword: stateHistory.Key("loki_basic_auth_password").MustString(""),
|
||||
LokiMaxQueryLength: stateHistory.Key("loki_max_query_length").MustDuration(lokiDefaultMaxQueryLength),
|
||||
LokiMaxQuerySize: stateHistory.Key("loki_max_query_size").MustInt(lokiDefaultMaxQuerySize),
|
||||
MultiPrimary: stateHistory.Key("primary").MustString(""),
|
||||
MultiSecondaries: splitTrim(stateHistory.Key("secondaries").MustString(""), ","),
|
||||
ExternalLabels: stateHistoryLabels.KeysHash(),
|
||||
|
Loading…
Reference in New Issue
Block a user