diff --git a/pkg/services/ngalert/writer/prom.go b/pkg/services/ngalert/writer/prom.go index 5bfa688c10b..15f768e7c67 100644 --- a/pkg/services/ngalert/writer/prom.go +++ b/pkg/services/ngalert/writer/prom.go @@ -11,10 +11,11 @@ import ( "github.com/benbjohnson/clock" "github.com/grafana/dataplane/sdata/numeric" + "github.com/m3db/prometheus_remote_client_golang/promremote" + "github.com/grafana/grafana/pkg/infra/log" "github.com/grafana/grafana/pkg/services/ngalert/metrics" "github.com/grafana/grafana/pkg/setting" - "github.com/m3db/prometheus_remote_client_golang/promremote" "github.com/grafana/grafana-plugin-sdk-go/backend/httpclient" "github.com/grafana/grafana-plugin-sdk-go/data" @@ -26,6 +27,7 @@ const ( // Fixed error messages MimirDuplicateTimestampError = "err-mimir-sample-duplicate-timestamp" MimirInvalidLabelError = "err-mimir-label-invalid" + MimirMaxSeriesPerUserError = "err-mimir-max-series-per-user" // Best effort error messages PrometheusDuplicateTimestampError = "duplicate sample for timestamp" @@ -268,6 +270,11 @@ func checkWriteError(writeErr promremote.WriteError) (err error, ignored bool) { return errors.Join(ErrRejectedWrite, writeErr), false } + // this can happen when user exceeded defined maximum of + if strings.Contains(msg, MimirMaxSeriesPerUserError) { + return errors.Join(ErrRejectedWrite, writeErr), false + } + // For now, all 400s that are not previously known are considered unexpected. // TODO: Consider blanket-converting all 400s to be known errors. This should only be done once we are confident this is not a problem with this client. return errors.Join(ErrUnexpectedWriteFailure, writeErr), false diff --git a/pkg/services/ngalert/writer/prom_test.go b/pkg/services/ngalert/writer/prom_test.go index 43297948354..c2e61a6df39 100644 --- a/pkg/services/ngalert/writer/prom_test.go +++ b/pkg/services/ngalert/writer/prom_test.go @@ -12,13 +12,14 @@ import ( "github.com/benbjohnson/clock" "github.com/grafana/grafana-plugin-sdk-go/data" - "github.com/grafana/grafana/pkg/infra/log" - "github.com/grafana/grafana/pkg/setting" "github.com/m3db/prometheus_remote_client_golang/promremote" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/prometheus/prompb" "github.com/stretchr/testify/require" + "github.com/grafana/grafana/pkg/infra/log" + "github.com/grafana/grafana/pkg/setting" + "github.com/grafana/grafana/pkg/services/ngalert/metrics" ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models" ) @@ -221,6 +222,22 @@ func TestPrometheusWriter_Write(t *testing.T) { require.Error(t, err) require.ErrorIs(t, err, ErrRejectedWrite) }) + + t.Run("max series limit fit under the client error category ", func(t *testing.T) { + msg := "send data to ingesters: failed pushing to ingester ingester-1: user=1: per-user series limit of 10 exceeded (err-mimir-max-series-per-user). To adjust the related per-tenant limit, configure -ingester.max-global-series-per-user, or contact your service administrator." + clientErr := testClientWriteError{ + statusCode: http.StatusBadRequest, + msg: &msg, + } + client.writeSeriesFunc = func(ctx context.Context, ts promremote.TSList, opts promremote.WriteOptions) (promremote.WriteResult, promremote.WriteError) { + return promremote.WriteResult{}, clientErr + } + + err := writer.Write(ctx, "test", now, frames, 1, map[string]string{"extra": "label"}) + + require.Error(t, err) + require.ErrorIs(t, err, ErrRejectedWrite) + }) } func extractValue(t *testing.T, frames data.Frames, labels map[string]string, frameType data.FrameType) float64 {