Alerting: Prom writer to handle err-mimir-max-series-per-user as user error (#98334)

handle err-mimir-max-series-per-user as user error
This commit is contained in:
Yuri Tseretyan
2025-01-06 10:54:56 -05:00
committed by GitHub
parent 742157b828
commit bfa56bcf08
2 changed files with 27 additions and 3 deletions

View File

@@ -11,10 +11,11 @@ import (
"github.com/benbjohnson/clock"
"github.com/grafana/dataplane/sdata/numeric"
"github.com/m3db/prometheus_remote_client_golang/promremote"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
"github.com/grafana/grafana/pkg/setting"
"github.com/m3db/prometheus_remote_client_golang/promremote"
"github.com/grafana/grafana-plugin-sdk-go/backend/httpclient"
"github.com/grafana/grafana-plugin-sdk-go/data"
@@ -26,6 +27,7 @@ const (
// Fixed error messages
MimirDuplicateTimestampError = "err-mimir-sample-duplicate-timestamp"
MimirInvalidLabelError = "err-mimir-label-invalid"
MimirMaxSeriesPerUserError = "err-mimir-max-series-per-user"
// Best effort error messages
PrometheusDuplicateTimestampError = "duplicate sample for timestamp"
@@ -268,6 +270,11 @@ func checkWriteError(writeErr promremote.WriteError) (err error, ignored bool) {
return errors.Join(ErrRejectedWrite, writeErr), false
}
// this can happen when user exceeded defined maximum of
if strings.Contains(msg, MimirMaxSeriesPerUserError) {
return errors.Join(ErrRejectedWrite, writeErr), false
}
// For now, all 400s that are not previously known are considered unexpected.
// TODO: Consider blanket-converting all 400s to be known errors. This should only be done once we are confident this is not a problem with this client.
return errors.Join(ErrUnexpectedWriteFailure, writeErr), false

View File

@@ -12,13 +12,14 @@ import (
"github.com/benbjohnson/clock"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/setting"
"github.com/m3db/prometheus_remote_client_golang/promremote"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/prometheus/prompb"
"github.com/stretchr/testify/require"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
)
@@ -221,6 +222,22 @@ func TestPrometheusWriter_Write(t *testing.T) {
require.Error(t, err)
require.ErrorIs(t, err, ErrRejectedWrite)
})
t.Run("max series limit fit under the client error category ", func(t *testing.T) {
msg := "send data to ingesters: failed pushing to ingester ingester-1: user=1: per-user series limit of 10 exceeded (err-mimir-max-series-per-user). To adjust the related per-tenant limit, configure -ingester.max-global-series-per-user, or contact your service administrator."
clientErr := testClientWriteError{
statusCode: http.StatusBadRequest,
msg: &msg,
}
client.writeSeriesFunc = func(ctx context.Context, ts promremote.TSList, opts promremote.WriteOptions) (promremote.WriteResult, promremote.WriteError) {
return promremote.WriteResult{}, clientErr
}
err := writer.Write(ctx, "test", now, frames, 1, map[string]string{"extra": "label"})
require.Error(t, err)
require.ErrorIs(t, err, ErrRejectedWrite)
})
}
func extractValue(t *testing.T, frames data.Frames, labels map[string]string, frameType data.FrameType) float64 {