mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Tracing: Support remote, rate-limited, and probabilistic sampling in tracing.opentelemetry config section (#73587)
* tracing: Support remote sampling server Signed-off-by: Dave Henderson <dave.henderson@grafana.com> * Update docs/sources/setup-grafana/configure-grafana/_index.md Co-authored-by: Christopher Moyer <35463610+chri2547@users.noreply.github.com> * Update docs/sources/setup-grafana/configure-grafana/_index.md Co-authored-by: Christopher Moyer <35463610+chri2547@users.noreply.github.com> * Update docs/sources/setup-grafana/configure-grafana/_index.md Co-authored-by: Christopher Moyer <35463610+chri2547@users.noreply.github.com> * Update docs/sources/setup-grafana/configure-grafana/_index.md Co-authored-by: Christopher Moyer <35463610+chri2547@users.noreply.github.com> * Update docs/sources/setup-grafana/configure-grafana/_index.md * Update docs/sources/setup-grafana/configure-grafana/_index.md * Update docs/sources/setup-grafana/configure-grafana/_index.md * Satisfying the doc-validator check * satisfy prettier Signed-off-by: Dave Henderson <dave.henderson@grafana.com> * back out unnecessary change Signed-off-by: Dave Henderson <dave.henderson@grafana.com> --------- Signed-off-by: Dave Henderson <dave.henderson@grafana.com> Co-authored-by: Christopher Moyer <35463610+chri2547@users.noreply.github.com>
This commit is contained in:
parent
951876b465
commit
ce1169f8b7
@ -1374,6 +1374,18 @@ disable_shared_zipkin_spans = false
|
||||
|
||||
# attributes that will always be included in when creating new spans. ex (key1:value1,key2:value2)
|
||||
custom_attributes =
|
||||
# Type specifies the type of the sampler: const, probabilistic, rateLimiting, or remote
|
||||
sampler_type =
|
||||
# Sampler configuration parameter
|
||||
# for "const" sampler, 0 or 1 for always false/true respectively
|
||||
# for "probabilistic" sampler, a probability between 0.0 and 1.0
|
||||
# for "rateLimiting" sampler, the number of spans per second
|
||||
# for "remote" sampler, param is the same as for "probabilistic"
|
||||
# and indicates the initial sampling rate before the actual one
|
||||
# is received from the sampling server (set at sampling_server_url)
|
||||
sampler_param =
|
||||
# specifies the URL of the sampling server when sampler_type is remote
|
||||
sampling_server_url =
|
||||
|
||||
[tracing.opentelemetry.jaeger]
|
||||
# jaeger destination (ex http://localhost:14268/api/traces)
|
||||
@ -1668,4 +1680,4 @@ update_controller_url =
|
||||
hidden_toggles =
|
||||
|
||||
# Disables updating specific feature toggles in the feature management page
|
||||
read_only_toggles =
|
||||
read_only_toggles =
|
||||
|
@ -1272,6 +1272,18 @@
|
||||
[tracing.opentelemetry]
|
||||
# attributes that will always be included in when creating new spans. ex (key1:value1,key2:value2)
|
||||
;custom_attributes = key1:value1,key2:value2
|
||||
# Type specifies the type of the sampler: const, probabilistic, rateLimiting, or remote
|
||||
; sampler_type = remote
|
||||
# Sampler configuration parameter
|
||||
# for "const" sampler, 0 or 1 for always false/true respectively
|
||||
# for "probabilistic" sampler, a probability between 0.0 and 1.0
|
||||
# for "rateLimiting" sampler, the number of spans per second
|
||||
# for "remote" sampler, param is the same as for "probabilistic"
|
||||
# and indicates the initial sampling rate before the actual one
|
||||
# is received from the sampling server (set at sampling_server_url)
|
||||
; sampler_param = 0.5
|
||||
# specifies the URL of the sampling server when sampler_type is remote
|
||||
; sampling_server_url = http://localhost:5778/sampling
|
||||
|
||||
[tracing.opentelemetry.jaeger]
|
||||
# jaeger destination (ex http://localhost:14268/api/traces)
|
||||
@ -1535,4 +1547,4 @@
|
||||
# Hide specific feature toggles from the feature management page
|
||||
;hidden_toggles =
|
||||
# Disable updating specific feature toggles in the feature management page
|
||||
;read_only_toggles =
|
||||
;read_only_toggles =
|
||||
|
@ -1801,6 +1801,8 @@ Refer to https://www.jaegertracing.io/docs/1.16/sampling/#client-sampling-config
|
||||
|
||||
Can be set with the environment variable `JAEGER_SAMPLER_TYPE`.
|
||||
|
||||
_To override this setting, enter `sampler_type` in the `tracing.opentelemetry` section._
|
||||
|
||||
### sampler_param
|
||||
|
||||
Default value is `1`.
|
||||
@ -1816,10 +1818,14 @@ This is the sampler configuration parameter. Depending on the value of `sampler_
|
||||
|
||||
May be set with the environment variable `JAEGER_SAMPLER_PARAM`.
|
||||
|
||||
_Setting `sampler_param` in the `tracing.opentelemetry` section will override this setting._
|
||||
|
||||
### sampling_server_url
|
||||
|
||||
sampling_server_url is the URL of a sampling manager providing a sampling strategy.
|
||||
|
||||
_Setting `sampling_server_url` in the `tracing.opentelemetry` section will override this setting._
|
||||
|
||||
### zipkin_propagation
|
||||
|
||||
Default value is `false`.
|
||||
@ -1846,6 +1852,31 @@ Comma-separated list of attributes to include in all new spans, such as `key1:va
|
||||
|
||||
Can be set with the environment variable `OTEL_RESOURCE_ATTRIBUTES` (use `=` instead of `:` with the environment variable).
|
||||
|
||||
### sampler_type
|
||||
|
||||
Default value is `const`.
|
||||
|
||||
Specifies the type of sampler: `const`, `probabilistic`, `ratelimiting`, or `remote`.
|
||||
|
||||
### sampler_param
|
||||
|
||||
Default value is `1`.
|
||||
|
||||
Depending on the value of `sampler_type`, the sampler configuration parameter can be `0`, `1`, or any decimal value between `0` and `1`.
|
||||
|
||||
- For the `const` sampler, use `0` to never sample or `1` to always sample
|
||||
- For the `probabilistic` sampler, you can use a decimal value between `0.0` and `1.0`
|
||||
- For the `rateLimiting` sampler, enter the number of spans per second
|
||||
- For the `remote` sampler, use a decimal value between `0.0` and `1.0`
|
||||
to specify the initial sampling rate used before the first update
|
||||
is received from the sampling server
|
||||
|
||||
### sampling_server_url
|
||||
|
||||
When `sampler_type` is `remote`, this specifies the URL of the sampling server. This can be used by all tracing providers.
|
||||
|
||||
Use a sampling server that supports the Jaeger remote sampling API, such as jaeger-agent, jaeger-collector, opentelemetry-collector-contrib, or [Grafana Agent](/oss/agent/).
|
||||
|
||||
<hr>
|
||||
|
||||
## [tracing.opentelemetry.jaeger]
|
||||
|
@ -7,13 +7,14 @@ import (
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/codes"
|
||||
tracesdk "go.opentelemetry.io/otel/sdk/trace"
|
||||
"go.opentelemetry.io/otel/sdk/trace/tracetest"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
||||
func InitializeTracerForTest() Tracer {
|
||||
exp := tracetest.NewInMemoryExporter()
|
||||
tp, _ := initTracerProvider(exp, "testing")
|
||||
tp, _ := initTracerProvider(exp, "testing", tracesdk.AlwaysSample())
|
||||
otel.SetTracerProvider(tp)
|
||||
|
||||
ots := &Opentelemetry{Propagation: "jaeger,w3c", tracerProvider: tp}
|
||||
|
@ -224,6 +224,22 @@ func (ots *Opentelemetry) parseSettings() error {
|
||||
return err
|
||||
}
|
||||
|
||||
// if sampler_type is set in tracing.opentelemetry, we ignore the config in tracing.jaeger
|
||||
sampler := section.Key("sampler_type").MustString("")
|
||||
if sampler != "" {
|
||||
ots.sampler = sampler
|
||||
}
|
||||
|
||||
samplerParam := section.Key("sampler_param").MustFloat64(0)
|
||||
if samplerParam != 0 {
|
||||
ots.samplerParam = samplerParam
|
||||
}
|
||||
|
||||
samplerRemoteURL := section.Key("sampling_server_url").MustString("")
|
||||
if samplerRemoteURL != "" {
|
||||
ots.samplerRemoteURL = samplerRemoteURL
|
||||
}
|
||||
|
||||
section = ots.Cfg.Raw.Section("tracing.opentelemetry.jaeger")
|
||||
ots.enabled = noopExporter
|
||||
|
||||
@ -295,16 +311,9 @@ func (ots *Opentelemetry) initJaegerTracerProvider() (*tracesdk.TracerProvider,
|
||||
return nil, err
|
||||
}
|
||||
|
||||
sampler := tracesdk.AlwaysSample()
|
||||
if ots.sampler == "const" || ots.sampler == "probabilistic" {
|
||||
sampler = tracesdk.TraceIDRatioBased(ots.samplerParam)
|
||||
} else if ots.sampler == "rateLimiting" {
|
||||
sampler = newRateLimiter(ots.samplerParam)
|
||||
} else if ots.sampler == "remote" {
|
||||
sampler = jaegerremote.New("grafana", jaegerremote.WithSamplingServerURL(ots.samplerRemoteURL),
|
||||
jaegerremote.WithInitialSampler(tracesdk.TraceIDRatioBased(ots.samplerParam)))
|
||||
} else if ots.sampler != "" {
|
||||
return nil, fmt.Errorf("invalid sampler type: %s", ots.sampler)
|
||||
sampler, err := ots.initSampler()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tp := tracesdk.NewTracerProvider(
|
||||
@ -323,10 +332,39 @@ func (ots *Opentelemetry) initOTLPTracerProvider() (*tracesdk.TracerProvider, er
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return initTracerProvider(exp, ots.Cfg.BuildVersion, ots.customAttribs...)
|
||||
sampler, err := ots.initSampler()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return initTracerProvider(exp, ots.Cfg.BuildVersion, sampler, ots.customAttribs...)
|
||||
}
|
||||
|
||||
func initTracerProvider(exp tracesdk.SpanExporter, version string, customAttribs ...attribute.KeyValue) (*tracesdk.TracerProvider, error) {
|
||||
func (ots *Opentelemetry) initSampler() (tracesdk.Sampler, error) {
|
||||
switch ots.sampler {
|
||||
case "const", "":
|
||||
if ots.samplerParam >= 1 {
|
||||
return tracesdk.AlwaysSample(), nil
|
||||
} else if ots.samplerParam <= 0 {
|
||||
return tracesdk.NeverSample(), nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("invalid param for const sampler - must be 0 or 1: %f", ots.samplerParam)
|
||||
case "probabilistic":
|
||||
return tracesdk.TraceIDRatioBased(ots.samplerParam), nil
|
||||
case "rateLimiting":
|
||||
return newRateLimiter(ots.samplerParam), nil
|
||||
case "remote":
|
||||
return jaegerremote.New("grafana",
|
||||
jaegerremote.WithSamplingServerURL(ots.samplerRemoteURL),
|
||||
jaegerremote.WithInitialSampler(tracesdk.TraceIDRatioBased(ots.samplerParam)),
|
||||
), nil
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid sampler type: %s", ots.sampler)
|
||||
}
|
||||
}
|
||||
|
||||
func initTracerProvider(exp tracesdk.SpanExporter, version string, sampler tracesdk.Sampler, customAttribs ...attribute.KeyValue) (*tracesdk.TracerProvider, error) {
|
||||
res, err := resource.New(
|
||||
context.Background(),
|
||||
resource.WithAttributes(
|
||||
@ -343,9 +381,7 @@ func initTracerProvider(exp tracesdk.SpanExporter, version string, customAttribs
|
||||
|
||||
tp := tracesdk.NewTracerProvider(
|
||||
tracesdk.WithBatcher(exp),
|
||||
tracesdk.WithSampler(tracesdk.ParentBased(
|
||||
tracesdk.AlwaysSample(),
|
||||
)),
|
||||
tracesdk.WithSampler(tracesdk.ParentBased(sampler)),
|
||||
tracesdk.WithResource(res),
|
||||
)
|
||||
return tp, nil
|
||||
@ -501,21 +537,23 @@ func (s OpentelemetrySpan) ContextWithSpan(ctx context.Context) context.Context
|
||||
|
||||
type rateLimiter struct {
|
||||
sync.Mutex
|
||||
rps float64
|
||||
balance float64
|
||||
maxBalance float64
|
||||
lastTick time.Time
|
||||
description string
|
||||
rps float64
|
||||
balance float64
|
||||
maxBalance float64
|
||||
lastTick time.Time
|
||||
|
||||
now func() time.Time
|
||||
}
|
||||
|
||||
func newRateLimiter(rps float64) *rateLimiter {
|
||||
return &rateLimiter{
|
||||
rps: rps,
|
||||
balance: math.Max(rps, 1),
|
||||
maxBalance: math.Max(rps, 1),
|
||||
lastTick: time.Now(),
|
||||
now: time.Now,
|
||||
rps: rps,
|
||||
description: fmt.Sprintf("RateLimitingSampler{%g}", rps),
|
||||
balance: math.Max(rps, 1),
|
||||
maxBalance: math.Max(rps, 1),
|
||||
lastTick: time.Now(),
|
||||
now: time.Now,
|
||||
}
|
||||
}
|
||||
|
||||
@ -538,4 +576,4 @@ func (rl *rateLimiter) ShouldSample(p tracesdk.SamplingParameters) tracesdk.Samp
|
||||
return tracesdk.SamplingResult{Decision: tracesdk.Drop, Tracestate: psc.TraceState()}
|
||||
}
|
||||
|
||||
func (rl *rateLimiter) Description() string { return "RateLimitingSampler" }
|
||||
func (rl *rateLimiter) Description() string { return rl.description }
|
||||
|
@ -4,6 +4,7 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
|
||||
"github.com/grafana/grafana/pkg/setting"
|
||||
@ -63,6 +64,10 @@ func TestTracingConfig(t *testing.T) {
|
||||
ExpectedAddress string
|
||||
ExpectedPropagator string
|
||||
ExpectedAttrs []attribute.KeyValue
|
||||
|
||||
ExpectedSampler string
|
||||
ExpectedSamplerParam float64
|
||||
ExpectedSamplingServerURL string
|
||||
}{
|
||||
{
|
||||
Name: "default config uses noop exporter",
|
||||
@ -126,14 +131,34 @@ func TestTracingConfig(t *testing.T) {
|
||||
[tracing.jaeger]
|
||||
address = foo.com:6831
|
||||
custom_tags = a:b
|
||||
sampler_param = 0
|
||||
[tracing.opentelemetry]
|
||||
custom_attributes = c:d
|
||||
sampler_param = 1
|
||||
[tracing.opentelemetry.jaeger]
|
||||
address = bar.com:6831
|
||||
`,
|
||||
ExpectedExporter: jaegerExporter,
|
||||
ExpectedAddress: "bar.com:6831",
|
||||
ExpectedAttrs: []attribute.KeyValue{attribute.String("c", "d")},
|
||||
ExpectedExporter: jaegerExporter,
|
||||
ExpectedAddress: "bar.com:6831",
|
||||
ExpectedAttrs: []attribute.KeyValue{attribute.String("c", "d")},
|
||||
ExpectedSamplerParam: 1.0,
|
||||
},
|
||||
{
|
||||
Name: "remote sampler config is parsed from otel config",
|
||||
Cfg: `
|
||||
[tracing.opentelemetry]
|
||||
sampler_type = remote
|
||||
sampler_param = 0.5
|
||||
sampling_server_url = http://example.com:5778/sampling
|
||||
[tracing.opentelemetry.otlp]
|
||||
address = otlp.example.com:4317
|
||||
`,
|
||||
ExpectedExporter: otlpExporter,
|
||||
ExpectedAddress: "otlp.example.com:4317",
|
||||
ExpectedAttrs: []attribute.KeyValue{},
|
||||
ExpectedSampler: "remote",
|
||||
ExpectedSamplerParam: 0.5,
|
||||
ExpectedSamplingServerURL: "http://example.com:5778/sampling",
|
||||
},
|
||||
} {
|
||||
t.Run(test.Name, func(t *testing.T) {
|
||||
@ -156,6 +181,46 @@ func TestTracingConfig(t *testing.T) {
|
||||
assert.Equal(t, test.ExpectedAddress, otel.Address)
|
||||
assert.Equal(t, test.ExpectedPropagator, otel.Propagation)
|
||||
assert.Equal(t, test.ExpectedAttrs, otel.customAttribs)
|
||||
|
||||
if test.ExpectedSampler != "" {
|
||||
assert.Equal(t, test.ExpectedSampler, otel.sampler)
|
||||
assert.Equal(t, test.ExpectedSamplerParam, otel.samplerParam)
|
||||
assert.Equal(t, test.ExpectedSamplingServerURL, otel.samplerRemoteURL)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestInitSampler(t *testing.T) {
|
||||
otel := &Opentelemetry{}
|
||||
sampler, err := otel.initSampler()
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "AlwaysOffSampler", sampler.Description())
|
||||
|
||||
otel.sampler = "bogus"
|
||||
_, err = otel.initSampler()
|
||||
require.Error(t, err)
|
||||
|
||||
otel.sampler = "const"
|
||||
otel.samplerParam = 0.5
|
||||
_, err = otel.initSampler()
|
||||
require.Error(t, err)
|
||||
|
||||
otel.sampler = "const"
|
||||
otel.samplerParam = 1.0
|
||||
sampler, err = otel.initSampler()
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "AlwaysOnSampler", sampler.Description())
|
||||
|
||||
otel.sampler = "probabilistic"
|
||||
otel.samplerParam = 0.5
|
||||
sampler, err = otel.initSampler()
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "TraceIDRatioBased{0.5}", sampler.Description())
|
||||
|
||||
otel.sampler = "rateLimiting"
|
||||
otel.samplerParam = 100.25
|
||||
sampler, err = otel.initSampler()
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "RateLimitingSampler{100.25}", sampler.Description())
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user