Tracing: Support remote, rate-limited, and probabilistic sampling in tracing.opentelemetry config section (#73587)

* tracing: Support remote sampling server

Signed-off-by: Dave Henderson <dave.henderson@grafana.com>

* Update docs/sources/setup-grafana/configure-grafana/_index.md

Co-authored-by: Christopher Moyer <35463610+chri2547@users.noreply.github.com>

* Update docs/sources/setup-grafana/configure-grafana/_index.md

Co-authored-by: Christopher Moyer <35463610+chri2547@users.noreply.github.com>

* Update docs/sources/setup-grafana/configure-grafana/_index.md

Co-authored-by: Christopher Moyer <35463610+chri2547@users.noreply.github.com>

* Update docs/sources/setup-grafana/configure-grafana/_index.md

Co-authored-by: Christopher Moyer <35463610+chri2547@users.noreply.github.com>

* Update docs/sources/setup-grafana/configure-grafana/_index.md

* Update docs/sources/setup-grafana/configure-grafana/_index.md

* Update docs/sources/setup-grafana/configure-grafana/_index.md

* Satisfying the doc-validator check

* satisfy prettier

Signed-off-by: Dave Henderson <dave.henderson@grafana.com>

* back out unnecessary change

Signed-off-by: Dave Henderson <dave.henderson@grafana.com>

---------

Signed-off-by: Dave Henderson <dave.henderson@grafana.com>
Co-authored-by: Christopher Moyer <35463610+chri2547@users.noreply.github.com>
This commit is contained in:
Dave Henderson 2023-09-11 12:13:29 -04:00 committed by GitHub
parent 951876b465
commit ce1169f8b7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 190 additions and 31 deletions

View File

@ -1374,6 +1374,18 @@ disable_shared_zipkin_spans = false
# attributes that will always be included in when creating new spans. ex (key1:value1,key2:value2)
custom_attributes =
# Type specifies the type of the sampler: const, probabilistic, rateLimiting, or remote
sampler_type =
# Sampler configuration parameter
# for "const" sampler, 0 or 1 for always false/true respectively
# for "probabilistic" sampler, a probability between 0.0 and 1.0
# for "rateLimiting" sampler, the number of spans per second
# for "remote" sampler, param is the same as for "probabilistic"
# and indicates the initial sampling rate before the actual one
# is received from the sampling server (set at sampling_server_url)
sampler_param =
# specifies the URL of the sampling server when sampler_type is remote
sampling_server_url =
[tracing.opentelemetry.jaeger]
# jaeger destination (ex http://localhost:14268/api/traces)
@ -1668,4 +1680,4 @@ update_controller_url =
hidden_toggles =
# Disables updating specific feature toggles in the feature management page
read_only_toggles =
read_only_toggles =

View File

@ -1272,6 +1272,18 @@
[tracing.opentelemetry]
# attributes that will always be included in when creating new spans. ex (key1:value1,key2:value2)
;custom_attributes = key1:value1,key2:value2
# Type specifies the type of the sampler: const, probabilistic, rateLimiting, or remote
; sampler_type = remote
# Sampler configuration parameter
# for "const" sampler, 0 or 1 for always false/true respectively
# for "probabilistic" sampler, a probability between 0.0 and 1.0
# for "rateLimiting" sampler, the number of spans per second
# for "remote" sampler, param is the same as for "probabilistic"
# and indicates the initial sampling rate before the actual one
# is received from the sampling server (set at sampling_server_url)
; sampler_param = 0.5
# specifies the URL of the sampling server when sampler_type is remote
; sampling_server_url = http://localhost:5778/sampling
[tracing.opentelemetry.jaeger]
# jaeger destination (ex http://localhost:14268/api/traces)
@ -1535,4 +1547,4 @@
# Hide specific feature toggles from the feature management page
;hidden_toggles =
# Disable updating specific feature toggles in the feature management page
;read_only_toggles =
;read_only_toggles =

View File

@ -1801,6 +1801,8 @@ Refer to https://www.jaegertracing.io/docs/1.16/sampling/#client-sampling-config
Can be set with the environment variable `JAEGER_SAMPLER_TYPE`.
_To override this setting, enter `sampler_type` in the `tracing.opentelemetry` section._
### sampler_param
Default value is `1`.
@ -1816,10 +1818,14 @@ This is the sampler configuration parameter. Depending on the value of `sampler_
May be set with the environment variable `JAEGER_SAMPLER_PARAM`.
_Setting `sampler_param` in the `tracing.opentelemetry` section will override this setting._
### sampling_server_url
sampling_server_url is the URL of a sampling manager providing a sampling strategy.
_Setting `sampling_server_url` in the `tracing.opentelemetry` section will override this setting._
### zipkin_propagation
Default value is `false`.
@ -1846,6 +1852,31 @@ Comma-separated list of attributes to include in all new spans, such as `key1:va
Can be set with the environment variable `OTEL_RESOURCE_ATTRIBUTES` (use `=` instead of `:` with the environment variable).
### sampler_type
Default value is `const`.
Specifies the type of sampler: `const`, `probabilistic`, `ratelimiting`, or `remote`.
### sampler_param
Default value is `1`.
Depending on the value of `sampler_type`, the sampler configuration parameter can be `0`, `1`, or any decimal value between `0` and `1`.
- For the `const` sampler, use `0` to never sample or `1` to always sample
- For the `probabilistic` sampler, you can use a decimal value between `0.0` and `1.0`
- For the `rateLimiting` sampler, enter the number of spans per second
- For the `remote` sampler, use a decimal value between `0.0` and `1.0`
to specify the initial sampling rate used before the first update
is received from the sampling server
### sampling_server_url
When `sampler_type` is `remote`, this specifies the URL of the sampling server. This can be used by all tracing providers.
Use a sampling server that supports the Jaeger remote sampling API, such as jaeger-agent, jaeger-collector, opentelemetry-collector-contrib, or [Grafana Agent](/oss/agent/).
<hr>
## [tracing.opentelemetry.jaeger]

View File

@ -7,13 +7,14 @@ import (
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
tracesdk "go.opentelemetry.io/otel/sdk/trace"
"go.opentelemetry.io/otel/sdk/trace/tracetest"
"go.opentelemetry.io/otel/trace"
)
func InitializeTracerForTest() Tracer {
exp := tracetest.NewInMemoryExporter()
tp, _ := initTracerProvider(exp, "testing")
tp, _ := initTracerProvider(exp, "testing", tracesdk.AlwaysSample())
otel.SetTracerProvider(tp)
ots := &Opentelemetry{Propagation: "jaeger,w3c", tracerProvider: tp}

View File

@ -224,6 +224,22 @@ func (ots *Opentelemetry) parseSettings() error {
return err
}
// if sampler_type is set in tracing.opentelemetry, we ignore the config in tracing.jaeger
sampler := section.Key("sampler_type").MustString("")
if sampler != "" {
ots.sampler = sampler
}
samplerParam := section.Key("sampler_param").MustFloat64(0)
if samplerParam != 0 {
ots.samplerParam = samplerParam
}
samplerRemoteURL := section.Key("sampling_server_url").MustString("")
if samplerRemoteURL != "" {
ots.samplerRemoteURL = samplerRemoteURL
}
section = ots.Cfg.Raw.Section("tracing.opentelemetry.jaeger")
ots.enabled = noopExporter
@ -295,16 +311,9 @@ func (ots *Opentelemetry) initJaegerTracerProvider() (*tracesdk.TracerProvider,
return nil, err
}
sampler := tracesdk.AlwaysSample()
if ots.sampler == "const" || ots.sampler == "probabilistic" {
sampler = tracesdk.TraceIDRatioBased(ots.samplerParam)
} else if ots.sampler == "rateLimiting" {
sampler = newRateLimiter(ots.samplerParam)
} else if ots.sampler == "remote" {
sampler = jaegerremote.New("grafana", jaegerremote.WithSamplingServerURL(ots.samplerRemoteURL),
jaegerremote.WithInitialSampler(tracesdk.TraceIDRatioBased(ots.samplerParam)))
} else if ots.sampler != "" {
return nil, fmt.Errorf("invalid sampler type: %s", ots.sampler)
sampler, err := ots.initSampler()
if err != nil {
return nil, err
}
tp := tracesdk.NewTracerProvider(
@ -323,10 +332,39 @@ func (ots *Opentelemetry) initOTLPTracerProvider() (*tracesdk.TracerProvider, er
return nil, err
}
return initTracerProvider(exp, ots.Cfg.BuildVersion, ots.customAttribs...)
sampler, err := ots.initSampler()
if err != nil {
return nil, err
}
return initTracerProvider(exp, ots.Cfg.BuildVersion, sampler, ots.customAttribs...)
}
func initTracerProvider(exp tracesdk.SpanExporter, version string, customAttribs ...attribute.KeyValue) (*tracesdk.TracerProvider, error) {
func (ots *Opentelemetry) initSampler() (tracesdk.Sampler, error) {
switch ots.sampler {
case "const", "":
if ots.samplerParam >= 1 {
return tracesdk.AlwaysSample(), nil
} else if ots.samplerParam <= 0 {
return tracesdk.NeverSample(), nil
}
return nil, fmt.Errorf("invalid param for const sampler - must be 0 or 1: %f", ots.samplerParam)
case "probabilistic":
return tracesdk.TraceIDRatioBased(ots.samplerParam), nil
case "rateLimiting":
return newRateLimiter(ots.samplerParam), nil
case "remote":
return jaegerremote.New("grafana",
jaegerremote.WithSamplingServerURL(ots.samplerRemoteURL),
jaegerremote.WithInitialSampler(tracesdk.TraceIDRatioBased(ots.samplerParam)),
), nil
default:
return nil, fmt.Errorf("invalid sampler type: %s", ots.sampler)
}
}
func initTracerProvider(exp tracesdk.SpanExporter, version string, sampler tracesdk.Sampler, customAttribs ...attribute.KeyValue) (*tracesdk.TracerProvider, error) {
res, err := resource.New(
context.Background(),
resource.WithAttributes(
@ -343,9 +381,7 @@ func initTracerProvider(exp tracesdk.SpanExporter, version string, customAttribs
tp := tracesdk.NewTracerProvider(
tracesdk.WithBatcher(exp),
tracesdk.WithSampler(tracesdk.ParentBased(
tracesdk.AlwaysSample(),
)),
tracesdk.WithSampler(tracesdk.ParentBased(sampler)),
tracesdk.WithResource(res),
)
return tp, nil
@ -501,21 +537,23 @@ func (s OpentelemetrySpan) ContextWithSpan(ctx context.Context) context.Context
type rateLimiter struct {
sync.Mutex
rps float64
balance float64
maxBalance float64
lastTick time.Time
description string
rps float64
balance float64
maxBalance float64
lastTick time.Time
now func() time.Time
}
func newRateLimiter(rps float64) *rateLimiter {
return &rateLimiter{
rps: rps,
balance: math.Max(rps, 1),
maxBalance: math.Max(rps, 1),
lastTick: time.Now(),
now: time.Now,
rps: rps,
description: fmt.Sprintf("RateLimitingSampler{%g}", rps),
balance: math.Max(rps, 1),
maxBalance: math.Max(rps, 1),
lastTick: time.Now(),
now: time.Now,
}
}
@ -538,4 +576,4 @@ func (rl *rateLimiter) ShouldSample(p tracesdk.SamplingParameters) tracesdk.Samp
return tracesdk.SamplingResult{Decision: tracesdk.Drop, Tracestate: psc.TraceState()}
}
func (rl *rateLimiter) Description() string { return "RateLimitingSampler" }
func (rl *rateLimiter) Description() string { return rl.description }

View File

@ -4,6 +4,7 @@ import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.opentelemetry.io/otel/attribute"
"github.com/grafana/grafana/pkg/setting"
@ -63,6 +64,10 @@ func TestTracingConfig(t *testing.T) {
ExpectedAddress string
ExpectedPropagator string
ExpectedAttrs []attribute.KeyValue
ExpectedSampler string
ExpectedSamplerParam float64
ExpectedSamplingServerURL string
}{
{
Name: "default config uses noop exporter",
@ -126,14 +131,34 @@ func TestTracingConfig(t *testing.T) {
[tracing.jaeger]
address = foo.com:6831
custom_tags = a:b
sampler_param = 0
[tracing.opentelemetry]
custom_attributes = c:d
sampler_param = 1
[tracing.opentelemetry.jaeger]
address = bar.com:6831
`,
ExpectedExporter: jaegerExporter,
ExpectedAddress: "bar.com:6831",
ExpectedAttrs: []attribute.KeyValue{attribute.String("c", "d")},
ExpectedExporter: jaegerExporter,
ExpectedAddress: "bar.com:6831",
ExpectedAttrs: []attribute.KeyValue{attribute.String("c", "d")},
ExpectedSamplerParam: 1.0,
},
{
Name: "remote sampler config is parsed from otel config",
Cfg: `
[tracing.opentelemetry]
sampler_type = remote
sampler_param = 0.5
sampling_server_url = http://example.com:5778/sampling
[tracing.opentelemetry.otlp]
address = otlp.example.com:4317
`,
ExpectedExporter: otlpExporter,
ExpectedAddress: "otlp.example.com:4317",
ExpectedAttrs: []attribute.KeyValue{},
ExpectedSampler: "remote",
ExpectedSamplerParam: 0.5,
ExpectedSamplingServerURL: "http://example.com:5778/sampling",
},
} {
t.Run(test.Name, func(t *testing.T) {
@ -156,6 +181,46 @@ func TestTracingConfig(t *testing.T) {
assert.Equal(t, test.ExpectedAddress, otel.Address)
assert.Equal(t, test.ExpectedPropagator, otel.Propagation)
assert.Equal(t, test.ExpectedAttrs, otel.customAttribs)
if test.ExpectedSampler != "" {
assert.Equal(t, test.ExpectedSampler, otel.sampler)
assert.Equal(t, test.ExpectedSamplerParam, otel.samplerParam)
assert.Equal(t, test.ExpectedSamplingServerURL, otel.samplerRemoteURL)
}
})
}
}
func TestInitSampler(t *testing.T) {
otel := &Opentelemetry{}
sampler, err := otel.initSampler()
require.NoError(t, err)
assert.Equal(t, "AlwaysOffSampler", sampler.Description())
otel.sampler = "bogus"
_, err = otel.initSampler()
require.Error(t, err)
otel.sampler = "const"
otel.samplerParam = 0.5
_, err = otel.initSampler()
require.Error(t, err)
otel.sampler = "const"
otel.samplerParam = 1.0
sampler, err = otel.initSampler()
require.NoError(t, err)
assert.Equal(t, "AlwaysOnSampler", sampler.Description())
otel.sampler = "probabilistic"
otel.samplerParam = 0.5
sampler, err = otel.initSampler()
require.NoError(t, err)
assert.Equal(t, "TraceIDRatioBased{0.5}", sampler.Description())
otel.sampler = "rateLimiting"
otel.samplerParam = 100.25
sampler, err = otel.initSampler()
require.NoError(t, err)
assert.Equal(t, "RateLimitingSampler{100.25}", sampler.Description())
}