API Server: Standalone observability (#84789)

Adds support for logs (specify level), metrics (enable metrics and Prometheus /metrics endpoint 
and traces (jaeger or otlp) for standalone API server. This will allow any grafana core service 
part of standalone apiserver to use logging, metrics and traces as normal.
This commit is contained in:
Marcus Efraimsson
2024-03-21 17:06:32 +01:00
committed by GitHub
parent 856ed64aac
commit 6c1de260a2
22 changed files with 860 additions and 445 deletions

View File

@@ -0,0 +1,41 @@
package options
import (
"time"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/setting"
"github.com/spf13/pflag"
genericapiserver "k8s.io/apiserver/pkg/server"
)
type LoggingOptions struct {
logger log.Logger
Level string
}
func NewLoggingOptions(logger log.Logger) *LoggingOptions {
return &LoggingOptions{
logger: logger,
}
}
func (o *LoggingOptions) AddFlags(fs *pflag.FlagSet) {
fs.StringVar(&o.Level, "grafana.log.level", "debug", "Log level, debug, info, warn, error.")
}
func (o *LoggingOptions) Validate() []error {
return nil
}
func (o *LoggingOptions) ApplyTo(c *genericapiserver.RecommendedConfig) error {
err := log.SetupConsoleLogger(o.Level)
if err != nil {
return err
}
o.logger.Info("Starting grafana-apiserver", "version", setting.BuildVersion, "commit", setting.BuildCommit, "branch", setting.BuildBranch, "compiled", time.Unix(setting.BuildStamp, 0))
o.logger.Debug("Console logging initialized", "logLevel", o.Level)
return nil
}

View File

@@ -0,0 +1,42 @@
package options
import (
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/infra/metrics"
"github.com/grafana/grafana/pkg/setting"
"github.com/prometheus/client_golang/prometheus"
"github.com/spf13/pflag"
genericapiserver "k8s.io/apiserver/pkg/server"
)
type MetricsOptions struct {
logger log.Logger
Enabled bool
MetricsRegisterer prometheus.Registerer
}
func NewMetrcicsOptions(logger log.Logger) *MetricsOptions {
return &MetricsOptions{
logger: logger,
}
}
func (o *MetricsOptions) AddFlags(fs *pflag.FlagSet) {
fs.BoolVar(&o.Enabled, "grafana.metrics.enable", false, "Enable metrics and Prometheus /metrics endpoint.")
}
func (o *MetricsOptions) Validate() []error {
return nil
}
func (o *MetricsOptions) ApplyTo(c *genericapiserver.RecommendedConfig) error {
c.EnableMetrics = o.Enabled
o.MetricsRegisterer = metrics.ProvideRegisterer()
metrics.SetBuildInformation(o.MetricsRegisterer, setting.BuildVersion, setting.BuildCommit, setting.BuildBranch, setting.BuildStamp)
if o.Enabled {
o.logger.Debug("Metrics enabled")
}
return nil
}

View File

@@ -0,0 +1,159 @@
package options
import (
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/apiserver/options"
"github.com/spf13/pflag"
"k8s.io/apimachinery/pkg/runtime"
genericapiserver "k8s.io/apiserver/pkg/server"
genericoptions "k8s.io/apiserver/pkg/server/options"
)
type Options struct {
LoggingOptions *LoggingOptions
ExtraOptions *options.ExtraOptions
RecommendedOptions *genericoptions.RecommendedOptions
TracingOptions *TracingOptions
MetricsOptions *MetricsOptions
}
func New(logger log.Logger, codec runtime.Codec) *Options {
return &Options{
LoggingOptions: NewLoggingOptions(logger),
ExtraOptions: options.NewExtraOptions(),
RecommendedOptions: options.NewRecommendedOptions(codec),
TracingOptions: NewTracingOptions(logger),
MetricsOptions: NewMetrcicsOptions(logger),
}
}
func (o *Options) AddFlags(fs *pflag.FlagSet) {
o.LoggingOptions.AddFlags(fs)
o.ExtraOptions.AddFlags(fs)
o.RecommendedOptions.AddFlags(fs)
o.TracingOptions.AddFlags(fs)
o.MetricsOptions.AddFlags(fs)
}
func (o *Options) Validate() []error {
if errs := o.LoggingOptions.Validate(); len(errs) != 0 {
return errs
}
if errs := o.ExtraOptions.Validate(); len(errs) != 0 {
return errs
}
if errs := o.TracingOptions.Validate(); len(errs) != 0 {
return errs
}
if errs := o.MetricsOptions.Validate(); len(errs) != 0 {
return errs
}
// NOTE: we don't call validate on the top level recommended options as it doesn't like skipping etcd-servers
// the function is left here for troubleshooting any other config issues
// errors = append(errors, o.RecommendedOptions.Validate()...)
if errs := o.RecommendedOptions.SecureServing.Validate(); len(errs) != 0 {
return errs
}
if o.ExtraOptions.DevMode {
// NOTE: Only consider authn for dev mode - resolves the failure due to missing extension apiserver auth-config
// in parent k8s
if errs := o.RecommendedOptions.Authentication.Validate(); len(errs) != 0 {
return errs
}
}
return nil
}
// A copy of ApplyTo in recommended.go, but for >= 0.28, server pkg in apiserver does a bit extra causing
// a panic when CoreAPI is set to nil
func (o *Options) ModifiedApplyTo(config *genericapiserver.RecommendedConfig) error {
if err := o.RecommendedOptions.Etcd.ApplyTo(&config.Config); err != nil {
return err
}
if err := o.RecommendedOptions.EgressSelector.ApplyTo(&config.Config); err != nil {
return err
}
if err := o.RecommendedOptions.Traces.ApplyTo(config.Config.EgressSelector, &config.Config); err != nil {
return err
}
if err := o.RecommendedOptions.SecureServing.ApplyTo(&config.Config.SecureServing, &config.Config.LoopbackClientConfig); err != nil {
return err
}
if err := o.RecommendedOptions.Authentication.ApplyTo(&config.Config.Authentication, config.SecureServing, config.OpenAPIConfig); err != nil {
return err
}
if err := o.RecommendedOptions.Authorization.ApplyTo(&config.Config.Authorization); err != nil {
return err
}
if err := o.RecommendedOptions.Audit.ApplyTo(&config.Config); err != nil {
return err
}
// TODO: determine whether we need flow control (API priority and fairness)
// We can't assume that a shared informers config was provided in standalone mode and will need a guard
// when enabling below
/* kubeClient, err := kubernetes.NewForConfig(config.ClientConfig)
if err != nil {
return err
}
if err := o.RecommendedOptions.Features.ApplyTo(&config.Config, kubeClient, config.SharedInformerFactory); err != nil {
return err
} */
if err := o.RecommendedOptions.CoreAPI.ApplyTo(config); err != nil {
return err
}
_, err := o.RecommendedOptions.ExtraAdmissionInitializers(config)
if err != nil {
return err
}
return nil
}
func (o *Options) ApplyTo(serverConfig *genericapiserver.RecommendedConfig) error {
if o.LoggingOptions != nil {
if err := o.LoggingOptions.ApplyTo(serverConfig); err != nil {
return err
}
}
if o.ExtraOptions != nil {
if err := o.ExtraOptions.ApplyTo(serverConfig); err != nil {
return err
}
}
if o.RecommendedOptions.CoreAPI == nil {
if err := o.ModifiedApplyTo(serverConfig); err != nil {
return err
}
} else {
if err := o.RecommendedOptions.ApplyTo(serverConfig); err != nil {
return err
}
}
if o.TracingOptions != nil {
if err := o.TracingOptions.ApplyTo(serverConfig); err != nil {
return err
}
}
if o.MetricsOptions != nil {
if err := o.MetricsOptions.ApplyTo(serverConfig); err != nil {
return err
}
}
return nil
}

View File

@@ -0,0 +1,124 @@
package options
import (
"context"
"errors"
"fmt"
"net/url"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/infra/tracing"
"github.com/grafana/grafana/pkg/setting"
"github.com/spf13/pflag"
"go.opentelemetry.io/otel/attribute"
genericapiserver "k8s.io/apiserver/pkg/server"
)
type TracingOptions struct {
logger log.Logger
JaegerAddress string
JaegerPropagation string
OTLPAddress string
OTLPPropagation string
Tags map[string]string
SamplerType string
SamplerParam float64
SamplingServiceURL string
TracingService *tracing.TracingService
}
func NewTracingOptions(logger log.Logger) *TracingOptions {
return &TracingOptions{
logger: logger,
Tags: map[string]string{},
}
}
func (o *TracingOptions) AddFlags(fs *pflag.FlagSet) {
fs.StringVar(&o.JaegerAddress, "grafana.tracing.jaeger.address", "", "Tracing Jaeger exporter destination, e.g. http://localhost:14268/api/traces. This enabled the Jaeger export and takes presedence over grafana.tracing.otlp.")
fs.StringVar(&o.JaegerPropagation, "grafana.tracing.jaeger.propagation", "jaeger", "Tracing Jaeger propagation specifies the text map propagation format, w3c or jaeger.")
fs.StringVar(&o.OTLPAddress, "grafana.tracing.otlp.address", "", "Tracing OTLP exporter destination, e.g. localhost:4317.")
fs.StringVar(&o.OTLPPropagation, "grafana.tracing.otlp.propagation", "w3c", "Tracing OTLP propagation specifies the text map propagation format, w3c or jaeger.")
fs.StringToStringVar(&o.Tags, "grafana.tracing.tag", map[string]string{}, "Tracing server tag in 'key=value' format. Specify multiple times to add many.")
fs.StringVar(&o.SamplerType, "grafana.tracing.sampler-type", "const", "Tracing sampler type specifies the type of the sampler: const, probabilistic, rateLimiting, or remote.")
fs.Float64Var(&o.SamplerParam, "grafana.tracing.sampler-param", 0, "Tracing sampler configuration parameter. For 'const' sampler, 0 or 1 for always false/true respectively. For 'rateLimiting' sampler, the number of spans per second. For 'remote' sampler, param is the same as for 'probabilistic' and indicates the initial sampling rate before the actual one is received from the sampling service.")
fs.StringVar(&o.SamplingServiceURL, "grafana.tracing.sampling-service", "", "Tracing server sampling service URL (used for both Jaeger and OTLP) if grafana.tracing.sampler-type=remote.")
}
func (o *TracingOptions) Validate() []error {
errors := []error{}
if o.JaegerAddress != "" {
if _, err := url.Parse(o.JaegerAddress); err != nil {
errors = append(errors, fmt.Errorf("failed to parse tracing.jaeger.address: %w", err))
}
}
if o.SamplingServiceURL != "" {
if _, err := url.Parse(o.SamplingServiceURL); err != nil {
errors = append(errors, fmt.Errorf("failed to parse tracing.sampling-service: %w", err))
}
}
return errors
}
func (o *TracingOptions) ApplyTo(config *genericapiserver.RecommendedConfig) error {
tracingCfg := tracing.NewEmptyTracingConfig()
var err error
if o.OTLPAddress != "" {
tracingCfg, err = tracing.NewOTLPTracingConfig(o.OTLPAddress, o.OTLPPropagation)
}
if o.JaegerAddress != "" {
tracingCfg, err = tracing.NewJaegerTracingConfig(o.JaegerAddress, o.JaegerPropagation)
}
if err != nil {
return err
}
tracingCfg.ServiceName = "grafana-apiserver"
tracingCfg.ServiceVersion = setting.BuildVersion
for k, v := range o.Tags {
tracingCfg.CustomAttribs = append(tracingCfg.CustomAttribs, attribute.String(k, v))
}
tracingCfg.Sampler = o.SamplerType
tracingCfg.SamplerParam = o.SamplerParam
tracingCfg.SamplerRemoteURL = o.SamplingServiceURL
ts, err := tracing.ProvideService(tracingCfg)
if err != nil {
return err
}
o.TracingService = ts
config.TracerProvider = ts.GetTracerProvider()
config.AddPostStartHookOrDie("grafana-tracing-service", func(hookCtx genericapiserver.PostStartHookContext) error {
ctx, cancel := context.WithCancel(context.Background())
go func() {
<-hookCtx.StopCh
cancel()
}()
go func() {
if err := ts.Run(ctx); err != nil && !errors.Is(err, context.Canceled) {
o.logger.Error("failed to shutdown tracing service", "error", err)
}
}()
return nil
})
return nil
}