Alerting: Configure recording rule writer from config.ini (#89056)

This commit is contained in:
William Wernert 2024-06-12 16:04:46 -04:00 committed by GitHub
parent 7664b89209
commit c62cc25513
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 136 additions and 49 deletions

View File

@ -1399,6 +1399,23 @@ max_age =
# Configures max number of alert annotations that Grafana stores. Default value is 0, which keeps all alert annotations.
max_annotations_to_keep =
[recording_rules]
# Target URL (including write path) for recording rules.
url =
# Optional username for basic authentication on recording rule write requests. Can be left blank to disable basic auth
basic_auth_username =
# Optional assword for basic authentication on recording rule write requests. Can be left blank.
basic_auth_password =
# Request timeout for recording rule writes.
timeout = 10s
# Optional custom headers to include in recording rule write requests.
[recording_rules.custom_headers]
# exampleHeader = exampleValue
# NOTE: this configuration options are not used yet.
[remote.alertmanager]

View File

@ -1389,6 +1389,24 @@ max_age =
# Configures max number of alert annotations that Grafana stores. Default value is 0, which keeps all alert annotations.
max_annotations_to_keep =
#################################### Recording Rules #####################
[recording_rules]
# Target URL (including write path) for recording rules.
url =
# Optional username for basic authentication on recording rule write requests. Can be left blank to disable basic auth
basic_auth_username =
# Optional assword for basic authentication on recording rule write requests. Can be left blank.
basic_auth_password =
# Request timeout for recording rule writes.
timeout = 30s
# Optional custom headers to include in recording rule write requests.
[recording_rules.custom_headers]
# exampleHeader = exampleValue
#################################### Annotations #########################
[annotations]
# Configures the batch size for the annotation clean-up job. This setting is used for dashboard, API, and alert annotations.

View File

@ -332,6 +332,12 @@ func (ng *AlertNG) init() error {
ng.AlertsRouter = alertsRouter
evalFactory := eval.NewEvaluatorFactory(ng.Cfg.UnifiedAlerting, ng.DataSourceCache, ng.ExpressionService, ng.pluginsStore)
recordingWriter, err := createRecordingWriter(ng.FeatureToggles, ng.Cfg.UnifiedAlerting.RecordingRules)
if err != nil {
return err
}
schedCfg := schedule.SchedulerCfg{
MaxAttempts: ng.Cfg.UnifiedAlerting.MaxAttempts,
C: clk,
@ -347,7 +353,7 @@ func (ng *AlertNG) init() error {
AlertSender: alertsRouter,
Tracer: ng.tracer,
Log: log.New("ngalert.scheduler"),
RecordingWriter: writer.NewPrometheusWriter(log.New("ngalert.recording.writer")),
RecordingWriter: recordingWriter,
}
// There are a set of feature toggles available that act as short-circuits for common configurations.
@ -624,3 +630,13 @@ func ApplyStateHistoryFeatureToggles(cfg *setting.UnifiedAlertingStateHistorySet
func createRemoteAlertmanager(cfg remote.AlertmanagerConfig, kvstore kvstore.KVStore, decryptFn remote.DecryptFn, autogenFn remote.AutogenFn, m *metrics.RemoteAlertmanager) (*remote.Alertmanager, error) {
return remote.NewAlertmanager(cfg, notifier.NewFileStore(cfg.OrgID, kvstore), decryptFn, autogenFn, m)
}
func createRecordingWriter(featureToggles featuremgmt.FeatureToggles, settings setting.RecordingRuleSettings) (schedule.RecordingWriter, error) {
logger := log.New("ngalert.writer")
if featureToggles.IsEnabledGlobally(featuremgmt.FlagGrafanaManagedRecordingRules) {
return writer.NewPrometheusWriter(settings, logger)
}
return writer.NoopWriter{}, nil
}

View File

@ -16,7 +16,6 @@ import (
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/state"
"github.com/grafana/grafana/pkg/services/ngalert/writer"
"github.com/grafana/grafana/pkg/services/org"
"github.com/grafana/grafana/pkg/services/user"
"github.com/grafana/grafana/pkg/util"
@ -57,7 +56,7 @@ func newRuleFactory(
met *metrics.Scheduler,
logger log.Logger,
tracer tracing.Tracer,
recordingWriter writer.Writer,
recordingWriter RecordingWriter,
evalAppliedHook evalAppliedFunc,
stopAppliedHook stopAppliedFunc,
) ruleFactoryFunc {

View File

@ -14,7 +14,6 @@ import (
"github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/writer"
"github.com/grafana/grafana/pkg/util"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
@ -39,10 +38,10 @@ type recordingRule struct {
metrics *metrics.Scheduler
tracer tracing.Tracer
writer writer.Writer
writer RecordingWriter
}
func newRecordingRule(parent context.Context, maxAttempts int64, clock clock.Clock, evalFactory eval.EvaluatorFactory, ft featuremgmt.FeatureToggles, logger log.Logger, metrics *metrics.Scheduler, tracer tracing.Tracer, writer writer.Writer) *recordingRule {
func newRecordingRule(parent context.Context, maxAttempts int64, clock clock.Clock, evalFactory eval.EvaluatorFactory, ft featuremgmt.FeatureToggles, logger log.Logger, metrics *metrics.Scheduler, tracer tracing.Tracer, writer RecordingWriter) *recordingRule {
ctx, stop := util.WithCancelCause(parent)
return &recordingRule{
ctx: ctx,

View File

@ -11,6 +11,7 @@ import (
"github.com/benbjohnson/clock"
"golang.org/x/sync/errgroup"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/infra/tracing"
"github.com/grafana/grafana/pkg/services/featuremgmt"
@ -19,7 +20,6 @@ import (
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/state"
"github.com/grafana/grafana/pkg/services/ngalert/writer"
"github.com/grafana/grafana/pkg/util/ticker"
)
@ -47,6 +47,10 @@ type RulesStore interface {
GetAlertRulesForScheduling(ctx context.Context, query *ngmodels.GetAlertRulesForSchedulingQuery) error
}
type RecordingWriter interface {
Write(ctx context.Context, name string, t time.Time, frames data.Frames, extraLabels map[string]string) error
}
type schedule struct {
// base tick rate (fastest possible configured check)
baseInterval time.Duration
@ -94,7 +98,7 @@ type schedule struct {
tracer tracing.Tracer
recordingWriter writer.Writer
recordingWriter RecordingWriter
}
// SchedulerCfg is the scheduler configuration.
@ -113,7 +117,7 @@ type SchedulerCfg struct {
AlertSender AlertsSender
Tracer tracing.Tracer
Log log.Logger
RecordingWriter writer.Writer
RecordingWriter RecordingWriter
}
// NewScheduler returns a new scheduler.

View File

@ -0,0 +1,14 @@
package writer
import (
"context"
"time"
"github.com/grafana/grafana-plugin-sdk-go/data"
)
type NoopWriter struct{}
func (w NoopWriter) Write(ctx context.Context, name string, t time.Time, frames data.Frames, extraLabels map[string]string) error {
return nil
}

View File

@ -7,14 +7,11 @@ import (
"github.com/grafana/dataplane/sdata/numeric"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana-plugin-sdk-go/data"
)
type PrometheusWriter struct {
logger log.Logger
}
// Metric represents a Prometheus time series metric.
type Metric struct {
T int64
@ -28,27 +25,6 @@ type Point struct {
Metric Metric
}
func NewPrometheusWriter(l log.Logger) *PrometheusWriter {
return &PrometheusWriter{
logger: l,
}
}
// Write writes the given frames to the Prometheus remote write endpoint.
// TODO: stub implementation, does not make any remote write calls.
func (w PrometheusWriter) Write(ctx context.Context, name string, t time.Time, frames data.Frames, extraLabels map[string]string) error {
l := w.logger.FromContext(ctx)
points, err := PointsFromFrames(name, t, frames, extraLabels)
if err != nil {
return err
}
// TODO: placeholder for actual remote write call
l.Debug("writing points", "points", points)
return nil
}
func PointsFromFrames(name string, t time.Time, frames data.Frames, extraLabels map[string]string) ([]Point, error) {
cr, err := numeric.CollectionReaderFromFrames(frames)
if err != nil {
@ -94,3 +70,31 @@ func PointsFromFrames(name string, t time.Time, frames data.Frames, extraLabels
return points, nil
}
type PrometheusWriter struct {
logger log.Logger
}
func NewPrometheusWriter(
settings setting.RecordingRuleSettings,
l log.Logger,
) (*PrometheusWriter, error) {
return &PrometheusWriter{
logger: l,
}, nil
}
// Write writes the given frames to the Prometheus remote write endpoint.
// TODO: stub implementation, does not make any remote write calls.
func (w PrometheusWriter) Write(ctx context.Context, name string, t time.Time, frames data.Frames, extraLabels map[string]string) error {
l := w.logger.FromContext(ctx)
points, err := PointsFromFrames(name, t, frames, extraLabels)
if err != nil {
return err
}
// TODO: placeholder for actual remote write call
l.Debug("writing points", "points", points)
return nil
}

View File

@ -1,12 +0,0 @@
package writer
import (
"context"
"time"
"github.com/grafana/grafana-plugin-sdk-go/data"
)
type Writer interface {
Write(ctx context.Context, name string, t time.Time, frames data.Frames, extraLabels map[string]string) error
}

View File

@ -59,9 +59,10 @@ const (
// with intervals that are not exactly divided by this number not to be evaluated
SchedulerBaseInterval = 10 * time.Second
// DefaultRuleEvaluationInterval indicates a default interval of for how long a rule should be evaluated to change state from Pending to Alerting
DefaultRuleEvaluationInterval = SchedulerBaseInterval * 6 // == 60 seconds
stateHistoryDefaultEnabled = true
lokiDefaultMaxQueryLength = 721 * time.Hour // 30d1h, matches the default value in Loki
DefaultRuleEvaluationInterval = SchedulerBaseInterval * 6 // == 60 seconds
stateHistoryDefaultEnabled = true
lokiDefaultMaxQueryLength = 721 * time.Hour // 30d1h, matches the default value in Loki
defaultRecordingRequestTimeout = 10 * time.Second
)
type UnifiedAlertingSettings struct {
@ -103,6 +104,8 @@ type UnifiedAlertingSettings struct {
SkipClustering bool
StateHistory UnifiedAlertingStateHistorySettings
RemoteAlertmanager RemoteAlertmanagerSettings
RecordingRules RecordingRuleSettings
// MaxStateSaveConcurrency controls the number of goroutines (per rule) that can save alert state in parallel.
MaxStateSaveConcurrency int
StatePeriodicSaveInterval time.Duration
@ -112,6 +115,14 @@ type UnifiedAlertingSettings struct {
NotificationLogRetention time.Duration
}
type RecordingRuleSettings struct {
URL string
BasicAuthUsername string
BasicAuthPassword string
CustomHeaders map[string]string
Timeout time.Duration
}
// RemoteAlertmanagerSettings contains the configuration needed
// to disable the internal Alertmanager and use an external one instead.
type RemoteAlertmanagerSettings struct {
@ -395,6 +406,23 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
}
uaCfg.StateHistory = uaCfgStateHistory
rr := iniFile.Section("recording_rules")
uaCfgRecordingRules := RecordingRuleSettings{
URL: rr.Key("url").MustString(""),
BasicAuthUsername: rr.Key("basic_auth_username").MustString(""),
BasicAuthPassword: rr.Key("basic_auth_password").MustString(""),
Timeout: rr.Key("timeout").MustDuration(defaultRecordingRequestTimeout),
}
rrHeaders := iniFile.Section("recording_rules.custom_headers")
rrHeadersKeys := rrHeaders.Keys()
uaCfgRecordingRules.CustomHeaders = make(map[string]string, len(rrHeadersKeys))
for _, key := range rrHeadersKeys {
uaCfgRecordingRules.CustomHeaders[key.Name()] = key.Value()
}
uaCfg.RecordingRules = uaCfgRecordingRules
uaCfg.MaxStateSaveConcurrency = ua.Key("max_state_save_concurrency").MustInt(1)
uaCfg.StatePeriodicSaveInterval, err = gtime.ParseDuration(valueAsString(ua, "state_periodic_save_interval", (time.Minute * 5).String()))