Alerting: Extract ticker into shared package (#55703)

* Move ticker files to dedicated package with no changes

* Fix package naming and resolve naming conflicts

* Fix up all existing references to moved objects

* Remove all alerting-specific references from shared util

* Rename TickerMetrics to simply Metrics

* Rename base ticker type to T and rename NewTicker to simply New
This commit is contained in:
Alexander Weaver
2022-09-26 12:35:33 -05:00
committed by GitHub
parent 53c61b49bf
commit bd6a5c900f
6 changed files with 44 additions and 47 deletions

View File

@@ -16,7 +16,6 @@ import (
"github.com/grafana/grafana/pkg/infra/tracing" "github.com/grafana/grafana/pkg/infra/tracing"
"github.com/grafana/grafana/pkg/infra/usagestats" "github.com/grafana/grafana/pkg/infra/usagestats"
"github.com/grafana/grafana/pkg/models" "github.com/grafana/grafana/pkg/models"
"github.com/grafana/grafana/pkg/services/alerting/metrics"
"github.com/grafana/grafana/pkg/services/annotations" "github.com/grafana/grafana/pkg/services/annotations"
"github.com/grafana/grafana/pkg/services/dashboards" "github.com/grafana/grafana/pkg/services/dashboards"
"github.com/grafana/grafana/pkg/services/datasources" "github.com/grafana/grafana/pkg/services/datasources"
@@ -25,6 +24,7 @@ import (
"github.com/grafana/grafana/pkg/services/rendering" "github.com/grafana/grafana/pkg/services/rendering"
"github.com/grafana/grafana/pkg/setting" "github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/tsdb/legacydata" "github.com/grafana/grafana/pkg/tsdb/legacydata"
"github.com/grafana/grafana/pkg/util/ticker"
) )
// AlertEngine is the background process that // AlertEngine is the background process that
@@ -37,7 +37,7 @@ type AlertEngine struct {
Cfg *setting.Cfg Cfg *setting.Cfg
execQueue chan *Job execQueue chan *Job
ticker *Ticker ticker *ticker.T
scheduler scheduler scheduler scheduler
evalHandler evalHandler evalHandler evalHandler
ruleReader ruleReader ruleReader ruleReader
@@ -90,7 +90,7 @@ func ProvideAlertEngine(renderer rendering.Service, requestValidator models.Plug
// Run starts the alerting service background process. // Run starts the alerting service background process.
func (e *AlertEngine) Run(ctx context.Context) error { func (e *AlertEngine) Run(ctx context.Context) error {
reg := prometheus.WrapRegistererWithPrefix("legacy_", prometheus.DefaultRegisterer) reg := prometheus.WrapRegistererWithPrefix("legacy_", prometheus.DefaultRegisterer)
e.ticker = NewTicker(clock.New(), 1*time.Second, metrics.NewTickerMetrics(reg)) e.ticker = ticker.New(clock.New(), 1*time.Second, ticker.NewMetrics(reg, "alerting"))
defer e.ticker.Stop() defer e.ticker.Stop()
alertGroup, ctx := errgroup.WithContext(ctx) alertGroup, ctx := errgroup.WithContext(ctx)
alertGroup.Go(func() error { return e.alertingTicker(ctx) }) alertGroup.Go(func() error { return e.alertingTicker(ctx) })

View File

@@ -13,8 +13,8 @@ import (
"github.com/grafana/grafana/pkg/api/response" "github.com/grafana/grafana/pkg/api/response"
"github.com/grafana/grafana/pkg/models" "github.com/grafana/grafana/pkg/models"
legacyMetrics "github.com/grafana/grafana/pkg/services/alerting/metrics"
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/util/ticker"
"github.com/grafana/grafana/pkg/web" "github.com/grafana/grafana/pkg/web"
) )
@@ -55,7 +55,7 @@ type Scheduler struct {
SchedulableAlertRules prometheus.Gauge SchedulableAlertRules prometheus.Gauge
SchedulableAlertRulesHash prometheus.Gauge SchedulableAlertRulesHash prometheus.Gauge
UpdateSchedulableAlertRulesDuration prometheus.Histogram UpdateSchedulableAlertRulesDuration prometheus.Histogram
Ticker *legacyMetrics.Ticker Ticker *ticker.Metrics
EvaluationMissed *prometheus.CounterVec EvaluationMissed *prometheus.CounterVec
} }
@@ -199,7 +199,7 @@ func newSchedulerMetrics(r prometheus.Registerer) *Scheduler {
Buckets: []float64{0.1, 0.25, 0.5, 1, 2, 5, 10}, Buckets: []float64{0.1, 0.25, 0.5, 1, 2, 5, 10},
}, },
), ),
Ticker: legacyMetrics.NewTickerMetrics(r), Ticker: ticker.NewMetrics(r, "alerting"),
EvaluationMissed: promauto.With(r).NewCounterVec( EvaluationMissed: promauto.With(r).NewCounterVec(
prometheus.CounterOpts{ prometheus.CounterOpts{
Namespace: Namespace, Namespace: Namespace,

View File

@@ -10,7 +10,6 @@ import (
prometheusModel "github.com/prometheus/common/model" prometheusModel "github.com/prometheus/common/model"
"github.com/grafana/grafana/pkg/infra/log" "github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/alerting"
"github.com/grafana/grafana/pkg/services/datasources" "github.com/grafana/grafana/pkg/services/datasources"
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/eval" "github.com/grafana/grafana/pkg/services/ngalert/eval"
@@ -21,6 +20,7 @@ import (
"github.com/grafana/grafana/pkg/services/org" "github.com/grafana/grafana/pkg/services/org"
"github.com/grafana/grafana/pkg/services/user" "github.com/grafana/grafana/pkg/services/user"
"github.com/grafana/grafana/pkg/setting" "github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/util/ticker"
"github.com/benbjohnson/clock" "github.com/benbjohnson/clock"
"golang.org/x/sync/errgroup" "golang.org/x/sync/errgroup"
@@ -68,7 +68,7 @@ type schedule struct {
clock clock.Clock clock clock.Clock
ticker *alerting.Ticker ticker *ticker.T
// evalApplied is only used for tests: test code can set it to non-nil // evalApplied is only used for tests: test code can set it to non-nil
// function, and then it'll be called from the event loop whenever the // function, and then it'll be called from the event loop whenever the
@@ -119,7 +119,7 @@ type SchedulerCfg struct {
// NewScheduler returns a new schedule. // NewScheduler returns a new schedule.
func NewScheduler(cfg SchedulerCfg, appURL *url.URL, stateManager *state.Manager) *schedule { func NewScheduler(cfg SchedulerCfg, appURL *url.URL, stateManager *state.Manager) *schedule {
ticker := alerting.NewTicker(cfg.C, cfg.Cfg.BaseInterval, cfg.Metrics.Ticker) ticker := ticker.New(cfg.C, cfg.Cfg.BaseInterval, cfg.Metrics.Ticker)
sch := schedule{ sch := schedule{
registry: alertRuleInfoRegistry{alertRuleInfo: make(map[ngmodels.AlertRuleKey]*alertRuleInfo)}, registry: alertRuleInfoRegistry{alertRuleInfo: make(map[ngmodels.AlertRuleKey]*alertRuleInfo)},
@@ -449,7 +449,7 @@ func (sch *schedule) overrideCfg(cfg SchedulerCfg) {
sch.clock = cfg.C sch.clock = cfg.C
sch.baseInterval = cfg.Cfg.BaseInterval sch.baseInterval = cfg.Cfg.BaseInterval
sch.ticker.Stop() sch.ticker.Stop()
sch.ticker = alerting.NewTicker(cfg.C, cfg.Cfg.BaseInterval, cfg.Metrics.Ticker) sch.ticker = ticker.New(cfg.C, cfg.Cfg.BaseInterval, cfg.Metrics.Ticker)
sch.evalAppliedFunc = cfg.EvalAppliedFunc sch.evalAppliedFunc = cfg.EvalAppliedFunc
sch.stopAppliedFunc = cfg.StopAppliedFunc sch.stopAppliedFunc = cfg.StopAppliedFunc
} }

View File

@@ -1,33 +1,33 @@
package metrics package ticker
import ( import (
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/client_golang/prometheus/promauto"
) )
type Ticker struct { type Metrics struct {
LastTickTime prometheus.Gauge LastTickTime prometheus.Gauge
NextTickTime prometheus.Gauge NextTickTime prometheus.Gauge
IntervalSeconds prometheus.Gauge IntervalSeconds prometheus.Gauge
} }
func NewTickerMetrics(reg prometheus.Registerer) *Ticker { func NewMetrics(reg prometheus.Registerer, subsystem string) *Metrics {
return &Ticker{ return &Metrics{
LastTickTime: promauto.With(reg).NewGauge(prometheus.GaugeOpts{ LastTickTime: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
Namespace: "grafana", Namespace: "grafana",
Subsystem: "alerting", Subsystem: subsystem,
Name: "ticker_last_consumed_tick_timestamp_seconds", Name: "ticker_last_consumed_tick_timestamp_seconds",
Help: "Timestamp of the last consumed tick in seconds.", Help: "Timestamp of the last consumed tick in seconds.",
}), }),
NextTickTime: promauto.With(reg).NewGauge(prometheus.GaugeOpts{ NextTickTime: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
Namespace: "grafana", Namespace: "grafana",
Subsystem: "alerting", Subsystem: subsystem,
Name: "ticker_next_tick_timestamp_seconds", Name: "ticker_next_tick_timestamp_seconds",
Help: "Timestamp of the next tick in seconds before it is consumed.", Help: "Timestamp of the next tick in seconds before it is consumed.",
}), }),
IntervalSeconds: promauto.With(reg).NewGauge(prometheus.GaugeOpts{ IntervalSeconds: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
Namespace: "grafana", Namespace: "grafana",
Subsystem: "alerting", Subsystem: subsystem,
Name: "ticker_interval_seconds", Name: "ticker_interval_seconds",
Help: "Interval at which the ticker is meant to tick.", Help: "Interval at which the ticker is meant to tick.",
}), }),

View File

@@ -1,4 +1,4 @@
package alerting package ticker
import ( import (
"fmt" "fmt"
@@ -7,29 +7,28 @@ import (
"github.com/benbjohnson/clock" "github.com/benbjohnson/clock"
"github.com/grafana/grafana/pkg/infra/log" "github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/alerting/metrics"
) )
// Ticker is a ticker to power the alerting scheduler. it's like a time.Ticker, except: // Ticker emits ticks at regular time intervals. it's like a time.Ticker, except:
// - it doesn't drop ticks for slow receivers, rather, it queues up. so that callers are in control to instrument what's going on. // - it doesn't drop ticks for slow receivers, rather, it queues up. so that callers are in control to instrument what's going on.
// - it ticks on interval marks or very shortly after. this provides a predictable load pattern // - it ticks on interval marks or very shortly after. this provides a predictable load pattern
// (this shouldn't cause too much load contention issues because the next steps in the pipeline just process at their own pace) // (this shouldn't cause too much load contention issues because the next steps in the pipeline just process at their own pace)
// - the timestamps are used to mark "last datapoint to query for" and as such, are a configurable amount of seconds in the past // - the timestamps are used to mark "last datapoint to query for" and as such, are a configurable amount of seconds in the past
type Ticker struct { type T struct {
C chan time.Time C chan time.Time
clock clock.Clock clock clock.Clock
last time.Time last time.Time
interval time.Duration interval time.Duration
metrics *metrics.Ticker metrics *Metrics
stopCh chan struct{} stopCh chan struct{}
} }
// NewTicker returns a Ticker that ticks on interval marks (or very shortly after) starting at c.Now(), and never drops ticks. interval should not be negative or zero. // NewTicker returns a Ticker that ticks on interval marks (or very shortly after) starting at c.Now(), and never drops ticks. interval should not be negative or zero.
func NewTicker(c clock.Clock, interval time.Duration, metric *metrics.Ticker) *Ticker { func New(c clock.Clock, interval time.Duration, metric *Metrics) *T {
if interval <= 0 { if interval <= 0 {
panic(fmt.Errorf("non-positive interval [%v] is not allowed", interval)) panic(fmt.Errorf("non-positive interval [%v] is not allowed", interval))
} }
t := &Ticker{ t := &T{
C: make(chan time.Time), C: make(chan time.Time),
clock: c, clock: c,
last: getStartTick(c, interval), last: getStartTick(c, interval),
@@ -47,7 +46,7 @@ func getStartTick(clk clock.Clock, interval time.Duration) time.Time {
return time.Unix(0, nano-(nano%interval.Nanoseconds())) return time.Unix(0, nano-(nano%interval.Nanoseconds()))
} }
func (t *Ticker) run() { func (t *T) run() {
logger := log.New("ticker") logger := log.New("ticker")
logger.Info("starting", "first_tick", t.last.Add(t.interval)) logger.Info("starting", "first_tick", t.last.Add(t.interval))
LOOP: LOOP:
@@ -77,7 +76,7 @@ LOOP:
} }
// Stop stops the ticker. It does not close the C channel // Stop stops the ticker. It does not close the C channel
func (t *Ticker) Stop() { func (t *T) Stop() {
select { select {
case t.stopCh <- struct{}{}: case t.stopCh <- struct{}{}:
default: default:

View File

@@ -1,4 +1,4 @@
package alerting package ticker
import ( import (
"bytes" "bytes"
@@ -13,8 +13,6 @@ import (
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil" "github.com/prometheus/client_golang/prometheus/testutil"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/grafana/grafana/pkg/services/alerting/metrics"
) )
func TestTicker(t *testing.T) { func TestTicker(t *testing.T) {
@@ -53,7 +51,7 @@ func TestTicker(t *testing.T) {
interval := time.Duration(rand.Int63n(100)+10) * time.Second interval := time.Duration(rand.Int63n(100)+10) * time.Second
clk := clock.NewMock() clk := clock.NewMock()
clk.Add(interval) // align clock with the start tick clk.Add(interval) // align clock with the start tick
ticker := NewTicker(clk, interval, metrics.NewTickerMetrics(prometheus.NewRegistry())) ticker := New(clk, interval, NewMetrics(prometheus.NewRegistry(), "test"))
ticks := rand.Intn(9) + 1 ticks := rand.Intn(9) + 1
jitter := rand.Int63n(int64(interval) - 1) jitter := rand.Int63n(int64(interval) - 1)
@@ -87,7 +85,7 @@ func TestTicker(t *testing.T) {
t.Run("should not put anything to channel until it's time", func(t *testing.T) { t.Run("should not put anything to channel until it's time", func(t *testing.T) {
clk := clock.NewMock() clk := clock.NewMock()
interval := time.Duration(rand.Int63n(9)+1) * time.Second interval := time.Duration(rand.Int63n(9)+1) * time.Second
ticker := NewTicker(clk, interval, metrics.NewTickerMetrics(prometheus.NewRegistry())) ticker := New(clk, interval, NewMetrics(prometheus.NewRegistry(), "test"))
expectedTick := clk.Now().Add(interval) expectedTick := clk.Now().Add(interval)
for { for {
require.Empty(t, ticker.C) require.Empty(t, ticker.C)
@@ -104,7 +102,7 @@ func TestTicker(t *testing.T) {
t.Run("should put the tick in the channel immediately if it is behind", func(t *testing.T) { t.Run("should put the tick in the channel immediately if it is behind", func(t *testing.T) {
clk := clock.NewMock() clk := clock.NewMock()
interval := time.Duration(rand.Int63n(9)+1) * time.Second interval := time.Duration(rand.Int63n(9)+1) * time.Second
ticker := NewTicker(clk, interval, metrics.NewTickerMetrics(prometheus.NewRegistry())) ticker := New(clk, interval, NewMetrics(prometheus.NewRegistry(), "test"))
// We can expect the first tick to be at a consistent interval. Take a snapshot of the clock now, before we advance it. // We can expect the first tick to be at a consistent interval. Take a snapshot of the clock now, before we advance it.
expectedTick := clk.Now().Add(interval) expectedTick := clk.Now().Add(interval)
@@ -133,25 +131,25 @@ func TestTicker(t *testing.T) {
clk.Set(time.Now()) clk.Set(time.Now())
interval := time.Duration(rand.Int63n(9)+1) * time.Second interval := time.Duration(rand.Int63n(9)+1) * time.Second
registry := prometheus.NewPedanticRegistry() registry := prometheus.NewPedanticRegistry()
ticker := NewTicker(clk, interval, metrics.NewTickerMetrics(registry)) ticker := New(clk, interval, NewMetrics(registry, "test"))
expectedTick := getStartTick(clk, interval).Add(interval) expectedTick := getStartTick(clk, interval).Add(interval)
expectedMetricFmt := `# HELP grafana_alerting_ticker_interval_seconds Interval at which the ticker is meant to tick. expectedMetricFmt := `# HELP grafana_test_ticker_interval_seconds Interval at which the ticker is meant to tick.
# TYPE grafana_alerting_ticker_interval_seconds gauge # TYPE grafana_test_ticker_interval_seconds gauge
grafana_alerting_ticker_interval_seconds %v grafana_test_ticker_interval_seconds %v
# HELP grafana_alerting_ticker_last_consumed_tick_timestamp_seconds Timestamp of the last consumed tick in seconds. # HELP grafana_test_ticker_last_consumed_tick_timestamp_seconds Timestamp of the last consumed tick in seconds.
# TYPE grafana_alerting_ticker_last_consumed_tick_timestamp_seconds gauge # TYPE grafana_test_ticker_last_consumed_tick_timestamp_seconds gauge
grafana_alerting_ticker_last_consumed_tick_timestamp_seconds %v grafana_test_ticker_last_consumed_tick_timestamp_seconds %v
# HELP grafana_alerting_ticker_next_tick_timestamp_seconds Timestamp of the next tick in seconds before it is consumed. # HELP grafana_test_ticker_next_tick_timestamp_seconds Timestamp of the next tick in seconds before it is consumed.
# TYPE grafana_alerting_ticker_next_tick_timestamp_seconds gauge # TYPE grafana_test_ticker_next_tick_timestamp_seconds gauge
grafana_alerting_ticker_next_tick_timestamp_seconds %v grafana_test_ticker_next_tick_timestamp_seconds %v
` `
expectedMetric := fmt.Sprintf(expectedMetricFmt, interval.Seconds(), 0, float64(expectedTick.UnixNano())/1e9) expectedMetric := fmt.Sprintf(expectedMetricFmt, interval.Seconds(), 0, float64(expectedTick.UnixNano())/1e9)
errs := make(map[string]error, 1) errs := make(map[string]error, 1)
require.Eventuallyf(t, func() bool { require.Eventuallyf(t, func() bool {
err := testutil.GatherAndCompare(registry, bytes.NewBufferString(expectedMetric), "grafana_alerting_ticker_last_consumed_tick_timestamp_seconds", "grafana_alerting_ticker_next_tick_timestamp_seconds", "grafana_alerting_ticker_interval_seconds") err := testutil.GatherAndCompare(registry, bytes.NewBufferString(expectedMetric), "grafana_test_ticker_last_consumed_tick_timestamp_seconds", "grafana_test_ticker_next_tick_timestamp_seconds", "grafana_test_ticker_interval_seconds")
if err != nil { if err != nil {
errs["error"] = err errs["error"] = err
} }
@@ -164,7 +162,7 @@ func TestTicker(t *testing.T) {
expectedMetric = fmt.Sprintf(expectedMetricFmt, interval.Seconds(), float64(actual.UnixNano())/1e9, float64(expectedTick.Add(interval).UnixNano())/1e9) expectedMetric = fmt.Sprintf(expectedMetricFmt, interval.Seconds(), float64(actual.UnixNano())/1e9, float64(expectedTick.Add(interval).UnixNano())/1e9)
require.Eventuallyf(t, func() bool { require.Eventuallyf(t, func() bool {
err := testutil.GatherAndCompare(registry, bytes.NewBufferString(expectedMetric), "grafana_alerting_ticker_last_consumed_tick_timestamp_seconds", "grafana_alerting_ticker_next_tick_timestamp_seconds", "grafana_alerting_ticker_interval_seconds") err := testutil.GatherAndCompare(registry, bytes.NewBufferString(expectedMetric), "grafana_test_ticker_last_consumed_tick_timestamp_seconds", "grafana_test_ticker_next_tick_timestamp_seconds", "grafana_test_ticker_interval_seconds")
if err != nil { if err != nil {
errs["error"] = err errs["error"] = err
} }
@@ -176,7 +174,7 @@ func TestTicker(t *testing.T) {
t.Run("when it waits for the next tick", func(t *testing.T) { t.Run("when it waits for the next tick", func(t *testing.T) {
clk := clock.NewMock() clk := clock.NewMock()
interval := time.Duration(rand.Int63n(9)+1) * time.Second interval := time.Duration(rand.Int63n(9)+1) * time.Second
ticker := NewTicker(clk, interval, metrics.NewTickerMetrics(prometheus.NewRegistry())) ticker := New(clk, interval, NewMetrics(prometheus.NewRegistry(), "test"))
clk.Add(interval) clk.Add(interval)
readChanOrFail(t, ticker.C) readChanOrFail(t, ticker.C)
ticker.Stop() ticker.Stop()
@@ -187,7 +185,7 @@ func TestTicker(t *testing.T) {
t.Run("when it waits for the tick to be consumed", func(t *testing.T) { t.Run("when it waits for the tick to be consumed", func(t *testing.T) {
clk := clock.NewMock() clk := clock.NewMock()
interval := time.Duration(rand.Int63n(9)+1) * time.Second interval := time.Duration(rand.Int63n(9)+1) * time.Second
ticker := NewTicker(clk, interval, metrics.NewTickerMetrics(prometheus.NewRegistry())) ticker := New(clk, interval, NewMetrics(prometheus.NewRegistry(), "test"))
clk.Add(interval) clk.Add(interval)
ticker.Stop() ticker.Stop()
require.Empty(t, ticker.C) require.Empty(t, ticker.C)
@@ -196,7 +194,7 @@ func TestTicker(t *testing.T) {
t.Run("multiple times", func(t *testing.T) { t.Run("multiple times", func(t *testing.T) {
clk := clock.NewMock() clk := clock.NewMock()
interval := time.Duration(rand.Int63n(9)+1) * time.Second interval := time.Duration(rand.Int63n(9)+1) * time.Second
ticker := NewTicker(clk, interval, metrics.NewTickerMetrics(prometheus.NewRegistry())) ticker := New(clk, interval, NewMetrics(prometheus.NewRegistry(), "test"))
ticker.Stop() ticker.Stop()
ticker.Stop() ticker.Stop()
ticker.Stop() ticker.Stop()