Alerting: Extract ticker into shared package (#55703)

* Move ticker files to dedicated package with no changes

* Fix package naming and resolve naming conflicts

* Fix up all existing references to moved objects

* Remove all alerting-specific references from shared util

* Rename TickerMetrics to simply Metrics

* Rename base ticker type to T and rename NewTicker to simply New
This commit is contained in:
Alexander Weaver
2022-09-26 12:35:33 -05:00
committed by GitHub
parent 53c61b49bf
commit bd6a5c900f
6 changed files with 44 additions and 47 deletions

View File

@@ -0,0 +1,35 @@
package ticker
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
type Metrics struct {
LastTickTime prometheus.Gauge
NextTickTime prometheus.Gauge
IntervalSeconds prometheus.Gauge
}
func NewMetrics(reg prometheus.Registerer, subsystem string) *Metrics {
return &Metrics{
LastTickTime: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
Namespace: "grafana",
Subsystem: subsystem,
Name: "ticker_last_consumed_tick_timestamp_seconds",
Help: "Timestamp of the last consumed tick in seconds.",
}),
NextTickTime: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
Namespace: "grafana",
Subsystem: subsystem,
Name: "ticker_next_tick_timestamp_seconds",
Help: "Timestamp of the next tick in seconds before it is consumed.",
}),
IntervalSeconds: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
Namespace: "grafana",
Subsystem: subsystem,
Name: "ticker_interval_seconds",
Help: "Interval at which the ticker is meant to tick.",
}),
}
}

85
pkg/util/ticker/ticker.go Normal file
View File

@@ -0,0 +1,85 @@
package ticker
import (
"fmt"
"time"
"github.com/benbjohnson/clock"
"github.com/grafana/grafana/pkg/infra/log"
)
// Ticker emits ticks at regular time intervals. it's like a time.Ticker, except:
// - it doesn't drop ticks for slow receivers, rather, it queues up. so that callers are in control to instrument what's going on.
// - it ticks on interval marks or very shortly after. this provides a predictable load pattern
// (this shouldn't cause too much load contention issues because the next steps in the pipeline just process at their own pace)
// - the timestamps are used to mark "last datapoint to query for" and as such, are a configurable amount of seconds in the past
type T struct {
C chan time.Time
clock clock.Clock
last time.Time
interval time.Duration
metrics *Metrics
stopCh chan struct{}
}
// NewTicker returns a Ticker that ticks on interval marks (or very shortly after) starting at c.Now(), and never drops ticks. interval should not be negative or zero.
func New(c clock.Clock, interval time.Duration, metric *Metrics) *T {
if interval <= 0 {
panic(fmt.Errorf("non-positive interval [%v] is not allowed", interval))
}
t := &T{
C: make(chan time.Time),
clock: c,
last: getStartTick(c, interval),
interval: interval,
metrics: metric,
stopCh: make(chan struct{}),
}
metric.IntervalSeconds.Set(t.interval.Seconds()) // Seconds report fractional part as well, so it matches the format of the timestamp we report below
go t.run()
return t
}
func getStartTick(clk clock.Clock, interval time.Duration) time.Time {
nano := clk.Now().UnixNano()
return time.Unix(0, nano-(nano%interval.Nanoseconds()))
}
func (t *T) run() {
logger := log.New("ticker")
logger.Info("starting", "first_tick", t.last.Add(t.interval))
LOOP:
for {
next := t.last.Add(t.interval) // calculate the time of the next tick
t.metrics.NextTickTime.Set(float64(next.UnixNano()) / 1e9)
diff := t.clock.Now().Sub(next) // calculate the difference between the current time and the next tick
// if difference is not negative, then it should tick
if diff >= 0 {
select {
case t.C <- next:
case <-t.stopCh:
break LOOP
}
t.last = next
t.metrics.LastTickTime.Set(float64(next.UnixNano()) / 1e9)
continue
}
// tick is too young. try again when ...
select {
case <-t.clock.After(-diff): // ...it'll definitely be old enough
case <-t.stopCh:
break LOOP
}
}
logger.Info("stopped", "last_tick", t.last)
}
// Stop stops the ticker. It does not close the C channel
func (t *T) Stop() {
select {
case t.stopCh <- struct{}{}:
default:
// already stopped
}
}

View File

@@ -0,0 +1,203 @@
package ticker
import (
"bytes"
"context"
"fmt"
"math/rand"
"sync"
"testing"
"time"
"github.com/benbjohnson/clock"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/stretchr/testify/require"
)
func TestTicker(t *testing.T) {
readChanOrFail := func(t *testing.T, c chan time.Time) time.Time {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
t.Cleanup(func() {
cancel()
})
t.Helper()
select {
case tick := <-c:
return tick
case <-ctx.Done():
require.Failf(t, fmt.Sprintf("%v", ctx.Err()), "timeout reading the channel")
default:
require.Failf(t, "channel is empty but it should have a tick", "")
}
return time.Time{}
}
t.Run("should align with clock", func(t *testing.T) {
interval := 10 * time.Second
clk := clock.NewMock()
clk.Add(1 * time.Minute)
require.Equal(t, clk.Now(), getStartTick(clk, interval))
now := clk.Now()
for i := 0; i < 100; i++ {
delta := time.Duration(rand.Int63n(interval.Nanoseconds()))
clk.Set(now.Add(delta))
require.Equal(t, now, getStartTick(clk, interval))
}
})
t.Run("should not drop ticks", func(t *testing.T) {
interval := time.Duration(rand.Int63n(100)+10) * time.Second
clk := clock.NewMock()
clk.Add(interval) // align clock with the start tick
ticker := New(clk, interval, NewMetrics(prometheus.NewRegistry(), "test"))
ticks := rand.Intn(9) + 1
jitter := rand.Int63n(int64(interval) - 1)
clk.Add(time.Duration(ticks)*interval + time.Duration(jitter))
w := sync.WaitGroup{}
w.Add(1)
regTicks := make([]time.Time, 0, ticks)
go func() {
for timestamp := range ticker.C {
regTicks = append(regTicks, timestamp)
if len(regTicks) == ticks {
w.Done()
}
}
}()
w.Wait()
require.Len(t, regTicks, ticks)
t.Run("ticks should monotonically increase", func(t *testing.T) {
for i := 1; i < len(regTicks); i++ {
previous := regTicks[i-1]
current := regTicks[i]
require.Equal(t, interval, current.Sub(previous))
}
})
})
t.Run("should not put anything to channel until it's time", func(t *testing.T) {
clk := clock.NewMock()
interval := time.Duration(rand.Int63n(9)+1) * time.Second
ticker := New(clk, interval, NewMetrics(prometheus.NewRegistry(), "test"))
expectedTick := clk.Now().Add(interval)
for {
require.Empty(t, ticker.C)
clk.Add(time.Duration(rand.Int31n(500)+100) * time.Millisecond)
if clk.Now().After(expectedTick) {
break
}
}
actual := readChanOrFail(t, ticker.C)
require.Equal(t, expectedTick, actual)
})
t.Run("should put the tick in the channel immediately if it is behind", func(t *testing.T) {
clk := clock.NewMock()
interval := time.Duration(rand.Int63n(9)+1) * time.Second
ticker := New(clk, interval, NewMetrics(prometheus.NewRegistry(), "test"))
// We can expect the first tick to be at a consistent interval. Take a snapshot of the clock now, before we advance it.
expectedTick := clk.Now().Add(interval)
require.Empty(t, ticker.C)
clk.Add(interval) // advance the clock by the interval to make the ticker tick the first time.
clk.Add(interval) // advance the clock by the interval to make the ticker tick the second time.
// Irregardless of wall time, the first tick should be initial clock + interval.
actual1 := readChanOrFail(t, ticker.C)
require.Equal(t, expectedTick, actual1)
var actual2 time.Time
require.Eventually(t, func() bool {
actual2 = readChanOrFail(t, ticker.C)
return true
}, time.Second, 10*time.Millisecond)
// Similarly, the second tick should be last tick + interval irregardless of wall time.
require.Equal(t, expectedTick.Add(interval), actual2)
})
t.Run("should report metrics", func(t *testing.T) {
clk := clock.NewMock()
clk.Set(time.Now())
interval := time.Duration(rand.Int63n(9)+1) * time.Second
registry := prometheus.NewPedanticRegistry()
ticker := New(clk, interval, NewMetrics(registry, "test"))
expectedTick := getStartTick(clk, interval).Add(interval)
expectedMetricFmt := `# HELP grafana_test_ticker_interval_seconds Interval at which the ticker is meant to tick.
# TYPE grafana_test_ticker_interval_seconds gauge
grafana_test_ticker_interval_seconds %v
# HELP grafana_test_ticker_last_consumed_tick_timestamp_seconds Timestamp of the last consumed tick in seconds.
# TYPE grafana_test_ticker_last_consumed_tick_timestamp_seconds gauge
grafana_test_ticker_last_consumed_tick_timestamp_seconds %v
# HELP grafana_test_ticker_next_tick_timestamp_seconds Timestamp of the next tick in seconds before it is consumed.
# TYPE grafana_test_ticker_next_tick_timestamp_seconds gauge
grafana_test_ticker_next_tick_timestamp_seconds %v
`
expectedMetric := fmt.Sprintf(expectedMetricFmt, interval.Seconds(), 0, float64(expectedTick.UnixNano())/1e9)
errs := make(map[string]error, 1)
require.Eventuallyf(t, func() bool {
err := testutil.GatherAndCompare(registry, bytes.NewBufferString(expectedMetric), "grafana_test_ticker_last_consumed_tick_timestamp_seconds", "grafana_test_ticker_next_tick_timestamp_seconds", "grafana_test_ticker_interval_seconds")
if err != nil {
errs["error"] = err
}
return err == nil
}, 1*time.Second, 100*time.Millisecond, "failed to wait for metrics to match expected values:\n%v", errs)
clk.Add(interval)
actual := readChanOrFail(t, ticker.C)
expectedMetric = fmt.Sprintf(expectedMetricFmt, interval.Seconds(), float64(actual.UnixNano())/1e9, float64(expectedTick.Add(interval).UnixNano())/1e9)
require.Eventuallyf(t, func() bool {
err := testutil.GatherAndCompare(registry, bytes.NewBufferString(expectedMetric), "grafana_test_ticker_last_consumed_tick_timestamp_seconds", "grafana_test_ticker_next_tick_timestamp_seconds", "grafana_test_ticker_interval_seconds")
if err != nil {
errs["error"] = err
}
return err == nil
}, 1*time.Second, 100*time.Millisecond, "failed to wait for metrics to match expected values:\n%v", errs)
})
t.Run("should stop", func(t *testing.T) {
t.Run("when it waits for the next tick", func(t *testing.T) {
clk := clock.NewMock()
interval := time.Duration(rand.Int63n(9)+1) * time.Second
ticker := New(clk, interval, NewMetrics(prometheus.NewRegistry(), "test"))
clk.Add(interval)
readChanOrFail(t, ticker.C)
ticker.Stop()
clk.Add(interval)
require.Empty(t, ticker.C)
})
t.Run("when it waits for the tick to be consumed", func(t *testing.T) {
clk := clock.NewMock()
interval := time.Duration(rand.Int63n(9)+1) * time.Second
ticker := New(clk, interval, NewMetrics(prometheus.NewRegistry(), "test"))
clk.Add(interval)
ticker.Stop()
require.Empty(t, ticker.C)
})
t.Run("multiple times", func(t *testing.T) {
clk := clock.NewMock()
interval := time.Duration(rand.Int63n(9)+1) * time.Second
ticker := New(clk, interval, NewMetrics(prometheus.NewRegistry(), "test"))
ticker.Stop()
ticker.Stop()
ticker.Stop()
})
})
}