From 788f677ed7596ff1e19b4e0b41761157b99f1f7f Mon Sep 17 00:00:00 2001 From: bergquist Date: Tue, 5 Sep 2017 23:19:57 +0200 Subject: [PATCH] remove old internal metrics lib --- pkg/api/metrics.go | 55 -- pkg/cmd/grafana-server/server.go | 3 +- pkg/metrics/EMWA.go | 122 ---- pkg/metrics/combos.go | 46 -- pkg/metrics/common.go | 1 - pkg/metrics/counter.go | 12 +- pkg/metrics/delta.go | 11 - pkg/metrics/gauge.go | 10 +- pkg/metrics/graphite.go | 107 --- pkg/metrics/graphite_test.go | 77 --- pkg/metrics/graphitepublisher/graphite.go | 12 +- .../graphitepublisher/graphite_test.go | 61 +- pkg/metrics/histogram.go | 189 ------ pkg/metrics/histogram_test.go | 90 --- pkg/metrics/meter.go | 221 ------- pkg/metrics/metrics.go | 117 +++- pkg/metrics/publish.go | 144 +---- pkg/metrics/registry.go | 37 -- pkg/metrics/sample.go | 607 ------------------ pkg/metrics/sample_test.go | 367 ----------- pkg/metrics/settings.go | 70 +- pkg/metrics/timer.go | 266 +------- 22 files changed, 247 insertions(+), 2378 deletions(-) delete mode 100644 pkg/metrics/EMWA.go delete mode 100644 pkg/metrics/combos.go delete mode 100644 pkg/metrics/delta.go delete mode 100644 pkg/metrics/graphite.go delete mode 100644 pkg/metrics/graphite_test.go delete mode 100644 pkg/metrics/histogram.go delete mode 100644 pkg/metrics/histogram_test.go delete mode 100644 pkg/metrics/meter.go delete mode 100644 pkg/metrics/registry.go delete mode 100644 pkg/metrics/sample.go delete mode 100644 pkg/metrics/sample_test.go diff --git a/pkg/api/metrics.go b/pkg/api/metrics.go index 48c8d884329..9d41366ae4c 100644 --- a/pkg/api/metrics.go +++ b/pkg/api/metrics.go @@ -2,13 +2,10 @@ package api import ( "context" - "encoding/json" - "net/http" "github.com/grafana/grafana/pkg/api/dtos" "github.com/grafana/grafana/pkg/bus" "github.com/grafana/grafana/pkg/components/simplejson" - "github.com/grafana/grafana/pkg/metrics" "github.com/grafana/grafana/pkg/middleware" "github.com/grafana/grafana/pkg/models" "github.com/grafana/grafana/pkg/tsdb" @@ -79,58 +76,6 @@ func GetTestDataScenarios(c *middleware.Context) Response { return Json(200, &result) } -func GetInternalMetrics(c *middleware.Context) Response { - if metrics.UseNilMetrics { - return Json(200, util.DynMap{"message": "Metrics disabled"}) - } - - snapshots := metrics.MetricStats.GetSnapshots() - - resp := make(map[string]interface{}) - - for _, m := range snapshots { - metricName := m.Name() + m.StringifyTags() - - switch metric := m.(type) { - case metrics.Gauge: - resp[metricName] = map[string]interface{}{ - "value": metric.Value(), - } - case metrics.Counter: - resp[metricName] = map[string]interface{}{ - "count": metric.Count(), - } - case metrics.Timer: - percentiles := metric.Percentiles([]float64{0.25, 0.75, 0.90, 0.99}) - resp[metricName] = map[string]interface{}{ - "count": metric.Count(), - "min": metric.Min(), - "max": metric.Max(), - "mean": metric.Mean(), - "std": metric.StdDev(), - "p25": percentiles[0], - "p75": percentiles[1], - "p90": percentiles[2], - "p99": percentiles[3], - } - } - } - - var b []byte - var err error - if b, err = json.MarshalIndent(resp, "", " "); err != nil { - return ApiError(500, "body json marshal", err) - } - - return &NormalResponse{ - body: b, - status: 200, - header: http.Header{ - "Content-Type": []string{"application/json"}, - }, - } -} - // Genereates a index out of range error func GenerateError(c *middleware.Context) Response { var array []string diff --git a/pkg/cmd/grafana-server/server.go b/pkg/cmd/grafana-server/server.go index eab55ec3d9d..b2952df9185 100644 --- a/pkg/cmd/grafana-server/server.go +++ b/pkg/cmd/grafana-server/server.go @@ -54,7 +54,8 @@ func (g *GrafanaServerImpl) Start() { g.writePIDFile() initSql() - metrics.Init() + metricsCfg := metrics.ReadSettings(setting.Cfg) + metrics.Init(metricsCfg) search.Init() login.Init() social.NewOAuthService() diff --git a/pkg/metrics/EMWA.go b/pkg/metrics/EMWA.go deleted file mode 100644 index d99dc77b016..00000000000 --- a/pkg/metrics/EMWA.go +++ /dev/null @@ -1,122 +0,0 @@ -// includes code from -// https://raw.githubusercontent.com/rcrowley/go-metrics/master/sample.go -// Copyright 2012 Richard Crowley. All rights reserved. - -package metrics - -import ( - "math" - "sync" - "sync/atomic" -) - -// EWMAs continuously calculate an exponentially-weighted moving average -// based on an outside source of clock ticks. -type EWMA interface { - Rate() float64 - Snapshot() EWMA - Tick() - Update(int64) -} - -// NewEWMA constructs a new EWMA with the given alpha. -func NewEWMA(alpha float64) EWMA { - if UseNilMetrics { - return NilEWMA{} - } - return &StandardEWMA{alpha: alpha} -} - -// NewEWMA1 constructs a new EWMA for a one-minute moving average. -func NewEWMA1() EWMA { - return NewEWMA(1 - math.Exp(-5.0/60.0/1)) -} - -// NewEWMA5 constructs a new EWMA for a five-minute moving average. -func NewEWMA5() EWMA { - return NewEWMA(1 - math.Exp(-5.0/60.0/5)) -} - -// NewEWMA15 constructs a new EWMA for a fifteen-minute moving average. -func NewEWMA15() EWMA { - return NewEWMA(1 - math.Exp(-5.0/60.0/15)) -} - -// EWMASnapshot is a read-only copy of another EWMA. -type EWMASnapshot float64 - -// Rate returns the rate of events per second at the time the snapshot was -// taken. -func (a EWMASnapshot) Rate() float64 { return float64(a) } - -// Snapshot returns the snapshot. -func (a EWMASnapshot) Snapshot() EWMA { return a } - -// Tick panics. -func (EWMASnapshot) Tick() { - panic("Tick called on an EWMASnapshot") -} - -// Update panics. -func (EWMASnapshot) Update(int64) { - panic("Update called on an EWMASnapshot") -} - -// NilEWMA is a no-op EWMA. -type NilEWMA struct{} - -// Rate is a no-op. -func (NilEWMA) Rate() float64 { return 0.0 } - -// Snapshot is a no-op. -func (NilEWMA) Snapshot() EWMA { return NilEWMA{} } - -// Tick is a no-op. -func (NilEWMA) Tick() {} - -// Update is a no-op. -func (NilEWMA) Update(n int64) {} - -// StandardEWMA is the standard implementation of an EWMA and tracks the number -// of uncounted events and processes them on each tick. It uses the -// sync/atomic package to manage uncounted events. -type StandardEWMA struct { - uncounted int64 // /!\ this should be the first member to ensure 64-bit alignment - alpha float64 - rate float64 - init bool - mutex sync.Mutex -} - -// Rate returns the moving average rate of events per second. -func (a *StandardEWMA) Rate() float64 { - a.mutex.Lock() - defer a.mutex.Unlock() - return a.rate * float64(1e9) -} - -// Snapshot returns a read-only copy of the EWMA. -func (a *StandardEWMA) Snapshot() EWMA { - return EWMASnapshot(a.Rate()) -} - -// Tick ticks the clock to update the moving average. It assumes it is called -// every five seconds. -func (a *StandardEWMA) Tick() { - count := atomic.LoadInt64(&a.uncounted) - atomic.AddInt64(&a.uncounted, -count) - instantRate := float64(count) / float64(5e9) - a.mutex.Lock() - defer a.mutex.Unlock() - if a.init { - a.rate += a.alpha * (instantRate - a.rate) - } else { - a.init = true - a.rate = instantRate - } -} - -// Update adds n uncounted events. -func (a *StandardEWMA) Update(n int64) { - atomic.AddInt64(&a.uncounted, n) -} diff --git a/pkg/metrics/combos.go b/pkg/metrics/combos.go deleted file mode 100644 index b4da59c5b32..00000000000 --- a/pkg/metrics/combos.go +++ /dev/null @@ -1,46 +0,0 @@ -package metrics - -// type comboCounterRef struct { -// *MetricMeta -// usageCounter Counter -// metricCounter Counter -// } -// -// func RegComboCounter(name string, tagStrings ...string) Counter { -// meta := NewMetricMeta(name, tagStrings) -// cr := &comboCounterRef{ -// MetricMeta: meta, -// usageCounter: NewCounter(meta), -// metricCounter: NewCounter(meta), -// } -// -// UsageStats.Register(cr.usageCounter) -// MetricStats.Register(cr.metricCounter) -// -// return cr -// } -// -// func (c comboCounterRef) Clear() { -// c.usageCounter.Clear() -// c.metricCounter.Clear() -// } -// -// func (c comboCounterRef) Count() int64 { -// panic("Count called on a combocounter ref") -// } -// -// // Dec panics. -// func (c comboCounterRef) Dec(i int64) { -// c.usageCounter.Dec(i) -// c.metricCounter.Dec(i) -// } -// -// // Inc panics. -// func (c comboCounterRef) Inc(i int64) { -// c.usageCounter.Inc(i) -// c.metricCounter.Inc(i) -// } -// -// func (c comboCounterRef) Snapshot() Metric { -// return c.metricCounter.Snapshot() -// } diff --git a/pkg/metrics/common.go b/pkg/metrics/common.go index 2043d3a67cf..293a08fae0c 100644 --- a/pkg/metrics/common.go +++ b/pkg/metrics/common.go @@ -57,5 +57,4 @@ type Metric interface { Name() string GetTagsCopy() map[string]string StringifyTags() string - Snapshot() Metric } diff --git a/pkg/metrics/counter.go b/pkg/metrics/counter.go index 874bbf7270a..9d01b7d5ecf 100644 --- a/pkg/metrics/counter.go +++ b/pkg/metrics/counter.go @@ -11,8 +11,6 @@ import ( type Counter interface { Metric - Clear() - Count() int64 Inc(int64) } @@ -20,8 +18,8 @@ func promifyName(name string) string { return strings.Replace(name, ".", "_", -1) } -// NewCounter constructs a new StandardCounter. -func NewCounter(meta *MetricMeta) Counter { +func RegCounter(name string, tagStrings ...string) Counter { + meta := NewMetricMeta(name, tagStrings) promCounter := prometheus.NewCounter(prometheus.CounterOpts{ Name: promifyName(meta.Name()) + "_total", Help: meta.Name(), @@ -37,12 +35,6 @@ func NewCounter(meta *MetricMeta) Counter { } } -func RegCounter(name string, tagStrings ...string) Counter { - cr := NewCounter(NewMetricMeta(name, tagStrings)) - //MetricStats.Register(cr) - return cr -} - // StandardCounter is the standard implementation of a Counter and uses the // sync/atomic package to manage a single int64 value. type StandardCounter struct { diff --git a/pkg/metrics/delta.go b/pkg/metrics/delta.go deleted file mode 100644 index 71354178209..00000000000 --- a/pkg/metrics/delta.go +++ /dev/null @@ -1,11 +0,0 @@ -package metrics - -import "math" - -func calculateDelta(oldValue, newValue int64) int64 { - if oldValue < newValue { - return newValue - oldValue - } else { - return (math.MaxInt64 - oldValue) + (newValue - math.MinInt64) + 1 - } -} diff --git a/pkg/metrics/gauge.go b/pkg/metrics/gauge.go index a9cace00239..1e3f0652168 100644 --- a/pkg/metrics/gauge.go +++ b/pkg/metrics/gauge.go @@ -18,7 +18,8 @@ type Gauge interface { Value() int64 } -func NewGauge(meta *MetricMeta) Gauge { +func RegGauge(name string, tagStrings ...string) Gauge { + meta := NewMetricMeta(name, tagStrings) promGauge := prometheus.NewGauge(prometheus.GaugeOpts{ Name: promifyName(meta.Name()) + "_total", Help: meta.Name(), @@ -34,13 +35,6 @@ func NewGauge(meta *MetricMeta) Gauge { } } -func RegGauge(name string, tagStrings ...string) Gauge { - tr := NewGauge(NewMetricMeta(name, tagStrings)) - - //MetricStats.Register(tr) - return tr -} - // GaugeSnapshot is a read-only copy of another Gauge. type GaugeSnapshot struct { value int64 diff --git a/pkg/metrics/graphite.go b/pkg/metrics/graphite.go deleted file mode 100644 index 59c992776de..00000000000 --- a/pkg/metrics/graphite.go +++ /dev/null @@ -1,107 +0,0 @@ -package metrics - -import ( - "bytes" - "fmt" - "net" - "strings" - "time" - - "github.com/grafana/grafana/pkg/log" - "github.com/grafana/grafana/pkg/setting" -) - -type GraphitePublisher struct { - address string - protocol string - prefix string - prevCounts map[string]int64 -} - -func CreateGraphitePublisher() (*GraphitePublisher, error) { - graphiteSection, err := setting.Cfg.GetSection("metrics.graphite") - if err != nil { - return nil, nil - } - - address := graphiteSection.Key("address").String() - if address == "" { - return nil, nil - } - - publisher := &GraphitePublisher{} - publisher.prevCounts = make(map[string]int64) - publisher.protocol = "tcp" - publisher.prefix = graphiteSection.Key("prefix").MustString("prod.grafana.%(instance_name)s") - publisher.address = address - - safeInstanceName := strings.Replace(setting.InstanceName, ".", "_", -1) - prefix := graphiteSection.Key("prefix").Value() - - if prefix == "" { - prefix = "prod.grafana.%(instance_name)s." - } - - publisher.prefix = strings.Replace(prefix, "%(instance_name)s", safeInstanceName, -1) - return publisher, nil -} - -func (this *GraphitePublisher) Publish(metrics []Metric) { - conn, err := net.DialTimeout(this.protocol, this.address, time.Second*5) - - if err != nil { - log.Error(3, "Metrics: GraphitePublisher: Failed to connect to %s!", err) - return - } - - buf := bytes.NewBufferString("") - now := time.Now().Unix() - - for _, m := range metrics { - metricName := this.prefix + m.Name() + m.StringifyTags() - - switch metric := m.(type) { - case Counter: - this.addCount(buf, metricName+".count", metric.Count(), now) - case Gauge: - this.addCount(buf, metricName, metric.Value(), now) - case Timer: - percentiles := metric.Percentiles([]float64{0.25, 0.75, 0.90, 0.99}) - this.addCount(buf, metricName+".count", metric.Count(), now) - this.addInt(buf, metricName+".max", metric.Max(), now) - this.addInt(buf, metricName+".min", metric.Min(), now) - this.addFloat(buf, metricName+".mean", metric.Mean(), now) - this.addFloat(buf, metricName+".std", metric.StdDev(), now) - this.addFloat(buf, metricName+".p25", percentiles[0], now) - this.addFloat(buf, metricName+".p75", percentiles[1], now) - this.addFloat(buf, metricName+".p90", percentiles[2], now) - this.addFloat(buf, metricName+".p99", percentiles[3], now) - } - } - - log.Trace("Metrics: GraphitePublisher.Publish() \n%s", buf) - _, err = conn.Write(buf.Bytes()) - - if err != nil { - log.Error(3, "Metrics: GraphitePublisher: Failed to send metrics! %s", err) - } -} - -func (this *GraphitePublisher) addInt(buf *bytes.Buffer, metric string, value int64, now int64) { - buf.WriteString(fmt.Sprintf("%s %d %d\n", metric, value, now)) -} - -func (this *GraphitePublisher) addFloat(buf *bytes.Buffer, metric string, value float64, now int64) { - buf.WriteString(fmt.Sprintf("%s %f %d\n", metric, value, now)) -} - -func (this *GraphitePublisher) addCount(buf *bytes.Buffer, metric string, value int64, now int64) { - delta := value - - if last, ok := this.prevCounts[metric]; ok { - delta = calculateDelta(last, value) - } - - this.prevCounts[metric] = value - buf.WriteString(fmt.Sprintf("%s %d %d\n", metric, delta, now)) -} diff --git a/pkg/metrics/graphite_test.go b/pkg/metrics/graphite_test.go deleted file mode 100644 index ff2bf530d5e..00000000000 --- a/pkg/metrics/graphite_test.go +++ /dev/null @@ -1,77 +0,0 @@ -package metrics - -import ( - "testing" - - "github.com/grafana/grafana/pkg/setting" - - . "github.com/smartystreets/goconvey/convey" -) - -func TestGraphitePublisher(t *testing.T) { - - setting.CustomInitPath = "conf/does_not_exist.ini" - - Convey("Test graphite prefix replacement", t, func() { - var err error - err = setting.NewConfigContext(&setting.CommandLineArgs{ - HomePath: "../../", - }) - - So(err, ShouldBeNil) - - sec, err := setting.Cfg.NewSection("metrics.graphite") - sec.NewKey("prefix", "prod.grafana.%(instance_name)s.") - sec.NewKey("address", "localhost:2001") - - So(err, ShouldBeNil) - - setting.InstanceName = "hostname.with.dots.com" - publisher, err := CreateGraphitePublisher() - - So(err, ShouldBeNil) - So(publisher, ShouldNotBeNil) - - So(publisher.prefix, ShouldEqual, "prod.grafana.hostname_with_dots_com.") - So(publisher.address, ShouldEqual, "localhost:2001") - }) - - Convey("Test graphite publisher default prefix", t, func() { - var err error - err = setting.NewConfigContext(&setting.CommandLineArgs{ - HomePath: "../../", - }) - - So(err, ShouldBeNil) - - sec, err := setting.Cfg.NewSection("metrics.graphite") - sec.NewKey("address", "localhost:2001") - - So(err, ShouldBeNil) - - setting.InstanceName = "hostname.with.dots.com" - publisher, err := CreateGraphitePublisher() - - So(err, ShouldBeNil) - So(publisher, ShouldNotBeNil) - - So(publisher.prefix, ShouldEqual, "prod.grafana.hostname_with_dots_com.") - So(publisher.address, ShouldEqual, "localhost:2001") - }) - - Convey("Test graphite publisher default values", t, func() { - var err error - err = setting.NewConfigContext(&setting.CommandLineArgs{ - HomePath: "../../", - }) - - So(err, ShouldBeNil) - - _, err = setting.Cfg.NewSection("metrics.graphite") - - publisher, err := CreateGraphitePublisher() - - So(err, ShouldBeNil) - So(publisher, ShouldBeNil) - }) -} diff --git a/pkg/metrics/graphitepublisher/graphite.go b/pkg/metrics/graphitepublisher/graphite.go index 2feee134590..e4b08116af2 100644 --- a/pkg/metrics/graphitepublisher/graphite.go +++ b/pkg/metrics/graphitepublisher/graphite.go @@ -246,7 +246,9 @@ func writeMetric(buf *bufio.Writer, m model.Metric, mf *dto.MetricFamily) error switch numLabels { case 0: if hasName { - return writeSanitized(buf, string(metricName)) + if err := writeSanitized(buf, string(metricName)); err != nil { + return err + } } default: sort.Strings(labelStrings) @@ -306,10 +308,16 @@ func replaceInvalidRune(c rune) rune { } func (b *Bridge) replaceCounterWithDelta(mf *dto.MetricFamily, metric model.Metric, value model.SampleValue) float64 { - if !b.countersAsDetlas || mf.GetType() != dto.MetricType_COUNTER { + if mf.GetType() != dto.MetricType_COUNTER { return float64(value) } + if !b.countersAsDetlas { + return float64(value) + } + + //println("use delta for", metric[model.MetricNameLabel], mf.GetType().String()) + //TODO(bergquist): turn _count in summery into delta //TODO(bergquist): turn _count in histogram into delta diff --git a/pkg/metrics/graphitepublisher/graphite_test.go b/pkg/metrics/graphitepublisher/graphite_test.go index 072c375e623..604f47cbde3 100644 --- a/pkg/metrics/graphitepublisher/graphite_test.go +++ b/pkg/metrics/graphitepublisher/graphite_test.go @@ -217,7 +217,7 @@ prefix.name_bucket.constname.constvalue.labelname.val2.le._Inf 3 1477043 } } -func TestCounter(t *testing.T) { +func TestCounterVec(t *testing.T) { cntVec := prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "name", @@ -284,6 +284,65 @@ prefix.name.constname.constvalue.labelname.val2.count 1 1477053 } } +func TestCounter(t *testing.T) { + cntVec := prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "name", + Help: "docstring", + ConstLabels: prometheus.Labels{"constname": "constvalue"}, + }) + + reg := prometheus.NewRegistry() + reg.MustRegister(cntVec) + + cntVec.Inc() + + b, err := NewBridge(&Config{ + URL: "localhost:8080", + Gatherer: reg, + CountersAsDelta: true, + }) + if err != nil { + t.Fatalf("error creating bridge: %v", err) + } + + // first collect + mfs, err := reg.Gather() + if err != nil { + t.Fatalf("error: %v", err) + } + + var buf bytes.Buffer + err = b.writeMetrics(&buf, mfs, "prefix", model.Time(1477043083)) + if err != nil { + t.Fatalf("error: %v", err) + } + + want := "prefix.name.constname.constvalue.count 1 1477043\n" + if got := buf.String(); want != got { + t.Fatalf("wanted \n%s\n, got \n%s\n", want, got) + } + + //next collect + cntVec.Inc() + + mfs, err = reg.Gather() + if err != nil { + t.Fatalf("error: %v", err) + } + + buf = bytes.Buffer{} + err = b.writeMetrics(&buf, mfs, "prefix", model.Time(1477053083)) + if err != nil { + t.Fatalf("error: %v", err) + } + + want2 := "prefix.name.constname.constvalue.count 1 1477053\n" + if got := buf.String(); want2 != got { + t.Fatalf("wanted \n%s\n, got \n%s\n", want2, got) + } +} + func TestPush(t *testing.T) { reg := prometheus.NewRegistry() cntVec := prometheus.NewCounterVec( diff --git a/pkg/metrics/histogram.go b/pkg/metrics/histogram.go deleted file mode 100644 index 32338da4b69..00000000000 --- a/pkg/metrics/histogram.go +++ /dev/null @@ -1,189 +0,0 @@ -// includes code from -// https://raw.githubusercontent.com/rcrowley/go-metrics/master/sample.go -// Copyright 2012 Richard Crowley. All rights reserved. - -package metrics - -// Histograms calculate distribution statistics from a series of int64 values. -type Histogram interface { - Metric - - Clear() - Count() int64 - Max() int64 - Mean() float64 - Min() int64 - Percentile(float64) float64 - Percentiles([]float64) []float64 - StdDev() float64 - Sum() int64 - Update(int64) - Variance() float64 -} - -func NewHistogram(meta *MetricMeta, s Sample) Histogram { - return &StandardHistogram{ - MetricMeta: meta, - sample: s, - } -} - -// HistogramSnapshot is a read-only copy of another Histogram. -type HistogramSnapshot struct { - *MetricMeta - sample *SampleSnapshot -} - -// Clear panics. -func (*HistogramSnapshot) Clear() { - panic("Clear called on a HistogramSnapshot") -} - -// Count returns the number of samples recorded at the time the snapshot was -// taken. -func (h *HistogramSnapshot) Count() int64 { return h.sample.Count() } - -// Max returns the maximum value in the sample at the time the snapshot was -// taken. -func (h *HistogramSnapshot) Max() int64 { return h.sample.Max() } - -// Mean returns the mean of the values in the sample at the time the snapshot -// was taken. -func (h *HistogramSnapshot) Mean() float64 { return h.sample.Mean() } - -// Min returns the minimum value in the sample at the time the snapshot was -// taken. -func (h *HistogramSnapshot) Min() int64 { return h.sample.Min() } - -// Percentile returns an arbitrary percentile of values in the sample at the -// time the snapshot was taken. -func (h *HistogramSnapshot) Percentile(p float64) float64 { - return h.sample.Percentile(p) -} - -// Percentiles returns a slice of arbitrary percentiles of values in the sample -// at the time the snapshot was taken. -func (h *HistogramSnapshot) Percentiles(ps []float64) []float64 { - return h.sample.Percentiles(ps) -} - -// Sample returns the Sample underlying the histogram. -func (h *HistogramSnapshot) Sample() Sample { return h.sample } - -// Snapshot returns the snapshot. -func (h *HistogramSnapshot) Snapshot() Metric { return h } - -// StdDev returns the standard deviation of the values in the sample at the -// time the snapshot was taken. -func (h *HistogramSnapshot) StdDev() float64 { return h.sample.StdDev() } - -// Sum returns the sum in the sample at the time the snapshot was taken. -func (h *HistogramSnapshot) Sum() int64 { return h.sample.Sum() } - -// Update panics. -func (*HistogramSnapshot) Update(int64) { - panic("Update called on a HistogramSnapshot") -} - -// Variance returns the variance of inputs at the time the snapshot was taken. -func (h *HistogramSnapshot) Variance() float64 { return h.sample.Variance() } - -// NilHistogram is a no-op Histogram. -type NilHistogram struct { - *MetricMeta -} - -// Clear is a no-op. -func (NilHistogram) Clear() {} - -// Count is a no-op. -func (NilHistogram) Count() int64 { return 0 } - -// Max is a no-op. -func (NilHistogram) Max() int64 { return 0 } - -// Mean is a no-op. -func (NilHistogram) Mean() float64 { return 0.0 } - -// Min is a no-op. -func (NilHistogram) Min() int64 { return 0 } - -// Percentile is a no-op. -func (NilHistogram) Percentile(p float64) float64 { return 0.0 } - -// Percentiles is a no-op. -func (NilHistogram) Percentiles(ps []float64) []float64 { - return make([]float64, len(ps)) -} - -// Sample is a no-op. -func (NilHistogram) Sample() Sample { return NilSample{} } - -// Snapshot is a no-op. -func (n NilHistogram) Snapshot() Metric { return n } - -// StdDev is a no-op. -func (NilHistogram) StdDev() float64 { return 0.0 } - -// Sum is a no-op. -func (NilHistogram) Sum() int64 { return 0 } - -// Update is a no-op. -func (NilHistogram) Update(v int64) {} - -// Variance is a no-op. -func (NilHistogram) Variance() float64 { return 0.0 } - -// StandardHistogram is the standard implementation of a Histogram and uses a -// Sample to bound its memory use. -type StandardHistogram struct { - *MetricMeta - sample Sample -} - -// Clear clears the histogram and its sample. -func (h *StandardHistogram) Clear() { h.sample.Clear() } - -// Count returns the number of samples recorded since the histogram was last -// cleared. -func (h *StandardHistogram) Count() int64 { return h.sample.Count() } - -// Max returns the maximum value in the sample. -func (h *StandardHistogram) Max() int64 { return h.sample.Max() } - -// Mean returns the mean of the values in the sample. -func (h *StandardHistogram) Mean() float64 { return h.sample.Mean() } - -// Min returns the minimum value in the sample. -func (h *StandardHistogram) Min() int64 { return h.sample.Min() } - -// Percentile returns an arbitrary percentile of the values in the sample. -func (h *StandardHistogram) Percentile(p float64) float64 { - return h.sample.Percentile(p) -} - -// Percentiles returns a slice of arbitrary percentiles of the values in the -// sample. -func (h *StandardHistogram) Percentiles(ps []float64) []float64 { - return h.sample.Percentiles(ps) -} - -// Sample returns the Sample underlying the histogram. -func (h *StandardHistogram) Sample() Sample { return h.sample } - -// Snapshot returns a read-only copy of the histogram. -func (h *StandardHistogram) Snapshot() Metric { - return &HistogramSnapshot{sample: h.sample.Snapshot().(*SampleSnapshot)} -} - -// StdDev returns the standard deviation of the values in the sample. -func (h *StandardHistogram) StdDev() float64 { return h.sample.StdDev() } - -// Sum returns the sum in the sample. -func (h *StandardHistogram) Sum() int64 { return h.sample.Sum() } - -// Update samples a new value. -func (h *StandardHistogram) Update(v int64) { h.sample.Update(v) } - -// Variance returns the variance of the values in the sample. -func (h *StandardHistogram) Variance() float64 { return h.sample.Variance() } diff --git a/pkg/metrics/histogram_test.go b/pkg/metrics/histogram_test.go deleted file mode 100644 index 010402123c2..00000000000 --- a/pkg/metrics/histogram_test.go +++ /dev/null @@ -1,90 +0,0 @@ -// includes code from -// https://raw.githubusercontent.com/rcrowley/go-metrics/master/sample.go -// Copyright 2012 Richard Crowley. All rights reserved. - -package metrics - -import "testing" - -func BenchmarkHistogram(b *testing.B) { - h := NewHistogram(nil, NewUniformSample(100)) - b.ResetTimer() - for i := 0; i < b.N; i++ { - h.Update(int64(i)) - } -} - -func TestHistogram10000(t *testing.T) { - h := NewHistogram(nil, NewUniformSample(100000)) - for i := 1; i <= 10000; i++ { - h.Update(int64(i)) - } - testHistogram10000(t, h) -} - -func TestHistogramEmpty(t *testing.T) { - h := NewHistogram(nil, NewUniformSample(100)) - if count := h.Count(); 0 != count { - t.Errorf("h.Count(): 0 != %v\n", count) - } - if min := h.Min(); 0 != min { - t.Errorf("h.Min(): 0 != %v\n", min) - } - if max := h.Max(); 0 != max { - t.Errorf("h.Max(): 0 != %v\n", max) - } - if mean := h.Mean(); 0.0 != mean { - t.Errorf("h.Mean(): 0.0 != %v\n", mean) - } - if stdDev := h.StdDev(); 0.0 != stdDev { - t.Errorf("h.StdDev(): 0.0 != %v\n", stdDev) - } - ps := h.Percentiles([]float64{0.5, 0.75, 0.99}) - if 0.0 != ps[0] { - t.Errorf("median: 0.0 != %v\n", ps[0]) - } - if 0.0 != ps[1] { - t.Errorf("75th percentile: 0.0 != %v\n", ps[1]) - } - if 0.0 != ps[2] { - t.Errorf("99th percentile: 0.0 != %v\n", ps[2]) - } -} - -func TestHistogramSnapshot(t *testing.T) { - h := NewHistogram(nil, NewUniformSample(100000)) - for i := 1; i <= 10000; i++ { - h.Update(int64(i)) - } - snapshot := h.Snapshot().(Histogram) - h.Update(0) - testHistogram10000(t, snapshot) -} - -func testHistogram10000(t *testing.T, h Histogram) { - if count := h.Count(); 10000 != count { - t.Errorf("h.Count(): 10000 != %v\n", count) - } - if min := h.Min(); 1 != min { - t.Errorf("h.Min(): 1 != %v\n", min) - } - if max := h.Max(); 10000 != max { - t.Errorf("h.Max(): 10000 != %v\n", max) - } - if mean := h.Mean(); 5000.5 != mean { - t.Errorf("h.Mean(): 5000.5 != %v\n", mean) - } - if stdDev := h.StdDev(); 2886.751331514372 != stdDev { - t.Errorf("h.StdDev(): 2886.751331514372 != %v\n", stdDev) - } - ps := h.Percentiles([]float64{0.5, 0.75, 0.99}) - if 5000.5 != ps[0] { - t.Errorf("median: 5000.5 != %v\n", ps[0]) - } - if 7500.75 != ps[1] { - t.Errorf("75th percentile: 7500.75 != %v\n", ps[1]) - } - if 9900.99 != ps[2] { - t.Errorf("99th percentile: 9900.99 != %v\n", ps[2]) - } -} diff --git a/pkg/metrics/meter.go b/pkg/metrics/meter.go deleted file mode 100644 index 265bff99cb6..00000000000 --- a/pkg/metrics/meter.go +++ /dev/null @@ -1,221 +0,0 @@ -// includes code from -// https://raw.githubusercontent.com/rcrowley/go-metrics/master/sample.go -// Copyright 2012 Richard Crowley. All rights reserved. - -package metrics - -import ( - "sync" - "time" -) - -// Meters count events to produce exponentially-weighted moving average rates -// at one-, five-, and fifteen-minutes and a mean rate. -type Meter interface { - Metric - - Count() int64 - Mark(int64) - Rate1() float64 - Rate5() float64 - Rate15() float64 - RateMean() float64 -} - -// NewMeter constructs a new StandardMeter and launches a goroutine. -func NewMeter(meta *MetricMeta) Meter { - if UseNilMetrics { - return NilMeter{} - } - - m := newStandardMeter(meta) - arbiter.Lock() - defer arbiter.Unlock() - arbiter.meters = append(arbiter.meters, m) - if !arbiter.started { - arbiter.started = true - go arbiter.tick() - } - return m -} - -type MeterSnapshot struct { - *MetricMeta - count int64 - rate1, rate5, rate15, rateMean float64 -} - -// Count returns the count of events at the time the snapshot was taken. -func (m *MeterSnapshot) Count() int64 { return m.count } - -// Mark panics. -func (*MeterSnapshot) Mark(n int64) { - panic("Mark called on a MeterSnapshot") -} - -// Rate1 returns the one-minute moving average rate of events per second at the -// time the snapshot was taken. -func (m *MeterSnapshot) Rate1() float64 { return m.rate1 } - -// Rate5 returns the five-minute moving average rate of events per second at -// the time the snapshot was taken. -func (m *MeterSnapshot) Rate5() float64 { return m.rate5 } - -// Rate15 returns the fifteen-minute moving average rate of events per second -// at the time the snapshot was taken. -func (m *MeterSnapshot) Rate15() float64 { return m.rate15 } - -// RateMean returns the meter's mean rate of events per second at the time the -// snapshot was taken. -func (m *MeterSnapshot) RateMean() float64 { return m.rateMean } - -// Snapshot returns the snapshot. -func (m *MeterSnapshot) Snapshot() Metric { return m } - -// NilMeter is a no-op Meter. -type NilMeter struct{ *MetricMeta } - -// Count is a no-op. -func (NilMeter) Count() int64 { return 0 } - -// Mark is a no-op. -func (NilMeter) Mark(n int64) {} - -// Rate1 is a no-op. -func (NilMeter) Rate1() float64 { return 0.0 } - -// Rate5 is a no-op. -func (NilMeter) Rate5() float64 { return 0.0 } - -// Rate15is a no-op. -func (NilMeter) Rate15() float64 { return 0.0 } - -// RateMean is a no-op. -func (NilMeter) RateMean() float64 { return 0.0 } - -// Snapshot is a no-op. -func (NilMeter) Snapshot() Metric { return NilMeter{} } - -// StandardMeter is the standard implementation of a Meter. -type StandardMeter struct { - *MetricMeta - lock sync.RWMutex - snapshot *MeterSnapshot - a1, a5, a15 EWMA - startTime time.Time -} - -func newStandardMeter(meta *MetricMeta) *StandardMeter { - return &StandardMeter{ - MetricMeta: meta, - snapshot: &MeterSnapshot{MetricMeta: meta}, - a1: NewEWMA1(), - a5: NewEWMA5(), - a15: NewEWMA15(), - startTime: time.Now(), - } -} - -// Count returns the number of events recorded. -func (m *StandardMeter) Count() int64 { - m.lock.RLock() - count := m.snapshot.count - m.lock.RUnlock() - return count -} - -// Mark records the occurrence of n events. -func (m *StandardMeter) Mark(n int64) { - m.lock.Lock() - defer m.lock.Unlock() - m.snapshot.count += n - m.a1.Update(n) - m.a5.Update(n) - m.a15.Update(n) - m.updateSnapshot() -} - -// Rate1 returns the one-minute moving average rate of events per second. -func (m *StandardMeter) Rate1() float64 { - m.lock.RLock() - rate1 := m.snapshot.rate1 - m.lock.RUnlock() - return rate1 -} - -// Rate5 returns the five-minute moving average rate of events per second. -func (m *StandardMeter) Rate5() float64 { - m.lock.RLock() - rate5 := m.snapshot.rate5 - m.lock.RUnlock() - return rate5 -} - -// Rate15 returns the fifteen-minute moving average rate of events per second. -func (m *StandardMeter) Rate15() float64 { - m.lock.RLock() - rate15 := m.snapshot.rate15 - m.lock.RUnlock() - return rate15 -} - -// RateMean returns the meter's mean rate of events per second. -func (m *StandardMeter) RateMean() float64 { - m.lock.RLock() - rateMean := m.snapshot.rateMean - m.lock.RUnlock() - return rateMean -} - -// Snapshot returns a read-only copy of the meter. -func (m *StandardMeter) Snapshot() Metric { - m.lock.RLock() - snapshot := *m.snapshot - m.lock.RUnlock() - return &snapshot -} - -func (m *StandardMeter) updateSnapshot() { - // should run with write lock held on m.lock - snapshot := m.snapshot - snapshot.rate1 = m.a1.Rate() - snapshot.rate5 = m.a5.Rate() - snapshot.rate15 = m.a15.Rate() - snapshot.rateMean = float64(snapshot.count) / time.Since(m.startTime).Seconds() -} - -func (m *StandardMeter) tick() { - m.lock.Lock() - defer m.lock.Unlock() - m.a1.Tick() - m.a5.Tick() - m.a15.Tick() - m.updateSnapshot() -} - -type meterArbiter struct { - sync.RWMutex - started bool - meters []*StandardMeter - ticker *time.Ticker -} - -var arbiter = meterArbiter{ticker: time.NewTicker(5e9)} - -// Ticks meters on the scheduled interval -func (ma *meterArbiter) tick() { - for { - select { - case <-ma.ticker.C: - ma.tickMeters() - } - } -} - -func (ma *meterArbiter) tickMeters() { - ma.RLock() - defer ma.RUnlock() - for _, meter := range ma.meters { - meter.tick() - } -} diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 3ede3fd3a34..6ae2a22245a 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -1,12 +1,18 @@ package metrics -var MetricStats Registry -var UseNilMetrics bool +import ( + "bytes" + "encoding/json" + "net/http" + "runtime" + "strings" + "time" -func init() { - // init with nil metrics - //initMetricVars(&MetricSettings{}) -} + "github.com/grafana/grafana/pkg/bus" + "github.com/grafana/grafana/pkg/models" + "github.com/grafana/grafana/pkg/plugins" + "github.com/grafana/grafana/pkg/setting" +) var ( M_Instance_Start Counter @@ -70,9 +76,6 @@ var ( ) func initMetricVars(settings *MetricSettings) { - UseNilMetrics = settings.Enabled == false - MetricStats = NewRegistry() - M_Instance_Start = RegCounter("instance_start") M_Page_Status_200 = RegCounter("page.resp_status", "code", "200") @@ -143,4 +146,100 @@ func initMetricVars(settings *MetricSettings) { M_StatTotal_Users = RegGauge("stat_totals", "stat", "users") M_StatTotal_Orgs = RegGauge("stat_totals", "stat", "orgs") M_StatTotal_Playlists = RegGauge("stat_totals", "stat", "playlists") + + go instrumentationLoop(settings) +} + +func instrumentationLoop(settings *MetricSettings) chan struct{} { + M_Instance_Start.Inc(1) + + onceEveryDayTick := time.NewTicker(time.Hour * 24) + secondTicker := time.NewTicker(time.Second * time.Duration(settings.IntervalSeconds)) + + for { + select { + case <-onceEveryDayTick.C: + sendUsageStats() + case <-secondTicker.C: + updateTotalStats() + } + } +} + +func updateTotalStats() { + // every interval also publish totals + metricPublishCounter++ + if metricPublishCounter%10 == 0 { + // get stats + statsQuery := models.GetSystemStatsQuery{} + if err := bus.Dispatch(&statsQuery); err != nil { + metricsLogger.Error("Failed to get system stats", "error", err) + return + } + + M_StatTotal_Dashboards.Update(statsQuery.Result.Dashboards) + M_StatTotal_Users.Update(statsQuery.Result.Users) + M_StatTotal_Playlists.Update(statsQuery.Result.Playlists) + M_StatTotal_Orgs.Update(statsQuery.Result.Orgs) + } +} + +func sendUsageStats() { + if !setting.ReportingEnabled { + return + } + + metricsLogger.Debug("Sending anonymous usage stats to stats.grafana.org") + + version := strings.Replace(setting.BuildVersion, ".", "_", -1) + + metrics := map[string]interface{}{} + report := map[string]interface{}{ + "version": version, + "metrics": metrics, + "os": runtime.GOOS, + "arch": runtime.GOARCH, + } + + statsQuery := models.GetSystemStatsQuery{} + if err := bus.Dispatch(&statsQuery); err != nil { + metricsLogger.Error("Failed to get system stats", "error", err) + return + } + + metrics["stats.dashboards.count"] = statsQuery.Result.Dashboards + metrics["stats.users.count"] = statsQuery.Result.Users + metrics["stats.orgs.count"] = statsQuery.Result.Orgs + metrics["stats.playlist.count"] = statsQuery.Result.Playlists + metrics["stats.plugins.apps.count"] = len(plugins.Apps) + metrics["stats.plugins.panels.count"] = len(plugins.Panels) + metrics["stats.plugins.datasources.count"] = len(plugins.DataSources) + metrics["stats.alerts.count"] = statsQuery.Result.Alerts + metrics["stats.active_users.count"] = statsQuery.Result.ActiveUsers + metrics["stats.datasources.count"] = statsQuery.Result.Datasources + + dsStats := models.GetDataSourceStatsQuery{} + if err := bus.Dispatch(&dsStats); err != nil { + metricsLogger.Error("Failed to get datasource stats", "error", err) + return + } + + // send counters for each data source + // but ignore any custom data sources + // as sending that name could be sensitive information + dsOtherCount := 0 + for _, dsStat := range dsStats.Result { + if models.IsKnownDataSourcePlugin(dsStat.Type) { + metrics["stats.ds."+dsStat.Type+".count"] = dsStat.Count + } else { + dsOtherCount += dsStat.Count + } + } + metrics["stats.ds.other.count"] = dsOtherCount + + out, _ := json.MarshalIndent(report, "", " ") + data := bytes.NewBuffer(out) + + client := http.Client{Timeout: time.Duration(5 * time.Second)} + go client.Post("https://stats.grafana.org/grafana-usage-report", "application/json", data) } diff --git a/pkg/metrics/publish.go b/pkg/metrics/publish.go index 7f80edde3aa..8da0c94ceed 100644 --- a/pkg/metrics/publish.go +++ b/pkg/metrics/publish.go @@ -1,21 +1,10 @@ package metrics import ( - "bytes" "context" - "encoding/json" - "net/http" - "runtime" - "strings" - "time" - "github.com/grafana/grafana/pkg/bus" "github.com/grafana/grafana/pkg/log" "github.com/grafana/grafana/pkg/metrics/graphitepublisher" - m "github.com/grafana/grafana/pkg/models" - "github.com/grafana/grafana/pkg/plugins" - "github.com/grafana/grafana/pkg/setting" - "github.com/prometheus/client_golang/prometheus" ) var metricsLogger log.Logger = log.New("metrics") @@ -29,136 +18,15 @@ func (lw *logWrapper) Println(v ...interface{}) { lw.logger.Info("graphite metric bridge", v...) } -func Init() { - settings := readSettings() +func Init(settings *MetricSettings) { initMetricVars(settings) - //go instrumentationLoop(settings) - cfg := &graphitepub.Config{ - URL: "localhost:2003", - Gatherer: prometheus.DefaultGatherer, - Prefix: "prefix", - Interval: 10 * time.Second, - Timeout: 10 * time.Second, - Logger: &logWrapper{logger: metricsLogger}, - ErrorHandling: graphitepub.ContinueOnError, - CountersAsDelta: true, - } - - bridge, err := graphitepub.NewBridge(cfg) - if err != nil { - metricsLogger.Error("failed to create graphite bridge", "error", err) - } else { - go bridge.Run(context.Background()) - } -} - -func instrumentationLoop(settings *MetricSettings) chan struct{} { - M_Instance_Start.Inc(1) - - onceEveryDayTick := time.NewTicker(time.Hour * 24) - secondTicker := time.NewTicker(time.Second * time.Duration(settings.IntervalSeconds)) - - for { - select { - case <-onceEveryDayTick.C: - sendUsageStats() - case <-secondTicker.C: - if settings.Enabled { - sendMetrics(settings) - } - } - } -} - -func sendMetrics(settings *MetricSettings) { - if len(settings.Publishers) == 0 { - return - } - - updateTotalStats() - - metrics := MetricStats.GetSnapshots() - - for _, publisher := range settings.Publishers { - publisher.Publish(metrics) - } -} - -func updateTotalStats() { - // every interval also publish totals - metricPublishCounter++ - if metricPublishCounter%10 == 0 { - // get stats - statsQuery := m.GetSystemStatsQuery{} - if err := bus.Dispatch(&statsQuery); err != nil { - metricsLogger.Error("Failed to get system stats", "error", err) - return - } - - M_StatTotal_Dashboards.Update(statsQuery.Result.Dashboards) - M_StatTotal_Users.Update(statsQuery.Result.Users) - M_StatTotal_Playlists.Update(statsQuery.Result.Playlists) - M_StatTotal_Orgs.Update(statsQuery.Result.Orgs) - } -} - -func sendUsageStats() { - if !setting.ReportingEnabled { - return - } - - metricsLogger.Debug("Sending anonymous usage stats to stats.grafana.org") - - version := strings.Replace(setting.BuildVersion, ".", "_", -1) - - metrics := map[string]interface{}{} - report := map[string]interface{}{ - "version": version, - "metrics": metrics, - "os": runtime.GOOS, - "arch": runtime.GOARCH, - } - - statsQuery := m.GetSystemStatsQuery{} - if err := bus.Dispatch(&statsQuery); err != nil { - metricsLogger.Error("Failed to get system stats", "error", err) - return - } - - metrics["stats.dashboards.count"] = statsQuery.Result.Dashboards - metrics["stats.users.count"] = statsQuery.Result.Users - metrics["stats.orgs.count"] = statsQuery.Result.Orgs - metrics["stats.playlist.count"] = statsQuery.Result.Playlists - metrics["stats.plugins.apps.count"] = len(plugins.Apps) - metrics["stats.plugins.panels.count"] = len(plugins.Panels) - metrics["stats.plugins.datasources.count"] = len(plugins.DataSources) - metrics["stats.alerts.count"] = statsQuery.Result.Alerts - metrics["stats.active_users.count"] = statsQuery.Result.ActiveUsers - metrics["stats.datasources.count"] = statsQuery.Result.Datasources - - dsStats := m.GetDataSourceStatsQuery{} - if err := bus.Dispatch(&dsStats); err != nil { - metricsLogger.Error("Failed to get datasource stats", "error", err) - return - } - - // send counters for each data source - // but ignore any custom data sources - // as sending that name could be sensitive information - dsOtherCount := 0 - for _, dsStat := range dsStats.Result { - if m.IsKnownDataSourcePlugin(dsStat.Type) { - metrics["stats.ds."+dsStat.Type+".count"] = dsStat.Count + if settings.GraphiteBridgeConfig != nil { + bridge, err := graphitepublisher.NewBridge(settings.GraphiteBridgeConfig) + if err != nil { + metricsLogger.Error("failed to create graphite bridge", "error", err) } else { - dsOtherCount += dsStat.Count + go bridge.Run(context.Background()) } } - metrics["stats.ds.other.count"] = dsOtherCount - - out, _ := json.MarshalIndent(report, "", " ") - data := bytes.NewBuffer(out) - - client := http.Client{Timeout: time.Duration(5 * time.Second)} - go client.Post("https://stats.grafana.org/grafana-usage-report", "application/json", data) } diff --git a/pkg/metrics/registry.go b/pkg/metrics/registry.go deleted file mode 100644 index 6c40d4fde9f..00000000000 --- a/pkg/metrics/registry.go +++ /dev/null @@ -1,37 +0,0 @@ -package metrics - -import "sync" - -type Registry interface { - GetSnapshots() []Metric - Register(metric Metric) -} - -// The standard implementation of a Registry is a mutex-protected map -// of names to metrics. -type StandardRegistry struct { - metrics []Metric - mutex sync.Mutex -} - -// Create a new registry. -func NewRegistry() Registry { - return &StandardRegistry{ - metrics: make([]Metric, 0), - } -} - -func (r *StandardRegistry) Register(metric Metric) { - r.mutex.Lock() - defer r.mutex.Unlock() - r.metrics = append(r.metrics, metric) -} - -// Call the given function for each registered metric. -func (r *StandardRegistry) GetSnapshots() []Metric { - metrics := make([]Metric, len(r.metrics)) - for i, metric := range r.metrics { - metrics[i] = metric.Snapshot() - } - return metrics -} diff --git a/pkg/metrics/sample.go b/pkg/metrics/sample.go deleted file mode 100644 index 4288f29cce6..00000000000 --- a/pkg/metrics/sample.go +++ /dev/null @@ -1,607 +0,0 @@ -// includes code from -// https://raw.githubusercontent.com/rcrowley/go-metrics/master/sample.go -// Copyright 2012 Richard Crowley. All rights reserved. - -package metrics - -import ( - "math" - "math/rand" - "sort" - "sync" - "time" -) - -const rescaleThreshold = time.Hour - -// Samples maintain a statistically-significant selection of values from -// a stream. -type Sample interface { - Clear() - Count() int64 - Max() int64 - Mean() float64 - Min() int64 - Percentile(float64) float64 - Percentiles([]float64) []float64 - Size() int - Snapshot() Sample - StdDev() float64 - Sum() int64 - Update(int64) - Values() []int64 - Variance() float64 -} - -// ExpDecaySample is an exponentially-decaying sample using a forward-decaying -// priority reservoir. See Cormode et al's "Forward Decay: A Practical Time -// Decay Model for Streaming Systems". -// -// -type ExpDecaySample struct { - alpha float64 - count int64 - mutex sync.Mutex - reservoirSize int - t0, t1 time.Time - values *expDecaySampleHeap -} - -// NewExpDecaySample constructs a new exponentially-decaying sample with the -// given reservoir size and alpha. -func NewExpDecaySample(reservoirSize int, alpha float64) Sample { - s := &ExpDecaySample{ - alpha: alpha, - reservoirSize: reservoirSize, - t0: time.Now(), - values: newExpDecaySampleHeap(reservoirSize), - } - s.t1 = s.t0.Add(rescaleThreshold) - return s -} - -// Clear clears all samples. -func (s *ExpDecaySample) Clear() { - s.mutex.Lock() - defer s.mutex.Unlock() - s.count = 0 - s.t0 = time.Now() - s.t1 = s.t0.Add(rescaleThreshold) - s.values.Clear() -} - -// Count returns the number of samples recorded, which may exceed the -// reservoir size. -func (s *ExpDecaySample) Count() int64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return s.count -} - -// Max returns the maximum value in the sample, which may not be the maximum -// value ever to be part of the sample. -func (s *ExpDecaySample) Max() int64 { - return SampleMax(s.Values()) -} - -// Mean returns the mean of the values in the sample. -func (s *ExpDecaySample) Mean() float64 { - return SampleMean(s.Values()) -} - -// Min returns the minimum value in the sample, which may not be the minimum -// value ever to be part of the sample. -func (s *ExpDecaySample) Min() int64 { - return SampleMin(s.Values()) -} - -// Percentile returns an arbitrary percentile of values in the sample. -func (s *ExpDecaySample) Percentile(p float64) float64 { - return SamplePercentile(s.Values(), p) -} - -// Percentiles returns a slice of arbitrary percentiles of values in the -// sample. -func (s *ExpDecaySample) Percentiles(ps []float64) []float64 { - return SamplePercentiles(s.Values(), ps) -} - -// Size returns the size of the sample, which is at most the reservoir size. -func (s *ExpDecaySample) Size() int { - s.mutex.Lock() - defer s.mutex.Unlock() - return s.values.Size() -} - -// Snapshot returns a read-only copy of the sample. -func (s *ExpDecaySample) Snapshot() Sample { - s.mutex.Lock() - defer s.mutex.Unlock() - vals := s.values.Values() - values := make([]int64, len(vals)) - for i, v := range vals { - values[i] = v.v - } - return &SampleSnapshot{ - count: s.count, - values: values, - } -} - -// StdDev returns the standard deviation of the values in the sample. -func (s *ExpDecaySample) StdDev() float64 { - return SampleStdDev(s.Values()) -} - -// Sum returns the sum of the values in the sample. -func (s *ExpDecaySample) Sum() int64 { - return SampleSum(s.Values()) -} - -// Update samples a new value. -func (s *ExpDecaySample) Update(v int64) { - s.update(time.Now(), v) -} - -// Values returns a copy of the values in the sample. -func (s *ExpDecaySample) Values() []int64 { - s.mutex.Lock() - defer s.mutex.Unlock() - vals := s.values.Values() - values := make([]int64, len(vals)) - for i, v := range vals { - values[i] = v.v - } - return values -} - -// Variance returns the variance of the values in the sample. -func (s *ExpDecaySample) Variance() float64 { - return SampleVariance(s.Values()) -} - -// update samples a new value at a particular timestamp. This is a method all -// its own to facilitate testing. -func (s *ExpDecaySample) update(t time.Time, v int64) { - s.mutex.Lock() - defer s.mutex.Unlock() - s.count++ - if s.values.Size() == s.reservoirSize { - s.values.Pop() - } - s.values.Push(expDecaySample{ - k: math.Exp(t.Sub(s.t0).Seconds()*s.alpha) / rand.Float64(), - v: v, - }) - if t.After(s.t1) { - values := s.values.Values() - t0 := s.t0 - s.values.Clear() - s.t0 = t - s.t1 = s.t0.Add(rescaleThreshold) - for _, v := range values { - v.k = v.k * math.Exp(-s.alpha*s.t0.Sub(t0).Seconds()) - s.values.Push(v) - } - } -} - -// NilSample is a no-op Sample. -type NilSample struct{} - -// Clear is a no-op. -func (NilSample) Clear() {} - -// Count is a no-op. -func (NilSample) Count() int64 { return 0 } - -// Max is a no-op. -func (NilSample) Max() int64 { return 0 } - -// Mean is a no-op. -func (NilSample) Mean() float64 { return 0.0 } - -// Min is a no-op. -func (NilSample) Min() int64 { return 0 } - -// Percentile is a no-op. -func (NilSample) Percentile(p float64) float64 { return 0.0 } - -// Percentiles is a no-op. -func (NilSample) Percentiles(ps []float64) []float64 { - return make([]float64, len(ps)) -} - -// Size is a no-op. -func (NilSample) Size() int { return 0 } - -// Sample is a no-op. -func (NilSample) Snapshot() Sample { return NilSample{} } - -// StdDev is a no-op. -func (NilSample) StdDev() float64 { return 0.0 } - -// Sum is a no-op. -func (NilSample) Sum() int64 { return 0 } - -// Update is a no-op. -func (NilSample) Update(v int64) {} - -// Values is a no-op. -func (NilSample) Values() []int64 { return []int64{} } - -// Variance is a no-op. -func (NilSample) Variance() float64 { return 0.0 } - -// SampleMax returns the maximum value of the slice of int64. -func SampleMax(values []int64) int64 { - if 0 == len(values) { - return 0 - } - var max int64 = math.MinInt64 - for _, v := range values { - if max < v { - max = v - } - } - return max -} - -// SampleMean returns the mean value of the slice of int64. -func SampleMean(values []int64) float64 { - if 0 == len(values) { - return 0.0 - } - return float64(SampleSum(values)) / float64(len(values)) -} - -// SampleMin returns the minimum value of the slice of int64. -func SampleMin(values []int64) int64 { - if 0 == len(values) { - return 0 - } - var min int64 = math.MaxInt64 - for _, v := range values { - if min > v { - min = v - } - } - return min -} - -// SamplePercentiles returns an arbitrary percentile of the slice of int64. -func SamplePercentile(values int64Slice, p float64) float64 { - return SamplePercentiles(values, []float64{p})[0] -} - -// SamplePercentiles returns a slice of arbitrary percentiles of the slice of -// int64. -func SamplePercentiles(values int64Slice, ps []float64) []float64 { - scores := make([]float64, len(ps)) - size := len(values) - if size > 0 { - sort.Sort(values) - for i, p := range ps { - pos := p * float64(size+1) - if pos < 1.0 { - scores[i] = float64(values[0]) - } else if pos >= float64(size) { - scores[i] = float64(values[size-1]) - } else { - lower := float64(values[int(pos)-1]) - upper := float64(values[int(pos)]) - scores[i] = lower + (pos-math.Floor(pos))*(upper-lower) - } - } - } - return scores -} - -// SampleSnapshot is a read-only copy of another Sample. -type SampleSnapshot struct { - count int64 - values []int64 -} - -// Clear panics. -func (*SampleSnapshot) Clear() { - panic("Clear called on a SampleSnapshot") -} - -// Count returns the count of inputs at the time the snapshot was taken. -func (s *SampleSnapshot) Count() int64 { return s.count } - -// Max returns the maximal value at the time the snapshot was taken. -func (s *SampleSnapshot) Max() int64 { return SampleMax(s.values) } - -// Mean returns the mean value at the time the snapshot was taken. -func (s *SampleSnapshot) Mean() float64 { return SampleMean(s.values) } - -// Min returns the minimal value at the time the snapshot was taken. -func (s *SampleSnapshot) Min() int64 { return SampleMin(s.values) } - -// Percentile returns an arbitrary percentile of values at the time the -// snapshot was taken. -func (s *SampleSnapshot) Percentile(p float64) float64 { - return SamplePercentile(s.values, p) -} - -// Percentiles returns a slice of arbitrary percentiles of values at the time -// the snapshot was taken. -func (s *SampleSnapshot) Percentiles(ps []float64) []float64 { - return SamplePercentiles(s.values, ps) -} - -// Size returns the size of the sample at the time the snapshot was taken. -func (s *SampleSnapshot) Size() int { return len(s.values) } - -// Snapshot returns the snapshot. -func (s *SampleSnapshot) Snapshot() Sample { return s } - -// StdDev returns the standard deviation of values at the time the snapshot was -// taken. -func (s *SampleSnapshot) StdDev() float64 { return SampleStdDev(s.values) } - -// Sum returns the sum of values at the time the snapshot was taken. -func (s *SampleSnapshot) Sum() int64 { return SampleSum(s.values) } - -// Update panics. -func (*SampleSnapshot) Update(int64) { - panic("Update called on a SampleSnapshot") -} - -// Values returns a copy of the values in the sample. -func (s *SampleSnapshot) Values() []int64 { - values := make([]int64, len(s.values)) - copy(values, s.values) - return values -} - -// Variance returns the variance of values at the time the snapshot was taken. -func (s *SampleSnapshot) Variance() float64 { return SampleVariance(s.values) } - -// SampleStdDev returns the standard deviation of the slice of int64. -func SampleStdDev(values []int64) float64 { - return math.Sqrt(SampleVariance(values)) -} - -// SampleSum returns the sum of the slice of int64. -func SampleSum(values []int64) int64 { - var sum int64 - for _, v := range values { - sum += v - } - return sum -} - -// SampleVariance returns the variance of the slice of int64. -func SampleVariance(values []int64) float64 { - if 0 == len(values) { - return 0.0 - } - m := SampleMean(values) - var sum float64 - for _, v := range values { - d := float64(v) - m - sum += d * d - } - return sum / float64(len(values)) -} - -// A uniform sample using Vitter's Algorithm R. -// -// -type UniformSample struct { - count int64 - mutex sync.Mutex - reservoirSize int - values []int64 -} - -// NewUniformSample constructs a new uniform sample with the given reservoir -// size. -func NewUniformSample(reservoirSize int) Sample { - return &UniformSample{ - reservoirSize: reservoirSize, - values: make([]int64, 0, reservoirSize), - } -} - -// Clear clears all samples. -func (s *UniformSample) Clear() { - s.mutex.Lock() - defer s.mutex.Unlock() - s.count = 0 - s.values = make([]int64, 0, s.reservoirSize) -} - -// Count returns the number of samples recorded, which may exceed the -// reservoir size. -func (s *UniformSample) Count() int64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return s.count -} - -// Max returns the maximum value in the sample, which may not be the maximum -// value ever to be part of the sample. -func (s *UniformSample) Max() int64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return SampleMax(s.values) -} - -// Mean returns the mean of the values in the sample. -func (s *UniformSample) Mean() float64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return SampleMean(s.values) -} - -// Min returns the minimum value in the sample, which may not be the minimum -// value ever to be part of the sample. -func (s *UniformSample) Min() int64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return SampleMin(s.values) -} - -// Percentile returns an arbitrary percentile of values in the sample. -func (s *UniformSample) Percentile(p float64) float64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return SamplePercentile(s.values, p) -} - -// Percentiles returns a slice of arbitrary percentiles of values in the -// sample. -func (s *UniformSample) Percentiles(ps []float64) []float64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return SamplePercentiles(s.values, ps) -} - -// Size returns the size of the sample, which is at most the reservoir size. -func (s *UniformSample) Size() int { - s.mutex.Lock() - defer s.mutex.Unlock() - return len(s.values) -} - -// Snapshot returns a read-only copy of the sample. -func (s *UniformSample) Snapshot() Sample { - s.mutex.Lock() - defer s.mutex.Unlock() - values := make([]int64, len(s.values)) - copy(values, s.values) - return &SampleSnapshot{ - count: s.count, - values: values, - } -} - -// StdDev returns the standard deviation of the values in the sample. -func (s *UniformSample) StdDev() float64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return SampleStdDev(s.values) -} - -// Sum returns the sum of the values in the sample. -func (s *UniformSample) Sum() int64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return SampleSum(s.values) -} - -// Update samples a new value. -func (s *UniformSample) Update(v int64) { - s.mutex.Lock() - defer s.mutex.Unlock() - s.count++ - if len(s.values) < s.reservoirSize { - s.values = append(s.values, v) - } else { - r := rand.Int63n(s.count) - if r < int64(len(s.values)) { - s.values[int(r)] = v - } - } -} - -// Values returns a copy of the values in the sample. -func (s *UniformSample) Values() []int64 { - s.mutex.Lock() - defer s.mutex.Unlock() - values := make([]int64, len(s.values)) - copy(values, s.values) - return values -} - -// Variance returns the variance of the values in the sample. -func (s *UniformSample) Variance() float64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return SampleVariance(s.values) -} - -// expDecaySample represents an individual sample in a heap. -type expDecaySample struct { - k float64 - v int64 -} - -func newExpDecaySampleHeap(reservoirSize int) *expDecaySampleHeap { - return &expDecaySampleHeap{make([]expDecaySample, 0, reservoirSize)} -} - -// expDecaySampleHeap is a min-heap of expDecaySamples. -// The internal implementation is copied from the standard library's container/heap -type expDecaySampleHeap struct { - s []expDecaySample -} - -func (h *expDecaySampleHeap) Clear() { - h.s = h.s[:0] -} - -func (h *expDecaySampleHeap) Push(s expDecaySample) { - n := len(h.s) - h.s = h.s[0 : n+1] - h.s[n] = s - h.up(n) -} - -func (h *expDecaySampleHeap) Pop() expDecaySample { - n := len(h.s) - 1 - h.s[0], h.s[n] = h.s[n], h.s[0] - h.down(0, n) - - n = len(h.s) - s := h.s[n-1] - h.s = h.s[0 : n-1] - return s -} - -func (h *expDecaySampleHeap) Size() int { - return len(h.s) -} - -func (h *expDecaySampleHeap) Values() []expDecaySample { - return h.s -} - -func (h *expDecaySampleHeap) up(j int) { - for { - i := (j - 1) / 2 // parent - if i == j || !(h.s[j].k < h.s[i].k) { - break - } - h.s[i], h.s[j] = h.s[j], h.s[i] - j = i - } -} - -func (h *expDecaySampleHeap) down(i, n int) { - for { - j1 := 2*i + 1 - if j1 >= n || j1 < 0 { // j1 < 0 after int overflow - break - } - j := j1 // left child - if j2 := j1 + 1; j2 < n && !(h.s[j1].k < h.s[j2].k) { - j = j2 // = 2*i + 2 // right child - } - if !(h.s[j].k < h.s[i].k) { - break - } - h.s[i], h.s[j] = h.s[j], h.s[i] - i = j - } -} - -type int64Slice []int64 - -func (p int64Slice) Len() int { return len(p) } -func (p int64Slice) Less(i, j int) bool { return p[i] < p[j] } -func (p int64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } diff --git a/pkg/metrics/sample_test.go b/pkg/metrics/sample_test.go deleted file mode 100644 index 755a8cf0173..00000000000 --- a/pkg/metrics/sample_test.go +++ /dev/null @@ -1,367 +0,0 @@ -// includes code from -// https://raw.githubusercontent.com/rcrowley/go-metrics/master/sample.go -// Copyright 2012 Richard Crowley. All rights reserved. - -package metrics - -import ( - "math/rand" - "runtime" - "testing" - "time" -) - -// Benchmark{Compute,Copy}{1000,1000000} demonstrate that, even for relatively -// expensive computations like Variance, the cost of copying the Sample, as -// approximated by a make and copy, is much greater than the cost of the -// computation for small samples and only slightly less for large samples. -func BenchmarkCompute1000(b *testing.B) { - s := make([]int64, 1000) - for i := 0; i < len(s); i++ { - s[i] = int64(i) - } - b.ResetTimer() - for i := 0; i < b.N; i++ { - SampleVariance(s) - } -} -func BenchmarkCompute1000000(b *testing.B) { - s := make([]int64, 1000000) - for i := 0; i < len(s); i++ { - s[i] = int64(i) - } - b.ResetTimer() - for i := 0; i < b.N; i++ { - SampleVariance(s) - } -} -func BenchmarkCopy1000(b *testing.B) { - s := make([]int64, 1000) - for i := 0; i < len(s); i++ { - s[i] = int64(i) - } - b.ResetTimer() - for i := 0; i < b.N; i++ { - sCopy := make([]int64, len(s)) - copy(sCopy, s) - } -} -func BenchmarkCopy1000000(b *testing.B) { - s := make([]int64, 1000000) - for i := 0; i < len(s); i++ { - s[i] = int64(i) - } - b.ResetTimer() - for i := 0; i < b.N; i++ { - sCopy := make([]int64, len(s)) - copy(sCopy, s) - } -} - -func BenchmarkExpDecaySample257(b *testing.B) { - benchmarkSample(b, NewExpDecaySample(257, 0.015)) -} - -func BenchmarkExpDecaySample514(b *testing.B) { - benchmarkSample(b, NewExpDecaySample(514, 0.015)) -} - -func BenchmarkExpDecaySample1028(b *testing.B) { - benchmarkSample(b, NewExpDecaySample(1028, 0.015)) -} - -func BenchmarkUniformSample257(b *testing.B) { - benchmarkSample(b, NewUniformSample(257)) -} - -func BenchmarkUniformSample514(b *testing.B) { - benchmarkSample(b, NewUniformSample(514)) -} - -func BenchmarkUniformSample1028(b *testing.B) { - benchmarkSample(b, NewUniformSample(1028)) -} - -func TestExpDecaySample10(t *testing.T) { - rand.Seed(1) - s := NewExpDecaySample(100, 0.99) - for i := 0; i < 10; i++ { - s.Update(int64(i)) - } - if size := s.Count(); 10 != size { - t.Errorf("s.Count(): 10 != %v\n", size) - } - if size := s.Size(); 10 != size { - t.Errorf("s.Size(): 10 != %v\n", size) - } - if l := len(s.Values()); 10 != l { - t.Errorf("len(s.Values()): 10 != %v\n", l) - } - for _, v := range s.Values() { - if v > 10 || v < 0 { - t.Errorf("out of range [0, 10): %v\n", v) - } - } -} - -func TestExpDecaySample100(t *testing.T) { - rand.Seed(1) - s := NewExpDecaySample(1000, 0.01) - for i := 0; i < 100; i++ { - s.Update(int64(i)) - } - if size := s.Count(); 100 != size { - t.Errorf("s.Count(): 100 != %v\n", size) - } - if size := s.Size(); 100 != size { - t.Errorf("s.Size(): 100 != %v\n", size) - } - if l := len(s.Values()); 100 != l { - t.Errorf("len(s.Values()): 100 != %v\n", l) - } - for _, v := range s.Values() { - if v > 100 || v < 0 { - t.Errorf("out of range [0, 100): %v\n", v) - } - } -} - -func TestExpDecaySample1000(t *testing.T) { - rand.Seed(1) - s := NewExpDecaySample(100, 0.99) - for i := 0; i < 1000; i++ { - s.Update(int64(i)) - } - if size := s.Count(); 1000 != size { - t.Errorf("s.Count(): 1000 != %v\n", size) - } - if size := s.Size(); 100 != size { - t.Errorf("s.Size(): 100 != %v\n", size) - } - if l := len(s.Values()); 100 != l { - t.Errorf("len(s.Values()): 100 != %v\n", l) - } - for _, v := range s.Values() { - if v > 1000 || v < 0 { - t.Errorf("out of range [0, 1000): %v\n", v) - } - } -} - -// This test makes sure that the sample's priority is not amplified by using -// nanosecond duration since start rather than second duration since start. -// The priority becomes +Inf quickly after starting if this is done, -// effectively freezing the set of samples until a rescale step happens. -func TestExpDecaySampleNanosecondRegression(t *testing.T) { - rand.Seed(1) - s := NewExpDecaySample(100, 0.99) - for i := 0; i < 100; i++ { - s.Update(10) - } - time.Sleep(1 * time.Millisecond) - for i := 0; i < 100; i++ { - s.Update(20) - } - v := s.Values() - avg := float64(0) - for i := 0; i < len(v); i++ { - avg += float64(v[i]) - } - avg /= float64(len(v)) - if avg > 16 || avg < 14 { - t.Errorf("out of range [14, 16]: %v\n", avg) - } -} - -func TestExpDecaySampleRescale(t *testing.T) { - s := NewExpDecaySample(2, 0.001).(*ExpDecaySample) - s.update(time.Now(), 1) - s.update(time.Now().Add(time.Hour+time.Microsecond), 1) - for _, v := range s.values.Values() { - if v.k == 0.0 { - t.Fatal("v.k == 0.0") - } - } -} - -func TestExpDecaySampleSnapshot(t *testing.T) { - now := time.Now() - rand.Seed(1) - s := NewExpDecaySample(100, 0.99) - for i := 1; i <= 10000; i++ { - s.(*ExpDecaySample).update(now.Add(time.Duration(i)), int64(i)) - } - snapshot := s.Snapshot() - s.Update(1) - testExpDecaySampleStatistics(t, snapshot) -} - -func TestExpDecaySampleStatistics(t *testing.T) { - now := time.Now() - rand.Seed(1) - s := NewExpDecaySample(100, 0.99) - for i := 1; i <= 10000; i++ { - s.(*ExpDecaySample).update(now.Add(time.Duration(i)), int64(i)) - } - testExpDecaySampleStatistics(t, s) -} - -func TestUniformSample(t *testing.T) { - rand.Seed(1) - s := NewUniformSample(100) - for i := 0; i < 1000; i++ { - s.Update(int64(i)) - } - if size := s.Count(); 1000 != size { - t.Errorf("s.Count(): 1000 != %v\n", size) - } - if size := s.Size(); 100 != size { - t.Errorf("s.Size(): 100 != %v\n", size) - } - if l := len(s.Values()); 100 != l { - t.Errorf("len(s.Values()): 100 != %v\n", l) - } - for _, v := range s.Values() { - if v > 1000 || v < 0 { - t.Errorf("out of range [0, 100): %v\n", v) - } - } -} - -func TestUniformSampleIncludesTail(t *testing.T) { - rand.Seed(1) - s := NewUniformSample(100) - max := 100 - for i := 0; i < max; i++ { - s.Update(int64(i)) - } - v := s.Values() - sum := 0 - exp := (max - 1) * max / 2 - for i := 0; i < len(v); i++ { - sum += int(v[i]) - } - if exp != sum { - t.Errorf("sum: %v != %v\n", exp, sum) - } -} - -func TestUniformSampleSnapshot(t *testing.T) { - s := NewUniformSample(100) - for i := 1; i <= 10000; i++ { - s.Update(int64(i)) - } - snapshot := s.Snapshot() - s.Update(1) - testUniformSampleStatistics(t, snapshot) -} - -func TestUniformSampleStatistics(t *testing.T) { - rand.Seed(1) - s := NewUniformSample(100) - for i := 1; i <= 10000; i++ { - s.Update(int64(i)) - } - testUniformSampleStatistics(t, s) -} - -func benchmarkSample(b *testing.B, s Sample) { - var memStats runtime.MemStats - runtime.ReadMemStats(&memStats) - pauseTotalNs := memStats.PauseTotalNs - b.ResetTimer() - for i := 0; i < b.N; i++ { - s.Update(1) - } - b.StopTimer() - runtime.GC() - runtime.ReadMemStats(&memStats) - b.Logf("GC cost: %d ns/op", int(memStats.PauseTotalNs-pauseTotalNs)/b.N) -} - -func testExpDecaySampleStatistics(t *testing.T, s Sample) { - if count := s.Count(); 10000 != count { - t.Errorf("s.Count(): 10000 != %v\n", count) - } - if min := s.Min(); 107 != min { - t.Errorf("s.Min(): 107 != %v\n", min) - } - if max := s.Max(); 10000 != max { - t.Errorf("s.Max(): 10000 != %v\n", max) - } - if mean := s.Mean(); 4965.98 != mean { - t.Errorf("s.Mean(): 4965.98 != %v\n", mean) - } - if stdDev := s.StdDev(); 2959.825156930727 != stdDev { - t.Errorf("s.StdDev(): 2959.825156930727 != %v\n", stdDev) - } - ps := s.Percentiles([]float64{0.5, 0.75, 0.99}) - if 4615 != ps[0] { - t.Errorf("median: 4615 != %v\n", ps[0]) - } - if 7672 != ps[1] { - t.Errorf("75th percentile: 7672 != %v\n", ps[1]) - } - if 9998.99 != ps[2] { - t.Errorf("99th percentile: 9998.99 != %v\n", ps[2]) - } -} - -func testUniformSampleStatistics(t *testing.T, s Sample) { - if count := s.Count(); 10000 != count { - t.Errorf("s.Count(): 10000 != %v\n", count) - } - if min := s.Min(); 37 != min { - t.Errorf("s.Min(): 37 != %v\n", min) - } - if max := s.Max(); 9989 != max { - t.Errorf("s.Max(): 9989 != %v\n", max) - } - if mean := s.Mean(); 4748.14 != mean { - t.Errorf("s.Mean(): 4748.14 != %v\n", mean) - } - if stdDev := s.StdDev(); 2826.684117548333 != stdDev { - t.Errorf("s.StdDev(): 2826.684117548333 != %v\n", stdDev) - } - ps := s.Percentiles([]float64{0.5, 0.75, 0.99}) - if 4599 != ps[0] { - t.Errorf("median: 4599 != %v\n", ps[0]) - } - if 7380.5 != ps[1] { - t.Errorf("75th percentile: 7380.5 != %v\n", ps[1]) - } - if 9986.429999999998 != ps[2] { - t.Errorf("99th percentile: 9986.429999999998 != %v\n", ps[2]) - } -} - -// TestUniformSampleConcurrentUpdateCount would expose data race problems with -// concurrent Update and Count calls on Sample when test is called with -race -// argument -func TestUniformSampleConcurrentUpdateCount(t *testing.T) { - if testing.Short() { - t.Skip("skipping in short mode") - } - s := NewUniformSample(100) - for i := 0; i < 100; i++ { - s.Update(int64(i)) - } - quit := make(chan struct{}) - go func() { - t := time.NewTicker(10 * time.Millisecond) - for { - select { - case <-t.C: - s.Update(rand.Int63()) - case <-quit: - t.Stop() - return - } - } - }() - for i := 0; i < 1000; i++ { - s.Count() - time.Sleep(5 * time.Millisecond) - } - quit <- struct{}{} -} diff --git a/pkg/metrics/settings.go b/pkg/metrics/settings.go index 691bf6b6e73..ab96e9aaa92 100644 --- a/pkg/metrics/settings.go +++ b/pkg/metrics/settings.go @@ -1,25 +1,27 @@ package metrics -import "github.com/grafana/grafana/pkg/setting" +import ( + "strings" + "time" -type MetricPublisher interface { - Publish(metrics []Metric) -} + "github.com/grafana/grafana/pkg/metrics/graphitepublisher" + "github.com/grafana/grafana/pkg/setting" + "github.com/prometheus/client_golang/prometheus" + ini "gopkg.in/ini.v1" +) type MetricSettings struct { - Enabled bool - IntervalSeconds int64 - - Publishers []MetricPublisher + Enabled bool + IntervalSeconds int64 + GraphiteBridgeConfig *graphitepublisher.Config } -func readSettings() *MetricSettings { +func ReadSettings(file *ini.File) *MetricSettings { var settings = &MetricSettings{ - Enabled: false, - Publishers: make([]MetricPublisher, 0), + Enabled: false, } - var section, err = setting.Cfg.GetSection("metrics") + var section, err = file.GetSection("metrics") if err != nil { metricsLogger.Crit("Unable to find metrics config section", "error", err) return nil @@ -32,12 +34,46 @@ func readSettings() *MetricSettings { return settings } - if graphitePublisher, err := CreateGraphitePublisher(); err != nil { - metricsLogger.Error("Failed to init Graphite metric publisher", "error", err) - } else if graphitePublisher != nil { - metricsLogger.Info("Metrics publisher initialized", "type", "graphite") - settings.Publishers = append(settings.Publishers, graphitePublisher) + cfg, err := parseGraphiteSettings(settings, file) + if err != nil { + metricsLogger.Crit("Unable to parse metrics graphite section", "error", err) + return nil } + settings.GraphiteBridgeConfig = cfg + return settings } + +func parseGraphiteSettings(settings *MetricSettings, file *ini.File) (*graphitepublisher.Config, error) { + graphiteSection, err := setting.Cfg.GetSection("metrics.graphite") + if err != nil { + return nil, nil + } + + address := graphiteSection.Key("address").String() + if address == "" { + return nil, nil + } + + cfg := &graphitepublisher.Config{ + URL: address, + Prefix: graphiteSection.Key("prefix").MustString("prod.grafana.%(instance_name)s"), + CountersAsDelta: true, + Gatherer: prometheus.DefaultGatherer, + Interval: time.Duration(settings.IntervalSeconds) * time.Second, + Timeout: 10 * time.Second, + Logger: &logWrapper{logger: metricsLogger}, + ErrorHandling: graphitepublisher.ContinueOnError, + } + + safeInstanceName := strings.Replace(setting.InstanceName, ".", "_", -1) + prefix := graphiteSection.Key("prefix").Value() + + if prefix == "" { + prefix = "prod.grafana.%(instance_name)s." + } + + cfg.Prefix = strings.Replace(prefix, "%(instance_name)s", safeInstanceName, -1) + return cfg, nil +} diff --git a/pkg/metrics/timer.go b/pkg/metrics/timer.go index 7092cf1d86f..c6595b4d359 100644 --- a/pkg/metrics/timer.go +++ b/pkg/metrics/timer.go @@ -5,7 +5,6 @@ package metrics import ( - "sync" "time" "github.com/prometheus/client_golang/prometheus" @@ -15,27 +14,12 @@ import ( type Timer interface { Metric - Count() int64 - Max() int64 - Mean() float64 - Min() int64 - Percentile(float64) float64 - Percentiles([]float64) []float64 - Rate1() float64 - Rate5() float64 - Rate15() float64 - RateMean() float64 - StdDev() float64 - Sum() int64 - Time(func()) Update(time.Duration) UpdateSince(time.Time) - Variance() float64 } -// NewTimer constructs a new StandardTimer using an exponentially-decaying -// sample with the same reservoir size and alpha as UNIX load averages. -func NewTimer(meta *MetricMeta) Timer { +func RegTimer(name string, tagStrings ...string) Timer { + meta := NewMetricMeta(name, tagStrings) promSummary := prometheus.NewSummary(prometheus.SummaryOpts{ Name: promifyName(meta.Name()), Help: meta.Name(), @@ -46,266 +30,24 @@ func NewTimer(meta *MetricMeta) Timer { return &StandardTimer{ MetricMeta: meta, - histogram: NewHistogram(meta, NewExpDecaySample(1028, 0.015)), - meter: NewMeter(meta), Summary: promSummary, } } -func RegTimer(name string, tagStrings ...string) Timer { - tr := NewTimer(NewMetricMeta(name, tagStrings)) - MetricStats.Register(tr) - return tr -} - -// NilTimer is a no-op Timer. -type NilTimer struct { - *MetricMeta - h Histogram - m Meter -} - -// Count is a no-op. -func (NilTimer) Count() int64 { return 0 } - -// Max is a no-op. -func (NilTimer) Max() int64 { return 0 } - -// Mean is a no-op. -func (NilTimer) Mean() float64 { return 0.0 } - -// Min is a no-op. -func (NilTimer) Min() int64 { return 0 } - -// Percentile is a no-op. -func (NilTimer) Percentile(p float64) float64 { return 0.0 } - -// Percentiles is a no-op. -func (NilTimer) Percentiles(ps []float64) []float64 { - return make([]float64, len(ps)) -} - -// Rate1 is a no-op. -func (NilTimer) Rate1() float64 { return 0.0 } - -// Rate5 is a no-op. -func (NilTimer) Rate5() float64 { return 0.0 } - -// Rate15 is a no-op. -func (NilTimer) Rate15() float64 { return 0.0 } - -// RateMean is a no-op. -func (NilTimer) RateMean() float64 { return 0.0 } - -// Snapshot is a no-op. -func (n NilTimer) Snapshot() Metric { return n } - -// StdDev is a no-op. -func (NilTimer) StdDev() float64 { return 0.0 } - -// Sum is a no-op. -func (NilTimer) Sum() int64 { return 0 } - -// Time is a no-op. -func (NilTimer) Time(func()) {} - -// Update is a no-op. -func (NilTimer) Update(time.Duration) {} - -// UpdateSince is a no-op. -func (NilTimer) UpdateSince(time.Time) {} - -// Variance is a no-op. -func (NilTimer) Variance() float64 { return 0.0 } - // StandardTimer is the standard implementation of a Timer and uses a Histogram // and Meter. type StandardTimer struct { *MetricMeta - histogram Histogram - meter Meter - mutex sync.Mutex + prometheus.Summary } -// Count returns the number of events recorded. -func (t *StandardTimer) Count() int64 { - return t.histogram.Count() -} - -// Max returns the maximum value in the sample. -func (t *StandardTimer) Max() int64 { - return t.histogram.Max() -} - -// Mean returns the mean of the values in the sample. -func (t *StandardTimer) Mean() float64 { - return t.histogram.Mean() -} - -// Min returns the minimum value in the sample. -func (t *StandardTimer) Min() int64 { - return t.histogram.Min() -} - -// Percentile returns an arbitrary percentile of the values in the sample. -func (t *StandardTimer) Percentile(p float64) float64 { - return t.histogram.Percentile(p) -} - -// Percentiles returns a slice of arbitrary percentiles of the values in the -// sample. -func (t *StandardTimer) Percentiles(ps []float64) []float64 { - return t.histogram.Percentiles(ps) -} - -// Rate1 returns the one-minute moving average rate of events per second. -func (t *StandardTimer) Rate1() float64 { - return t.meter.Rate1() -} - -// Rate5 returns the five-minute moving average rate of events per second. -func (t *StandardTimer) Rate5() float64 { - return t.meter.Rate5() -} - -// Rate15 returns the fifteen-minute moving average rate of events per second. -func (t *StandardTimer) Rate15() float64 { - return t.meter.Rate15() -} - -// RateMean returns the meter's mean rate of events per second. -func (t *StandardTimer) RateMean() float64 { - return t.meter.RateMean() -} - -// Snapshot returns a read-only copy of the timer. -func (t *StandardTimer) Snapshot() Metric { - t.mutex.Lock() - defer t.mutex.Unlock() - return &TimerSnapshot{ - MetricMeta: t.MetricMeta, - histogram: t.histogram.Snapshot().(*HistogramSnapshot), - meter: t.meter.Snapshot().(*MeterSnapshot), - } -} - -// StdDev returns the standard deviation of the values in the sample. -func (t *StandardTimer) StdDev() float64 { - return t.histogram.StdDev() -} - -// Sum returns the sum in the sample. -func (t *StandardTimer) Sum() int64 { - return t.histogram.Sum() -} - -// Record the duration of the execution of the given function. -func (t *StandardTimer) Time(f func()) { - ts := time.Now() - f() - t.Update(time.Since(ts)) -} - // Record the duration of an event. func (t *StandardTimer) Update(d time.Duration) { - t.mutex.Lock() - defer t.mutex.Unlock() - t.histogram.Update(int64(d)) - t.meter.Mark(1) - t.Summary.Observe(float64(d)) } // Record the duration of an event that started at a time and ends now. func (t *StandardTimer) UpdateSince(ts time.Time) { - t.mutex.Lock() - defer t.mutex.Unlock() - sinceMs := time.Since(ts) / time.Millisecond - t.histogram.Update(int64(sinceMs)) - t.meter.Mark(1) - - t.Summary.Observe(float64(sinceMs)) + t.Summary.Observe(float64(time.Since(ts) / time.Millisecond)) } - -// Variance returns the variance of the values in the sample. -func (t *StandardTimer) Variance() float64 { - return t.histogram.Variance() -} - -// TimerSnapshot is a read-only copy of another Timer. -type TimerSnapshot struct { - *MetricMeta - histogram *HistogramSnapshot - meter *MeterSnapshot -} - -// Count returns the number of events recorded at the time the snapshot was -// taken. -func (t *TimerSnapshot) Count() int64 { return t.histogram.Count() } - -// Max returns the maximum value at the time the snapshot was taken. -func (t *TimerSnapshot) Max() int64 { return t.histogram.Max() } - -// Mean returns the mean value at the time the snapshot was taken. -func (t *TimerSnapshot) Mean() float64 { return t.histogram.Mean() } - -// Min returns the minimum value at the time the snapshot was taken. -func (t *TimerSnapshot) Min() int64 { return t.histogram.Min() } - -// Percentile returns an arbitrary percentile of sampled values at the time the -// snapshot was taken. -func (t *TimerSnapshot) Percentile(p float64) float64 { - return t.histogram.Percentile(p) -} - -// Percentiles returns a slice of arbitrary percentiles of sampled values at -// the time the snapshot was taken. -func (t *TimerSnapshot) Percentiles(ps []float64) []float64 { - return t.histogram.Percentiles(ps) -} - -// Rate1 returns the one-minute moving average rate of events per second at the -// time the snapshot was taken. -func (t *TimerSnapshot) Rate1() float64 { return t.meter.Rate1() } - -// Rate5 returns the five-minute moving average rate of events per second at -// the time the snapshot was taken. -func (t *TimerSnapshot) Rate5() float64 { return t.meter.Rate5() } - -// Rate15 returns the fifteen-minute moving average rate of events per second -// at the time the snapshot was taken. -func (t *TimerSnapshot) Rate15() float64 { return t.meter.Rate15() } - -// RateMean returns the meter's mean rate of events per second at the time the -// snapshot was taken. -func (t *TimerSnapshot) RateMean() float64 { return t.meter.RateMean() } - -// Snapshot returns the snapshot. -func (t *TimerSnapshot) Snapshot() Metric { return t } - -// StdDev returns the standard deviation of the values at the time the snapshot -// was taken. -func (t *TimerSnapshot) StdDev() float64 { return t.histogram.StdDev() } - -// Sum returns the sum at the time the snapshot was taken. -func (t *TimerSnapshot) Sum() int64 { return t.histogram.Sum() } - -// Time panics. -func (*TimerSnapshot) Time(func()) { - panic("Time called on a TimerSnapshot") -} - -// Update panics. -func (*TimerSnapshot) Update(time.Duration) { - panic("Update called on a TimerSnapshot") -} - -// UpdateSince panics. -func (*TimerSnapshot) UpdateSince(time.Time) { - panic("UpdateSince called on a TimerSnapshot") -} - -// Variance returns the variance of the values at the time the snapshot was -// taken. -func (t *TimerSnapshot) Variance() float64 { return t.histogram.Variance() }