mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting/metrics (#33547)
* moves alerting metrics to their own pkg * adds grafana_alerting_alerts (by state) metric * alerts_received_{total,invalid} * embed alertmanager alerting struct in ng metrics & remove duplicated notification metrics (already embed alertmanager notifier metrics) * use silence metrics from alertmanager lib * fix - manager has metrics * updates ngalert tests * comment lint Signed-off-by: Owen Diehl <ow.diehl@gmail.com> * cleaner prom registry code * removes ngalert global metrics * new registry use in all tests * ngalert metrics impl service, hack testinfra code to prevent duplicate metric registrations * nilmetrics unexported
This commit is contained in:
@@ -2,12 +2,14 @@ package state
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/grafana/grafana-plugin-sdk-go/data"
|
||||
|
||||
"github.com/grafana/grafana/pkg/infra/log"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
|
||||
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||
prometheusModel "github.com/prometheus/common/model"
|
||||
)
|
||||
@@ -16,12 +18,14 @@ type cache struct {
|
||||
states map[string]*State
|
||||
mtxStates sync.RWMutex
|
||||
log log.Logger
|
||||
metrics *metrics.Metrics
|
||||
}
|
||||
|
||||
func newCache(logger log.Logger) *cache {
|
||||
func newCache(logger log.Logger, metrics *metrics.Metrics) *cache {
|
||||
return &cache{
|
||||
states: make(map[string]*State),
|
||||
log: logger,
|
||||
states: make(map[string]*State),
|
||||
log: logger,
|
||||
metrics: metrics,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -118,13 +122,23 @@ func (c *cache) reset() {
|
||||
func (c *cache) trim() {
|
||||
c.mtxStates.Lock()
|
||||
defer c.mtxStates.Unlock()
|
||||
|
||||
ct := make(map[eval.State]int)
|
||||
|
||||
for _, v := range c.states {
|
||||
if len(v.Results) > 100 {
|
||||
newResults := make([]Evaluation, 100)
|
||||
copy(newResults, v.Results[100:])
|
||||
// Keep last 100 results
|
||||
copy(newResults, v.Results[len(v.Results)-100:])
|
||||
v.Results = newResults
|
||||
c.set(v)
|
||||
}
|
||||
|
||||
n := ct[v.State]
|
||||
ct[v.State] = n + 1
|
||||
}
|
||||
|
||||
for k, n := range ct {
|
||||
c.metrics.AlertState.WithLabelValues(strings.ToLower(k.String())).Set(float64(n))
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -1,25 +1,29 @@
|
||||
package state
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/grafana/grafana/pkg/infra/log"
|
||||
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
|
||||
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||
)
|
||||
|
||||
type Manager struct {
|
||||
cache *cache
|
||||
quit chan struct{}
|
||||
Log log.Logger
|
||||
cache *cache
|
||||
quit chan struct{}
|
||||
Log log.Logger
|
||||
metrics *metrics.Metrics
|
||||
}
|
||||
|
||||
func NewManager(logger log.Logger) *Manager {
|
||||
func NewManager(logger log.Logger, metrics *metrics.Metrics) *Manager {
|
||||
manager := &Manager{
|
||||
cache: newCache(logger),
|
||||
quit: make(chan struct{}),
|
||||
Log: logger,
|
||||
cache: newCache(logger, metrics),
|
||||
quit: make(chan struct{}),
|
||||
Log: logger,
|
||||
metrics: metrics,
|
||||
}
|
||||
go manager.cleanUp()
|
||||
return manager
|
||||
@@ -95,8 +99,11 @@ func (st *Manager) GetStatesByRuleUID() map[string][]*State {
|
||||
}
|
||||
|
||||
func (st *Manager) cleanUp() {
|
||||
ticker := time.NewTicker(time.Duration(60) * time.Minute)
|
||||
st.Log.Debug("starting cleanup process", "intervalMinutes", 60)
|
||||
// TODO: parameterize?
|
||||
// Setting to a reasonable default scrape interval for Prometheus.
|
||||
dur := time.Duration(15) * time.Second
|
||||
ticker := time.NewTicker(dur)
|
||||
st.Log.Debug("starting cleanup process", "dur", fmt.Sprint(dur))
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
|
Reference in New Issue
Block a user