mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: Run state manager as regular sub-service (#58246)
This commit is contained in:
parent
e6a9fa1cf9
commit
978f1119d7
@ -282,6 +282,10 @@ func (ng *AlertNG) Run(ctx context.Context) error {
|
|||||||
|
|
||||||
children, subCtx := errgroup.WithContext(ctx)
|
children, subCtx := errgroup.WithContext(ctx)
|
||||||
|
|
||||||
|
children.Go(func() error {
|
||||||
|
return ng.stateManager.Run(subCtx)
|
||||||
|
})
|
||||||
|
|
||||||
children.Go(func() error {
|
children.Go(func() error {
|
||||||
return ng.MultiOrgAlertmanager.Run(subCtx)
|
return ng.MultiOrgAlertmanager.Run(subCtx)
|
||||||
})
|
})
|
||||||
|
@ -298,8 +298,6 @@ func (sch *schedule) schedulePeriodic(ctx context.Context, t *ticker.T) error {
|
|||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
// waiting for all rule evaluation routines to stop
|
// waiting for all rule evaluation routines to stop
|
||||||
waitErr := dispatcherGroup.Wait()
|
waitErr := dispatcherGroup.Wait()
|
||||||
// close the state manager and flush the state
|
|
||||||
sch.stateManager.Close()
|
|
||||||
return waitErr
|
return waitErr
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -15,7 +15,10 @@ import (
|
|||||||
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
|
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||||
)
|
)
|
||||||
|
|
||||||
var ResendDelay = 30 * time.Second
|
var (
|
||||||
|
ResendDelay = 30 * time.Second
|
||||||
|
MetricsScrapeInterval = 15 * time.Second // TODO: parameterize? // Setting to a reasonable default scrape interval for Prometheus.
|
||||||
|
)
|
||||||
|
|
||||||
// AlertInstanceManager defines the interface for querying the current alert instances.
|
// AlertInstanceManager defines the interface for querying the current alert instances.
|
||||||
type AlertInstanceManager interface {
|
type AlertInstanceManager interface {
|
||||||
@ -29,7 +32,6 @@ type Manager struct {
|
|||||||
|
|
||||||
clock clock.Clock
|
clock clock.Clock
|
||||||
cache *cache
|
cache *cache
|
||||||
quit chan struct{}
|
|
||||||
ResendDelay time.Duration
|
ResendDelay time.Duration
|
||||||
|
|
||||||
instanceStore InstanceStore
|
instanceStore InstanceStore
|
||||||
@ -39,9 +41,8 @@ type Manager struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func NewManager(metrics *metrics.State, externalURL *url.URL, instanceStore InstanceStore, imageService image.ImageService, clock clock.Clock, historian Historian) *Manager {
|
func NewManager(metrics *metrics.State, externalURL *url.URL, instanceStore InstanceStore, imageService image.ImageService, clock clock.Clock, historian Historian) *Manager {
|
||||||
manager := &Manager{
|
return &Manager{
|
||||||
cache: newCache(),
|
cache: newCache(),
|
||||||
quit: make(chan struct{}),
|
|
||||||
ResendDelay: ResendDelay, // TODO: make this configurable
|
ResendDelay: ResendDelay, // TODO: make this configurable
|
||||||
log: log.New("ngalert.state.manager"),
|
log: log.New("ngalert.state.manager"),
|
||||||
metrics: metrics,
|
metrics: metrics,
|
||||||
@ -51,14 +52,21 @@ func NewManager(metrics *metrics.State, externalURL *url.URL, instanceStore Inst
|
|||||||
clock: clock,
|
clock: clock,
|
||||||
externalURL: externalURL,
|
externalURL: externalURL,
|
||||||
}
|
}
|
||||||
if manager.metrics != nil {
|
|
||||||
go manager.recordMetrics()
|
|
||||||
}
|
|
||||||
return manager
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (st *Manager) Close() {
|
func (st *Manager) Run(ctx context.Context) error {
|
||||||
st.quit <- struct{}{}
|
ticker := st.clock.Ticker(MetricsScrapeInterval)
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ticker.C:
|
||||||
|
st.log.Debug("Recording state cache metrics", "now", st.clock.Now())
|
||||||
|
st.cache.recordMetrics(st.metrics)
|
||||||
|
case <-ctx.Done():
|
||||||
|
st.log.Debug("Stopping")
|
||||||
|
ticker.Stop()
|
||||||
|
return ctx.Err()
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (st *Manager) Warm(ctx context.Context, rulesReader RuleReader) {
|
func (st *Manager) Warm(ctx context.Context, rulesReader RuleReader) {
|
||||||
@ -269,24 +277,6 @@ func (st *Manager) GetStatesForRuleUID(orgID int64, alertRuleUID string) []*Stat
|
|||||||
return st.cache.getStatesForRuleUID(orgID, alertRuleUID)
|
return st.cache.getStatesForRuleUID(orgID, alertRuleUID)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (st *Manager) recordMetrics() {
|
|
||||||
// TODO: parameterize?
|
|
||||||
// Setting to a reasonable default scrape interval for Prometheus.
|
|
||||||
dur := time.Duration(15) * time.Second
|
|
||||||
ticker := st.clock.Ticker(dur)
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-ticker.C:
|
|
||||||
st.log.Debug("Recording state cache metrics", "now", st.clock.Now())
|
|
||||||
st.cache.recordMetrics(st.metrics)
|
|
||||||
case <-st.quit:
|
|
||||||
st.log.Debug("Stopping state cache metrics recording", "now", st.clock.Now())
|
|
||||||
ticker.Stop()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (st *Manager) Put(states []*State) {
|
func (st *Manager) Put(states []*State) {
|
||||||
for _, s := range states {
|
for _, s := range states {
|
||||||
st.cache.set(s)
|
st.cache.set(s)
|
||||||
|
Loading…
Reference in New Issue
Block a user