2021-04-23 14:32:25 -05:00
|
|
|
package state
|
|
|
|
|
|
|
|
import (
|
2021-09-14 09:08:04 -05:00
|
|
|
"context"
|
2022-05-22 09:33:49 -05:00
|
|
|
"errors"
|
2021-07-13 11:50:10 -05:00
|
|
|
"fmt"
|
2021-10-04 13:04:37 -05:00
|
|
|
"net/url"
|
2021-07-13 11:50:10 -05:00
|
|
|
"strconv"
|
2022-02-24 04:58:54 -06:00
|
|
|
"strings"
|
2021-04-23 14:32:25 -05:00
|
|
|
"time"
|
|
|
|
|
2022-06-22 11:18:42 -05:00
|
|
|
"github.com/benbjohnson/clock"
|
2022-02-24 04:58:54 -06:00
|
|
|
"github.com/grafana/grafana-plugin-sdk-go/data"
|
2021-07-13 11:50:10 -05:00
|
|
|
|
2021-04-23 14:32:25 -05:00
|
|
|
"github.com/grafana/grafana/pkg/infra/log"
|
2022-02-24 04:58:54 -06:00
|
|
|
"github.com/grafana/grafana/pkg/models"
|
|
|
|
"github.com/grafana/grafana/pkg/services/annotations"
|
2022-05-17 13:52:22 -05:00
|
|
|
"github.com/grafana/grafana/pkg/services/dashboards"
|
2021-04-23 14:32:25 -05:00
|
|
|
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
2022-05-22 09:33:49 -05:00
|
|
|
"github.com/grafana/grafana/pkg/services/ngalert/image"
|
2021-04-30 11:28:06 -05:00
|
|
|
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
|
2021-04-23 14:32:25 -05:00
|
|
|
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
|
2021-07-07 11:18:31 -05:00
|
|
|
"github.com/grafana/grafana/pkg/services/ngalert/store"
|
2022-05-22 09:33:49 -05:00
|
|
|
"github.com/grafana/grafana/pkg/services/screenshot"
|
2021-04-23 14:32:25 -05:00
|
|
|
)
|
|
|
|
|
2021-09-02 10:22:59 -05:00
|
|
|
var ResendDelay = 30 * time.Second
|
|
|
|
|
2022-03-09 12:20:29 -06:00
|
|
|
// AlertInstanceManager defines the interface for querying the current alert instances.
|
|
|
|
type AlertInstanceManager interface {
|
|
|
|
GetAll(orgID int64) []*State
|
|
|
|
GetStatesForRuleUID(orgID int64, alertRuleUID string) []*State
|
|
|
|
}
|
|
|
|
|
2021-04-23 14:32:25 -05:00
|
|
|
type Manager struct {
|
2021-07-07 11:18:31 -05:00
|
|
|
log log.Logger
|
2021-09-14 06:55:01 -05:00
|
|
|
metrics *metrics.State
|
2021-07-07 11:18:31 -05:00
|
|
|
|
2022-06-22 11:18:42 -05:00
|
|
|
clock clock.Clock
|
2021-05-19 15:15:09 -05:00
|
|
|
cache *cache
|
|
|
|
quit chan struct{}
|
|
|
|
ResendDelay time.Duration
|
2021-07-07 11:18:31 -05:00
|
|
|
|
2022-05-17 13:52:22 -05:00
|
|
|
ruleStore store.RuleStore
|
|
|
|
instanceStore store.InstanceStore
|
|
|
|
dashboardService dashboards.DashboardService
|
2022-05-22 09:33:49 -05:00
|
|
|
imageService image.ImageService
|
2022-09-19 02:54:37 -05:00
|
|
|
AnnotationsRepo annotations.Repository
|
2021-04-23 14:32:25 -05:00
|
|
|
}
|
|
|
|
|
2022-05-17 13:52:22 -05:00
|
|
|
func NewManager(logger log.Logger, metrics *metrics.State, externalURL *url.URL,
|
2022-06-21 17:16:53 -05:00
|
|
|
ruleStore store.RuleStore, instanceStore store.InstanceStore,
|
2022-09-19 02:54:37 -05:00
|
|
|
dashboardService dashboards.DashboardService, imageService image.ImageService, clock clock.Clock, annotationsRepo annotations.Repository) *Manager {
|
2021-04-23 14:32:25 -05:00
|
|
|
manager := &Manager{
|
2022-05-17 13:52:22 -05:00
|
|
|
cache: newCache(logger, metrics, externalURL),
|
|
|
|
quit: make(chan struct{}),
|
|
|
|
ResendDelay: ResendDelay, // TODO: make this configurable
|
|
|
|
log: logger,
|
|
|
|
metrics: metrics,
|
|
|
|
ruleStore: ruleStore,
|
|
|
|
instanceStore: instanceStore,
|
|
|
|
dashboardService: dashboardService,
|
2022-05-22 09:33:49 -05:00
|
|
|
imageService: imageService,
|
2022-06-22 11:18:42 -05:00
|
|
|
clock: clock,
|
2022-09-19 02:54:37 -05:00
|
|
|
AnnotationsRepo: annotationsRepo,
|
2021-04-23 14:32:25 -05:00
|
|
|
}
|
2021-05-18 12:56:14 -05:00
|
|
|
go manager.recordMetrics()
|
2021-04-23 14:32:25 -05:00
|
|
|
return manager
|
|
|
|
}
|
|
|
|
|
2022-09-21 09:10:17 -05:00
|
|
|
func (st *Manager) Close() {
|
2021-04-23 14:32:25 -05:00
|
|
|
st.quit <- struct{}{}
|
|
|
|
}
|
|
|
|
|
2022-02-08 02:52:03 -06:00
|
|
|
func (st *Manager) Warm(ctx context.Context) {
|
2021-07-07 11:18:31 -05:00
|
|
|
st.log.Info("warming cache for startup")
|
2022-08-25 13:12:22 -05:00
|
|
|
st.ResetAllStates()
|
2021-07-07 11:18:31 -05:00
|
|
|
|
2022-02-08 07:49:04 -06:00
|
|
|
orgIds, err := st.instanceStore.FetchOrgIds(ctx)
|
2021-07-07 11:18:31 -05:00
|
|
|
if err != nil {
|
|
|
|
st.log.Error("unable to fetch orgIds", "msg", err.Error())
|
|
|
|
}
|
|
|
|
|
|
|
|
var states []*State
|
|
|
|
for _, orgId := range orgIds {
|
|
|
|
// Get Rules
|
|
|
|
ruleCmd := ngModels.ListAlertRulesQuery{
|
|
|
|
OrgID: orgId,
|
|
|
|
}
|
2022-04-25 05:42:42 -05:00
|
|
|
if err := st.ruleStore.ListAlertRules(ctx, &ruleCmd); err != nil {
|
2021-07-07 11:18:31 -05:00
|
|
|
st.log.Error("unable to fetch previous state", "msg", err.Error())
|
|
|
|
}
|
|
|
|
|
|
|
|
ruleByUID := make(map[string]*ngModels.AlertRule, len(ruleCmd.Result))
|
|
|
|
for _, rule := range ruleCmd.Result {
|
|
|
|
ruleByUID[rule.UID] = rule
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get Instances
|
|
|
|
cmd := ngModels.ListAlertInstancesQuery{
|
|
|
|
RuleOrgID: orgId,
|
|
|
|
}
|
2022-02-08 07:49:04 -06:00
|
|
|
if err := st.instanceStore.ListAlertInstances(ctx, &cmd); err != nil {
|
2021-07-07 11:18:31 -05:00
|
|
|
st.log.Error("unable to fetch previous state", "msg", err.Error())
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, entry := range cmd.Result {
|
|
|
|
ruleForEntry, ok := ruleByUID[entry.RuleUID]
|
|
|
|
if !ok {
|
|
|
|
st.log.Error("rule not found for instance, ignoring", "rule", entry.RuleUID)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
lbs := map[string]string(entry.Labels)
|
|
|
|
cacheId, err := entry.Labels.StringKey()
|
|
|
|
if err != nil {
|
|
|
|
st.log.Error("error getting cacheId for entry", "msg", err.Error())
|
|
|
|
}
|
|
|
|
stateForEntry := &State{
|
2022-02-02 12:18:20 -06:00
|
|
|
AlertRuleUID: entry.RuleUID,
|
|
|
|
OrgID: entry.RuleOrgID,
|
|
|
|
CacheId: cacheId,
|
|
|
|
Labels: lbs,
|
|
|
|
State: translateInstanceState(entry.CurrentState),
|
2022-05-23 03:49:49 -05:00
|
|
|
StateReason: entry.CurrentReason,
|
2022-02-02 12:18:20 -06:00
|
|
|
LastEvaluationString: "",
|
|
|
|
StartsAt: entry.CurrentStateSince,
|
|
|
|
EndsAt: entry.CurrentStateEnd,
|
|
|
|
LastEvaluationTime: entry.LastEvalTime,
|
|
|
|
Annotations: ruleForEntry.Annotations,
|
2021-07-07 11:18:31 -05:00
|
|
|
}
|
|
|
|
states = append(states, stateForEntry)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, s := range states {
|
|
|
|
st.set(s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-07-14 14:59:59 -05:00
|
|
|
func (st *Manager) getOrCreate(ctx context.Context, alertRule *ngModels.AlertRule, result eval.Result, extraLabels data.Labels) *State {
|
|
|
|
return st.cache.getOrCreate(ctx, alertRule, result, extraLabels)
|
2021-04-23 14:32:25 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
func (st *Manager) set(entry *State) {
|
|
|
|
st.cache.set(entry)
|
|
|
|
}
|
|
|
|
|
2021-05-04 11:57:50 -05:00
|
|
|
func (st *Manager) Get(orgID int64, alertRuleUID, stateId string) (*State, error) {
|
|
|
|
return st.cache.get(orgID, alertRuleUID, stateId)
|
2021-04-23 14:32:25 -05:00
|
|
|
}
|
|
|
|
|
2022-08-25 13:12:22 -05:00
|
|
|
// ResetAllStates is used to ensure a clean cache on startup.
|
|
|
|
func (st *Manager) ResetAllStates() {
|
2021-04-23 14:32:25 -05:00
|
|
|
st.cache.reset()
|
|
|
|
}
|
|
|
|
|
2022-08-25 13:12:22 -05:00
|
|
|
// ResetStateByRuleUID deletes all entries in the state manager that match the given rule UID.
|
|
|
|
func (st *Manager) ResetStateByRuleUID(ctx context.Context, ruleKey ngModels.AlertRuleKey) []*State {
|
|
|
|
logger := st.log.New(ruleKey.LogContext()...)
|
|
|
|
logger.Debug("resetting state of the rule")
|
|
|
|
states := st.cache.removeByRuleUID(ruleKey.OrgID, ruleKey.UID)
|
|
|
|
if len(states) > 0 {
|
|
|
|
err := st.instanceStore.DeleteAlertInstancesByRule(ctx, ruleKey)
|
|
|
|
if err != nil {
|
|
|
|
logger.Error("failed to delete states that belong to a rule from database", ruleKey.LogContext()...)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
logger.Info("rules state was reset", "deleted_states", len(states))
|
|
|
|
return states
|
2021-05-03 13:01:33 -05:00
|
|
|
}
|
|
|
|
|
2022-07-14 14:59:59 -05:00
|
|
|
// ProcessEvalResults updates the current states that belong to a rule with the evaluation results.
|
|
|
|
// if extraLabels is not empty, those labels will be added to every state. The extraLabels take precedence over rule labels and result labels
|
|
|
|
func (st *Manager) ProcessEvalResults(ctx context.Context, evaluatedAt time.Time, alertRule *ngModels.AlertRule, results eval.Results, extraLabels data.Labels) []*State {
|
2022-08-25 13:12:22 -05:00
|
|
|
logger := st.log.New(alertRule.GetKey().LogContext()...)
|
2022-08-18 08:40:33 -05:00
|
|
|
logger.Debug("state manager processing evaluation results", "resultCount", len(results))
|
2021-04-23 14:32:25 -05:00
|
|
|
var states []*State
|
2021-07-26 11:12:04 -05:00
|
|
|
processedResults := make(map[string]*State, len(results))
|
2021-04-23 14:32:25 -05:00
|
|
|
for _, result := range results {
|
2022-07-14 14:59:59 -05:00
|
|
|
s := st.setNextState(ctx, alertRule, result, extraLabels)
|
2021-04-23 14:32:25 -05:00
|
|
|
states = append(states, s)
|
2021-07-26 11:12:04 -05:00
|
|
|
processedResults[s.CacheId] = s
|
2021-04-23 14:32:25 -05:00
|
|
|
}
|
2022-09-09 10:44:06 -05:00
|
|
|
st.staleResultsHandler(ctx, evaluatedAt, alertRule, processedResults)
|
2022-08-18 08:40:33 -05:00
|
|
|
if len(states) > 0 {
|
|
|
|
logger.Debug("saving new states to the database", "count", len(states))
|
2022-09-09 10:44:06 -05:00
|
|
|
for _, state := range states {
|
|
|
|
if err := st.saveState(ctx, state); err != nil {
|
|
|
|
logger.Error("failed to save alert state", "labels", state.Labels.String(), "state", state.State.String(), "err", err.Error())
|
|
|
|
}
|
|
|
|
}
|
2022-08-18 08:40:33 -05:00
|
|
|
}
|
2021-04-23 14:32:25 -05:00
|
|
|
return states
|
|
|
|
}
|
|
|
|
|
2022-05-22 21:53:41 -05:00
|
|
|
// Maybe take a screenshot. Do it if:
|
|
|
|
// 1. The alert state is transitioning into the "Alerting" state from something else.
|
|
|
|
// 2. The alert state has just transitioned to the resolved state.
|
|
|
|
// 3. The state is alerting and there is no screenshot annotation on the alert state.
|
|
|
|
func (st *Manager) maybeTakeScreenshot(
|
|
|
|
ctx context.Context,
|
|
|
|
alertRule *ngModels.AlertRule,
|
|
|
|
state *State,
|
|
|
|
oldState eval.State,
|
|
|
|
) error {
|
|
|
|
shouldScreenshot := state.Resolved ||
|
|
|
|
state.State == eval.Alerting && oldState != eval.Alerting ||
|
|
|
|
state.State == eval.Alerting && state.Image == nil
|
|
|
|
if !shouldScreenshot {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
img, err := st.imageService.NewImage(ctx, alertRule)
|
|
|
|
if err != nil &&
|
|
|
|
errors.Is(err, screenshot.ErrScreenshotsUnavailable) ||
|
|
|
|
errors.Is(err, image.ErrNoDashboard) ||
|
|
|
|
errors.Is(err, image.ErrNoPanel) {
|
|
|
|
// It's not an error if screenshots are disabled, or our rule isn't allowed to generate screenshots.
|
|
|
|
return nil
|
|
|
|
} else if err != nil {
|
|
|
|
return err
|
2022-05-22 09:33:49 -05:00
|
|
|
}
|
2022-05-22 21:53:41 -05:00
|
|
|
state.Image = img
|
2022-05-22 09:33:49 -05:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-10-07 16:30:06 -05:00
|
|
|
// Set the current state based on evaluation results
|
2022-07-14 14:59:59 -05:00
|
|
|
func (st *Manager) setNextState(ctx context.Context, alertRule *ngModels.AlertRule, result eval.Result, extraLabels data.Labels) *State {
|
|
|
|
currentState := st.getOrCreate(ctx, alertRule, result, extraLabels)
|
2021-04-23 14:32:25 -05:00
|
|
|
|
|
|
|
currentState.LastEvaluationTime = result.EvaluatedAt
|
|
|
|
currentState.EvaluationDuration = result.EvaluationDuration
|
|
|
|
currentState.Results = append(currentState.Results, Evaluation{
|
2022-02-02 12:18:20 -06:00
|
|
|
EvaluationTime: result.EvaluatedAt,
|
|
|
|
EvaluationState: result.State,
|
|
|
|
Values: NewEvaluationValues(result.Values),
|
2022-04-05 13:36:42 -05:00
|
|
|
Condition: alertRule.Condition,
|
2021-04-23 14:32:25 -05:00
|
|
|
})
|
2022-02-02 12:18:20 -06:00
|
|
|
currentState.LastEvaluationString = result.EvaluationString
|
2021-05-18 12:56:14 -05:00
|
|
|
currentState.TrimResults(alertRule)
|
2021-07-13 11:50:10 -05:00
|
|
|
oldState := currentState.State
|
2022-05-23 03:49:49 -05:00
|
|
|
oldReason := currentState.StateReason
|
2021-04-23 14:32:25 -05:00
|
|
|
|
2021-07-07 11:18:31 -05:00
|
|
|
st.log.Debug("setting alert state", "uid", alertRule.UID)
|
2021-04-23 14:32:25 -05:00
|
|
|
switch result.State {
|
|
|
|
case eval.Normal:
|
2021-07-13 11:50:10 -05:00
|
|
|
currentState.resultNormal(alertRule, result)
|
2021-04-23 14:32:25 -05:00
|
|
|
case eval.Alerting:
|
2021-06-17 12:01:46 -05:00
|
|
|
currentState.resultAlerting(alertRule, result)
|
2021-04-23 14:32:25 -05:00
|
|
|
case eval.Error:
|
2021-06-17 12:01:46 -05:00
|
|
|
currentState.resultError(alertRule, result)
|
2021-04-23 14:32:25 -05:00
|
|
|
case eval.NoData:
|
2021-06-17 12:01:46 -05:00
|
|
|
currentState.resultNoData(alertRule, result)
|
2021-04-23 14:32:25 -05:00
|
|
|
case eval.Pending: // we do not emit results with this state
|
|
|
|
}
|
|
|
|
|
2022-05-23 03:49:49 -05:00
|
|
|
// Set reason iff: result is different than state, reason is not Alerting or Normal
|
|
|
|
currentState.StateReason = ""
|
|
|
|
|
|
|
|
if currentState.State != result.State &&
|
|
|
|
result.State != eval.Normal &&
|
|
|
|
result.State != eval.Alerting {
|
|
|
|
currentState.StateReason = result.State.String()
|
|
|
|
}
|
|
|
|
|
2021-07-29 13:29:17 -05:00
|
|
|
// Set Resolved property so the scheduler knows to send a postable alert
|
|
|
|
// to Alertmanager.
|
|
|
|
currentState.Resolved = oldState == eval.Alerting && currentState.State == eval.Normal
|
|
|
|
|
2022-05-22 21:53:41 -05:00
|
|
|
err := st.maybeTakeScreenshot(ctx, alertRule, currentState, oldState)
|
|
|
|
if err != nil {
|
2022-05-31 07:56:22 -05:00
|
|
|
st.log.Warn("failed to generate a screenshot for an alert instance",
|
2022-05-22 21:53:41 -05:00
|
|
|
"alert_rule", alertRule.UID,
|
|
|
|
"dashboard", alertRule.DashboardUID,
|
2022-05-31 07:56:22 -05:00
|
|
|
"panel", alertRule.PanelID,
|
|
|
|
"err", err)
|
2022-05-22 21:53:41 -05:00
|
|
|
}
|
|
|
|
|
2021-04-23 14:32:25 -05:00
|
|
|
st.set(currentState)
|
2022-05-23 03:49:49 -05:00
|
|
|
|
|
|
|
shouldUpdateAnnotation := oldState != currentState.State || oldReason != currentState.StateReason
|
|
|
|
if shouldUpdateAnnotation {
|
|
|
|
go st.annotateState(ctx, alertRule, currentState.Labels, result.EvaluatedAt, InstanceStateAndReason{State: currentState.State, Reason: currentState.StateReason}, InstanceStateAndReason{State: oldState, Reason: oldReason})
|
2021-07-13 11:50:10 -05:00
|
|
|
}
|
2021-04-23 14:32:25 -05:00
|
|
|
return currentState
|
|
|
|
}
|
|
|
|
|
2021-05-04 11:57:50 -05:00
|
|
|
func (st *Manager) GetAll(orgID int64) []*State {
|
|
|
|
return st.cache.getAll(orgID)
|
2021-04-23 14:32:25 -05:00
|
|
|
}
|
|
|
|
|
2021-05-04 11:57:50 -05:00
|
|
|
func (st *Manager) GetStatesForRuleUID(orgID int64, alertRuleUID string) []*State {
|
|
|
|
return st.cache.getStatesForRuleUID(orgID, alertRuleUID)
|
2021-04-23 14:32:25 -05:00
|
|
|
}
|
|
|
|
|
2021-05-18 12:56:14 -05:00
|
|
|
func (st *Manager) recordMetrics() {
|
2021-04-30 11:28:06 -05:00
|
|
|
// TODO: parameterize?
|
|
|
|
// Setting to a reasonable default scrape interval for Prometheus.
|
|
|
|
dur := time.Duration(15) * time.Second
|
2022-06-22 11:18:42 -05:00
|
|
|
ticker := st.clock.Ticker(dur)
|
2021-04-23 14:32:25 -05:00
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-ticker.C:
|
2022-06-22 11:18:42 -05:00
|
|
|
st.log.Debug("recording state cache metrics", "now", st.clock.Now())
|
2021-05-18 12:56:14 -05:00
|
|
|
st.cache.recordMetrics()
|
2021-04-23 14:32:25 -05:00
|
|
|
case <-st.quit:
|
2022-06-22 11:18:42 -05:00
|
|
|
st.log.Debug("stopping state cache metrics recording", "now", st.clock.Now())
|
2021-04-23 14:32:25 -05:00
|
|
|
ticker.Stop()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (st *Manager) Put(states []*State) {
|
|
|
|
for _, s := range states {
|
|
|
|
st.set(s)
|
|
|
|
}
|
|
|
|
}
|
2021-07-07 11:18:31 -05:00
|
|
|
|
2022-09-09 10:44:06 -05:00
|
|
|
func (st *Manager) saveState(ctx context.Context, s *State) error {
|
|
|
|
cmd := ngModels.SaveAlertInstanceCommand{
|
|
|
|
RuleOrgID: s.OrgID,
|
|
|
|
RuleUID: s.AlertRuleUID,
|
|
|
|
Labels: ngModels.InstanceLabels(s.Labels),
|
|
|
|
State: ngModels.InstanceStateType(s.State.String()),
|
|
|
|
StateReason: s.StateReason,
|
|
|
|
LastEvalTime: s.LastEvaluationTime,
|
|
|
|
CurrentStateSince: s.StartsAt,
|
|
|
|
CurrentStateEnd: s.EndsAt,
|
2022-08-18 08:40:33 -05:00
|
|
|
}
|
2022-09-09 10:44:06 -05:00
|
|
|
return st.instanceStore.SaveAlertInstance(ctx, &cmd)
|
2022-08-18 08:40:33 -05:00
|
|
|
}
|
|
|
|
|
2022-05-23 03:49:49 -05:00
|
|
|
// TODO: why wouldn't you allow other types like NoData or Error?
|
2021-07-07 11:18:31 -05:00
|
|
|
func translateInstanceState(state ngModels.InstanceStateType) eval.State {
|
|
|
|
switch {
|
|
|
|
case state == ngModels.InstanceStateFiring:
|
|
|
|
return eval.Alerting
|
|
|
|
case state == ngModels.InstanceStateNormal:
|
|
|
|
return eval.Normal
|
|
|
|
default:
|
|
|
|
return eval.Error
|
|
|
|
}
|
|
|
|
}
|
2021-07-13 11:50:10 -05:00
|
|
|
|
2022-05-23 03:49:49 -05:00
|
|
|
// This struct provides grouping of state with reason, and string formatting.
|
|
|
|
type InstanceStateAndReason struct {
|
|
|
|
State eval.State
|
|
|
|
Reason string
|
|
|
|
}
|
|
|
|
|
|
|
|
func (i InstanceStateAndReason) String() string {
|
|
|
|
s := fmt.Sprintf("%v", i.State)
|
|
|
|
if len(i.Reason) > 0 {
|
|
|
|
s += fmt.Sprintf(" (%v)", i.Reason)
|
|
|
|
}
|
|
|
|
return s
|
|
|
|
}
|
|
|
|
|
|
|
|
func (st *Manager) annotateState(ctx context.Context, alertRule *ngModels.AlertRule, labels data.Labels, evaluatedAt time.Time, currentData, previousData InstanceStateAndReason) {
|
|
|
|
st.log.Debug("alert state changed creating annotation", "alertRuleUID", alertRule.UID, "newState", currentData.String(), "oldState", previousData.String())
|
2021-07-13 11:50:10 -05:00
|
|
|
|
2022-02-24 04:58:54 -06:00
|
|
|
labels = removePrivateLabels(labels)
|
2022-05-23 03:49:49 -05:00
|
|
|
annotationText := fmt.Sprintf("%s {%s} - %s", alertRule.Title, labels.String(), currentData.String())
|
2021-07-13 11:50:10 -05:00
|
|
|
|
2021-12-01 05:04:54 -06:00
|
|
|
item := &annotations.Item{
|
|
|
|
AlertId: alertRule.ID,
|
|
|
|
OrgId: alertRule.OrgID,
|
2022-05-23 03:49:49 -05:00
|
|
|
PrevState: previousData.String(),
|
|
|
|
NewState: currentData.String(),
|
2021-12-01 05:04:54 -06:00
|
|
|
Text: annotationText,
|
2022-02-24 04:58:54 -06:00
|
|
|
Epoch: evaluatedAt.UnixNano() / int64(time.Millisecond),
|
2021-07-13 11:50:10 -05:00
|
|
|
}
|
|
|
|
|
2021-12-01 05:04:54 -06:00
|
|
|
dashUid, ok := alertRule.Annotations[ngModels.DashboardUIDAnnotation]
|
|
|
|
if ok {
|
|
|
|
panelUid := alertRule.Annotations[ngModels.PanelIDAnnotation]
|
2021-07-13 11:50:10 -05:00
|
|
|
|
2021-12-01 05:04:54 -06:00
|
|
|
panelId, err := strconv.ParseInt(panelUid, 10, 64)
|
|
|
|
if err != nil {
|
2022-07-12 14:13:04 -05:00
|
|
|
st.log.Error("error parsing panelUID for alert annotation", "panelUID", panelUid, "alertRuleUID", alertRule.UID, "err", err.Error())
|
2021-12-01 05:04:54 -06:00
|
|
|
return
|
|
|
|
}
|
2021-07-13 11:50:10 -05:00
|
|
|
|
2021-12-01 05:04:54 -06:00
|
|
|
query := &models.GetDashboardQuery{
|
|
|
|
Uid: dashUid,
|
|
|
|
OrgId: alertRule.OrgID,
|
|
|
|
}
|
2021-07-13 11:50:10 -05:00
|
|
|
|
2022-05-17 13:52:22 -05:00
|
|
|
err = st.dashboardService.GetDashboard(ctx, query)
|
2021-12-01 05:04:54 -06:00
|
|
|
if err != nil {
|
2022-06-07 12:54:23 -05:00
|
|
|
st.log.Error("error getting dashboard for alert annotation", "dashboardUID", dashUid, "alertRuleUID", alertRule.UID, "err", err.Error())
|
2021-12-01 05:04:54 -06:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
item.PanelId = panelId
|
|
|
|
item.DashboardId = query.Result.Id
|
2021-07-13 11:50:10 -05:00
|
|
|
}
|
|
|
|
|
2022-09-19 02:54:37 -05:00
|
|
|
if err := st.AnnotationsRepo.Save(ctx, item); err != nil {
|
2022-06-07 12:54:23 -05:00
|
|
|
st.log.Error("error saving alert annotation", "alertRuleUID", alertRule.UID, "err", err.Error())
|
2021-07-13 11:50:10 -05:00
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
2021-07-26 11:12:04 -05:00
|
|
|
|
2022-09-09 10:44:06 -05:00
|
|
|
func (st *Manager) staleResultsHandler(ctx context.Context, evaluatedAt time.Time, alertRule *ngModels.AlertRule, states map[string]*State) {
|
2021-07-26 11:12:04 -05:00
|
|
|
allStates := st.GetStatesForRuleUID(alertRule.OrgID, alertRule.UID)
|
|
|
|
for _, s := range allStates {
|
2022-09-09 10:44:06 -05:00
|
|
|
_, ok := states[s.CacheId]
|
|
|
|
if !ok && isItStale(evaluatedAt, s.LastEvaluationTime, alertRule.IntervalSeconds) {
|
2021-07-26 11:12:04 -05:00
|
|
|
st.log.Debug("removing stale state entry", "orgID", s.OrgID, "alertRuleUID", s.AlertRuleUID, "cacheID", s.CacheId)
|
|
|
|
st.cache.deleteEntry(s.OrgID, s.AlertRuleUID, s.CacheId)
|
|
|
|
ilbs := ngModels.InstanceLabels(s.Labels)
|
|
|
|
_, labelsHash, err := ilbs.StringAndHash()
|
|
|
|
if err != nil {
|
2022-06-07 12:54:23 -05:00
|
|
|
st.log.Error("unable to get labelsHash", "err", err.Error(), "orgID", s.OrgID, "alertRuleUID", s.AlertRuleUID)
|
2021-07-26 11:12:04 -05:00
|
|
|
}
|
|
|
|
|
2022-09-09 10:44:06 -05:00
|
|
|
if err = st.instanceStore.DeleteAlertInstance(ctx, s.OrgID, s.AlertRuleUID, labelsHash); err != nil {
|
|
|
|
st.log.Error("unable to delete stale instance from database", "err", err.Error(), "orgID", s.OrgID, "alertRuleUID", s.AlertRuleUID, "cacheID", s.CacheId)
|
|
|
|
}
|
2022-02-24 10:25:28 -06:00
|
|
|
|
|
|
|
if s.State == eval.Alerting {
|
2022-06-21 17:16:53 -05:00
|
|
|
st.annotateState(ctx, alertRule, s.Labels, evaluatedAt,
|
2022-05-23 03:49:49 -05:00
|
|
|
InstanceStateAndReason{State: eval.Normal, Reason: ""},
|
|
|
|
InstanceStateAndReason{State: s.State, Reason: s.StateReason})
|
2022-02-24 10:25:28 -06:00
|
|
|
}
|
2021-07-26 11:12:04 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-09-09 10:44:06 -05:00
|
|
|
func isItStale(evaluatedAt time.Time, lastEval time.Time, intervalSeconds int64) bool {
|
2022-06-21 17:16:53 -05:00
|
|
|
return !lastEval.Add(2 * time.Duration(intervalSeconds) * time.Second).After(evaluatedAt)
|
2021-07-26 11:12:04 -05:00
|
|
|
}
|
2022-02-24 04:58:54 -06:00
|
|
|
|
|
|
|
func removePrivateLabels(labels data.Labels) data.Labels {
|
|
|
|
result := make(data.Labels)
|
|
|
|
for k, v := range labels {
|
|
|
|
if !strings.HasPrefix(k, "__") && !strings.HasSuffix(k, "__") {
|
|
|
|
result[k] = v
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result
|
|
|
|
}
|