grafana/pkg/services/ngalert/migration/models.go
Matthew Jacobson 71e70c424f
Alerting: During legacy migration reduce the number of created silences (#78505)
* Alerting: During legacy migration reduce the number of created silences

During legacy migration every migrated rule was given a label rule_uid=<uid>.
This was used to silence DatasourceError/DatasourceNoData alerts for
migrated rules that had either ExecutionErrorState/NoDataState set to
keep_state, respectively.

This could potentially create a large amount of silences and a high cardinality
label. Both of these scenarios have poor outcomes for CPU load and latency in
unified alerting.

Instead, this change creates one label per ExecutionErrorState/NoDataState when
they are set to keep_state as well as two silence rules, if rules with said
labels were created during migration. These silence rules are:

- __legacy_silence_error_keep_state__ = true
- __legacy_silence_nodata_keep_state__ = true

This will drastically reduce the number of created silence rules in most cases
as well as not create the potentially high cardinality label `rule_uid`.
2024-01-24 15:56:19 -05:00

93 lines
2.3 KiB
Go

package migration
import (
"context"
"errors"
"github.com/grafana/grafana/pkg/infra/log"
legacymodels "github.com/grafana/grafana/pkg/services/alerting/models"
migrationStore "github.com/grafana/grafana/pkg/services/ngalert/migration/store"
"github.com/grafana/grafana/pkg/services/secrets"
"github.com/grafana/grafana/pkg/setting"
)
// OrgMigration is a helper struct for migrating alerts for a single org. It contains state, services, and caches.
type OrgMigration struct {
cfg *setting.Cfg
log log.Logger
migrationStore migrationStore.Store
encryptionService secrets.Service
orgID int64
silences *silenceHandler
}
// newOrgMigration creates a new OrgMigration for the given orgID.
func (ms *migrationService) newOrgMigration(orgID int64) *OrgMigration {
return &OrgMigration{
cfg: ms.cfg,
log: ms.log.New("orgID", orgID),
migrationStore: ms.migrationStore,
encryptionService: ms.encryptionService,
silences: ms.silences,
orgID: orgID,
}
}
// ChannelCache caches channels by ID and UID.
type ChannelCache struct {
channels []*legacymodels.AlertNotification
cache map[any]*legacymodels.AlertNotification
fetch func(ctx context.Context, key notificationKey) (*legacymodels.AlertNotification, error)
}
func (c *ChannelCache) Get(ctx context.Context, key notificationKey) (*legacymodels.AlertNotification, error) {
if key.ID > 0 {
if channel, ok := c.cache[key.ID]; ok {
return channel, nil
}
}
if key.UID != "" {
if channel, ok := c.cache[key.UID]; ok {
return channel, nil
}
}
channel, err := c.fetch(ctx, key)
if err != nil {
if errors.Is(err, migrationStore.ErrNotFound) {
if key.ID > 0 {
c.cache[key.ID] = nil
}
if key.UID != "" {
c.cache[key.UID] = nil
}
return nil, nil
}
return nil, err
}
c.cache[channel.ID] = channel
c.cache[channel.UID] = channel
c.channels = append(c.channels, channel)
return channel, nil
}
func (ms *migrationService) newChannelCache(orgID int64) *ChannelCache {
return &ChannelCache{
cache: make(map[any]*legacymodels.AlertNotification),
fetch: func(ctx context.Context, key notificationKey) (*legacymodels.AlertNotification, error) {
c, err := ms.migrationStore.GetNotificationChannel(ctx, migrationStore.GetNotificationChannelQuery{OrgID: orgID, ID: key.ID, UID: key.UID})
if err != nil {
return nil, err
}
return c, nil
},
}
}