Alerting: Stop persisting user-defined templates to disk (#83456)

Updates Grafana Alertmanager to work with new interface from grafana/alerting#161. This change stops passing user-defined templates to the Grafana Alertmanager by persisting them to disk and instead passes them by string.
This commit is contained in:
Matthew Jacobson 2024-03-04 13:12:49 -05:00 committed by GitHub
parent fa51724bc6
commit 2e8c514cfd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 47 additions and 209 deletions

2
go.mod
View File

@ -59,7 +59,7 @@ require (
github.com/google/uuid v1.6.0 // @grafana/backend-platform
github.com/google/wire v0.5.0 // @grafana/backend-platform
github.com/gorilla/websocket v1.5.0 // @grafana/grafana-app-platform-squad
github.com/grafana/alerting v0.0.0-20240222104113-abfafef9a7d2 // @grafana/alerting-squad-backend
github.com/grafana/alerting v0.0.0-20240304175322-e81931acc11b // @grafana/alerting-squad-backend
github.com/grafana/cuetsy v0.1.11 // @grafana/grafana-as-code
github.com/grafana/grafana-aws-sdk v0.24.0 // @grafana/aws-datasources
github.com/grafana/grafana-azure-sdk-go v1.12.0 // @grafana/partner-datasources

4
go.sum
View File

@ -2161,8 +2161,8 @@ github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/ad
github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc=
github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/grafana/alerting v0.0.0-20240222104113-abfafef9a7d2 h1:fmUMdtP7ditGgJFdXCwVxDrKnondHNNe0TkhN5YaIAI=
github.com/grafana/alerting v0.0.0-20240222104113-abfafef9a7d2/go.mod h1:brTFeACal/cSZAR8XO/4LPKs7rzNfS86okl6QjSP1eY=
github.com/grafana/alerting v0.0.0-20240304175322-e81931acc11b h1:rYx9ds94ZrueuXioEnoSqL737UYPSngPkMwBFl1guJE=
github.com/grafana/alerting v0.0.0-20240304175322-e81931acc11b/go.mod h1:brTFeACal/cSZAR8XO/4LPKs7rzNfS86okl6QjSP1eY=
github.com/grafana/codejen v0.0.3 h1:tAWxoTUuhgmEqxJPOLtJoxlPBbMULFwKFOcRsPRPXDw=
github.com/grafana/codejen v0.0.3/go.mod h1:zmwwM/DRyQB7pfuBjTWII3CWtxcXh8LTwAYGfDfpR6s=
github.com/grafana/cue v0.0.0-20230926092038-971951014e3f h1:TmYAMnqg3d5KYEAaT6PtTguL2GjLfvr6wnAX8Azw6tQ=

View File

@ -808,7 +808,8 @@ type Config struct {
// MuteTimeIntervals is deprecated and will be removed before Alertmanager 1.0.
MuteTimeIntervals []config.MuteTimeInterval `yaml:"mute_time_intervals,omitempty" json:"mute_time_intervals,omitempty"`
TimeIntervals []config.TimeInterval `yaml:"time_intervals,omitempty" json:"time_intervals,omitempty"`
Templates []string `yaml:"templates" json:"templates"`
// Templates is unused by Grafana Managed AM but is passed-through for compatibility with some external AMs.
Templates []string `yaml:"templates" json:"templates"`
}
// A Route is a node that contains definitions of how to handle alerts. This is modified

View File

@ -123,7 +123,6 @@ func NewAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store A
}
amcfg := &alertingNotify.GrafanaAlertmanagerConfig{
WorkingDirectory: filepath.Join(cfg.DataPath, workingDir, strconv.Itoa(int(orgID))),
ExternalURL: cfg.AppURL,
AlertStoreCallback: nil,
PeerTimeout: cfg.UnifiedAlerting.HAPeerTimeout,
@ -321,38 +320,28 @@ func (am *alertmanager) aggregateInhibitMatchers(rules []config.InhibitRule, amu
// It is not safe to call concurrently.
func (am *alertmanager) applyConfig(cfg *apimodels.PostableUserConfig) (bool, error) {
// First, let's make sure this config is not already loaded
var amConfigChanged bool
rawConfig, err := json.Marshal(cfg.AlertmanagerConfig)
rawConfig, err := json.Marshal(cfg)
if err != nil {
// In theory, this should never happen.
return false, err
}
if am.Base.ConfigHash() != md5.Sum(rawConfig) {
amConfigChanged = true
}
if cfg.TemplateFiles == nil {
cfg.TemplateFiles = map[string]string{}
}
cfg.TemplateFiles["__default__.tmpl"] = alertingTemplates.DefaultTemplateString
// next, we need to make sure we persist the templates to disk.
paths, templatesChanged, err := PersistTemplates(am.logger, cfg, am.Base.WorkingDirectory())
if err != nil {
return false, err
}
cfg.AlertmanagerConfig.Templates = paths
// If neither the configuration nor templates have changed, we've got nothing to do.
if !amConfigChanged && !templatesChanged {
am.logger.Debug("Neither config nor template have changed, skipping configuration sync.")
// If configuration hasn't changed, we've got nothing to do.
configHash := md5.Sum(rawConfig)
if am.Base.ConfigHash() == configHash {
am.logger.Debug("Config hasn't changed, skipping configuration sync.")
return false, nil
}
am.logger.Info("Applying new configuration to Alertmanager", "configHash", fmt.Sprintf("%x", configHash))
err = am.Base.ApplyConfig(AlertingConfiguration{
rawAlertmanagerConfig: rawConfig,
alertmanagerConfig: cfg.AlertmanagerConfig,
configHash: configHash,
route: cfg.AlertmanagerConfig.Route.AsAMRoute(),
inhibitRules: cfg.AlertmanagerConfig.InhibitRules,
muteTimeIntervals: cfg.AlertmanagerConfig.MuteTimeIntervals,
timeIntervals: cfg.AlertmanagerConfig.TimeIntervals,
templates: ToTemplateDefinitions(cfg),
receivers: PostableApiAlertingConfigToApiReceivers(cfg.AlertmanagerConfig),
receiverIntegrationsFunc: am.buildReceiverIntegrations,
})

View File

@ -4,6 +4,7 @@ import (
"encoding/json"
alertingNotify "github.com/grafana/alerting/notify"
alertingTemplates "github.com/grafana/alerting/templates"
"github.com/prometheus/alertmanager/config"
"github.com/grafana/grafana/pkg/components/simplejson"
@ -109,3 +110,15 @@ func PostableToGettableApiReceiver(r *apimodels.PostableApiReceiver, provenances
return out, nil
}
// ToTemplateDefinitions converts the given PostableUserConfig's TemplateFiles to a slice of TemplateDefinitions.
func ToTemplateDefinitions(cfg *apimodels.PostableUserConfig) []alertingTemplates.TemplateDefinition {
out := make([]alertingTemplates.TemplateDefinition, 0, len(cfg.TemplateFiles))
for name, tmpl := range cfg.TemplateFiles {
out = append(out, alertingTemplates.TemplateDefinition{
Name: name,
Template: tmpl,
})
}
return out
}

View File

@ -1,82 +1,15 @@
package notifier
import (
"crypto/md5"
"encoding/json"
"fmt"
"os"
"path/filepath"
alertingNotify "github.com/grafana/alerting/notify"
alertingTemplates "github.com/grafana/alerting/templates"
"github.com/grafana/grafana/pkg/infra/log"
api "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
)
func PersistTemplates(logger log.Logger, cfg *api.PostableUserConfig, path string) ([]string, bool, error) {
if len(cfg.TemplateFiles) < 1 {
return nil, false, nil
}
var templatesChanged bool
pathSet := map[string]struct{}{}
for name, content := range cfg.TemplateFiles {
if name != filepath.Base(filepath.Clean(name)) {
return nil, false, fmt.Errorf("template file name '%s' is not valid", name)
}
err := os.MkdirAll(path, 0750)
if err != nil {
return nil, false, fmt.Errorf("unable to create template directory %q: %s", path, err)
}
file := filepath.Join(path, name)
pathSet[name] = struct{}{}
// Check if the template file already exists and if it has changed
// We can safely ignore gosec here as we've previously checked the filename is clean
// nolint:gosec
if tmpl, err := os.ReadFile(file); err == nil && string(tmpl) == content {
// Templates file is the same we have, no-op and continue.
continue
} else if err != nil && !os.IsNotExist(err) {
return nil, false, err
}
// We can safely ignore gosec here as we've previously checked the filename is clean
// nolint:gosec
if err := os.WriteFile(file, []byte(content), 0644); err != nil {
return nil, false, fmt.Errorf("unable to create Alertmanager template file %q: %s", file, err)
}
templatesChanged = true
}
// Now that we have the list of _actual_ templates, let's remove the ones that we don't need.
existingFiles, err := os.ReadDir(path)
if err != nil {
logger.Error("Unable to read directory for deleting Alertmanager templates", "error", err, "path", path)
}
for _, existingFile := range existingFiles {
p := filepath.Join(path, existingFile.Name())
_, ok := pathSet[existingFile.Name()]
if !ok {
templatesChanged = true
err := os.Remove(p)
if err != nil {
logger.Error("Unable to delete template", "error", err, "file", p)
}
}
}
paths := make([]string, 0, len(pathSet))
for path := range pathSet {
paths = append(paths, path)
}
return paths, templatesChanged, nil
}
func Load(rawConfig []byte) (*api.PostableUserConfig, error) {
cfg := &api.PostableUserConfig{}
@ -90,8 +23,13 @@ func Load(rawConfig []byte) (*api.PostableUserConfig, error) {
// AlertingConfiguration provides configuration for an Alertmanager.
// It implements the notify.Configuration interface.
type AlertingConfiguration struct {
alertmanagerConfig api.PostableApiAlertingConfig
route *alertingNotify.Route
inhibitRules []alertingNotify.InhibitRule
muteTimeIntervals []alertingNotify.MuteTimeInterval
timeIntervals []alertingNotify.TimeInterval
templates []alertingTemplates.TemplateDefinition
rawAlertmanagerConfig []byte
configHash [16]byte
receivers []*alertingNotify.APIReceiver
receiverIntegrationsFunc func(r *alertingNotify.APIReceiver, tmpl *alertingTemplates.Template) ([]*alertingNotify.Integration, error)
@ -108,15 +46,15 @@ func (a AlertingConfiguration) DispatcherLimits() alertingNotify.DispatcherLimit
}
func (a AlertingConfiguration) InhibitRules() []alertingNotify.InhibitRule {
return a.alertmanagerConfig.InhibitRules
return a.inhibitRules
}
func (a AlertingConfiguration) MuteTimeIntervals() []alertingNotify.MuteTimeInterval {
return a.alertmanagerConfig.MuteTimeIntervals
return a.muteTimeIntervals
}
func (a AlertingConfiguration) TimeIntervals() []alertingNotify.TimeInterval {
return a.alertmanagerConfig.TimeIntervals
return a.timeIntervals
}
func (a AlertingConfiguration) Receivers() []*alertingNotify.APIReceiver {
@ -124,15 +62,15 @@ func (a AlertingConfiguration) Receivers() []*alertingNotify.APIReceiver {
}
func (a AlertingConfiguration) RoutingTree() *alertingNotify.Route {
return a.alertmanagerConfig.Route.AsAMRoute()
return a.route
}
func (a AlertingConfiguration) Templates() []string {
return a.alertmanagerConfig.Templates
func (a AlertingConfiguration) Templates() []alertingTemplates.TemplateDefinition {
return a.templates
}
func (a AlertingConfiguration) Hash() [16]byte {
return md5.Sum(a.rawAlertmanagerConfig)
return a.configHash
}
func (a AlertingConfiguration) Raw() []byte {

View File

@ -2,110 +2,12 @@ package notifier
import (
"errors"
"os"
"path/filepath"
"testing"
"github.com/grafana/grafana/pkg/infra/log/logtest"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
api "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
)
func TestPersistTemplates(t *testing.T) {
tc := []struct {
name string
templates map[string]string
existingTemplates map[string]string
expectedPaths []string
expectedError error
expectedChange bool
}{
{
name: "With valid templates file names, it persists successfully",
templates: map[string]string{"email.template": "a perfectly fine template"},
expectedChange: true,
expectedError: nil,
expectedPaths: []string{"email.template"},
},
{
name: "With a invalid filename, it fails",
templates: map[string]string{"adirectory/email.template": "a perfectly fine template"},
expectedError: errors.New("template file name 'adirectory/email.template' is not valid"),
},
{
name: "with a template that has the same name but different content to an existing one",
existingTemplates: map[string]string{"email.template": "a perfectly fine template"},
templates: map[string]string{"email.template": "a completely different content"},
expectedChange: true,
expectedError: nil,
expectedPaths: []string{"email.template"},
},
{
name: "with a template that has the same name and the same content as an existing one",
existingTemplates: map[string]string{"email.template": "a perfectly fine template"},
templates: map[string]string{"email.template": "a perfectly fine template"},
expectedChange: false,
expectedError: nil,
expectedPaths: []string{"email.template"},
},
{
name: "with two new template files, it changes the template tree",
existingTemplates: map[string]string{"email.template": "a perfectly fine template"},
templates: map[string]string{"slack.template": "a perfectly fine template", "webhook.template": "a webhook template"},
expectedChange: true,
expectedError: nil,
expectedPaths: []string{"slack.template", "webhook.template"},
},
{
name: "when we remove a template file from the list, it changes the template tree",
existingTemplates: map[string]string{"slack.template": "a perfectly fine template", "webhook.template": "a webhook template"},
templates: map[string]string{"slack.template": "a perfectly fine template"},
expectedChange: true,
expectedError: nil,
expectedPaths: []string{"slack.template"},
},
}
for _, tt := range tc {
t.Run(tt.name, func(t *testing.T) {
dir := t.TempDir()
// Write "existing files"
for name, content := range tt.existingTemplates {
err := os.WriteFile(filepath.Join(dir, name), []byte(content), 0644)
require.NoError(t, err)
}
c := &api.PostableUserConfig{TemplateFiles: tt.templates}
testLogger := logtest.Fake{}
paths, changed, persistErr := PersistTemplates(&testLogger, c, dir)
files := map[string]string{}
readFiles, err := os.ReadDir(dir)
require.NoError(t, err)
for _, f := range readFiles {
if f.IsDir() || f.Name() == "" {
continue
}
// Safe to disable, this is a test.
// nolint:gosec
content, err := os.ReadFile(filepath.Join(dir, f.Name()))
// nolint:gosec
require.NoError(t, err)
files[f.Name()] = string(content)
}
require.Equal(t, tt.expectedError, persistErr)
require.ElementsMatch(t, tt.expectedPaths, paths)
require.Equal(t, tt.expectedChange, changed)
if tt.expectedError == nil {
require.Equal(t, tt.templates, files)
}
})
}
}
func TestLoad(t *testing.T) {
tc := []struct {
name string

View File

@ -8,15 +8,15 @@ import (
// TODO: We no longer do apimodels at this layer, move it to the API.
func (am *alertmanager) GetStatus() apimodels.GettableStatus {
config := &apimodels.PostableApiAlertingConfig{}
status := am.Base.GetStatus() // TODO: This should return a GettableStatus, for now it returns PostableApiAlertingConfig.
config := &apimodels.PostableUserConfig{}
status := am.Base.GetStatus() // TODO: This should return a GettableStatus, for now it returns PostableUserConfig.
if status == nil {
return *apimodels.NewGettableStatus(config)
return *apimodels.NewGettableStatus(&config.AlertmanagerConfig)
}
if err := json.Unmarshal(status, config); err != nil {
am.logger.Error("Unable to unmarshall alertmanager config", "Err", err)
}
return *apimodels.NewGettableStatus(config)
return *apimodels.NewGettableStatus(&config.AlertmanagerConfig)
}

View File

@ -63,11 +63,6 @@ func (t *TemplateService) SetTemplate(ctx context.Context, orgID int64, tmpl def
revision.cfg.TemplateFiles = map[string]string{}
}
revision.cfg.TemplateFiles[tmpl.Name] = tmpl.Template
tmpls := make([]string, 0, len(revision.cfg.TemplateFiles))
for name := range revision.cfg.TemplateFiles {
tmpls = append(tmpls, name)
}
revision.cfg.AlertmanagerConfig.Templates = tmpls
err = t.xact.InTransaction(ctx, func(ctx context.Context) error {
if err := t.configStore.Save(ctx, revision, orgID); err != nil {