2021-08-24 05:28:09 -05:00
package notifier
import (
2021-09-14 06:55:01 -05:00
"bytes"
2021-08-24 05:28:09 -05:00
"context"
"testing"
"time"
2024-04-09 12:39:34 -05:00
"github.com/prometheus/alertmanager/types"
2023-01-30 02:55:35 -06:00
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/stretchr/testify/require"
2024-05-03 14:32:30 -05:00
alertingNotify "github.com/grafana/alerting/notify"
2021-09-16 09:33:51 -05:00
"github.com/grafana/grafana/pkg/infra/log"
2024-02-15 08:45:10 -06:00
"github.com/grafana/grafana/pkg/services/featuremgmt"
2021-09-14 06:55:01 -05:00
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
2021-08-24 05:28:09 -05:00
"github.com/grafana/grafana/pkg/services/ngalert/models"
2023-04-05 13:10:03 -05:00
"github.com/grafana/grafana/pkg/services/ngalert/store"
2023-10-12 07:43:10 -05:00
ngfakes "github.com/grafana/grafana/pkg/services/ngalert/tests/fakes"
2021-11-04 11:47:21 -05:00
"github.com/grafana/grafana/pkg/services/secrets/fakes"
secretsManager "github.com/grafana/grafana/pkg/services/secrets/manager"
2021-08-24 05:28:09 -05:00
"github.com/grafana/grafana/pkg/setting"
)
func TestMultiOrgAlertmanager_SyncAlertmanagersForOrgs ( t * testing . T ) {
2022-03-22 09:43:29 -05:00
tmpDir := t . TempDir ( )
2021-09-16 09:33:51 -05:00
cfg := & setting . Cfg {
2021-09-29 09:16:40 -05:00
DataPath : tmpDir ,
UnifiedAlerting : setting . UnifiedAlertingSettings {
AlertmanagerConfigPollInterval : 3 * time . Minute ,
DefaultConfiguration : setting . GetAlertmanagerDefaultConfiguration ( ) ,
DisabledOrgs : map [ int64 ] struct { } { 5 : { } } ,
} , // do not poll in tests.
2021-09-16 09:33:51 -05:00
}
2024-04-09 12:39:34 -05:00
mam := setupMam ( t , cfg )
reg := mam . metrics . Registerer . ( * prometheus . Registry )
orgStore := mam . orgStore . ( * FakeOrgStore )
2021-08-24 05:28:09 -05:00
ctx := context . Background ( )
// Ensure that one Alertmanager is created per org.
{
require . NoError ( t , mam . LoadAndSyncAlertmanagersForOrgs ( ctx ) )
require . Len ( t , mam . alertmanagers , 3 )
2021-09-14 06:55:01 -05:00
require . NoError ( t , testutil . GatherAndCompare ( reg , bytes . NewBufferString ( `
# HELP grafana_alerting_active_configurations The number of active Alertmanager configurations .
# TYPE grafana_alerting_active_configurations gauge
grafana_alerting_active_configurations 3
# HELP grafana_alerting_discovered_configurations The number of organizations we ' ve discovered that require an Alertmanager configuration .
# TYPE grafana_alerting_discovered_configurations gauge
grafana_alerting_discovered_configurations 3
` ) , "grafana_alerting_discovered_configurations" , "grafana_alerting_active_configurations" ) )
2023-02-02 11:45:17 -06:00
// Configurations should be marked as successfully applied.
for _ , org := range orgStore . orgs {
2024-04-09 12:39:34 -05:00
configs , err := mam . configStore . GetAppliedConfigurations ( ctx , org , 10 )
2023-03-31 15:43:04 -05:00
require . NoError ( t , err )
2023-02-02 11:45:17 -06:00
require . Len ( t , configs , 1 )
}
2021-08-24 05:28:09 -05:00
}
// When an org is removed, it should detect it.
{
orgStore . orgs = [ ] int64 { 1 , 3 }
require . NoError ( t , mam . LoadAndSyncAlertmanagersForOrgs ( ctx ) )
require . Len ( t , mam . alertmanagers , 2 )
2021-09-14 06:55:01 -05:00
require . NoError ( t , testutil . GatherAndCompare ( reg , bytes . NewBufferString ( `
# HELP grafana_alerting_active_configurations The number of active Alertmanager configurations .
# TYPE grafana_alerting_active_configurations gauge
grafana_alerting_active_configurations 2
# HELP grafana_alerting_discovered_configurations The number of organizations we ' ve discovered that require an Alertmanager configuration .
# TYPE grafana_alerting_discovered_configurations gauge
grafana_alerting_discovered_configurations 2
` ) , "grafana_alerting_discovered_configurations" , "grafana_alerting_active_configurations" ) )
2021-08-24 05:28:09 -05:00
}
// if the org comes back, it should detect it.
{
orgStore . orgs = [ ] int64 { 1 , 2 , 3 , 4 }
require . NoError ( t , mam . LoadAndSyncAlertmanagersForOrgs ( ctx ) )
require . Len ( t , mam . alertmanagers , 4 )
2021-09-14 06:55:01 -05:00
require . NoError ( t , testutil . GatherAndCompare ( reg , bytes . NewBufferString ( `
# HELP grafana_alerting_active_configurations The number of active Alertmanager configurations .
# TYPE grafana_alerting_active_configurations gauge
grafana_alerting_active_configurations 4
# HELP grafana_alerting_discovered_configurations The number of organizations we ' ve discovered that require an Alertmanager configuration .
# TYPE grafana_alerting_discovered_configurations gauge
grafana_alerting_discovered_configurations 4
` ) , "grafana_alerting_discovered_configurations" , "grafana_alerting_active_configurations" ) )
2021-08-24 05:28:09 -05:00
}
2021-09-29 09:16:40 -05:00
// if the disabled org comes back, it should not detect it.
{
orgStore . orgs = [ ] int64 { 1 , 2 , 3 , 4 , 5 }
require . NoError ( t , mam . LoadAndSyncAlertmanagersForOrgs ( ctx ) )
require . Len ( t , mam . alertmanagers , 4 )
}
2021-10-12 05:05:02 -05:00
2021-10-14 05:04:00 -05:00
// Orphaned state should be removed.
2021-10-12 05:05:02 -05:00
{
2021-10-14 05:04:00 -05:00
orgID := int64 ( 6 )
2021-10-12 05:05:02 -05:00
2024-03-22 17:37:33 -05:00
// Populate the kvstore with orphaned records.
2024-04-09 12:39:34 -05:00
err := mam . kvStore . Set ( ctx , orgID , KVNamespace , SilencesFilename , "file_1" )
2021-10-14 05:04:00 -05:00
require . NoError ( t , err )
2024-04-09 12:39:34 -05:00
err = mam . kvStore . Set ( ctx , orgID , KVNamespace , NotificationLogFilename , "file_1" )
2021-10-14 05:04:00 -05:00
require . NoError ( t , err )
2021-10-12 05:05:02 -05:00
// Now re run the sync job once.
require . NoError ( t , mam . LoadAndSyncAlertmanagersForOrgs ( ctx ) )
2021-10-14 05:04:00 -05:00
// The organization kvstore records should be gone by now.
2024-04-09 12:39:34 -05:00
_ , exists , _ := mam . kvStore . Get ( ctx , orgID , KVNamespace , SilencesFilename )
2021-10-14 05:04:00 -05:00
require . False ( t , exists )
2024-04-09 12:39:34 -05:00
_ , exists , _ = mam . kvStore . Get ( ctx , orgID , KVNamespace , NotificationLogFilename )
2021-10-14 05:04:00 -05:00
require . False ( t , exists )
2021-10-12 05:05:02 -05:00
}
2021-08-24 05:28:09 -05:00
}
2021-10-12 12:10:08 -05:00
func TestMultiOrgAlertmanager_SyncAlertmanagersForOrgsWithFailures ( t * testing . T ) {
2024-04-09 12:39:34 -05:00
mam := setupMam ( t , nil )
ctx := context . Background ( )
2021-10-12 12:10:08 -05:00
// Include a broken configuration for organization 2.
2023-02-02 11:45:17 -06:00
var orgWithBadConfig int64 = 2
2024-04-09 12:39:34 -05:00
mam . configStore = NewFakeConfigStore ( t , map [ int64 ] * models . AlertConfiguration {
2023-02-02 11:45:17 -06:00
2 : { AlertmanagerConfiguration : brokenConfig , OrgID : orgWithBadConfig } ,
} )
2024-04-09 12:39:34 -05:00
orgs , err := mam . orgStore . GetOrgs ( ctx )
2021-10-12 12:10:08 -05:00
require . NoError ( t , err )
2023-02-02 11:45:17 -06:00
// No successfully applied configurations should be found at first.
{
for _ , org := range orgs {
2024-04-09 12:39:34 -05:00
configs , err := mam . configStore . GetAppliedConfigurations ( ctx , org , 10 )
2023-03-31 15:43:04 -05:00
require . NoError ( t , err )
require . Len ( t , configs , 0 )
2023-02-02 11:45:17 -06:00
}
}
2021-10-12 12:10:08 -05:00
// When you sync the first time, the alertmanager is created but is doesn't become ready until you have a configuration applied.
{
require . NoError ( t , mam . LoadAndSyncAlertmanagersForOrgs ( ctx ) )
require . Len ( t , mam . alertmanagers , 3 )
2023-01-13 10:54:38 -06:00
require . True ( t , mam . alertmanagers [ 1 ] . Ready ( ) )
require . False ( t , mam . alertmanagers [ 2 ] . Ready ( ) )
require . True ( t , mam . alertmanagers [ 3 ] . Ready ( ) )
2023-02-02 11:45:17 -06:00
// Configurations should be marked as successfully applied for all orgs except for org 2.
for _ , org := range orgs {
2024-04-09 12:39:34 -05:00
configs , err := mam . configStore . GetAppliedConfigurations ( ctx , org , 10 )
2023-03-31 15:43:04 -05:00
require . NoError ( t , err )
2023-02-02 11:45:17 -06:00
if org == orgWithBadConfig {
2023-03-31 15:43:04 -05:00
require . Len ( t , configs , 0 )
2023-02-02 11:45:17 -06:00
} else {
require . Len ( t , configs , 1 )
}
}
2021-10-12 12:10:08 -05:00
}
// On the next sync, it never panics and alertmanager is still not ready.
{
require . NoError ( t , mam . LoadAndSyncAlertmanagersForOrgs ( ctx ) )
require . Len ( t , mam . alertmanagers , 3 )
2023-01-13 10:54:38 -06:00
require . True ( t , mam . alertmanagers [ 1 ] . Ready ( ) )
require . False ( t , mam . alertmanagers [ 2 ] . Ready ( ) )
require . True ( t , mam . alertmanagers [ 3 ] . Ready ( ) )
2023-02-02 11:45:17 -06:00
// The configuration should still be marked as successfully applied for all orgs except for org 2.
for _ , org := range orgs {
2024-04-09 12:39:34 -05:00
configs , err := mam . configStore . GetAppliedConfigurations ( ctx , org , 10 )
2023-03-31 15:43:04 -05:00
require . NoError ( t , err )
2023-02-02 11:45:17 -06:00
if org == orgWithBadConfig {
2023-03-31 15:43:04 -05:00
require . Len ( t , configs , 0 )
2023-02-02 11:45:17 -06:00
} else {
require . Len ( t , configs , 1 )
}
}
2021-10-12 12:10:08 -05:00
}
// If we fix the configuration, it becomes ready.
{
2024-04-09 12:39:34 -05:00
mam . configStore . ( * fakeConfigStore ) . configs = map [ int64 ] * models . AlertConfiguration { } // It'll apply the default config.
2021-10-12 12:10:08 -05:00
require . NoError ( t , mam . LoadAndSyncAlertmanagersForOrgs ( ctx ) )
require . Len ( t , mam . alertmanagers , 3 )
2023-01-13 10:54:38 -06:00
require . True ( t , mam . alertmanagers [ 1 ] . Ready ( ) )
require . True ( t , mam . alertmanagers [ 2 ] . Ready ( ) )
require . True ( t , mam . alertmanagers [ 3 ] . Ready ( ) )
2023-02-02 11:45:17 -06:00
// All configurations should be marked as successfully applied.
for _ , org := range orgs {
2024-04-09 12:39:34 -05:00
configs , err := mam . configStore . GetAppliedConfigurations ( ctx , org , 10 )
2023-03-31 15:43:04 -05:00
require . NoError ( t , err )
require . NotEqual ( t , 0 , len ( configs ) )
2023-02-02 11:45:17 -06:00
}
2021-10-12 12:10:08 -05:00
}
}
2021-08-24 05:28:09 -05:00
func TestMultiOrgAlertmanager_AlertmanagerFor ( t * testing . T ) {
2024-04-09 12:39:34 -05:00
mam := setupMam ( t , nil )
2021-08-24 05:28:09 -05:00
ctx := context . Background ( )
// Ensure that one Alertmanagers is created per org.
{
require . NoError ( t , mam . LoadAndSyncAlertmanagersForOrgs ( ctx ) )
require . Len ( t , mam . alertmanagers , 3 )
}
// First, let's try to request an Alertmanager from an org that doesn't exist.
{
2024-05-03 14:32:30 -05:00
_ , err := mam . alertmanagerForOrg ( 5 )
require . ErrorIs ( t , err , ErrAlertmanagerNotFound )
2021-08-24 05:28:09 -05:00
}
// With an Alertmanager that exists, it responds correctly.
{
2024-05-03 14:32:30 -05:00
am , err := mam . alertmanagerForOrg ( 2 )
2021-08-24 05:28:09 -05:00
require . NoError ( t , err )
2023-10-31 04:58:47 -05:00
internalAm , ok := am . ( * alertmanager )
require . True ( t , ok )
2024-05-03 06:59:02 -05:00
status , err := am . GetStatus ( ctx )
require . NoError ( t , err )
require . Equal ( t , "N/A" , * status . VersionInfo . Version )
2023-10-31 04:58:47 -05:00
require . Equal ( t , int64 ( 2 ) , internalAm . orgID )
2021-08-24 05:28:09 -05:00
}
// Let's now remove the previous queried organization.
2024-04-09 12:39:34 -05:00
mam . orgStore . ( * FakeOrgStore ) . orgs = [ ] int64 { 1 , 3 }
2021-08-24 05:28:09 -05:00
require . NoError ( t , mam . LoadAndSyncAlertmanagersForOrgs ( ctx ) )
{
2024-05-03 14:32:30 -05:00
_ , err := mam . alertmanagerForOrg ( 2 )
require . ErrorIs ( t , err , ErrAlertmanagerNotFound )
2021-08-24 05:28:09 -05:00
}
}
2021-09-15 16:48:52 -05:00
2023-04-05 13:10:03 -05:00
func TestMultiOrgAlertmanager_ActivateHistoricalConfiguration ( t * testing . T ) {
2024-04-09 12:39:34 -05:00
mam := setupMam ( t , nil )
2023-04-05 13:10:03 -05:00
ctx := context . Background ( )
// Ensure that one Alertmanager is created per org.
{
require . NoError ( t , mam . LoadAndSyncAlertmanagersForOrgs ( ctx ) )
require . Len ( t , mam . alertmanagers , 3 )
}
// First, let's confirm the default configs are active.
cfgs , err := mam . getLatestConfigs ( ctx )
require . NoError ( t , err )
require . Equal ( t , defaultConfig , cfgs [ 1 ] . AlertmanagerConfiguration )
require . Equal ( t , defaultConfig , cfgs [ 2 ] . AlertmanagerConfiguration )
// Store id for later use.
originalId := cfgs [ 2 ] . ID
require . Equal ( t , defaultConfig , cfgs [ 3 ] . AlertmanagerConfiguration )
// Now let's save a new config for org 2.
newConfig := ` { "template_files":null,"alertmanager_config": { "route": { "receiver":"grafana-default-email","group_by":["grafana_folder","alertname"]},"templates":null,"receivers":[ { "name":"grafana-default-email","grafana_managed_receiver_configs":[ { "uid":"","name":"some other name","type":"email","disableResolveMessage":false,"settings": { "addresses":"\u003cexample@email.com\u003e"},"secureSettings":null}]}]}} `
2024-05-03 14:32:30 -05:00
am , err := mam . alertmanagerForOrg ( 2 )
2023-04-05 13:10:03 -05:00
require . NoError ( t , err )
postable , err := Load ( [ ] byte ( newConfig ) )
require . NoError ( t , err )
err = am . SaveAndApplyConfig ( ctx , postable )
require . NoError ( t , err )
// Verify that the org has the new config.
cfgs , err = mam . getLatestConfigs ( ctx )
require . NoError ( t , err )
require . Equal ( t , newConfig , cfgs [ 2 ] . AlertmanagerConfiguration )
// First, let's try to activate a historical alertmanager config that doesn't exist.
{
err := mam . ActivateHistoricalConfiguration ( ctx , 1 , 42 )
require . Error ( t , err , store . ErrNoAlertmanagerConfiguration )
}
// Finally, we activate the default config for org 2.
{
err := mam . ActivateHistoricalConfiguration ( ctx , 2 , originalId )
require . NoError ( t , err )
}
// Verify that the org has the old default config.
cfgs , err = mam . getLatestConfigs ( ctx )
require . NoError ( t , err )
2024-04-09 12:39:34 -05:00
require . JSONEq ( t , defaultConfig , cfgs [ 2 ] . AlertmanagerConfiguration )
2023-04-05 13:10:03 -05:00
}
2024-04-09 12:39:34 -05:00
func TestMultiOrgAlertmanager_Silences ( t * testing . T ) {
mam := setupMam ( t , nil )
ctx := context . Background ( )
// Ensure that one Alertmanager is created per org.
{
require . NoError ( t , mam . LoadAndSyncAlertmanagersForOrgs ( ctx ) )
require . Len ( t , mam . alertmanagers , 3 )
}
2024-05-03 14:32:30 -05:00
am , err := mam . alertmanagerForOrg ( 1 )
2024-04-09 12:39:34 -05:00
require . NoError ( t , err )
// Confirm no silences.
silences , err := am . ListSilences ( ctx , [ ] string { } )
require . NoError ( t , err )
require . Len ( t , silences , 0 )
// Confirm empty state.
state , err := am . SilenceState ( ctx )
require . NoError ( t , err )
require . Len ( t , state , 0 )
// Confirm empty kvstore.
v , ok , err := mam . kvStore . Get ( ctx , 1 , KVNamespace , SilencesFilename )
require . NoError ( t , err )
require . False ( t , ok )
require . Empty ( t , v )
// Create 2 silences.
2024-05-03 14:32:30 -05:00
gen := models . SilenceGen ( models . SilenceMuts . WithEmptyId ( ) )
sid , err := mam . CreateSilence ( ctx , 1 , gen ( ) )
2024-04-09 12:39:34 -05:00
require . NoError ( t , err )
require . NotEmpty ( t , sid )
2024-05-03 14:32:30 -05:00
sid2 , err := mam . CreateSilence ( ctx , 1 , gen ( ) )
2024-04-09 12:39:34 -05:00
require . NoError ( t , err )
require . NotEmpty ( t , sid2 )
// Confirm 2 silences.
silences , err = am . ListSilences ( ctx , [ ] string { } )
require . NoError ( t , err )
require . Len ( t , silences , 2 )
// Confirm 2 states.
state , err = am . SilenceState ( ctx )
require . NoError ( t , err )
require . Len ( t , state , 2 )
// Confirm 2 silences in the kvstore.
v , ok , err = mam . kvStore . Get ( ctx , 1 , KVNamespace , SilencesFilename )
require . NoError ( t , err )
require . True ( t , ok )
decoded , err := decode ( v )
require . NoError ( t , err )
state , err = alertingNotify . DecodeState ( bytes . NewReader ( decoded ) )
require . NoError ( t , err )
require . Len ( t , state , 2 )
// Delete silence.
err = mam . DeleteSilence ( ctx , 1 , sid )
require . NoError ( t , err )
// Confirm silence is expired in memory.
silence , err := am . GetSilence ( ctx , sid )
require . NoError ( t , err )
require . EqualValues ( t , types . SilenceStateExpired , * silence . Status . State )
// Confirm silence is expired in kvstore.
v , ok , err = mam . kvStore . Get ( ctx , 1 , KVNamespace , SilencesFilename )
require . NoError ( t , err )
require . True ( t , ok )
decoded , err = decode ( v )
require . NoError ( t , err )
state , err = alertingNotify . DecodeState ( bytes . NewReader ( decoded ) )
require . NoError ( t , err )
require . True ( t , time . Now ( ) . After ( state [ sid ] . Silence . EndsAt ) ) // Expired.
}
func setupMam ( t * testing . T , cfg * setting . Cfg ) * MultiOrgAlertmanager {
if cfg == nil {
tmpDir := t . TempDir ( )
cfg = & setting . Cfg {
DataPath : tmpDir ,
UnifiedAlerting : setting . UnifiedAlertingSettings { AlertmanagerConfigPollInterval : 3 * time . Minute , DefaultConfiguration : defaultConfig } , // do not poll in tests.
}
}
cs := NewFakeConfigStore ( t , map [ int64 ] * models . AlertConfiguration { } )
orgStore := & FakeOrgStore {
orgs : [ ] int64 { 1 , 2 , 3 } ,
}
kvStore := ngfakes . NewFakeKVStore ( t )
provStore := ngfakes . NewFakeProvisioningStore ( )
secretsService := secretsManager . SetupTestService ( t , fakes . NewFakeSecretsStore ( ) )
decryptFn := secretsService . GetDecryptedValue
reg := prometheus . NewPedanticRegistry ( )
m := metrics . NewNGAlert ( reg )
mam , err := NewMultiOrgAlertmanager ( cfg , cs , orgStore , kvStore , provStore , decryptFn , m . GetMultiOrgAlertmanagerMetrics ( ) , nil , log . New ( "testlogger" ) , secretsService , featuremgmt . WithFeatures ( ) )
require . NoError ( t , err )
return mam
}
var defaultConfig = `
{
"template_files" : null ,
"alertmanager_config" : {
"route" : {
"receiver" : "grafana-default-email" ,
"group_by" : [
"grafana_folder" ,
"alertname"
]
} ,
"templates" : null ,
"receivers" : [
{
"name" : "grafana-default-email" ,
"grafana_managed_receiver_configs" : [
{
"uid" : "" ,
"name" : "email receiver" ,
"type" : "email" ,
"disableResolveMessage" : false ,
"settings" : {
"addresses" : "\u003cexample@email.com\u003e" } ,
"secureSettings" : null
}
]
}
]
}
} `
2021-10-12 12:10:08 -05:00
var brokenConfig = `
"alertmanager_config" : {
"route" : {
"receiver" : "grafana-default-email"
} ,
"receivers" : [ {
"name" : "grafana-default-email" ,
"grafana_managed_receiver_configs" : [ {
"uid" : "" ,
"name" : "slack receiver" ,
"type" : "slack" ,
"settings" : {
"addresses" : "<example@email.com>"
"url" : "<22> r_<72> <5F> q/b<> <62> <EFBFBD> <EFBFBD> <EFBFBD> p@ⱎȏ =<3D> <> @ӹtd>Rú<52> H<EFBFBD> <48> <20> ;<3B> @Uf<55> <66> 0<EFBFBD> \k2*jh<6A> }Íu<C38D> )" 2 <EFBFBD> F6 ] <EFBFBD> } r <EFBFBD> <EFBFBD> R <EFBFBD> b <EFBFBD> d <EFBFBD> J ; <EFBFBD> <EFBFBD> S퓧 <EFBFBD> <EFBFBD> $ <EFBFBD> <EFBFBD> " ,
"recipient" : "#graphana-metrics" ,
}
} ]
} ]
}
} `