2021-08-24 05:28:09 -05:00
package notifier
import (
2021-09-14 06:55:01 -05:00
"bytes"
2021-08-24 05:28:09 -05:00
"context"
2021-10-12 05:05:02 -05:00
"errors"
"io/fs"
2021-09-15 16:48:52 -05:00
"os"
2021-10-12 05:05:02 -05:00
"path/filepath"
2021-08-24 05:28:09 -05:00
"testing"
"time"
2021-09-16 09:33:51 -05:00
"github.com/grafana/grafana/pkg/infra/log"
2021-09-14 06:55:01 -05:00
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
2021-08-24 05:28:09 -05:00
"github.com/grafana/grafana/pkg/services/ngalert/models"
2022-04-22 11:57:56 -05:00
"github.com/grafana/grafana/pkg/services/ngalert/provisioning"
2021-11-04 11:47:21 -05:00
"github.com/grafana/grafana/pkg/services/secrets/fakes"
secretsManager "github.com/grafana/grafana/pkg/services/secrets/manager"
2021-08-24 05:28:09 -05:00
"github.com/grafana/grafana/pkg/setting"
2021-11-04 11:47:21 -05:00
2021-09-14 06:55:01 -05:00
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
2021-08-24 05:28:09 -05:00
"github.com/stretchr/testify/require"
)
func TestMultiOrgAlertmanager_SyncAlertmanagersForOrgs ( t * testing . T ) {
configStore := & FakeConfigStore {
configs : map [ int64 ] * models . AlertConfiguration { } ,
}
orgStore := & FakeOrgStore {
orgs : [ ] int64 { 1 , 2 , 3 } ,
}
2021-09-15 16:48:52 -05:00
2022-03-22 09:43:29 -05:00
tmpDir := t . TempDir ( )
2021-12-27 17:01:17 -06:00
kvStore := NewFakeKVStore ( t )
2022-04-22 11:57:56 -05:00
provStore := provisioning . NewFakeProvisioningStore ( )
2021-11-04 11:47:21 -05:00
secretsService := secretsManager . SetupTestService ( t , fakes . NewFakeSecretsStore ( ) )
decryptFn := secretsService . GetDecryptedValue
2021-09-14 06:55:01 -05:00
reg := prometheus . NewPedanticRegistry ( )
m := metrics . NewNGAlert ( reg )
2021-09-16 09:33:51 -05:00
cfg := & setting . Cfg {
2021-09-29 09:16:40 -05:00
DataPath : tmpDir ,
UnifiedAlerting : setting . UnifiedAlertingSettings {
AlertmanagerConfigPollInterval : 3 * time . Minute ,
DefaultConfiguration : setting . GetAlertmanagerDefaultConfiguration ( ) ,
DisabledOrgs : map [ int64 ] struct { } { 5 : { } } ,
} , // do not poll in tests.
2021-09-16 09:33:51 -05:00
}
2022-04-22 11:57:56 -05:00
mam , err := NewMultiOrgAlertmanager ( cfg , configStore , orgStore , kvStore , provStore , decryptFn , m . GetMultiOrgAlertmanagerMetrics ( ) , nil , log . New ( "testlogger" ) , secretsService )
2021-09-16 09:33:51 -05:00
require . NoError ( t , err )
2021-08-24 05:28:09 -05:00
ctx := context . Background ( )
// Ensure that one Alertmanager is created per org.
{
require . NoError ( t , mam . LoadAndSyncAlertmanagersForOrgs ( ctx ) )
require . Len ( t , mam . alertmanagers , 3 )
2021-09-14 06:55:01 -05:00
require . NoError ( t , testutil . GatherAndCompare ( reg , bytes . NewBufferString ( `
# HELP grafana_alerting_active_configurations The number of active Alertmanager configurations .
# TYPE grafana_alerting_active_configurations gauge
grafana_alerting_active_configurations 3
# HELP grafana_alerting_discovered_configurations The number of organizations we ' ve discovered that require an Alertmanager configuration .
# TYPE grafana_alerting_discovered_configurations gauge
grafana_alerting_discovered_configurations 3
` ) , "grafana_alerting_discovered_configurations" , "grafana_alerting_active_configurations" ) )
2021-08-24 05:28:09 -05:00
}
// When an org is removed, it should detect it.
{
orgStore . orgs = [ ] int64 { 1 , 3 }
require . NoError ( t , mam . LoadAndSyncAlertmanagersForOrgs ( ctx ) )
require . Len ( t , mam . alertmanagers , 2 )
2021-09-14 06:55:01 -05:00
require . NoError ( t , testutil . GatherAndCompare ( reg , bytes . NewBufferString ( `
# HELP grafana_alerting_active_configurations The number of active Alertmanager configurations .
# TYPE grafana_alerting_active_configurations gauge
grafana_alerting_active_configurations 2
# HELP grafana_alerting_discovered_configurations The number of organizations we ' ve discovered that require an Alertmanager configuration .
# TYPE grafana_alerting_discovered_configurations gauge
grafana_alerting_discovered_configurations 2
` ) , "grafana_alerting_discovered_configurations" , "grafana_alerting_active_configurations" ) )
2021-08-24 05:28:09 -05:00
}
// if the org comes back, it should detect it.
{
orgStore . orgs = [ ] int64 { 1 , 2 , 3 , 4 }
require . NoError ( t , mam . LoadAndSyncAlertmanagersForOrgs ( ctx ) )
require . Len ( t , mam . alertmanagers , 4 )
2021-09-14 06:55:01 -05:00
require . NoError ( t , testutil . GatherAndCompare ( reg , bytes . NewBufferString ( `
# HELP grafana_alerting_active_configurations The number of active Alertmanager configurations .
# TYPE grafana_alerting_active_configurations gauge
grafana_alerting_active_configurations 4
# HELP grafana_alerting_discovered_configurations The number of organizations we ' ve discovered that require an Alertmanager configuration .
# TYPE grafana_alerting_discovered_configurations gauge
grafana_alerting_discovered_configurations 4
` ) , "grafana_alerting_discovered_configurations" , "grafana_alerting_active_configurations" ) )
2021-08-24 05:28:09 -05:00
}
2021-09-29 09:16:40 -05:00
// if the disabled org comes back, it should not detect it.
{
orgStore . orgs = [ ] int64 { 1 , 2 , 3 , 4 , 5 }
require . NoError ( t , mam . LoadAndSyncAlertmanagersForOrgs ( ctx ) )
require . Len ( t , mam . alertmanagers , 4 )
}
2021-10-12 05:05:02 -05:00
2021-10-14 05:04:00 -05:00
// Orphaned state should be removed.
2021-10-12 05:05:02 -05:00
{
2021-10-14 05:04:00 -05:00
orgID := int64 ( 6 )
2021-10-12 05:05:02 -05:00
// First we create a directory and two files for an ograniztation that
// is not existing in the current state.
orphanDir := filepath . Join ( tmpDir , "alerting" , "6" )
err := os . Mkdir ( orphanDir , 0750 )
require . NoError ( t , err )
silencesPath := filepath . Join ( orphanDir , silencesFilename )
err = os . WriteFile ( silencesPath , [ ] byte ( "file_1" ) , 0644 )
require . NoError ( t , err )
notificationPath := filepath . Join ( orphanDir , notificationLogFilename )
err = os . WriteFile ( notificationPath , [ ] byte ( "file_2" ) , 0644 )
require . NoError ( t , err )
// We make sure that both files are on disk.
info , err := os . Stat ( silencesPath )
require . NoError ( t , err )
require . Equal ( t , info . Name ( ) , silencesFilename )
info , err = os . Stat ( notificationPath )
require . NoError ( t , err )
require . Equal ( t , info . Name ( ) , notificationLogFilename )
2021-10-14 05:04:00 -05:00
// We also populate the kvstore with orphaned records.
err = kvStore . Set ( ctx , orgID , KVNamespace , silencesFilename , "file_1" )
require . NoError ( t , err )
err = kvStore . Set ( ctx , orgID , KVNamespace , notificationLogFilename , "file_1" )
require . NoError ( t , err )
2021-10-12 05:05:02 -05:00
// Now re run the sync job once.
require . NoError ( t , mam . LoadAndSyncAlertmanagersForOrgs ( ctx ) )
// The organization directory should be gone by now.
_ , err = os . Stat ( orphanDir )
require . True ( t , errors . Is ( err , fs . ErrNotExist ) )
2021-10-14 05:04:00 -05:00
// The organization kvstore records should be gone by now.
_ , exists , _ := kvStore . Get ( ctx , orgID , KVNamespace , silencesFilename )
require . False ( t , exists )
_ , exists , _ = kvStore . Get ( ctx , orgID , KVNamespace , notificationLogFilename )
require . False ( t , exists )
2021-10-12 05:05:02 -05:00
}
2021-08-24 05:28:09 -05:00
}
2021-10-12 12:10:08 -05:00
func TestMultiOrgAlertmanager_SyncAlertmanagersForOrgsWithFailures ( t * testing . T ) {
// Include a broken configuration for organization 2.
configStore := & FakeConfigStore {
configs : map [ int64 ] * models . AlertConfiguration {
2 : { AlertmanagerConfiguration : brokenConfig , OrgID : 2 } ,
} ,
}
orgStore := & FakeOrgStore {
orgs : [ ] int64 { 1 , 2 , 3 } ,
}
2022-03-22 09:43:29 -05:00
tmpDir := t . TempDir ( )
2021-12-27 17:01:17 -06:00
kvStore := NewFakeKVStore ( t )
2022-04-22 11:57:56 -05:00
provStore := provisioning . NewFakeProvisioningStore ( )
2021-11-04 11:47:21 -05:00
secretsService := secretsManager . SetupTestService ( t , fakes . NewFakeSecretsStore ( ) )
decryptFn := secretsService . GetDecryptedValue
2021-10-12 12:10:08 -05:00
reg := prometheus . NewPedanticRegistry ( )
m := metrics . NewNGAlert ( reg )
cfg := & setting . Cfg {
DataPath : tmpDir ,
UnifiedAlerting : setting . UnifiedAlertingSettings {
AlertmanagerConfigPollInterval : 10 * time . Minute ,
DefaultConfiguration : setting . GetAlertmanagerDefaultConfiguration ( ) ,
} , // do not poll in tests.
}
2022-04-22 11:57:56 -05:00
mam , err := NewMultiOrgAlertmanager ( cfg , configStore , orgStore , kvStore , provStore , decryptFn , m . GetMultiOrgAlertmanagerMetrics ( ) , nil , log . New ( "testlogger" ) , secretsService )
2021-10-12 12:10:08 -05:00
require . NoError ( t , err )
ctx := context . Background ( )
// When you sync the first time, the alertmanager is created but is doesn't become ready until you have a configuration applied.
{
require . NoError ( t , mam . LoadAndSyncAlertmanagersForOrgs ( ctx ) )
require . Len ( t , mam . alertmanagers , 3 )
require . True ( t , mam . alertmanagers [ 1 ] . ready ( ) )
require . False ( t , mam . alertmanagers [ 2 ] . ready ( ) )
require . True ( t , mam . alertmanagers [ 3 ] . ready ( ) )
}
// On the next sync, it never panics and alertmanager is still not ready.
{
require . NoError ( t , mam . LoadAndSyncAlertmanagersForOrgs ( ctx ) )
require . Len ( t , mam . alertmanagers , 3 )
require . True ( t , mam . alertmanagers [ 1 ] . ready ( ) )
require . False ( t , mam . alertmanagers [ 2 ] . ready ( ) )
require . True ( t , mam . alertmanagers [ 3 ] . ready ( ) )
}
// If we fix the configuration, it becomes ready.
{
configStore . configs = map [ int64 ] * models . AlertConfiguration { } // It'll apply the default config.
require . NoError ( t , mam . LoadAndSyncAlertmanagersForOrgs ( ctx ) )
require . Len ( t , mam . alertmanagers , 3 )
require . True ( t , mam . alertmanagers [ 1 ] . ready ( ) )
require . True ( t , mam . alertmanagers [ 2 ] . ready ( ) )
require . True ( t , mam . alertmanagers [ 3 ] . ready ( ) )
}
}
2021-08-24 05:28:09 -05:00
func TestMultiOrgAlertmanager_AlertmanagerFor ( t * testing . T ) {
configStore := & FakeConfigStore {
configs : map [ int64 ] * models . AlertConfiguration { } ,
}
orgStore := & FakeOrgStore {
orgs : [ ] int64 { 1 , 2 , 3 } ,
}
2022-03-22 09:43:29 -05:00
tmpDir := t . TempDir ( )
2021-09-16 09:33:51 -05:00
cfg := & setting . Cfg {
2021-09-20 02:12:21 -05:00
DataPath : tmpDir ,
2021-09-28 05:00:16 -05:00
UnifiedAlerting : setting . UnifiedAlertingSettings { AlertmanagerConfigPollInterval : 3 * time . Minute , DefaultConfiguration : setting . GetAlertmanagerDefaultConfiguration ( ) } , // do not poll in tests.
2021-09-16 09:33:51 -05:00
}
2021-12-27 17:01:17 -06:00
kvStore := NewFakeKVStore ( t )
2022-04-22 11:57:56 -05:00
provStore := provisioning . NewFakeProvisioningStore ( )
2021-11-04 11:47:21 -05:00
secretsService := secretsManager . SetupTestService ( t , fakes . NewFakeSecretsStore ( ) )
decryptFn := secretsService . GetDecryptedValue
2021-09-14 06:55:01 -05:00
reg := prometheus . NewPedanticRegistry ( )
m := metrics . NewNGAlert ( reg )
2022-04-22 11:57:56 -05:00
mam , err := NewMultiOrgAlertmanager ( cfg , configStore , orgStore , kvStore , provStore , decryptFn , m . GetMultiOrgAlertmanagerMetrics ( ) , nil , log . New ( "testlogger" ) , secretsService )
2021-09-16 09:33:51 -05:00
require . NoError ( t , err )
2021-08-24 05:28:09 -05:00
ctx := context . Background ( )
// Ensure that one Alertmanagers is created per org.
{
require . NoError ( t , mam . LoadAndSyncAlertmanagersForOrgs ( ctx ) )
require . Len ( t , mam . alertmanagers , 3 )
}
// First, let's try to request an Alertmanager from an org that doesn't exist.
{
_ , err := mam . AlertmanagerFor ( 5 )
require . EqualError ( t , err , ErrNoAlertmanagerForOrg . Error ( ) )
}
// Now, let's try to request an Alertmanager that is not ready.
{
// let's delete its "running config" to make it non-ready
mam . alertmanagers [ 1 ] . config = nil
2021-12-27 17:01:17 -06:00
am , err := mam . AlertmanagerFor ( 1 )
require . NotNil ( t , am )
require . False ( t , am . Ready ( ) )
2021-08-24 05:28:09 -05:00
require . EqualError ( t , err , ErrAlertmanagerNotReady . Error ( ) )
}
// With an Alertmanager that exists, it responds correctly.
{
am , err := mam . AlertmanagerFor ( 2 )
require . NoError ( t , err )
require . Equal ( t , * am . GetStatus ( ) . VersionInfo . Version , "N/A" )
require . Equal ( t , am . orgID , int64 ( 2 ) )
require . NotNil ( t , am . config )
}
// Let's now remove the previous queried organization.
orgStore . orgs = [ ] int64 { 1 , 3 }
require . NoError ( t , mam . LoadAndSyncAlertmanagersForOrgs ( ctx ) )
{
_ , err := mam . AlertmanagerFor ( 2 )
require . EqualError ( t , err , ErrNoAlertmanagerForOrg . Error ( ) )
}
}
2021-09-15 16:48:52 -05:00
2021-10-12 12:10:08 -05:00
var brokenConfig = `
"alertmanager_config" : {
"route" : {
"receiver" : "grafana-default-email"
} ,
"receivers" : [ {
"name" : "grafana-default-email" ,
"grafana_managed_receiver_configs" : [ {
"uid" : "" ,
"name" : "slack receiver" ,
"type" : "slack" ,
"isDefault" : true ,
"settings" : {
"addresses" : "<example@email.com>"
"url" : "<22> r_<72> <5F> q/b<> <62> <EFBFBD> <EFBFBD> <EFBFBD> p@ⱎȏ =<3D> <> @ӹtd>Rú<52> H<EFBFBD> <48> <20> ;<3B> @Uf<55> <66> 0<EFBFBD> \k2*jh<6A> }Íu<C38D> )" 2 <EFBFBD> F6 ] <EFBFBD> } r <EFBFBD> <EFBFBD> R <EFBFBD> b <EFBFBD> d <EFBFBD> J ; <EFBFBD> <EFBFBD> S퓧 <EFBFBD> <EFBFBD> $ <EFBFBD> <EFBFBD> " ,
"recipient" : "#graphana-metrics" ,
}
} ]
} ]
}
} `