Alerting: Split Scheduler and AlertRouter tests (#52416)

* move fake FakeExternalAlertmanager to sender package
* move tests from scheduler to router
* update alerts router to have all fields private
* update scheduler tests to use sender mock
This commit is contained in:
Yuriy Tseretyan 2022-07-19 09:32:54 -04:00 committed by GitHub
parent c60487fdbf
commit 054fe54b03
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 747 additions and 500 deletions

View File

@ -185,7 +185,15 @@ func GenerateAlertRules(count int, f func() *AlertRule) []*AlertRule {
return result
}
// GenerateGroupKey generates many random alert rules. Does not guarantee that rules are unique (by UID)
// GenerateRuleKey generates a random alert rule key
func GenerateRuleKey(orgID int64) AlertRuleKey {
return AlertRuleKey{
OrgID: orgID,
UID: util.GenerateShortUID(),
}
}
// GenerateGroupKey generates a random group key
func GenerateGroupKey(orgID int64) AlertRuleGroupKey {
return AlertRuleGroupKey{
OrgID: orgID,

View File

@ -16,6 +16,9 @@ import (
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
prometheusModel "github.com/prometheus/common/model"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
busmock "github.com/grafana/grafana/pkg/bus/mock"
@ -23,13 +26,11 @@ import (
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/annotations"
"github.com/grafana/grafana/pkg/services/dashboards"
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/image"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/notifier"
"github.com/grafana/grafana/pkg/services/ngalert/provisioning"
"github.com/grafana/grafana/pkg/services/ngalert/sender"
"github.com/grafana/grafana/pkg/services/ngalert/state"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/services/secrets/fakes"
@ -38,317 +39,20 @@ import (
"github.com/grafana/grafana/pkg/util"
)
func TestSendingToExternalAlertmanager(t *testing.T) {
fakeAM := store.NewFakeExternalAlertmanager(t)
defer fakeAM.Close()
fakeRuleStore := store.NewFakeRuleStore(t)
fakeInstanceStore := &store.FakeInstanceStore{}
fakeAdminConfigStore := store.NewFakeAdminConfigStore(t)
// create alert rule with one second interval
alertRule := CreateTestAlertRule(t, fakeRuleStore, 1, 1, eval.Alerting)
// First, let's create an admin configuration that holds an alertmanager.
adminConfig := &models.AdminConfiguration{OrgID: 1, Alertmanagers: []string{fakeAM.Server.URL}, SendAlertsTo: models.AllAlertmanagers}
cmd := store.UpdateAdminConfigurationCmd{AdminConfiguration: adminConfig}
require.NoError(t, fakeAdminConfigStore.UpdateAdminConfiguration(cmd))
sched, mockedClock, alertsRouter := setupScheduler(t, fakeRuleStore, fakeInstanceStore, fakeAdminConfigStore, nil)
// Make sure we sync the configuration at least once before the evaluation happens to guarantee the sender is running
// when the first alert triggers.
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
alertsRouter.AdminConfigMtx.Lock()
require.Equal(t, 1, len(alertsRouter.Senders))
require.Equal(t, 1, len(alertsRouter.SendersCfgHash))
alertsRouter.AdminConfigMtx.Unlock()
// Then, ensure we've discovered the Alertmanager.
require.Eventually(t, func() bool {
return len(alertsRouter.AlertmanagersFor(1)) == 1 && len(alertsRouter.DroppedAlertmanagersFor(1)) == 0
}, 10*time.Second, 200*time.Millisecond)
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(func() {
cancel()
})
go func() {
err := sched.Run(ctx)
require.NoError(t, err)
}()
// With everything up and running, let's advance the time to make sure we get at least one alert iteration.
mockedClock.Add(2 * time.Second)
// Eventually, our Alertmanager should have received at least one alert.
require.Eventually(t, func() bool {
return fakeAM.AlertsCount() >= 1 && fakeAM.AlertNamesCompare([]string{alertRule.Title})
}, 10*time.Second, 200*time.Millisecond)
// Now, let's remove the Alertmanager from the admin configuration.
adminConfig.Alertmanagers = []string{}
cmd = store.UpdateAdminConfigurationCmd{AdminConfiguration: adminConfig}
require.NoError(t, fakeAdminConfigStore.UpdateAdminConfiguration(cmd))
// Again, make sure we sync and verify the senders.
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
alertsRouter.AdminConfigMtx.Lock()
require.Equal(t, 0, len(alertsRouter.Senders))
require.Equal(t, 0, len(alertsRouter.SendersCfgHash))
alertsRouter.AdminConfigMtx.Unlock()
// Then, ensure we've dropped the Alertmanager.
require.Eventually(t, func() bool {
return len(alertsRouter.AlertmanagersFor(1)) == 0 && len(alertsRouter.DroppedAlertmanagersFor(1)) == 0
}, 10*time.Second, 200*time.Millisecond)
}
func TestSendingToExternalAlertmanager_WithMultipleOrgs(t *testing.T) {
fakeAM := store.NewFakeExternalAlertmanager(t)
defer fakeAM.Close()
fakeRuleStore := store.NewFakeRuleStore(t)
fakeInstanceStore := &store.FakeInstanceStore{}
fakeAdminConfigStore := store.NewFakeAdminConfigStore(t)
// First, let's create an admin configuration that holds an alertmanager.
adminConfig := &models.AdminConfiguration{OrgID: 1, Alertmanagers: []string{fakeAM.Server.URL}}
cmd := store.UpdateAdminConfigurationCmd{AdminConfiguration: adminConfig}
require.NoError(t, fakeAdminConfigStore.UpdateAdminConfiguration(cmd))
sched, mockedClock, alertsRouter := setupScheduler(t, fakeRuleStore, fakeInstanceStore, fakeAdminConfigStore, nil)
// Make sure we sync the configuration at least once before the evaluation happens to guarantee the sender is running
// when the first alert triggers.
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
alertsRouter.AdminConfigMtx.Lock()
require.Equal(t, 1, len(alertsRouter.Senders))
require.Equal(t, 1, len(alertsRouter.SendersCfgHash))
alertsRouter.AdminConfigMtx.Unlock()
// Then, ensure we've discovered the Alertmanager.
require.Eventuallyf(t, func() bool {
return len(alertsRouter.AlertmanagersFor(1)) == 1 && len(alertsRouter.DroppedAlertmanagersFor(1)) == 0
}, 10*time.Second, 200*time.Millisecond, "Alertmanager for org 1 was never discovered")
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(func() {
cancel()
})
go func() {
err := sched.Run(ctx)
require.NoError(t, err)
}()
// 1. Now, let's assume a new org comes along.
adminConfig2 := &models.AdminConfiguration{OrgID: 2, Alertmanagers: []string{fakeAM.Server.URL}}
cmd = store.UpdateAdminConfigurationCmd{AdminConfiguration: adminConfig2}
require.NoError(t, fakeAdminConfigStore.UpdateAdminConfiguration(cmd))
// If we sync again, new senders must have spawned.
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
alertsRouter.AdminConfigMtx.Lock()
require.Equal(t, 2, len(alertsRouter.Senders))
require.Equal(t, 2, len(alertsRouter.SendersCfgHash))
alertsRouter.AdminConfigMtx.Unlock()
// Then, ensure we've discovered the Alertmanager for the new organization.
require.Eventuallyf(t, func() bool {
return len(alertsRouter.AlertmanagersFor(2)) == 1 && len(alertsRouter.DroppedAlertmanagersFor(2)) == 0
}, 10*time.Second, 200*time.Millisecond, "Alertmanager for org 2 was never discovered")
// With everything up and running, let's advance the time to make sure we get at least one alert iteration.
mockedClock.Add(10 * time.Second)
// TODO(gotjosh): Disabling this assertion as for some reason even after advancing the clock the alert is not being delivered.
// the check previous to this assertion would ensure that the sender is up and running before sending the notification.
// However, sometimes this does not happen.
// Create two alert rules with one second interval.
// alertRuleOrgOne := CreateTestAlertRule(t, FakeRuleStore, 1, 1)
// alertRuleOrgTwo := CreateTestAlertRule(t, FakeRuleStore, 1, 2)
// Eventually, our Alertmanager should have received at least two alerts.
// var count int
// require.Eventuallyf(t, func() bool {
// count := fakeAM.AlertsCount()
// return count == 2 && fakeAM.AlertNamesCompare([]string{alertRuleOrgOne.Title, alertRuleOrgTwo.Title})
// }, 20*time.Second, 200*time.Millisecond, "Alertmanager never received an '%s' from org 1 or '%s' from org 2, the alert count was: %d", alertRuleOrgOne.Title, alertRuleOrgTwo.Title, count)
// 2. Next, let's modify the configuration of an organization by adding an extra alertmanager.
fakeAM2 := store.NewFakeExternalAlertmanager(t)
adminConfig2 = &models.AdminConfiguration{OrgID: 2, Alertmanagers: []string{fakeAM.Server.URL, fakeAM2.Server.URL}}
cmd = store.UpdateAdminConfigurationCmd{AdminConfiguration: adminConfig2}
require.NoError(t, fakeAdminConfigStore.UpdateAdminConfiguration(cmd))
// Before we sync, let's grab the existing hash of this particular org.
alertsRouter.AdminConfigMtx.Lock()
currentHash := alertsRouter.SendersCfgHash[2]
alertsRouter.AdminConfigMtx.Unlock()
// Now, sync again.
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
// The hash for org two should not be the same and we should still have two senders.
alertsRouter.AdminConfigMtx.Lock()
require.NotEqual(t, alertsRouter.SendersCfgHash[2], currentHash)
require.Equal(t, 2, len(alertsRouter.Senders))
require.Equal(t, 2, len(alertsRouter.SendersCfgHash))
alertsRouter.AdminConfigMtx.Unlock()
// Wait for the discovery of the new Alertmanager for orgID = 2.
require.Eventuallyf(t, func() bool {
return len(alertsRouter.AlertmanagersFor(2)) == 2 && len(alertsRouter.DroppedAlertmanagersFor(2)) == 0
}, 10*time.Second, 200*time.Millisecond, "Alertmanager for org 2 was never re-discovered after fix")
// 3. Now, let's provide a configuration that fails for OrgID = 1.
adminConfig2 = &models.AdminConfiguration{OrgID: 1, Alertmanagers: []string{"123://invalid.org"}}
cmd = store.UpdateAdminConfigurationCmd{AdminConfiguration: adminConfig2}
require.NoError(t, fakeAdminConfigStore.UpdateAdminConfiguration(cmd))
// Before we sync, let's get the current config hash.
alertsRouter.AdminConfigMtx.Lock()
currentHash = alertsRouter.SendersCfgHash[1]
alertsRouter.AdminConfigMtx.Unlock()
// Now, sync again.
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
// The old configuration should still be running.
alertsRouter.AdminConfigMtx.Lock()
require.Equal(t, alertsRouter.SendersCfgHash[1], currentHash)
alertsRouter.AdminConfigMtx.Unlock()
require.Equal(t, 1, len(alertsRouter.AlertmanagersFor(1)))
// If we fix it - it should be applied.
adminConfig2 = &models.AdminConfiguration{OrgID: 1, Alertmanagers: []string{"notarealalertmanager:3030"}}
cmd = store.UpdateAdminConfigurationCmd{AdminConfiguration: adminConfig2}
require.NoError(t, fakeAdminConfigStore.UpdateAdminConfiguration(cmd))
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
alertsRouter.AdminConfigMtx.Lock()
require.NotEqual(t, alertsRouter.SendersCfgHash[1], currentHash)
alertsRouter.AdminConfigMtx.Unlock()
// Finally, remove everything.
require.NoError(t, fakeAdminConfigStore.DeleteAdminConfiguration(1))
require.NoError(t, fakeAdminConfigStore.DeleteAdminConfiguration(2))
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
alertsRouter.AdminConfigMtx.Lock()
require.Equal(t, 0, len(alertsRouter.Senders))
require.Equal(t, 0, len(alertsRouter.SendersCfgHash))
alertsRouter.AdminConfigMtx.Unlock()
require.Eventuallyf(t, func() bool {
NoAlertmanagerOrgOne := len(alertsRouter.AlertmanagersFor(1)) == 0 && len(alertsRouter.DroppedAlertmanagersFor(1)) == 0
NoAlertmanagerOrgTwo := len(alertsRouter.AlertmanagersFor(2)) == 0 && len(alertsRouter.DroppedAlertmanagersFor(2)) == 0
return NoAlertmanagerOrgOne && NoAlertmanagerOrgTwo
}, 10*time.Second, 200*time.Millisecond, "Alertmanager for org 1 and 2 were never removed")
}
func TestChangingAlertmanagersChoice(t *testing.T) {
fakeAM := store.NewFakeExternalAlertmanager(t)
defer fakeAM.Close()
fakeRuleStore := store.NewFakeRuleStore(t)
fakeInstanceStore := &store.FakeInstanceStore{}
fakeAdminConfigStore := store.NewFakeAdminConfigStore(t)
// create alert rule with one second interval and an Alertmanagers choice.
alertRule := CreateTestAlertRule(t, fakeRuleStore, 1, 1, eval.Alerting)
// First, let's create an admin configuration that holds an alertmanager
// and sends alerts to both internal and external alertmanagers (default).
adminConfig := &models.AdminConfiguration{OrgID: 1, Alertmanagers: []string{fakeAM.Server.URL}}
cmd := store.UpdateAdminConfigurationCmd{AdminConfiguration: adminConfig}
require.NoError(t, fakeAdminConfigStore.UpdateAdminConfiguration(cmd))
sched, mockedClock, alertsRouter := setupScheduler(t, fakeRuleStore, fakeInstanceStore, fakeAdminConfigStore, nil)
// Make sure we sync the configuration at least once before the evaluation happens to guarantee the sender is running
// when the first alert triggers.
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
alertsRouter.AdminConfigMtx.Lock()
require.Equal(t, 1, len(alertsRouter.Senders))
require.Equal(t, 1, len(alertsRouter.SendersCfgHash))
alertsRouter.AdminConfigMtx.Unlock()
// Then, ensure we've discovered the Alertmanager and the Alertmanagers choice is correct.
require.Eventually(t, func() bool {
return len(alertsRouter.AlertmanagersFor(1)) == 1 &&
len(alertsRouter.DroppedAlertmanagersFor(1)) == 0 &&
alertsRouter.SendAlertsTo[1] == adminConfig.SendAlertsTo
}, 10*time.Second, 200*time.Millisecond)
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(func() {
cancel()
})
go func() {
err := sched.Run(ctx)
require.NoError(t, err)
}()
// With everything up and running, let's advance the time to make sure we get at least one alert iteration.
mockedClock.Add(2 * time.Second)
// Eventually, our Alertmanager should have received alerts.
require.Eventuallyf(t, func() bool {
return fakeAM.AlertsCount() >= 1 &&
fakeAM.AlertNamesCompare([]string{alertRule.Title})
}, 10*time.Second, 200*time.Millisecond, "expected at least one alert to be received and the title of the first one to be '%s'. but got [%d]: [%v]", alertRule.Title, fakeAM.AlertsCount(), fakeAM.Alerts())
// Now, let's change the Alertmanagers choice to send only to the external Alertmanager.
adminConfig.SendAlertsTo = models.ExternalAlertmanagers
cmd = store.UpdateAdminConfigurationCmd{AdminConfiguration: adminConfig}
require.NoError(t, fakeAdminConfigStore.UpdateAdminConfiguration(cmd))
// Again, make sure we sync and verify the senders.
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
alertsRouter.AdminConfigMtx.Lock()
require.Equal(t, 1, len(alertsRouter.Senders))
require.Equal(t, 1, len(alertsRouter.SendersCfgHash))
alertsRouter.AdminConfigMtx.Unlock()
// Then, ensure we still have the Alertmanager but the Alertmanagers choice has changed.
require.Eventually(t, func() bool {
return len(alertsRouter.AlertmanagersFor(1)) == 1 &&
len(alertsRouter.DroppedAlertmanagersFor(1)) == 0 &&
alertsRouter.SendAlertsTo[1] == adminConfig.SendAlertsTo
}, 10*time.Second, 200*time.Millisecond)
// Finally, let's change the Alertmanagers choice to send only to the internal Alertmanager.
adminConfig.SendAlertsTo = models.InternalAlertmanager
cmd = store.UpdateAdminConfigurationCmd{AdminConfiguration: adminConfig}
require.NoError(t, fakeAdminConfigStore.UpdateAdminConfiguration(cmd))
// Again, make sure we sync and verify the senders.
// Senders should be running even though alerts are being handled externally.
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
alertsRouter.AdminConfigMtx.Lock()
require.Equal(t, 1, len(alertsRouter.Senders))
require.Equal(t, 1, len(alertsRouter.SendersCfgHash))
alertsRouter.AdminConfigMtx.Unlock()
// Then, ensure the Alertmanager is still listed and the Alertmanagers choice has changed.
require.Eventually(t, func() bool {
return len(alertsRouter.AlertmanagersFor(1)) == 1 &&
len(alertsRouter.DroppedAlertmanagersFor(1)) == 0 &&
alertsRouter.SendAlertsTo[1] == adminConfig.SendAlertsTo
}, 10*time.Second, 200*time.Millisecond)
}
func TestSchedule_ruleRoutine(t *testing.T) {
createSchedule := func(
evalAppliedChan chan time.Time,
) (*schedule, *store.FakeRuleStore, *store.FakeInstanceStore, *store.FakeAdminConfigStore, prometheus.Gatherer, *sender.AlertsRouter) {
senderMock *AlertsSenderMock,
) (*schedule, *store.FakeRuleStore, *store.FakeInstanceStore, prometheus.Gatherer) {
ruleStore := store.NewFakeRuleStore(t)
instanceStore := &store.FakeInstanceStore{}
adminConfigStore := store.NewFakeAdminConfigStore(t)
registry := prometheus.NewPedanticRegistry()
sch, _, alertsRouter := setupScheduler(t, ruleStore, instanceStore, adminConfigStore, registry)
sch, _ := setupScheduler(t, ruleStore, instanceStore, registry, senderMock)
sch.evalAppliedFunc = func(key models.AlertRuleKey, t time.Time) {
evalAppliedChan <- t
}
return sch, ruleStore, instanceStore, adminConfigStore, registry, alertsRouter
return sch, ruleStore, instanceStore, registry
}
// normal states do not include NoData and Error because currently it is not possible to perform any sensible test
@ -364,7 +68,7 @@ func TestSchedule_ruleRoutine(t *testing.T) {
t.Run(fmt.Sprintf("when rule evaluation happens (evaluation state %s)", evalState), func(t *testing.T) {
evalChan := make(chan *evaluation)
evalAppliedChan := make(chan time.Time)
sch, ruleStore, instanceStore, _, reg, _ := createSchedule(evalAppliedChan)
sch, ruleStore, instanceStore, reg := createSchedule(evalAppliedChan, nil)
rule := CreateTestAlertRule(t, ruleStore, 10, rand.Int63(), evalState)
@ -483,7 +187,7 @@ func TestSchedule_ruleRoutine(t *testing.T) {
t.Run("should exit", func(t *testing.T) {
t.Run("when context is cancelled", func(t *testing.T) {
stoppedChan := make(chan error)
sch, _, _, _, _, _ := createSchedule(make(chan time.Time))
sch, _, _, _ := createSchedule(make(chan time.Time), nil)
ctx, cancel := context.WithCancel(context.Background())
go func() {
@ -502,7 +206,7 @@ func TestSchedule_ruleRoutine(t *testing.T) {
evalAppliedChan := make(chan time.Time)
ctx := context.Background()
sch, ruleStore, _, _, _, _ := createSchedule(evalAppliedChan)
sch, ruleStore, _, _ := createSchedule(evalAppliedChan, nil)
rule := CreateTestAlertRule(t, ruleStore, 10, rand.Int63(), randomNormalState())
@ -554,7 +258,7 @@ func TestSchedule_ruleRoutine(t *testing.T) {
evalChan := make(chan *evaluation)
evalAppliedChan := make(chan time.Time)
sch, ruleStore, _, _, _, _ := createSchedule(evalAppliedChan)
sch, ruleStore, _, _ := createSchedule(evalAppliedChan, nil)
rule := CreateTestAlertRule(t, ruleStore, 10, rand.Int63(), randomNormalState())
@ -606,7 +310,7 @@ func TestSchedule_ruleRoutine(t *testing.T) {
evalAppliedChan := make(chan time.Time)
updateChan := make(chan ruleVersion)
sch, ruleStore, _, _, _, _ := createSchedule(evalAppliedChan)
sch, ruleStore, _, _ := createSchedule(evalAppliedChan, nil)
rule := CreateTestAlertRule(t, ruleStore, 10, rand.Int63(), eval.Alerting) // we want the alert to fire
@ -649,7 +353,7 @@ func TestSchedule_ruleRoutine(t *testing.T) {
evalAppliedChan := make(chan time.Time)
updateChan := make(chan ruleVersion)
sch, ruleStore, _, _, _, _ := createSchedule(evalAppliedChan)
sch, ruleStore, _, _ := createSchedule(evalAppliedChan, nil)
sch.maxAttempts = rand.Int63n(4) + 1
rule := CreateTestAlertRule(t, ruleStore, 10, rand.Int63(), randomNormalState())
@ -681,32 +385,20 @@ func TestSchedule_ruleRoutine(t *testing.T) {
t.Run("when rule version is updated", func(t *testing.T) {
t.Run("should clear the state and expire firing alerts", func(t *testing.T) {
fakeAM := store.NewFakeExternalAlertmanager(t)
defer fakeAM.Close()
orgID := rand.Int63()
s, err := sender.New()
require.NoError(t, err)
adminConfig := &models.AdminConfiguration{OrgID: orgID, Alertmanagers: []string{fakeAM.Server.URL}}
err = s.ApplyConfig(adminConfig)
require.NoError(t, err)
s.Run()
defer s.Stop()
require.Eventuallyf(t, func() bool {
return len(s.Alertmanagers()) == 1
}, 20*time.Second, 200*time.Millisecond, "external Alertmanager was not discovered.")
evalChan := make(chan *evaluation)
evalAppliedChan := make(chan time.Time)
updateChan := make(chan ruleVersion)
ctx := context.Background()
sch, ruleStore, _, _, _, alertsRouter := createSchedule(evalAppliedChan)
alertsRouter.Senders[orgID] = s
sender := AlertsSenderMock{}
var rulePtr = CreateTestAlertRule(t, ruleStore, 10, orgID, eval.Alerting) // we want the alert to fire
var rule = *rulePtr
ctx := context.Background()
sch, ruleStore, _, _ := createSchedule(evalAppliedChan, &sender)
var rule = CreateTestAlertRule(t, ruleStore, 10, orgID, eval.Alerting) // we want the alert to fire
sender.EXPECT().Send(rule.GetKey(), mock.Anything)
// define some state
states := make([]*state.State, 0, len(allStates))
@ -754,26 +446,23 @@ func TestSchedule_ruleRoutine(t *testing.T) {
updateChan <- ruleVersion(rule.Version)
wg.Wait()
newRule := rule
newRule := models.CopyRule(rule)
newRule.Version++
ruleStore.PutRule(ctx, &newRule)
ruleStore.PutRule(ctx, newRule)
wg.Add(1)
updateChan <- ruleVersion(newRule.Version)
wg.Wait()
require.Eventually(t, func() bool {
return len(sch.stateManager.GetStatesForRuleUID(rule.OrgID, rule.UID)) == 0
return len(sender.Calls) > 0
}, 5*time.Second, 100*time.Millisecond)
var count int
require.Eventuallyf(t, func() bool {
count = fakeAM.AlertsCount()
return count == expectedToBeSent
}, 20*time.Second, 200*time.Millisecond, "Alertmanager was expected to receive %d alerts, but received only %d", expectedToBeSent, count)
require.Empty(t, sch.stateManager.GetStatesForRuleUID(rule.OrgID, rule.UID))
for _, alert := range fakeAM.Alerts() {
require.Equalf(t, sch.clock.Now().UTC(), time.Time(alert.EndsAt).UTC(), "Alert received by Alertmanager should be expired as of now")
}
sender.AssertExpectations(t)
args, ok := sender.Calls[0].Arguments[1].(definitions.PostableAlerts)
require.Truef(t, ok, fmt.Sprintf("expected argument of function was supposed to be 'definitions.PostableAlerts' but got %T", sender.Calls[0].Arguments[1]))
require.Len(t, args.PostableAlerts, expectedToBeSent)
})
})
@ -789,34 +478,21 @@ func TestSchedule_ruleRoutine(t *testing.T) {
})
t.Run("when there are alerts that should be firing", func(t *testing.T) {
t.Run("it should send to local alertmanager if configured for organization", func(t *testing.T) {
// TODO figure out how to simulate multiorg alertmanager
t.Skip()
})
t.Run("it should send to external alertmanager if configured for organization", func(t *testing.T) {
fakeAM := store.NewFakeExternalAlertmanager(t)
defer fakeAM.Close()
t.Run("it should call sender", func(t *testing.T) {
orgID := rand.Int63()
s, err := sender.New()
require.NoError(t, err)
adminConfig := &models.AdminConfiguration{OrgID: orgID, Alertmanagers: []string{fakeAM.Server.URL}}
err = s.ApplyConfig(adminConfig)
require.NoError(t, err)
s.Run()
defer s.Stop()
require.Eventuallyf(t, func() bool {
return len(s.Alertmanagers()) == 1
}, 20*time.Second, 200*time.Millisecond, "external Alertmanager was not discovered.")
evalChan := make(chan *evaluation)
evalAppliedChan := make(chan time.Time)
sch, ruleStore, _, _, _, alertsRouter := createSchedule(evalAppliedChan)
alertsRouter.Senders[orgID] = s
sender := AlertsSenderMock{}
sch, ruleStore, _, _ := createSchedule(evalAppliedChan, &sender)
// eval.Alerting makes state manager to create notifications for alertmanagers
rule := CreateTestAlertRule(t, ruleStore, 10, orgID, eval.Alerting)
folder, _ := ruleStore.GetNamespaceByUID(context.Background(), rule.NamespaceUID, orgID, nil)
sender.EXPECT().Send(rule.GetKey(), mock.Anything).Return()
go func() {
ctx, cancel := context.WithCancel(context.Background())
@ -828,13 +504,22 @@ func TestSchedule_ruleRoutine(t *testing.T) {
scheduledAt: time.Now(),
version: rule.Version,
}
waitForTimeChannel(t, evalAppliedChan)
var count int
require.Eventuallyf(t, func() bool {
count = fakeAM.AlertsCount()
return count == 1 && fakeAM.AlertNamesCompare([]string{rule.Title})
}, 20*time.Second, 200*time.Millisecond, "Alertmanager never received an '%s', received alerts count: %d", rule.Title, count)
sender.AssertExpectations(t)
args, ok := sender.Calls[0].Arguments[1].(definitions.PostableAlerts)
require.Truef(t, ok, fmt.Sprintf("expected argument of function was supposed to be 'definitions.PostableAlerts' but got %T", sender.Calls[0].Arguments[1]))
require.Len(t, args.PostableAlerts, 1)
t.Run("should add extra labels", func(t *testing.T) {
alert := args.PostableAlerts[0]
assert.Equal(t, rule.UID, alert.Labels[models.RuleUIDLabel])
assert.Equal(t, rule.NamespaceUID, alert.Labels[models.NamespaceUIDLabel])
assert.Equal(t, rule.Title, alert.Labels[prometheusModel.AlertNameLabel])
assert.Equal(t, folder.Title, alert.Labels[models.FolderTitleLabel])
})
})
})
@ -925,12 +610,11 @@ func setupSchedulerWithFakeStores(t *testing.T) *schedule {
t.Helper()
ruleStore := store.NewFakeRuleStore(t)
instanceStore := &store.FakeInstanceStore{}
adminConfigStore := store.NewFakeAdminConfigStore(t)
sch, _, _ := setupScheduler(t, ruleStore, instanceStore, adminConfigStore, nil)
sch, _ := setupScheduler(t, ruleStore, instanceStore, nil, nil)
return sch
}
func setupScheduler(t *testing.T, rs store.RuleStore, is store.InstanceStore, acs store.AdminConfigurationStore, registry *prometheus.Registry) (*schedule, *clock.Mock, *sender.AlertsRouter) {
func setupScheduler(t *testing.T, rs store.RuleStore, is store.InstanceStore, registry *prometheus.Registry, senderMock *AlertsSenderMock) (*schedule, *clock.Mock) {
t.Helper()
fakeAnnoRepo := store.NewFakeAnnotationsRepo()
@ -942,16 +626,16 @@ func setupScheduler(t *testing.T, rs store.RuleStore, is store.InstanceStore, ac
}
m := metrics.NewNGAlert(registry)
secretsService := secretsManager.SetupTestService(t, fakes.NewFakeSecretsStore())
decryptFn := secretsService.GetDecryptedValue
moa, err := notifier.NewMultiOrgAlertmanager(&setting.Cfg{}, &notifier.FakeConfigStore{}, &notifier.FakeOrgStore{}, &notifier.FakeKVStore{}, provisioning.NewFakeProvisioningStore(), decryptFn, m.GetMultiOrgAlertmanagerMetrics(), nil, log.New("testlogger"), secretsService)
require.NoError(t, err)
appUrl := &url.URL{
Scheme: "http",
Host: "localhost",
}
alertsRouter := sender.NewAlertsRouter(moa, acs, mockedClock, appUrl, map[int64]struct{}{}, 10*time.Minute) // do not poll in unit tests.
if senderMock == nil {
senderMock = &AlertsSenderMock{}
senderMock.EXPECT().Send(mock.Anything, mock.Anything).Return()
}
cfg := setting.UnifiedAlertingSettings{
BaseInterval: time.Second,
@ -967,10 +651,10 @@ func setupScheduler(t *testing.T, rs store.RuleStore, is store.InstanceStore, ac
InstanceStore: is,
Logger: logger,
Metrics: m.GetSchedulerMetrics(),
AlertSender: alertsRouter,
AlertSender: senderMock,
}
st := state.NewManager(schedCfg.Logger, m.GetStateMetrics(), nil, rs, is, &dashboards.FakeDashboardService{}, &image.NoopImageService{}, clock.NewMock())
return NewScheduler(schedCfg, appUrl, st, busmock.New()), mockedClock, alertsRouter
return NewScheduler(schedCfg, appUrl, st, busmock.New()), mockedClock
}
// createTestAlertRule creates a dummy alert definition to be used by the tests.

View File

@ -27,13 +27,13 @@ type AlertsRouter struct {
clock clock.Clock
adminConfigStore store.AdminConfigurationStore
// Senders help us send alerts to external Alertmanagers.
AdminConfigMtx sync.RWMutex
SendAlertsTo map[int64]models.AlertmanagersChoice
Senders map[int64]*Sender
SendersCfgHash map[int64]string
// senders help us send alerts to external Alertmanagers.
adminConfigMtx sync.RWMutex
sendAlertsTo map[int64]models.AlertmanagersChoice
senders map[int64]*Sender
sendersCfgHash map[int64]string
MultiOrgNotifier *notifier.MultiOrgAlertmanager
multiOrgNotifier *notifier.MultiOrgAlertmanager
appURL *url.URL
disabledOrgs map[int64]struct{}
@ -46,12 +46,12 @@ func NewAlertsRouter(multiOrgNotifier *notifier.MultiOrgAlertmanager, store stor
clock: clk,
adminConfigStore: store,
AdminConfigMtx: sync.RWMutex{},
Senders: map[int64]*Sender{},
SendersCfgHash: map[int64]string{},
SendAlertsTo: map[int64]models.AlertmanagersChoice{},
adminConfigMtx: sync.RWMutex{},
senders: map[int64]*Sender{},
sendersCfgHash: map[int64]string{},
sendAlertsTo: map[int64]models.AlertmanagersChoice{},
MultiOrgNotifier: multiOrgNotifier,
multiOrgNotifier: multiOrgNotifier,
appURL: appURL,
disabledOrgs: disabledOrgs,
@ -72,7 +72,7 @@ func (d *AlertsRouter) SyncAndApplyConfigFromDatabase() error {
d.logger.Debug("found admin configurations", "count", len(cfgs))
orgsFound := make(map[int64]struct{}, len(cfgs))
d.AdminConfigMtx.Lock()
d.adminConfigMtx.Lock()
for _, cfg := range cfgs {
_, isDisabledOrg := d.disabledOrgs[cfg.OrgID]
if isDisabledOrg {
@ -81,11 +81,11 @@ func (d *AlertsRouter) SyncAndApplyConfigFromDatabase() error {
}
// Update the Alertmanagers choice for the organization.
d.SendAlertsTo[cfg.OrgID] = cfg.SendAlertsTo
d.sendAlertsTo[cfg.OrgID] = cfg.SendAlertsTo
orgsFound[cfg.OrgID] = struct{}{} // keep track of the which senders we need to keep.
existing, ok := d.Senders[cfg.OrgID]
existing, ok := d.senders[cfg.OrgID]
// We have no running sender and no Alertmanager(s) configured, no-op.
if !ok && len(cfg.Alertmanagers) == 0 {
@ -107,7 +107,7 @@ func (d *AlertsRouter) SyncAndApplyConfigFromDatabase() error {
// We have a running sender, check if we need to apply a new config.
if ok {
if d.SendersCfgHash[cfg.OrgID] == cfg.AsSHA256() {
if d.sendersCfgHash[cfg.OrgID] == cfg.AsSHA256() {
d.logger.Debug("sender configuration is the same as the one running, no-op", "org", cfg.OrgID, "alertmanagers", cfg.Alertmanagers)
continue
}
@ -118,7 +118,7 @@ func (d *AlertsRouter) SyncAndApplyConfigFromDatabase() error {
d.logger.Error("failed to apply configuration", "err", err, "org", cfg.OrgID)
continue
}
d.SendersCfgHash[cfg.OrgID] = cfg.AsSHA256()
d.sendersCfgHash[cfg.OrgID] = cfg.AsSHA256()
continue
}
@ -130,7 +130,7 @@ func (d *AlertsRouter) SyncAndApplyConfigFromDatabase() error {
continue
}
d.Senders[cfg.OrgID] = s
d.senders[cfg.OrgID] = s
s.Run()
err = s.ApplyConfig(cfg)
@ -139,19 +139,19 @@ func (d *AlertsRouter) SyncAndApplyConfigFromDatabase() error {
continue
}
d.SendersCfgHash[cfg.OrgID] = cfg.AsSHA256()
d.sendersCfgHash[cfg.OrgID] = cfg.AsSHA256()
}
sendersToStop := map[int64]*Sender{}
for orgID, s := range d.Senders {
for orgID, s := range d.senders {
if _, exists := orgsFound[orgID]; !exists {
sendersToStop[orgID] = s
delete(d.Senders, orgID)
delete(d.SendersCfgHash, orgID)
delete(d.senders, orgID)
delete(d.sendersCfgHash, orgID)
}
}
d.AdminConfigMtx.Unlock()
d.adminConfigMtx.Unlock()
// We can now stop these senders w/o having to hold a lock.
for orgID, s := range sendersToStop {
@ -174,11 +174,11 @@ func (d *AlertsRouter) Send(key models.AlertRuleKey, alerts definitions.Postable
// Send alerts to local notifier if they need to be handled internally
// or if no external AMs have been discovered yet.
var localNotifierExist, externalNotifierExist bool
if d.SendAlertsTo[key.OrgID] == models.ExternalAlertmanagers && len(d.AlertmanagersFor(key.OrgID)) > 0 {
if d.sendAlertsTo[key.OrgID] == models.ExternalAlertmanagers && len(d.AlertmanagersFor(key.OrgID)) > 0 {
logger.Debug("no alerts to put in the notifier")
} else {
logger.Debug("sending alerts to local notifier", "count", len(alerts.PostableAlerts), "alerts", alerts.PostableAlerts)
n, err := d.MultiOrgNotifier.AlertmanagerFor(key.OrgID)
n, err := d.multiOrgNotifier.AlertmanagerFor(key.OrgID)
if err == nil {
localNotifierExist = true
if err := n.PutAlerts(alerts); err != nil {
@ -195,10 +195,10 @@ func (d *AlertsRouter) Send(key models.AlertRuleKey, alerts definitions.Postable
// Send alerts to external Alertmanager(s) if we have a sender for this organization
// and alerts are not being handled just internally.
d.AdminConfigMtx.RLock()
defer d.AdminConfigMtx.RUnlock()
s, ok := d.Senders[key.OrgID]
if ok && d.SendAlertsTo[key.OrgID] != models.InternalAlertmanager {
d.adminConfigMtx.RLock()
defer d.adminConfigMtx.RUnlock()
s, ok := d.senders[key.OrgID]
if ok && d.sendAlertsTo[key.OrgID] != models.InternalAlertmanager {
logger.Debug("sending alerts to external notifier", "count", len(alerts.PostableAlerts), "alerts", alerts.PostableAlerts)
s.SendAlerts(alerts)
externalNotifierExist = true
@ -211,9 +211,9 @@ func (d *AlertsRouter) Send(key models.AlertRuleKey, alerts definitions.Postable
// AlertmanagersFor returns all the discovered Alertmanager(s) for a particular organization.
func (d *AlertsRouter) AlertmanagersFor(orgID int64) []*url.URL {
d.AdminConfigMtx.RLock()
defer d.AdminConfigMtx.RUnlock()
s, ok := d.Senders[orgID]
d.adminConfigMtx.RLock()
defer d.adminConfigMtx.RUnlock()
s, ok := d.senders[orgID]
if !ok {
return []*url.URL{}
}
@ -222,9 +222,9 @@ func (d *AlertsRouter) AlertmanagersFor(orgID int64) []*url.URL {
// DroppedAlertmanagersFor returns all the dropped Alertmanager(s) for a particular organization.
func (d *AlertsRouter) DroppedAlertmanagersFor(orgID int64) []*url.URL {
d.AdminConfigMtx.RLock()
defer d.AdminConfigMtx.RUnlock()
s, ok := d.Senders[orgID]
d.adminConfigMtx.RLock()
defer d.adminConfigMtx.RUnlock()
s, ok := d.senders[orgID]
if !ok {
return []*url.URL{}
}
@ -242,12 +242,12 @@ func (d *AlertsRouter) Run(ctx context.Context) error {
}
case <-ctx.Done():
// Stop sending alerts to all external Alertmanager(s).
d.AdminConfigMtx.Lock()
for orgID, s := range d.Senders {
delete(d.Senders, orgID) // delete before we stop to make sure we don't accept any more alerts.
d.adminConfigMtx.Lock()
for orgID, s := range d.senders {
delete(d.senders, orgID) // delete before we stop to make sure we don't accept any more alerts.
s.Stop()
}
d.AdminConfigMtx.Unlock()
d.adminConfigMtx.Unlock()
return nil
}

View File

@ -0,0 +1,362 @@
package sender
import (
"context"
"fmt"
"math/rand"
"net/url"
"testing"
"time"
"github.com/benbjohnson/clock"
"github.com/go-openapi/strfmt"
models2 "github.com/prometheus/alertmanager/api/v2/models"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/require"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/notifier"
"github.com/grafana/grafana/pkg/services/ngalert/provisioning"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/services/secrets/fakes"
secretsManager "github.com/grafana/grafana/pkg/services/secrets/manager"
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/util"
)
func TestSendingToExternalAlertmanager(t *testing.T) {
ruleKey := models.GenerateRuleKey(1)
fakeAM := NewFakeExternalAlertmanager(t)
defer fakeAM.Close()
fakeAdminConfigStore := &store.AdminConfigurationStoreMock{}
mockedGetAdminConfigurations := fakeAdminConfigStore.EXPECT().GetAdminConfigurations()
mockedClock := clock.NewMock()
moa := createMultiOrgAlertmanager(t, []int64{1})
appUrl := &url.URL{
Scheme: "http",
Host: "localhost",
}
alertsRouter := NewAlertsRouter(moa, fakeAdminConfigStore, mockedClock, appUrl, map[int64]struct{}{}, 10*time.Minute)
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
{OrgID: ruleKey.OrgID, Alertmanagers: []string{fakeAM.Server.URL}, SendAlertsTo: models.AllAlertmanagers},
}, nil)
// Make sure we sync the configuration at least once before the evaluation happens to guarantee the sender is running
// when the first alert triggers.
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
require.Equal(t, 1, len(alertsRouter.senders))
require.Equal(t, 1, len(alertsRouter.sendersCfgHash))
// Then, ensure we've discovered the Alertmanager.
assertAlertmanagersStatusForOrg(t, alertsRouter, ruleKey.OrgID, 1, 0)
var expected []*models2.PostableAlert
alerts := definitions.PostableAlerts{}
for i := 0; i < rand.Intn(5)+1; i++ {
alert := generatePostableAlert(t, mockedClock)
expected = append(expected, &alert)
alerts.PostableAlerts = append(alerts.PostableAlerts, alert)
}
alertsRouter.Send(ruleKey, alerts)
// Eventually, our Alertmanager should have received at least one alert.
assertAlertsDelivered(t, fakeAM, expected)
// Now, let's remove the Alertmanager from the admin configuration.
mockedGetAdminConfigurations.Return(nil, nil)
// Again, make sure we sync and verify the senders.
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
require.Equal(t, 0, len(alertsRouter.senders))
require.Equal(t, 0, len(alertsRouter.sendersCfgHash))
// Then, ensure we've dropped the Alertmanager.
assertAlertmanagersStatusForOrg(t, alertsRouter, ruleKey.OrgID, 0, 0)
}
func TestSendingToExternalAlertmanager_WithMultipleOrgs(t *testing.T) {
ruleKey1 := models.GenerateRuleKey(1)
ruleKey2 := models.GenerateRuleKey(2)
fakeAM := NewFakeExternalAlertmanager(t)
defer fakeAM.Close()
fakeAdminConfigStore := &store.AdminConfigurationStoreMock{}
mockedGetAdminConfigurations := fakeAdminConfigStore.EXPECT().GetAdminConfigurations()
mockedClock := clock.NewMock()
moa := createMultiOrgAlertmanager(t, []int64{1, 2})
appUrl := &url.URL{
Scheme: "http",
Host: "localhost",
}
alertsRouter := NewAlertsRouter(moa, fakeAdminConfigStore, mockedClock, appUrl, map[int64]struct{}{}, 10*time.Minute)
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
{OrgID: ruleKey1.OrgID, Alertmanagers: []string{fakeAM.Server.URL}, SendAlertsTo: models.AllAlertmanagers},
}, nil)
// Make sure we sync the configuration at least once before the evaluation happens to guarantee the sender is running
// when the first alert triggers.
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
require.Equal(t, 1, len(alertsRouter.senders))
require.Equal(t, 1, len(alertsRouter.sendersCfgHash))
// Then, ensure we've discovered the Alertmanager.
assertAlertmanagersStatusForOrg(t, alertsRouter, ruleKey1.OrgID, 1, 0)
// 1. Now, let's assume a new org comes along.
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
{OrgID: ruleKey1.OrgID, Alertmanagers: []string{fakeAM.Server.URL}, SendAlertsTo: models.AllAlertmanagers},
{OrgID: ruleKey2.OrgID, Alertmanagers: []string{fakeAM.Server.URL}},
}, nil)
// If we sync again, new senders must have spawned.
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
require.Equal(t, 2, len(alertsRouter.senders))
require.Equal(t, 2, len(alertsRouter.sendersCfgHash))
// Then, ensure we've discovered the Alertmanager for the new organization.
assertAlertmanagersStatusForOrg(t, alertsRouter, ruleKey1.OrgID, 1, 0)
assertAlertmanagersStatusForOrg(t, alertsRouter, ruleKey2.OrgID, 1, 0)
var expected []*models2.PostableAlert
alerts1 := definitions.PostableAlerts{}
for i := 0; i < rand.Intn(5)+1; i++ {
alert := generatePostableAlert(t, mockedClock)
expected = append(expected, &alert)
alerts1.PostableAlerts = append(alerts1.PostableAlerts, alert)
}
alerts2 := definitions.PostableAlerts{}
for i := 0; i < rand.Intn(5)+1; i++ {
alert := generatePostableAlert(t, mockedClock)
expected = append(expected, &alert)
alerts2.PostableAlerts = append(alerts2.PostableAlerts, alert)
}
alertsRouter.Send(ruleKey1, alerts1)
alertsRouter.Send(ruleKey2, alerts2)
assertAlertsDelivered(t, fakeAM, expected)
// 2. Next, let's modify the configuration of an organization by adding an extra alertmanager.
fakeAM2 := NewFakeExternalAlertmanager(t)
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
{OrgID: ruleKey1.OrgID, Alertmanagers: []string{fakeAM.Server.URL}, SendAlertsTo: models.AllAlertmanagers},
{OrgID: ruleKey2.OrgID, Alertmanagers: []string{fakeAM.Server.URL, fakeAM2.Server.URL}},
}, nil)
// Before we sync, let's grab the existing hash of this particular org.
currentHash := alertsRouter.sendersCfgHash[ruleKey2.OrgID]
// Now, sync again.
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
// The hash for org two should not be the same and we should still have two senders.
require.NotEqual(t, alertsRouter.sendersCfgHash[ruleKey2.OrgID], currentHash)
require.Equal(t, 2, len(alertsRouter.senders))
require.Equal(t, 2, len(alertsRouter.sendersCfgHash))
assertAlertmanagersStatusForOrg(t, alertsRouter, ruleKey2.OrgID, 2, 0)
// 3. Now, let's provide a configuration that fails for OrgID = 1.
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
{OrgID: ruleKey1.OrgID, Alertmanagers: []string{"123://invalid.org"}, SendAlertsTo: models.AllAlertmanagers},
{OrgID: ruleKey2.OrgID, Alertmanagers: []string{fakeAM.Server.URL, fakeAM2.Server.URL}},
}, nil)
// Before we sync, let's get the current config hash.
currentHash = alertsRouter.sendersCfgHash[ruleKey1.OrgID]
// Now, sync again.
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
// The old configuration should still be running.
require.Equal(t, alertsRouter.sendersCfgHash[ruleKey1.OrgID], currentHash)
require.Equal(t, 1, len(alertsRouter.AlertmanagersFor(ruleKey1.OrgID)))
// If we fix it - it should be applied.
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
{OrgID: ruleKey1.OrgID, Alertmanagers: []string{"notarealalertmanager:3030"}, SendAlertsTo: models.AllAlertmanagers},
{OrgID: ruleKey2.OrgID, Alertmanagers: []string{fakeAM.Server.URL, fakeAM2.Server.URL}},
}, nil)
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
require.NotEqual(t, alertsRouter.sendersCfgHash[ruleKey1.OrgID], currentHash)
// Finally, remove everything.
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{}, nil)
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
require.Equal(t, 0, len(alertsRouter.senders))
require.Equal(t, 0, len(alertsRouter.sendersCfgHash))
assertAlertmanagersStatusForOrg(t, alertsRouter, ruleKey1.OrgID, 0, 0)
assertAlertmanagersStatusForOrg(t, alertsRouter, ruleKey2.OrgID, 0, 0)
}
func TestChangingAlertmanagersChoice(t *testing.T) {
ruleKey := models.GenerateRuleKey(1)
fakeAM := NewFakeExternalAlertmanager(t)
defer fakeAM.Close()
fakeAdminConfigStore := &store.AdminConfigurationStoreMock{}
mockedGetAdminConfigurations := fakeAdminConfigStore.EXPECT().GetAdminConfigurations()
mockedClock := clock.NewMock()
mockedClock.Set(time.Now())
moa := createMultiOrgAlertmanager(t, []int64{1})
appUrl := &url.URL{
Scheme: "http",
Host: "localhost",
}
alertsRouter := NewAlertsRouter(moa, fakeAdminConfigStore, mockedClock, appUrl, map[int64]struct{}{}, 10*time.Minute)
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
{OrgID: ruleKey.OrgID, Alertmanagers: []string{fakeAM.Server.URL}, SendAlertsTo: models.AllAlertmanagers},
}, nil)
// Make sure we sync the configuration at least once before the evaluation happens to guarantee the sender is running
// when the first alert triggers.
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
require.Equal(t, 1, len(alertsRouter.senders))
require.Equal(t, 1, len(alertsRouter.sendersCfgHash))
require.Equal(t, models.AllAlertmanagers, alertsRouter.sendAlertsTo[ruleKey.OrgID])
// Then, ensure we've discovered the Alertmanager.
assertAlertmanagersStatusForOrg(t, alertsRouter, ruleKey.OrgID, 1, 0)
var expected []*models2.PostableAlert
alerts := definitions.PostableAlerts{}
for i := 0; i < rand.Intn(5)+1; i++ {
alert := generatePostableAlert(t, mockedClock)
expected = append(expected, &alert)
alerts.PostableAlerts = append(alerts.PostableAlerts, alert)
}
alertsRouter.Send(ruleKey, alerts)
// Eventually, our Alertmanager should have received at least one alert.
assertAlertsDelivered(t, fakeAM, expected)
// Now, let's change the Alertmanagers choice to send only to the external Alertmanager.
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
{OrgID: ruleKey.OrgID, Alertmanagers: []string{fakeAM.Server.URL}, SendAlertsTo: models.ExternalAlertmanagers},
}, nil)
// Again, make sure we sync and verify the senders.
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
require.Equal(t, 1, len(alertsRouter.senders))
require.Equal(t, 1, len(alertsRouter.sendersCfgHash))
assertAlertmanagersStatusForOrg(t, alertsRouter, ruleKey.OrgID, 1, 0)
require.Equal(t, models.ExternalAlertmanagers, alertsRouter.sendAlertsTo[ruleKey.OrgID])
// Finally, let's change the Alertmanagers choice to send only to the internal Alertmanager.
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
{OrgID: ruleKey.OrgID, Alertmanagers: []string{fakeAM.Server.URL}, SendAlertsTo: models.InternalAlertmanager},
}, nil)
// Again, make sure we sync and verify the senders.
// senders should be running even though alerts are being handled externally.
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
require.Equal(t, 1, len(alertsRouter.senders))
require.Equal(t, 1, len(alertsRouter.sendersCfgHash))
// Then, ensure the Alertmanager is still listed and the Alertmanagers choice has changed.
assertAlertmanagersStatusForOrg(t, alertsRouter, ruleKey.OrgID, 1, 0)
require.Equal(t, models.InternalAlertmanager, alertsRouter.sendAlertsTo[ruleKey.OrgID])
alertsRouter.Send(ruleKey, alerts)
am, err := moa.AlertmanagerFor(ruleKey.OrgID)
require.NoError(t, err)
actualAlerts, err := am.GetAlerts(true, true, true, nil, "")
require.NoError(t, err)
require.Len(t, actualAlerts, len(expected))
}
func assertAlertmanagersStatusForOrg(t *testing.T, alertsRouter *AlertsRouter, orgID int64, active, dropped int) {
t.Helper()
require.Eventuallyf(t, func() bool {
return len(alertsRouter.AlertmanagersFor(orgID)) == active && len(alertsRouter.DroppedAlertmanagersFor(orgID)) == dropped
}, 10*time.Second, 200*time.Millisecond,
fmt.Sprintf("expected %d active Alertmanagers and %d dropped ones but got %d active and %d dropped", active, dropped, len(alertsRouter.AlertmanagersFor(orgID)), len(alertsRouter.DroppedAlertmanagersFor(orgID))))
}
func assertAlertsDelivered(t *testing.T, fakeAM *FakeExternalAlertmanager, expectedAlerts []*models2.PostableAlert) {
t.Helper()
require.Eventuallyf(t, func() bool {
return fakeAM.AlertsCount() == len(expectedAlerts)
}, 10*time.Second, 200*time.Millisecond, fmt.Sprintf("expected %d alerts to be delivered to remote Alertmanager but only %d was delivered", len(expectedAlerts), fakeAM.AlertsCount()))
require.Len(t, fakeAM.Alerts(), len(expectedAlerts))
}
func generatePostableAlert(t *testing.T, clk clock.Clock) models2.PostableAlert {
t.Helper()
u := url.URL{
Scheme: "http",
Host: "localhost",
RawPath: "/" + util.GenerateShortUID(),
}
return models2.PostableAlert{
Annotations: models2.LabelSet(models.GenerateAlertLabels(5, "ann-")),
EndsAt: strfmt.DateTime(clk.Now().Add(1 * time.Minute)),
StartsAt: strfmt.DateTime(clk.Now()),
Alert: models2.Alert{
GeneratorURL: strfmt.URI(u.String()),
Labels: models2.LabelSet(models.GenerateAlertLabels(5, "lbl-")),
},
}
}
func createMultiOrgAlertmanager(t *testing.T, orgs []int64) *notifier.MultiOrgAlertmanager {
t.Helper()
tmpDir := t.TempDir()
orgStore := notifier.NewFakeOrgStore(t, orgs)
cfg := &setting.Cfg{
DataPath: tmpDir,
UnifiedAlerting: setting.UnifiedAlertingSettings{
AlertmanagerConfigPollInterval: 3 * time.Minute,
DefaultConfiguration: setting.GetAlertmanagerDefaultConfiguration(),
DisabledOrgs: map[int64]struct{}{},
}, // do not poll in tests.
}
cfgStore := notifier.NewFakeConfigStore(t, make(map[int64]*models.AlertConfiguration))
kvStore := notifier.NewFakeKVStore(t)
registry := prometheus.NewPedanticRegistry()
m := metrics.NewNGAlert(registry)
secretsService := secretsManager.SetupTestService(t, fakes.NewFakeSecretsStore())
decryptFn := secretsService.GetDecryptedValue
moa, err := notifier.NewMultiOrgAlertmanager(cfg, &cfgStore, &orgStore, kvStore, provisioning.NewFakeProvisioningStore(), decryptFn, m.GetMultiOrgAlertmanagerMetrics(), nil, log.New("testlogger"), secretsService)
require.NoError(t, err)
require.NoError(t, moa.LoadAndSyncAlertmanagersForOrgs(context.Background()))
require.Eventually(t, func() bool {
for _, org := range orgs {
_, err := moa.AlertmanagerFor(org)
if err != nil {
return false
}
}
return true
}, 10*time.Second, 100*time.Millisecond)
return moa
}

View File

@ -0,0 +1,88 @@
package sender
import (
"encoding/json"
"io/ioutil"
"net/http"
"net/http/httptest"
"sync"
"testing"
amv2 "github.com/prometheus/alertmanager/api/v2/models"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
type FakeExternalAlertmanager struct {
t *testing.T
mtx sync.Mutex
alerts amv2.PostableAlerts
Server *httptest.Server
}
func NewFakeExternalAlertmanager(t *testing.T) *FakeExternalAlertmanager {
t.Helper()
am := &FakeExternalAlertmanager{
t: t,
alerts: amv2.PostableAlerts{},
}
am.Server = httptest.NewServer(http.HandlerFunc(am.Handler()))
return am
}
func (am *FakeExternalAlertmanager) URL() string {
return am.Server.URL
}
func (am *FakeExternalAlertmanager) AlertNamesCompare(expected []string) bool {
n := []string{}
alerts := am.Alerts()
if len(expected) != len(alerts) {
return false
}
for _, a := range am.Alerts() {
for k, v := range a.Alert.Labels {
if k == model.AlertNameLabel {
n = append(n, v)
}
}
}
return assert.ObjectsAreEqual(expected, n)
}
func (am *FakeExternalAlertmanager) AlertsCount() int {
am.mtx.Lock()
defer am.mtx.Unlock()
return len(am.alerts)
}
func (am *FakeExternalAlertmanager) Alerts() amv2.PostableAlerts {
am.mtx.Lock()
defer am.mtx.Unlock()
return am.alerts
}
func (am *FakeExternalAlertmanager) Handler() func(w http.ResponseWriter, r *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
b, err := ioutil.ReadAll(r.Body)
require.NoError(am.t, err)
a := amv2.PostableAlerts{}
require.NoError(am.t, json.Unmarshal(b, &a))
am.mtx.Lock()
am.alerts = append(am.alerts, a...)
am.mtx.Unlock()
}
}
func (am *FakeExternalAlertmanager) Close() {
am.Server.Close()
}

View File

@ -17,6 +17,7 @@ type UpdateAdminConfigurationCmd struct {
AdminConfiguration *ngmodels.AdminConfiguration
}
//go:generate mockery --name AdminConfigurationStore --structname AdminConfigurationStoreMock --inpackage --filename admin_configuration_store_mock.go --with-expecter
type AdminConfigurationStore interface {
GetAdminConfiguration(orgID int64) (*ngmodels.AdminConfiguration, error)
GetAdminConfigurations() ([]*ngmodels.AdminConfiguration, error)

View File

@ -0,0 +1,186 @@
// Code generated by mockery v2.10.0. DO NOT EDIT.
package store
import (
models "github.com/grafana/grafana/pkg/services/ngalert/models"
mock "github.com/stretchr/testify/mock"
)
// AdminConfigurationStoreMock is an autogenerated mock type for the AdminConfigurationStore type
type AdminConfigurationStoreMock struct {
mock.Mock
}
type AdminConfigurationStoreMock_Expecter struct {
mock *mock.Mock
}
func (_m *AdminConfigurationStoreMock) EXPECT() *AdminConfigurationStoreMock_Expecter {
return &AdminConfigurationStoreMock_Expecter{mock: &_m.Mock}
}
// DeleteAdminConfiguration provides a mock function with given fields: orgID
func (_m *AdminConfigurationStoreMock) DeleteAdminConfiguration(orgID int64) error {
ret := _m.Called(orgID)
var r0 error
if rf, ok := ret.Get(0).(func(int64) error); ok {
r0 = rf(orgID)
} else {
r0 = ret.Error(0)
}
return r0
}
// AdminConfigurationStoreMock_DeleteAdminConfiguration_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DeleteAdminConfiguration'
type AdminConfigurationStoreMock_DeleteAdminConfiguration_Call struct {
*mock.Call
}
// DeleteAdminConfiguration is a helper method to define mock.On call
// - orgID int64
func (_e *AdminConfigurationStoreMock_Expecter) DeleteAdminConfiguration(orgID interface{}) *AdminConfigurationStoreMock_DeleteAdminConfiguration_Call {
return &AdminConfigurationStoreMock_DeleteAdminConfiguration_Call{Call: _e.mock.On("DeleteAdminConfiguration", orgID)}
}
func (_c *AdminConfigurationStoreMock_DeleteAdminConfiguration_Call) Run(run func(orgID int64)) *AdminConfigurationStoreMock_DeleteAdminConfiguration_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(int64))
})
return _c
}
func (_c *AdminConfigurationStoreMock_DeleteAdminConfiguration_Call) Return(_a0 error) *AdminConfigurationStoreMock_DeleteAdminConfiguration_Call {
_c.Call.Return(_a0)
return _c
}
// GetAdminConfiguration provides a mock function with given fields: orgID
func (_m *AdminConfigurationStoreMock) GetAdminConfiguration(orgID int64) (*models.AdminConfiguration, error) {
ret := _m.Called(orgID)
var r0 *models.AdminConfiguration
if rf, ok := ret.Get(0).(func(int64) *models.AdminConfiguration); ok {
r0 = rf(orgID)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*models.AdminConfiguration)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(int64) error); ok {
r1 = rf(orgID)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// AdminConfigurationStoreMock_GetAdminConfiguration_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetAdminConfiguration'
type AdminConfigurationStoreMock_GetAdminConfiguration_Call struct {
*mock.Call
}
// GetAdminConfiguration is a helper method to define mock.On call
// - orgID int64
func (_e *AdminConfigurationStoreMock_Expecter) GetAdminConfiguration(orgID interface{}) *AdminConfigurationStoreMock_GetAdminConfiguration_Call {
return &AdminConfigurationStoreMock_GetAdminConfiguration_Call{Call: _e.mock.On("GetAdminConfiguration", orgID)}
}
func (_c *AdminConfigurationStoreMock_GetAdminConfiguration_Call) Run(run func(orgID int64)) *AdminConfigurationStoreMock_GetAdminConfiguration_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(int64))
})
return _c
}
func (_c *AdminConfigurationStoreMock_GetAdminConfiguration_Call) Return(_a0 *models.AdminConfiguration, _a1 error) *AdminConfigurationStoreMock_GetAdminConfiguration_Call {
_c.Call.Return(_a0, _a1)
return _c
}
// GetAdminConfigurations provides a mock function with given fields:
func (_m *AdminConfigurationStoreMock) GetAdminConfigurations() ([]*models.AdminConfiguration, error) {
ret := _m.Called()
var r0 []*models.AdminConfiguration
if rf, ok := ret.Get(0).(func() []*models.AdminConfiguration); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]*models.AdminConfiguration)
}
}
var r1 error
if rf, ok := ret.Get(1).(func() error); ok {
r1 = rf()
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// AdminConfigurationStoreMock_GetAdminConfigurations_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetAdminConfigurations'
type AdminConfigurationStoreMock_GetAdminConfigurations_Call struct {
*mock.Call
}
// GetAdminConfigurations is a helper method to define mock.On call
func (_e *AdminConfigurationStoreMock_Expecter) GetAdminConfigurations() *AdminConfigurationStoreMock_GetAdminConfigurations_Call {
return &AdminConfigurationStoreMock_GetAdminConfigurations_Call{Call: _e.mock.On("GetAdminConfigurations")}
}
func (_c *AdminConfigurationStoreMock_GetAdminConfigurations_Call) Run(run func()) *AdminConfigurationStoreMock_GetAdminConfigurations_Call {
_c.Call.Run(func(args mock.Arguments) {
run()
})
return _c
}
func (_c *AdminConfigurationStoreMock_GetAdminConfigurations_Call) Return(_a0 []*models.AdminConfiguration, _a1 error) *AdminConfigurationStoreMock_GetAdminConfigurations_Call {
_c.Call.Return(_a0, _a1)
return _c
}
// UpdateAdminConfiguration provides a mock function with given fields: _a0
func (_m *AdminConfigurationStoreMock) UpdateAdminConfiguration(_a0 UpdateAdminConfigurationCmd) error {
ret := _m.Called(_a0)
var r0 error
if rf, ok := ret.Get(0).(func(UpdateAdminConfigurationCmd) error); ok {
r0 = rf(_a0)
} else {
r0 = ret.Error(0)
}
return r0
}
// AdminConfigurationStoreMock_UpdateAdminConfiguration_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'UpdateAdminConfiguration'
type AdminConfigurationStoreMock_UpdateAdminConfiguration_Call struct {
*mock.Call
}
// UpdateAdminConfiguration is a helper method to define mock.On call
// - _a0 UpdateAdminConfigurationCmd
func (_e *AdminConfigurationStoreMock_Expecter) UpdateAdminConfiguration(_a0 interface{}) *AdminConfigurationStoreMock_UpdateAdminConfiguration_Call {
return &AdminConfigurationStoreMock_UpdateAdminConfiguration_Call{Call: _e.mock.On("UpdateAdminConfiguration", _a0)}
}
func (_c *AdminConfigurationStoreMock_UpdateAdminConfiguration_Call) Run(run func(_a0 UpdateAdminConfigurationCmd)) *AdminConfigurationStoreMock_UpdateAdminConfiguration_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(UpdateAdminConfigurationCmd))
})
return _c
}
func (_c *AdminConfigurationStoreMock_UpdateAdminConfiguration_Call) Return(_a0 error) *AdminConfigurationStoreMock_UpdateAdminConfiguration_Call {
_c.Call.Return(_a0)
return _c
}

View File

@ -2,12 +2,8 @@ package store
import (
"context"
"encoding/json"
"fmt"
"io/ioutil"
"math/rand"
"net/http"
"net/http/httptest"
"sync"
"testing"
@ -16,11 +12,6 @@ import (
models2 "github.com/grafana/grafana/pkg/models"
"github.com/grafana/grafana/pkg/services/ngalert/models"
amv2 "github.com/prometheus/alertmanager/api/v2/models"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func NewFakeRuleStore(t *testing.T) *FakeRuleStore {
@ -437,79 +428,6 @@ func (f *FakeAdminConfigStore) UpdateAdminConfiguration(cmd UpdateAdminConfigura
return nil
}
type FakeExternalAlertmanager struct {
t *testing.T
mtx sync.Mutex
alerts amv2.PostableAlerts
Server *httptest.Server
}
func NewFakeExternalAlertmanager(t *testing.T) *FakeExternalAlertmanager {
t.Helper()
am := &FakeExternalAlertmanager{
t: t,
alerts: amv2.PostableAlerts{},
}
am.Server = httptest.NewServer(http.HandlerFunc(am.Handler()))
return am
}
func (am *FakeExternalAlertmanager) URL() string {
return am.Server.URL
}
func (am *FakeExternalAlertmanager) AlertNamesCompare(expected []string) bool {
n := []string{}
alerts := am.Alerts()
if len(expected) != len(alerts) {
return false
}
for _, a := range am.Alerts() {
for k, v := range a.Alert.Labels {
if k == model.AlertNameLabel {
n = append(n, v)
}
}
}
return assert.ObjectsAreEqual(expected, n)
}
func (am *FakeExternalAlertmanager) AlertsCount() int {
am.mtx.Lock()
defer am.mtx.Unlock()
return len(am.alerts)
}
func (am *FakeExternalAlertmanager) Alerts() amv2.PostableAlerts {
am.mtx.Lock()
defer am.mtx.Unlock()
return am.alerts
}
func (am *FakeExternalAlertmanager) Handler() func(w http.ResponseWriter, r *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
b, err := ioutil.ReadAll(r.Body)
require.NoError(am.t, err)
a := amv2.PostableAlerts{}
require.NoError(am.t, json.Unmarshal(b, &a))
am.mtx.Lock()
am.alerts = append(am.alerts, a...)
am.mtx.Unlock()
}
}
func (am *FakeExternalAlertmanager) Close() {
am.Server.Close()
}
type FakeAnnotationsRepo struct {
mtx sync.Mutex
Items []*annotations.Item

View File

@ -15,7 +15,7 @@ import (
"github.com/grafana/grafana/pkg/models"
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/services/ngalert/sender"
"github.com/grafana/grafana/pkg/services/user"
"github.com/grafana/grafana/pkg/tests/testinfra"
)
@ -54,9 +54,9 @@ func TestAdminConfiguration_SendingToExternalAlertmanagers(t *testing.T) {
})
// Create a couple of "fake" Alertmanagers
fakeAM1 := store.NewFakeExternalAlertmanager(t)
fakeAM2 := store.NewFakeExternalAlertmanager(t)
fakeAM3 := store.NewFakeExternalAlertmanager(t)
fakeAM1 := sender.NewFakeExternalAlertmanager(t)
fakeAM2 := sender.NewFakeExternalAlertmanager(t)
fakeAM3 := sender.NewFakeExternalAlertmanager(t)
// Now, let's test the configuration API.
{