Alerting: Update DbStore to use disabled orgs from the config (#52156)

* update DbStore to use UnifiedAlerting settings
* remove disabled orgs from scheduler and use config in db store instead
* remove test
This commit is contained in:
Yuriy Tseretyan 2022-07-15 14:13:30 -04:00 committed by GitHub
parent 35d98104ad
commit 6e1e4a4215
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 41 additions and 54 deletions

View File

@ -8,6 +8,10 @@ import (
"testing"
"time"
prometheus "github.com/prometheus/alertmanager/config"
"github.com/prometheus/alertmanager/timeinterval"
"github.com/stretchr/testify/require"
"github.com/grafana/grafana/pkg/components/simplejson"
"github.com/grafana/grafana/pkg/infra/log"
gfcore "github.com/grafana/grafana/pkg/models"
@ -18,10 +22,8 @@ import (
"github.com/grafana/grafana/pkg/services/secrets"
secrets_fakes "github.com/grafana/grafana/pkg/services/secrets/fakes"
"github.com/grafana/grafana/pkg/services/sqlstore"
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/web"
prometheus "github.com/prometheus/alertmanager/config"
"github.com/prometheus/alertmanager/timeinterval"
"github.com/stretchr/testify/require"
)
func TestProvisioningApi(t *testing.T) {
@ -322,8 +324,10 @@ func createTestEnv(t *testing.T) testEnvironment {
})
sqlStore := sqlstore.InitTestDB(t)
store := store.DBstore{
SQLStore: sqlStore,
BaseInterval: time.Second * 10,
SQLStore: sqlStore,
Cfg: setting.UnifiedAlertingSettings{
BaseInterval: time.Second * 10,
},
}
quotas := &provisioning.MockQuotaChecker{}
quotas.EXPECT().LimitOK()

View File

@ -316,8 +316,6 @@ type ListAlertRulesQuery struct {
}
type GetAlertRulesForSchedulingQuery struct {
ExcludeOrgIDs []int64
Result []*SchedulableAlertRule
}

View File

@ -104,8 +104,7 @@ func (ng *AlertNG) init() error {
var err error
store := &store.DBstore{
BaseInterval: ng.Cfg.UnifiedAlerting.BaseInterval,
DefaultInterval: ng.Cfg.UnifiedAlerting.DefaultRuleEvaluationInterval,
Cfg: ng.Cfg.UnifiedAlerting,
SQLStore: ng.SQLStore,
Logger: ng.Log,
FolderService: ng.folderService,

View File

@ -36,8 +36,10 @@ func setupAMTest(t *testing.T) *Alertmanager {
m := metrics.NewAlertmanagerMetrics(prometheus.NewRegistry())
sqlStore := sqlstore.InitTestDB(t)
s := &store.DBstore{
BaseInterval: 10 * time.Second,
DefaultInterval: 60 * time.Second,
Cfg: setting.UnifiedAlertingSettings{
BaseInterval: 10 * time.Second,
DefaultRuleEvaluationInterval: 60 * time.Second,
},
SQLStore: sqlStore,
Logger: log.New("alertmanager-test"),
DashboardService: dashboards.NewFakeDashboardService(t),

View File

@ -10,6 +10,8 @@ import (
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/services/sqlstore"
"github.com/grafana/grafana/pkg/setting"
"github.com/stretchr/testify/require"
)
@ -174,8 +176,10 @@ func createAlertRuleService(t *testing.T) AlertRuleService {
t.Helper()
sqlStore := sqlstore.InitTestDB(t)
store := store.DBstore{
SQLStore: sqlStore,
BaseInterval: time.Second * 10,
SQLStore: sqlStore,
Cfg: setting.UnifiedAlertingSettings{
BaseInterval: time.Second * 10,
},
}
quotas := MockQuotaChecker{}
quotas.EXPECT().LimitOK()

View File

@ -36,16 +36,14 @@ func sortedUIDs(alertRules []*models.SchedulableAlertRule) []string {
// updateSchedulableAlertRules updates the alert rules for the scheduler.
// It returns an error if the database is unavailable or the query returned
// an error.
func (sch *schedule) updateSchedulableAlertRules(ctx context.Context, disabledOrgs []int64) error {
func (sch *schedule) updateSchedulableAlertRules(ctx context.Context) error {
start := time.Now()
defer func() {
sch.metrics.UpdateSchedulableAlertRulesDuration.Observe(
time.Since(start).Seconds())
}()
q := models.GetAlertRulesForSchedulingQuery{
ExcludeOrgIDs: disabledOrgs,
}
q := models.GetAlertRulesForSchedulingQuery{}
if err := sch.ruleStore.GetAlertRulesForScheduling(ctx, &q); err != nil {
return fmt.Errorf("failed to get alert rules: %w", err)
}

View File

@ -90,7 +90,6 @@ type schedule struct {
metrics *metrics.Scheduler
alertsSender AlertsSender
disabledOrgs map[int64]struct{}
minRuleInterval time.Duration
// schedulableAlertRules contains the alert rules that are considered for
@ -137,7 +136,6 @@ func NewScheduler(cfg SchedulerCfg, appURL *url.URL, stateManager *state.Manager
appURL: appURL,
disableGrafanaFolder: cfg.Cfg.ReservedLabels.IsReservedLabelDisabled(ngmodels.FolderTitleLabel),
stateManager: stateManager,
disabledOrgs: cfg.Cfg.DisabledOrgs,
minRuleInterval: cfg.Cfg.MinInterval,
schedulableAlertRules: schedulableAlertRulesRegistry{rules: make(map[ngmodels.AlertRuleKey]*ngmodels.SchedulableAlertRule)},
bus: bus,
@ -224,17 +222,13 @@ func (sch *schedule) schedulePeriodic(ctx context.Context) error {
sch.metrics.BehindSeconds.Set(start.Sub(tick).Seconds())
tickNum := tick.Unix() / int64(sch.baseInterval.Seconds())
disabledOrgs := make([]int64, 0, len(sch.disabledOrgs))
for disabledOrg := range sch.disabledOrgs {
disabledOrgs = append(disabledOrgs, disabledOrg)
}
if err := sch.updateSchedulableAlertRules(ctx, disabledOrgs); err != nil {
if err := sch.updateSchedulableAlertRules(ctx); err != nil {
sch.log.Error("scheduler failed to update alert rules", "err", err)
}
alertRules := sch.schedulableAlertRules.all()
sch.log.Debug("alert rules fetched", "count", len(alertRules), "disabled_orgs", disabledOrgs)
sch.log.Debug("alert rules fetched", "count", len(alertRules))
// registeredDefinitions is a map used for finding deleted alert rules
// initially it is assigned to all known alert rules from the previous cycle

View File

@ -139,8 +139,6 @@ func TestAlertingTicker(t *testing.T) {
// create alert rule under main org with one second interval
alerts = append(alerts, tests.CreateTestAlertRule(t, ctx, dbstore, 1, mainOrgID))
const disabledOrgID int64 = 3
evalAppliedCh := make(chan evalAppliedInfo, len(alerts))
stopAppliedCh := make(chan models.AlertRuleKey, len(alerts))
@ -149,9 +147,6 @@ func TestAlertingTicker(t *testing.T) {
cfg := setting.UnifiedAlertingSettings{
BaseInterval: time.Second,
AdminConfigPollInterval: 10 * time.Minute, // do not poll in unit tests.
DisabledOrgs: map[int64]struct{}{
disabledOrgID: {},
},
}
notifier := &schedule.AlertsSenderMock{}
@ -243,15 +238,6 @@ func TestAlertingTicker(t *testing.T) {
tick := advanceClock(t, mockedClock)
assertEvalRun(t, evalAppliedCh, tick, expectedAlertRulesEvaluated...)
})
// create alert rule with one second interval under disabled org
alerts = append(alerts, tests.CreateTestAlertRule(t, ctx, dbstore, 1, disabledOrgID))
expectedAlertRulesEvaluated = []models.AlertRuleKey{alerts[2].GetKey()}
t.Run(fmt.Sprintf("on 8th tick alert rules: %s should be evaluated", concatenate(expectedAlertRulesEvaluated)), func(t *testing.T) {
tick := advanceClock(t, mockedClock)
assertEvalRun(t, evalAppliedCh, tick, expectedAlertRulesEvaluated...)
})
}
func assertEvalRun(t *testing.T, ch <-chan evalAppliedInfo, tick time.Time, keys ...models.AlertRuleKey) {

View File

@ -403,9 +403,9 @@ func (st DBstore) GetAlertRulesForScheduling(ctx context.Context, query *ngmodel
return st.SQLStore.WithDbSession(ctx, func(sess *sqlstore.DBSession) error {
alerts := make([]*ngmodels.SchedulableAlertRule, 0)
q := sess.Table("alert_rule")
if len(query.ExcludeOrgIDs) > 0 {
excludeOrgs := make([]interface{}, 0, len(query.ExcludeOrgIDs))
for _, orgID := range query.ExcludeOrgIDs {
if len(st.Cfg.DisabledOrgs) > 0 {
excludeOrgs := make([]interface{}, 0, len(st.Cfg.DisabledOrgs))
for orgID := range st.Cfg.DisabledOrgs {
excludeOrgs = append(excludeOrgs, orgID)
}
q = q.NotIn("org_id", excludeOrgs...)
@ -449,7 +449,7 @@ func (st DBstore) validateAlertRule(alertRule ngmodels.AlertRule) error {
return fmt.Errorf("%w: title is empty", ngmodels.ErrAlertRuleFailedValidation)
}
if err := ngmodels.ValidateRuleGroupInterval(alertRule.IntervalSeconds, int64(st.BaseInterval.Seconds())); err != nil {
if err := ngmodels.ValidateRuleGroupInterval(alertRule.IntervalSeconds, int64(st.Cfg.BaseInterval.Seconds())); err != nil {
return err
}

View File

@ -12,18 +12,21 @@ import (
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/sqlstore"
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/util"
)
func TestUpdateAlertRules(t *testing.T) {
sqlStore := sqlstore.InitTestDB(t)
store := DBstore{
SQLStore: sqlStore,
BaseInterval: time.Duration(rand.Int63n(100)) * time.Second,
SQLStore: sqlStore,
Cfg: setting.UnifiedAlertingSettings{
BaseInterval: time.Duration(rand.Int63n(100)) * time.Second,
},
}
createRule := func(t *testing.T) *models.AlertRule {
t.Helper()
rule := models.AlertRuleGen(withIntervalMatching(store.BaseInterval))()
rule := models.AlertRuleGen(withIntervalMatching(store.Cfg.BaseInterval))()
err := sqlStore.WithDbSession(context.Background(), func(sess *sqlstore.DBSession) error {
_, err := sess.Table(models.AlertRule{}).InsertOne(rule)
if err != nil {

View File

@ -9,6 +9,7 @@ import (
"github.com/grafana/grafana/pkg/services/dashboards"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/sqlstore"
"github.com/grafana/grafana/pkg/setting"
)
// TimeNow makes it possible to test usage of time
@ -28,10 +29,7 @@ type AlertingStore interface {
// DBstore stores the alert definitions and instances in the database.
type DBstore struct {
// the base scheduler tick rate; it's used for validating definition interval
BaseInterval time.Duration
// default alert definiiton interval
DefaultInterval time.Duration
Cfg setting.UnifiedAlertingSettings
SQLStore *sqlstore.SQLStore
Logger log.Logger
FolderService dashboards.FolderService

View File

@ -68,8 +68,10 @@ func SetupTestEnv(t *testing.T, baseInterval time.Duration) (*ngalert.AlertNG, *
)
require.NoError(t, err)
return ng, &store.DBstore{
SQLStore: ng.SQLStore,
BaseInterval: baseInterval * time.Second,
SQLStore: ng.SQLStore,
Cfg: setting.UnifiedAlertingSettings{
BaseInterval: baseInterval * time.Second,
},
Logger: log.New("ngalert-test"),
DashboardService: dashboardService,
}

View File

@ -247,8 +247,7 @@ func (ps *ProvisioningServiceImpl) ProvisionDashboards(ctx context.Context) erro
func (ps *ProvisioningServiceImpl) ProvisionAlertRules(ctx context.Context) error {
alertRulesPath := filepath.Join(ps.Cfg.ProvisioningPath, "alerting")
st := store.DBstore{
BaseInterval: ps.Cfg.UnifiedAlerting.BaseInterval,
DefaultInterval: ps.Cfg.UnifiedAlerting.DefaultRuleEvaluationInterval,
Cfg: ps.Cfg.UnifiedAlerting,
SQLStore: ps.SQLStore,
Logger: ps.log,
FolderService: nil, // we don't use it yet