diff --git a/pkg/services/ngalert/schedule/fetcher.go b/pkg/services/ngalert/schedule/fetcher.go index 1db61e9ad9a..fba2831d48f 100644 --- a/pkg/services/ngalert/schedule/fetcher.go +++ b/pkg/services/ngalert/schedule/fetcher.go @@ -3,36 +3,11 @@ package schedule import ( "context" "fmt" - "hash/fnv" - "sort" "time" "github.com/grafana/grafana/pkg/services/ngalert/models" ) -// hashUIDs returns a fnv64 hash of the UIDs for all alert rules. -// The order of the alert rules does not matter as hashUIDs sorts -// the UIDs in increasing order. -func hashUIDs(alertRules []*models.AlertRule) uint64 { - h := fnv.New64() - for _, uid := range sortedUIDs(alertRules) { - // We can ignore err as fnv64 does not return an error - // nolint:errcheck,gosec - h.Write([]byte(uid)) - } - return h.Sum64() -} - -// sortedUIDs returns a slice of sorted UIDs. -func sortedUIDs(alertRules []*models.AlertRule) []string { - uids := make([]string, 0, len(alertRules)) - for _, alertRule := range alertRules { - uids = append(uids, alertRule.UID) - } - sort.Strings(uids) - return uids -} - // updateSchedulableAlertRules updates the alert rules for the scheduler. // It returns diff that contains rule keys that were updated since the last poll, // and an error if the database query encountered problems. diff --git a/pkg/services/ngalert/schedule/metrics.go b/pkg/services/ngalert/schedule/metrics.go new file mode 100644 index 00000000000..72a7f455691 --- /dev/null +++ b/pkg/services/ngalert/schedule/metrics.go @@ -0,0 +1,68 @@ +package schedule + +import ( + "fmt" + "hash/fnv" + "sort" + + "github.com/grafana/grafana/pkg/services/ngalert/metrics" + models "github.com/grafana/grafana/pkg/services/ngalert/models" +) + +// hashUIDs returns a fnv64 hash of the UIDs for all alert rules. +// The order of the alert rules does not matter as hashUIDs sorts +// the UIDs in increasing order. +func hashUIDs(alertRules []*models.AlertRule) uint64 { + h := fnv.New64() + for _, uid := range sortedUIDs(alertRules) { + // We can ignore err as fnv64 does not return an error + // nolint:errcheck,gosec + h.Write([]byte(uid)) + } + return h.Sum64() +} + +// sortedUIDs returns a slice of sorted UIDs. +func sortedUIDs(alertRules []*models.AlertRule) []string { + uids := make([]string, 0, len(alertRules)) + for _, alertRule := range alertRules { + uids = append(uids, alertRule.UID) + } + sort.Strings(uids) + return uids +} + +func (sch *schedule) updateRulesMetrics(alertRules []*models.AlertRule) { + rulesPerOrg := make(map[int64]int64) // orgID -> count + orgsPaused := make(map[int64]int64) // orgID -> count + groupsPerOrg := make(map[int64]map[string]struct{}) // orgID -> set of groups + for _, rule := range alertRules { + rulesPerOrg[rule.OrgID]++ + + if rule.IsPaused { + orgsPaused[rule.OrgID]++ + } + + orgGroups, ok := groupsPerOrg[rule.OrgID] + if !ok { + orgGroups = make(map[string]struct{}) + groupsPerOrg[rule.OrgID] = orgGroups + } + orgGroups[rule.RuleGroup] = struct{}{} + } + + for orgID, numRules := range rulesPerOrg { + numRulesPaused := orgsPaused[orgID] + sch.metrics.GroupRules.WithLabelValues(fmt.Sprint(orgID), metrics.AlertRuleActiveLabelValue).Set(float64(numRules - numRulesPaused)) + sch.metrics.GroupRules.WithLabelValues(fmt.Sprint(orgID), metrics.AlertRulePausedLabelValue).Set(float64(numRulesPaused)) + } + + for orgID, groups := range groupsPerOrg { + sch.metrics.Groups.WithLabelValues(fmt.Sprint(orgID)).Set(float64(len(groups))) + } + + // While these are the rules that we iterate over, at the moment there's no 100% guarantee that they'll be + // scheduled as rules could be removed before we get a chance to evaluate them. + sch.metrics.SchedulableAlertRules.Set(float64(len(alertRules))) + sch.metrics.SchedulableAlertRulesHash.Set(float64(hashUIDs(alertRules))) +} diff --git a/pkg/services/ngalert/schedule/fetcher_test.go b/pkg/services/ngalert/schedule/metrics_test.go similarity index 92% rename from pkg/services/ngalert/schedule/fetcher_test.go rename to pkg/services/ngalert/schedule/metrics_test.go index 6d93c4237af..6f924535cd5 100644 --- a/pkg/services/ngalert/schedule/fetcher_test.go +++ b/pkg/services/ngalert/schedule/metrics_test.go @@ -3,9 +3,8 @@ package schedule import ( "testing" + models "github.com/grafana/grafana/pkg/services/ngalert/models" "github.com/stretchr/testify/assert" - - "github.com/grafana/grafana/pkg/services/ngalert/models" ) func TestHashUIDs(t *testing.T) { diff --git a/pkg/services/ngalert/schedule/schedule.go b/pkg/services/ngalert/schedule/schedule.go index 08dcd88e08f..d6a87775eac 100644 --- a/pkg/services/ngalert/schedule/schedule.go +++ b/pkg/services/ngalert/schedule/schedule.go @@ -213,41 +213,6 @@ type readyToRunItem struct { evaluation } -func (sch *schedule) updateRulesMetrics(alertRules []*ngmodels.AlertRule) { - rulesPerOrg := make(map[int64]int64) // orgID -> count - orgsPaused := make(map[int64]int64) // orgID -> count - groupsPerOrg := make(map[int64]map[string]struct{}) // orgID -> set of groups - for _, rule := range alertRules { - rulesPerOrg[rule.OrgID]++ - - if rule.IsPaused { - orgsPaused[rule.OrgID]++ - } - - orgGroups, ok := groupsPerOrg[rule.OrgID] - if !ok { - orgGroups = make(map[string]struct{}) - groupsPerOrg[rule.OrgID] = orgGroups - } - orgGroups[rule.RuleGroup] = struct{}{} - } - - for orgID, numRules := range rulesPerOrg { - numRulesPaused := orgsPaused[orgID] - sch.metrics.GroupRules.WithLabelValues(fmt.Sprint(orgID), metrics.AlertRuleActiveLabelValue).Set(float64(numRules - numRulesPaused)) - sch.metrics.GroupRules.WithLabelValues(fmt.Sprint(orgID), metrics.AlertRulePausedLabelValue).Set(float64(numRulesPaused)) - } - - for orgID, groups := range groupsPerOrg { - sch.metrics.Groups.WithLabelValues(fmt.Sprint(orgID)).Set(float64(len(groups))) - } - - // While these are the rules that we iterate over, at the moment there's no 100% guarantee that they'll be - // scheduled as rules could be removed before we get a chance to evaluate them. - sch.metrics.SchedulableAlertRules.Set(float64(len(alertRules))) - sch.metrics.SchedulableAlertRulesHash.Set(float64(hashUIDs(alertRules))) -} - // TODO refactor to accept a callback for tests that will be called with things that are returned currently, and return nothing. // Returns a slice of rules that were scheduled for evaluation, map of stopped rules, and a slice of updated rules func (sch *schedule) processTick(ctx context.Context, dispatcherGroup *errgroup.Group, tick time.Time) ([]readyToRunItem, map[ngmodels.AlertRuleKey]struct{}, []ngmodels.AlertRuleKeyWithVersion) {