Alerting: Move rule evaluation status logic out of prometheus API and into scheduler (#89141)

* Add health fields to rules and an aggregator method to the scheduler

* Move health, last error, and last eval time in together to minimize state processing

* Wire up a readonly scheduler to prom api

* Extract to exported function

* Use health in api_prometheus and fix up tests

* Rename health struct to status

* Fix tests one more time

* Several new tests

* Handle inactive rules

* Push state mapping into state manager

* rename to StatusReader

* Rectify cyclo complexity rebase

* Convert existing package local status implementation to models one

* fix tests

* undo RuleDefs rename
This commit is contained in:
Alexander Weaver
2024-09-30 16:52:49 -05:00
committed by GitHub
parent 6a3eb276ef
commit 393faa8732
13 changed files with 213 additions and 23 deletions

View File

@@ -40,6 +40,8 @@ type Rule interface {
Update(lastVersion RuleVersionAndPauseStatus) bool
// Type gives the type of the rule.
Type() ngmodels.RuleType
// Status indicates the status of the evaluating rule.
Status() ngmodels.RuleStatus
}
type ruleFactoryFunc func(context.Context, *ngmodels.AlertRule) Rule
@@ -180,6 +182,10 @@ func (a *alertRule) Type() ngmodels.RuleType {
return ngmodels.RuleTypeAlerting
}
func (a *alertRule) Status() ngmodels.RuleStatus {
return a.stateManager.GetStatusForRuleUID(a.key.OrgID, a.key.UID)
}
// eval signals the rule evaluation routine to perform the evaluation of the rule. Does nothing if the loop is stopped.
// Before sending a message into the channel, it does non-blocking read to make sure that there is no concurrent send operation.
// Returns a tuple where first element is