grafana/pkg/services/ngalert/ngalert.go
Sofia Papagiannaki 3cac10e598
AlertingNG: Create a scheduler to evaluate alert definitions (#29305)
* Always use cache: stop passing skipCache among ngalert functions

* Add updated column

* Scheduler initial draft

* Add retry on failure

* Allow settting/updating alert definition interval

Set default interval if no interval is provided during alert definition creation.
Keep existing alert definition interval if no interval is provided during alert definition update.

* Parameterise alerting.Ticker to run on custom interval

* Allow updating alert definition interval without having to provide the queries and expressions

* Add schedule tests

* Use xorm tags for having initialisms with consistent case in Go

* Add ability to pause/unpause the scheduler

* Add alert definition versioning

* Optimise scheduler to fetch alert definition only when it's necessary

* Change MySQL data column to mediumtext

* Delete alert definition versions

* Increase default scheduler interval to 10 seconds

* Fix setting OrgID on updates

* Add validation for alert definition name length

* Recreate tables
2020-12-17 16:00:09 +02:00

98 lines
2.8 KiB
Go

package ngalert
import (
"context"
"time"
"github.com/benbjohnson/clock"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/api/routing"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/registry"
"github.com/grafana/grafana/pkg/services/datasources"
"github.com/grafana/grafana/pkg/services/sqlstore"
"github.com/grafana/grafana/pkg/services/sqlstore/migrator"
"github.com/grafana/grafana/pkg/setting"
)
const (
maxAttempts int64 = 3
// scheduler interval
// changing this value is discouraged
// because this could cause existing alert definition
// with intervals that are not exactly divided by this number
// not to be evaluated
baseIntervalSeconds = 10
// default alert definiiton interval
defaultIntervalSeconds int64 = 6 * baseIntervalSeconds
)
// AlertNG is the service for evaluating the condition of an alert definition.
type AlertNG struct {
Cfg *setting.Cfg `inject:""`
DatasourceCache datasources.CacheService `inject:""`
RouteRegister routing.RouteRegister `inject:""`
SQLStore *sqlstore.SQLStore `inject:""`
log log.Logger
schedule *schedule
}
func init() {
registry.RegisterService(&AlertNG{})
}
// Init initializes the AlertingService.
func (ng *AlertNG) Init() error {
ng.log = log.New("ngalert")
ng.registerAPIEndpoints()
ng.schedule = newScheduler(clock.New(), baseIntervalSeconds*time.Second, ng.log, nil)
return nil
}
// Run starts the scheduler
func (ng *AlertNG) Run(ctx context.Context) error {
ng.log.Debug("ngalert starting")
return ng.alertingTicker(ctx)
}
// IsDisabled returns true if the alerting service is disable for this instance.
func (ng *AlertNG) IsDisabled() bool {
if ng.Cfg == nil {
return true
}
// Check also about expressions?
return !ng.Cfg.IsNgAlertEnabled()
}
// AddMigration defines database migrations.
// If Alerting NG is not enabled does nothing.
func (ng *AlertNG) AddMigration(mg *migrator.Migrator) {
if ng.IsDisabled() {
return
}
addAlertDefinitionMigrations(mg)
addAlertDefinitionVersionMigrations(mg)
}
// LoadAlertCondition returns a Condition object for the given alertDefinitionID.
func (ng *AlertNG) LoadAlertCondition(alertDefinitionID int64) (*eval.Condition, error) {
getAlertDefinitionByIDQuery := getAlertDefinitionByIDQuery{ID: alertDefinitionID}
if err := ng.getAlertDefinitionByID(&getAlertDefinitionByIDQuery); err != nil {
return nil, err
}
alertDefinition := getAlertDefinitionByIDQuery.Result
err := ng.validateAlertDefinition(alertDefinition, true)
if err != nil {
return nil, err
}
return &eval.Condition{
RefID: alertDefinition.Condition,
OrgID: alertDefinition.OrgID,
QueriesAndExpressions: alertDefinition.Data,
}, nil
}