grafana/pkg/services/ngalert/api/api_ruler.go
Alex Moreno 53945afedf
Alerting: Allow alert rule pausing from API (#62326)
* Add is_paused attr to the POST alert rule group endpoint

* Add is_paused to alerting API POST alert rule group

* Fixed tests

* Add is_paused to alerting gettable endpoints

* Fix integration tests

* Alerting: allow to pause existing rules (#62401)

* Display Pause Rule switch in Editing Rule form

* add isPaused property to form interface and dto

* map isPaused prop with is_paused value from DTO

Also update test snapshots

* Append '(Paused)' text on alert list state column when appropriate

* Change Switch styles according to discussion with UX

Also adding a tooltip with info what this means

* Adjust styles

* Fix alignment and isPaused type definition

Co-authored-by: gillesdemey <gilles.de.mey@gmail.com>

* Fix test

* Fix test

* Fix RuleList test

---------

Co-authored-by: gillesdemey <gilles.de.mey@gmail.com>

* wip

* Fix tests and add comments to clarify AlertRuleWithOptionals

* Fix one more test

* Fix tests

* Fix typo in comment

* Fix alert rule(s) cannot be paused via API

* Add integration tests for alerting api pausing flow

* Remove duplicated integration test

---------

Co-authored-by: Virginia Cepeda <virginia.cepeda@grafana.com>
Co-authored-by: gillesdemey <gilles.de.mey@gmail.com>
Co-authored-by: George Robinson <george.robinson@grafana.com>
2023-02-01 13:15:03 +01:00

529 lines
20 KiB
Go

package api
import (
"context"
"errors"
"fmt"
"net/http"
"strings"
"time"
"github.com/prometheus/common/model"
"github.com/grafana/grafana/pkg/api/apierrors"
"github.com/grafana/grafana/pkg/api/response"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/accesscontrol"
contextmodel "github.com/grafana/grafana/pkg/services/contexthandler/model"
"github.com/grafana/grafana/pkg/services/dashboards"
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/provisioning"
"github.com/grafana/grafana/pkg/services/ngalert/schedule"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/services/quota"
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/util"
)
type ConditionValidator interface {
// Validate validates that the condition is correct. Returns nil if the condition is correct. Otherwise, error that describes the failure
Validate(ctx eval.EvaluationContext, condition ngmodels.Condition) error
}
type RulerSrv struct {
xactManager provisioning.TransactionManager
provenanceStore provisioning.ProvisioningStore
store RuleStore
QuotaService quota.Service
scheduleService schedule.ScheduleService
log log.Logger
cfg *setting.UnifiedAlertingSettings
ac accesscontrol.AccessControl
conditionValidator ConditionValidator
}
var (
errProvisionedResource = errors.New("request affects resources created via provisioning API")
)
// RouteDeleteAlertRules deletes all alert rules the user is authorized to access in the given namespace
// or, if non-empty, a specific group of rules in the namespace.
// Returns http.StatusUnauthorized if user does not have access to any of the rules that match the filter.
// Returns http.StatusBadRequest if all rules that match the filter and the user is authorized to delete are provisioned.
func (srv RulerSrv) RouteDeleteAlertRules(c *contextmodel.ReqContext, namespaceTitle string, group string) response.Response {
namespace, err := srv.store.GetNamespaceByTitle(c.Req.Context(), namespaceTitle, c.SignedInUser.OrgID, c.SignedInUser, true)
if err != nil {
return toNamespaceErrorResponse(err)
}
var loggerCtx = []interface{}{
"namespace",
namespace.Title,
}
var ruleGroup string
if group != "" {
ruleGroup = group
loggerCtx = append(loggerCtx, "group", group)
}
logger := srv.log.New(loggerCtx...)
hasAccess := func(evaluator accesscontrol.Evaluator) bool {
return accesscontrol.HasAccess(srv.ac, c)(accesscontrol.ReqOrgAdminOrEditor, evaluator)
}
provenances, err := srv.provenanceStore.GetProvenances(c.Req.Context(), c.SignedInUser.OrgID, (&ngmodels.AlertRule{}).ResourceType())
if err != nil {
return ErrResp(http.StatusInternalServerError, err, "failed to fetch provenances of alert rules")
}
deletedGroups := make(map[ngmodels.AlertRuleGroupKey][]ngmodels.AlertRuleKey)
err = srv.xactManager.InTransaction(c.Req.Context(), func(ctx context.Context) error {
unauthz, provisioned := false, false
q := ngmodels.ListAlertRulesQuery{
OrgID: c.SignedInUser.OrgID,
NamespaceUIDs: []string{namespace.UID},
RuleGroup: ruleGroup,
}
if err = srv.store.ListAlertRules(ctx, &q); err != nil {
return err
}
if len(q.Result) == 0 {
logger.Debug("no alert rules to delete from namespace/group")
return nil
}
var deletionCandidates = make(map[ngmodels.AlertRuleGroupKey][]*ngmodels.AlertRule)
for _, rule := range q.Result {
key := rule.GetGroupKey()
deletionCandidates[key] = append(deletionCandidates[key], rule)
}
rulesToDelete := make([]string, 0, len(q.Result))
for groupKey, rules := range deletionCandidates {
if !authorizeAccessToRuleGroup(rules, hasAccess) {
unauthz = true
continue
}
if containsProvisionedAlerts(provenances, rules) {
provisioned = true
continue
}
uid := make([]string, 0, len(rules))
keys := make([]ngmodels.AlertRuleKey, 0, len(rules))
for _, rule := range rules {
uid = append(uid, rule.UID)
keys = append(keys, rule.GetKey())
}
rulesToDelete = append(rulesToDelete, uid...)
deletedGroups[groupKey] = keys
}
if len(rulesToDelete) > 0 {
return srv.store.DeleteAlertRulesByUID(ctx, c.SignedInUser.OrgID, rulesToDelete...)
}
// if none rules were deleted return an error.
// Check whether provisioned check failed first because if it is true, then all rules that the user can access (actually read via GET API) are provisioned.
if provisioned {
return errProvisionedResource
}
if unauthz {
if group == "" {
return fmt.Errorf("%w to delete any existing rules in the namespace", ErrAuthorization)
}
return fmt.Errorf("%w to delete group of the rules", ErrAuthorization)
}
return nil
})
if err != nil {
if errors.Is(err, ErrAuthorization) {
return ErrResp(http.StatusUnauthorized, err, "failed to delete rule group")
}
if errors.Is(err, errProvisionedResource) {
return ErrResp(http.StatusBadRequest, err, "failed to delete rule group")
}
return ErrResp(http.StatusInternalServerError, err, "failed to delete rule group")
}
logger.Debug("rules have been deleted from the store. updating scheduler")
for _, ruleKeys := range deletedGroups {
srv.scheduleService.DeleteAlertRule(ruleKeys...)
}
return response.JSON(http.StatusAccepted, util.DynMap{"message": "rules deleted"})
}
// RouteGetNamespaceRulesConfig returns all rules in a specific folder that user has access to
func (srv RulerSrv) RouteGetNamespaceRulesConfig(c *contextmodel.ReqContext, namespaceTitle string) response.Response {
namespace, err := srv.store.GetNamespaceByTitle(c.Req.Context(), namespaceTitle, c.SignedInUser.OrgID, c.SignedInUser, false)
if err != nil {
return toNamespaceErrorResponse(err)
}
q := ngmodels.ListAlertRulesQuery{
OrgID: c.SignedInUser.OrgID,
NamespaceUIDs: []string{namespace.UID},
}
if err := srv.store.ListAlertRules(c.Req.Context(), &q); err != nil {
return ErrResp(http.StatusInternalServerError, err, "failed to update rule group")
}
result := apimodels.NamespaceConfigResponse{}
hasAccess := func(evaluator accesscontrol.Evaluator) bool {
return accesscontrol.HasAccess(srv.ac, c)(accesscontrol.ReqViewer, evaluator)
}
provenanceRecords, err := srv.provenanceStore.GetProvenances(c.Req.Context(), c.SignedInUser.OrgID, (&ngmodels.AlertRule{}).ResourceType())
if err != nil {
return ErrResp(http.StatusInternalServerError, err, "failed to get provenance for rule group")
}
ruleGroups := make(map[string]ngmodels.RulesGroup)
for _, r := range q.Result {
ruleGroups[r.RuleGroup] = append(ruleGroups[r.RuleGroup], r)
}
for groupName, rules := range ruleGroups {
if !authorizeAccessToRuleGroup(rules, hasAccess) {
continue
}
result[namespaceTitle] = append(result[namespaceTitle], toGettableRuleGroupConfig(groupName, rules, namespace.ID, provenanceRecords))
}
return response.JSON(http.StatusAccepted, result)
}
// RouteGetRulesGroupConfig returns rules that belong to a specific group in a specific namespace (folder).
// If user does not have access to at least one of the rule in the group, returns status 401 Unauthorized
func (srv RulerSrv) RouteGetRulesGroupConfig(c *contextmodel.ReqContext, namespaceTitle string, ruleGroup string) response.Response {
namespace, err := srv.store.GetNamespaceByTitle(c.Req.Context(), namespaceTitle, c.SignedInUser.OrgID, c.SignedInUser, false)
if err != nil {
return toNamespaceErrorResponse(err)
}
q := ngmodels.ListAlertRulesQuery{
OrgID: c.SignedInUser.OrgID,
NamespaceUIDs: []string{namespace.UID},
RuleGroup: ruleGroup,
}
if err := srv.store.ListAlertRules(c.Req.Context(), &q); err != nil {
return ErrResp(http.StatusInternalServerError, err, "failed to get group alert rules")
}
hasAccess := func(evaluator accesscontrol.Evaluator) bool {
return accesscontrol.HasAccess(srv.ac, c)(accesscontrol.ReqViewer, evaluator)
}
provenanceRecords, err := srv.provenanceStore.GetProvenances(c.Req.Context(), c.SignedInUser.OrgID, (&ngmodels.AlertRule{}).ResourceType())
if err != nil {
return ErrResp(http.StatusInternalServerError, err, "failed to get group alert rules")
}
if !authorizeAccessToRuleGroup(q.Result, hasAccess) {
return ErrResp(http.StatusUnauthorized, fmt.Errorf("%w to access the group because it does not have access to one or many data sources one or many rules in the group use", ErrAuthorization), "")
}
result := apimodels.RuleGroupConfigResponse{
GettableRuleGroupConfig: toGettableRuleGroupConfig(ruleGroup, q.Result, namespace.ID, provenanceRecords),
}
return response.JSON(http.StatusAccepted, result)
}
// RouteGetRulesConfig returns all alert rules that are available to the current user
func (srv RulerSrv) RouteGetRulesConfig(c *contextmodel.ReqContext) response.Response {
namespaceMap, err := srv.store.GetUserVisibleNamespaces(c.Req.Context(), c.OrgID, c.SignedInUser)
if err != nil {
return ErrResp(http.StatusInternalServerError, err, "failed to get namespaces visible to the user")
}
result := apimodels.NamespaceConfigResponse{}
if len(namespaceMap) == 0 {
srv.log.Debug("user has no access to any namespaces")
return response.JSON(http.StatusOK, result)
}
namespaceUIDs := make([]string, len(namespaceMap))
for k := range namespaceMap {
namespaceUIDs = append(namespaceUIDs, k)
}
dashboardUID := c.Query("dashboard_uid")
panelID, err := getPanelIDFromRequest(c.Req)
if err != nil {
return ErrResp(http.StatusBadRequest, err, "invalid panel_id")
}
if dashboardUID == "" && panelID != 0 {
return ErrResp(http.StatusBadRequest, errors.New("panel_id must be set with dashboard_uid"), "")
}
q := ngmodels.ListAlertRulesQuery{
OrgID: c.SignedInUser.OrgID,
NamespaceUIDs: namespaceUIDs,
DashboardUID: dashboardUID,
PanelID: panelID,
}
if err := srv.store.ListAlertRules(c.Req.Context(), &q); err != nil {
return ErrResp(http.StatusInternalServerError, err, "failed to get alert rules")
}
hasAccess := func(evaluator accesscontrol.Evaluator) bool {
return accesscontrol.HasAccess(srv.ac, c)(accesscontrol.ReqViewer, evaluator)
}
provenanceRecords, err := srv.provenanceStore.GetProvenances(c.Req.Context(), c.SignedInUser.OrgID, (&ngmodels.AlertRule{}).ResourceType())
if err != nil {
return ErrResp(http.StatusInternalServerError, err, "failed to get alert rules")
}
configs := make(map[ngmodels.AlertRuleGroupKey]ngmodels.RulesGroup)
for _, r := range q.Result {
groupKey := r.GetGroupKey()
group := configs[groupKey]
group = append(group, r)
configs[groupKey] = group
}
for groupKey, rules := range configs {
folder, ok := namespaceMap[groupKey.NamespaceUID]
if !ok {
srv.log.Error("namespace not visible to the user", "user", c.SignedInUser.UserID, "namespace", groupKey.NamespaceUID)
continue
}
if !authorizeAccessToRuleGroup(rules, hasAccess) {
continue
}
namespace := folder.Title
result[namespace] = append(result[namespace], toGettableRuleGroupConfig(groupKey.RuleGroup, rules, folder.ID, provenanceRecords))
}
return response.JSON(http.StatusOK, result)
}
func (srv RulerSrv) RoutePostNameRulesConfig(c *contextmodel.ReqContext, ruleGroupConfig apimodels.PostableRuleGroupConfig, namespaceTitle string) response.Response {
namespace, err := srv.store.GetNamespaceByTitle(c.Req.Context(), namespaceTitle, c.SignedInUser.OrgID, c.SignedInUser, true)
if err != nil {
return toNamespaceErrorResponse(err)
}
rules, err := validateRuleGroup(&ruleGroupConfig, c.SignedInUser.OrgID, namespace, func(condition ngmodels.Condition) error {
return srv.conditionValidator.Validate(eval.Context(c.Req.Context(), c.SignedInUser), condition)
}, srv.cfg)
if err != nil {
return ErrResp(http.StatusBadRequest, err, "")
}
groupKey := ngmodels.AlertRuleGroupKey{
OrgID: c.SignedInUser.OrgID,
NamespaceUID: namespace.UID,
RuleGroup: ruleGroupConfig.Name,
}
return srv.updateAlertRulesInGroup(c, groupKey, rules)
}
// updateAlertRulesInGroup calculates changes (rules to add,update,delete), verifies that the user is authorized to do the calculated changes and updates database.
// All operations are performed in a single transaction
func (srv RulerSrv) updateAlertRulesInGroup(c *contextmodel.ReqContext, groupKey ngmodels.AlertRuleGroupKey, rules []*ngmodels.AlertRuleWithOptionals) response.Response {
var finalChanges *store.GroupDelta
hasAccess := accesscontrol.HasAccess(srv.ac, c)
err := srv.xactManager.InTransaction(c.Req.Context(), func(tranCtx context.Context) error {
logger := srv.log.New("namespace_uid", groupKey.NamespaceUID, "group", groupKey.RuleGroup, "org_id", groupKey.OrgID, "user_id", c.UserID)
groupChanges, err := store.CalculateChanges(tranCtx, srv.store, groupKey, rules)
if err != nil {
return err
}
if groupChanges.IsEmpty() {
finalChanges = groupChanges
logger.Info("no changes detected in the request. Do nothing")
return nil
}
// if RBAC is disabled the permission are limited to folder access that is done upstream
if !srv.ac.IsDisabled() {
err = authorizeRuleChanges(groupChanges, func(evaluator accesscontrol.Evaluator) bool {
return hasAccess(accesscontrol.ReqOrgAdminOrEditor, evaluator)
})
if err != nil {
return err
}
}
if err := verifyProvisionedRulesNotAffected(c.Req.Context(), srv.provenanceStore, c.OrgID, groupChanges); err != nil {
return err
}
finalChanges = store.UpdateCalculatedRuleFields(groupChanges)
logger.Debug("updating database with the authorized changes", "add", len(finalChanges.New), "update", len(finalChanges.New), "delete", len(finalChanges.Delete))
if len(finalChanges.Update) > 0 || len(finalChanges.New) > 0 {
updates := make([]ngmodels.UpdateRule, 0, len(finalChanges.Update))
inserts := make([]ngmodels.AlertRule, 0, len(finalChanges.New))
for _, update := range finalChanges.Update {
logger.Debug("updating rule", "rule_uid", update.New.UID, "diff", update.Diff.String())
updates = append(updates, ngmodels.UpdateRule{
Existing: update.Existing,
New: *update.New,
})
}
for _, rule := range finalChanges.New {
inserts = append(inserts, *rule)
}
_, err = srv.store.InsertAlertRules(tranCtx, inserts)
if err != nil {
return fmt.Errorf("failed to add rules: %w", err)
}
err = srv.store.UpdateAlertRules(tranCtx, updates)
if err != nil {
return fmt.Errorf("failed to update rules: %w", err)
}
}
if len(finalChanges.Delete) > 0 {
UIDs := make([]string, 0, len(finalChanges.Delete))
for _, rule := range finalChanges.Delete {
UIDs = append(UIDs, rule.UID)
}
if err = srv.store.DeleteAlertRulesByUID(tranCtx, c.SignedInUser.OrgID, UIDs...); err != nil {
return fmt.Errorf("failed to delete rules: %w", err)
}
}
if len(finalChanges.New) > 0 {
limitReached, err := srv.QuotaService.CheckQuotaReached(tranCtx, ngmodels.QuotaTargetSrv, &quota.ScopeParameters{
OrgID: c.OrgID,
UserID: c.UserID,
}) // alert rule is table name
if err != nil {
return fmt.Errorf("failed to get alert rules quota: %w", err)
}
if limitReached {
return ngmodels.ErrQuotaReached
}
}
return nil
})
if err != nil {
if errors.Is(err, ngmodels.ErrAlertRuleNotFound) {
return ErrResp(http.StatusNotFound, err, "failed to update rule group")
} else if errors.Is(err, ngmodels.ErrAlertRuleFailedValidation) || errors.Is(err, errProvisionedResource) {
return ErrResp(http.StatusBadRequest, err, "failed to update rule group")
} else if errors.Is(err, ngmodels.ErrQuotaReached) {
return ErrResp(http.StatusForbidden, err, "")
} else if errors.Is(err, ErrAuthorization) {
return ErrResp(http.StatusUnauthorized, err, "")
} else if errors.Is(err, store.ErrOptimisticLock) {
return ErrResp(http.StatusConflict, err, "")
}
return ErrResp(http.StatusInternalServerError, err, "failed to update rule group")
}
for _, rule := range finalChanges.Update {
srv.scheduleService.UpdateAlertRule(ngmodels.AlertRuleKey{
OrgID: c.SignedInUser.OrgID,
UID: rule.Existing.UID,
}, rule.Existing.Version+1, rule.New.IsPaused)
}
if len(finalChanges.Delete) > 0 {
keys := make([]ngmodels.AlertRuleKey, 0, len(finalChanges.Delete))
for _, rule := range finalChanges.Delete {
keys = append(keys, rule.GetKey())
}
srv.scheduleService.DeleteAlertRule(keys...)
}
if finalChanges.IsEmpty() {
return response.JSON(http.StatusAccepted, util.DynMap{"message": "no changes detected in the rule group"})
}
return response.JSON(http.StatusAccepted, util.DynMap{"message": "rule group updated successfully"})
}
func toGettableRuleGroupConfig(groupName string, rules ngmodels.RulesGroup, namespaceID int64, provenanceRecords map[string]ngmodels.Provenance) apimodels.GettableRuleGroupConfig {
rules.SortByGroupIndex()
ruleNodes := make([]apimodels.GettableExtendedRuleNode, 0, len(rules))
var interval time.Duration
if len(rules) > 0 {
interval = time.Duration(rules[0].IntervalSeconds) * time.Second
}
for _, r := range rules {
ruleNodes = append(ruleNodes, toGettableExtendedRuleNode(*r, namespaceID, provenanceRecords))
}
return apimodels.GettableRuleGroupConfig{
Name: groupName,
Interval: model.Duration(interval),
Rules: ruleNodes,
}
}
func toGettableExtendedRuleNode(r ngmodels.AlertRule, namespaceID int64, provenanceRecords map[string]ngmodels.Provenance) apimodels.GettableExtendedRuleNode {
provenance := ngmodels.ProvenanceNone
if prov, exists := provenanceRecords[r.ResourceID()]; exists {
provenance = prov
}
gettableExtendedRuleNode := apimodels.GettableExtendedRuleNode{
GrafanaManagedAlert: &apimodels.GettableGrafanaRule{
ID: r.ID,
OrgID: r.OrgID,
Title: r.Title,
Condition: r.Condition,
Data: r.Data,
Updated: r.Updated,
IntervalSeconds: r.IntervalSeconds,
Version: r.Version,
UID: r.UID,
NamespaceUID: r.NamespaceUID,
NamespaceID: namespaceID,
RuleGroup: r.RuleGroup,
NoDataState: apimodels.NoDataState(r.NoDataState),
ExecErrState: apimodels.ExecutionErrorState(r.ExecErrState),
Provenance: provenance,
IsPaused: r.IsPaused,
},
}
forDuration := model.Duration(r.For)
gettableExtendedRuleNode.ApiRuleNode = &apimodels.ApiRuleNode{
For: &forDuration,
Annotations: r.Annotations,
Labels: r.Labels,
}
return gettableExtendedRuleNode
}
func toNamespaceErrorResponse(err error) response.Response {
if errors.Is(err, ngmodels.ErrCannotEditNamespace) {
return ErrResp(http.StatusForbidden, err, err.Error())
}
if errors.Is(err, dashboards.ErrDashboardIdentifierNotSet) {
return ErrResp(http.StatusBadRequest, err, err.Error())
}
return apierrors.ToFolderErrorResponse(err)
}
// verifyProvisionedRulesNotAffected check that neither of provisioned alerts are affected by changes.
// Returns errProvisionedResource if there is at least one rule in groups affected by changes that was provisioned.
func verifyProvisionedRulesNotAffected(ctx context.Context, provenanceStore provisioning.ProvisioningStore, orgID int64, ch *store.GroupDelta) error {
provenances, err := provenanceStore.GetProvenances(ctx, orgID, (&ngmodels.AlertRule{}).ResourceType())
if err != nil {
return err
}
errorMsg := strings.Builder{}
for group, alertRules := range ch.AffectedGroups {
if !containsProvisionedAlerts(provenances, alertRules) {
continue
}
if errorMsg.Len() > 0 {
errorMsg.WriteRune(',')
}
errorMsg.WriteString(group.String())
}
if errorMsg.Len() == 0 {
return nil
}
return fmt.Errorf("%w: alert rule group [%s]", errProvisionedResource, errorMsg.String())
}