Alerting: Support for optimistic locking for alert rules (#50274)

* add support for optimistic locking for alert_rule table
* return 409 in the case of opitimistic lock
This commit is contained in:
Yuriy Tseretyan 2022-06-13 12:15:28 -04:00 committed by GitHub
parent 402b5ce4c6
commit c314ce48c7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 117 additions and 10 deletions

View File

@ -45,6 +45,7 @@ Scopes must have an order to ensure consistency and ease of search, this helps u
## Grafana Alerting - main / unreleased
- [FEATURE] use optimistic lock by version field when updating alert rules #50274
- [ENHANCEMENT] Scheduler: Drop ticks if rule evaluation is too slow and adds a metric grafana_alerting_schedule_rule_evaluations_missed_total to track missed evaluations per rule #48885
- [ENHANCEMENT] Ticker to tick at predictable time #50197

View File

@ -258,6 +258,9 @@ func (srv *ProvisioningSrv) RoutePostAlertRule(c *models.ReqContext, ar definiti
return ErrResp(http.StatusBadRequest, err, "")
}
if err != nil {
if errors.Is(err, store.ErrOptimisticLock) {
return ErrResp(http.StatusConflict, err, "")
}
return ErrResp(http.StatusInternalServerError, err, "")
}
ar.ID = createdAlertRule.ID
@ -275,6 +278,9 @@ func (srv *ProvisioningSrv) RoutePutAlertRule(c *models.ReqContext, ar definitio
return ErrResp(http.StatusBadRequest, err, "")
}
if err != nil {
if errors.Is(err, store.ErrOptimisticLock) {
return ErrResp(http.StatusConflict, err, "")
}
return ErrResp(http.StatusInternalServerError, err, "")
}
ar.Updated = updatedAlertRule.Updated
@ -295,6 +301,9 @@ func (srv *ProvisioningSrv) RoutePutAlertRuleGroup(c *models.ReqContext, ag defi
folderUID := pathParam(c, folderUIDPathParam)
err := srv.alertRules.UpdateRuleGroup(c.Req.Context(), c.OrgId, folderUID, rulegroup, ag.Interval)
if err != nil {
if errors.Is(err, store.ErrOptimisticLock) {
return ErrResp(http.StatusConflict, err, "")
}
return ErrResp(http.StatusInternalServerError, err, "")
}
return response.JSON(http.StatusOK, ag)

View File

@ -467,6 +467,8 @@ func (srv RulerSrv) updateAlertRulesInGroup(c *models.ReqContext, groupKey ngmod
return ErrResp(http.StatusForbidden, err, "")
} else if errors.Is(err, ErrAuthorization) {
return ErrResp(http.StatusUnauthorized, err, "")
} else if errors.Is(err, store.ErrOptimisticLock) {
return ErrResp(http.StatusConflict, err, "")
}
return ErrResp(http.StatusInternalServerError, err, "failed to update rule group")
}

View File

@ -111,7 +111,7 @@ type AlertRule struct {
Data []AlertQuery
Updated time.Time
IntervalSeconds int64
Version int64
Version int64 `xorm:"version"` // this tag makes xorm add optimistic lock (see https://xorm.io/docs/chapter-06/1.lock/)
UID string `xorm:"uid"`
NamespaceUID string `xorm:"namespace_uid"`
DashboardUID *string `xorm:"dashboard_uid"`

View File

@ -34,6 +34,7 @@ type UpdateRule struct {
var (
ErrAlertRuleGroupNotFound = errors.New("rulegroup not found")
ErrOptimisticLock = errors.New("version conflict while updating a record in the database with optimistic locking")
)
// RuleStore is the interface for persisting alert rules and instances
@ -93,7 +94,7 @@ func (st DBstore) DeleteAlertRulesByUID(ctx context.Context, orgID int64, ruleUI
})
}
// DeleteAlertInstanceByRuleUID is a handler for deleting alert instances by alert rule UID when a rule has been updated
// DeleteAlertInstancesByRuleUID is a handler for deleting alert instances by alert rule UID when a rule has been updated
func (st DBstore) DeleteAlertInstancesByRuleUID(ctx context.Context, orgID int64, ruleUID string) error {
return st.SQLStore.WithTransactionalDbSession(ctx, func(sess *sqlstore.DBSession) error {
_, err := sess.Exec("DELETE FROM alert_instance WHERE rule_org_id = ? AND rule_uid = ?", orgID, ruleUID)
@ -205,7 +206,7 @@ func (st DBstore) UpdateAlertRules(ctx context.Context, rules []UpdateRule) erro
for _, r := range rules {
var parentVersion int64
r.New.ID = r.Existing.ID
r.New.Version = r.Existing.Version + 1
r.New.Version = r.Existing.Version // xorm will take care of increasing it (see https://xorm.io/docs/chapter-06/1.lock/)
if err := st.validateAlertRule(r.New); err != nil {
return err
}
@ -213,11 +214,14 @@ func (st DBstore) UpdateAlertRules(ctx context.Context, rules []UpdateRule) erro
return err
}
// no way to update multiple rules at once
if _, err := sess.ID(r.Existing.ID).AllCols().Update(r.New); err != nil {
if st.SQLStore.Dialect.IsUniqueConstraintViolation(err) {
return ngmodels.ErrAlertRuleUniqueConstraintViolation
if updated, err := sess.ID(r.Existing.ID).AllCols().Update(r.New); err != nil || updated == 0 {
if err != nil {
if st.SQLStore.Dialect.IsUniqueConstraintViolation(err) {
return ngmodels.ErrAlertRuleUniqueConstraintViolation
}
return fmt.Errorf("failed to update rule [%s] %s: %w", r.New.UID, r.New.Title, err)
}
return fmt.Errorf("failed to update rule [%s] %s: %w", r.New.UID, r.New.Title, err)
return fmt.Errorf("%w: alert rule UID %s version %d", ErrOptimisticLock, r.New.UID, r.New.Version)
}
parentVersion = r.Existing.Version
ruleVersions = append(ruleVersions, ngmodels.AlertRuleVersion{
@ -226,7 +230,7 @@ func (st DBstore) UpdateAlertRules(ctx context.Context, rules []UpdateRule) erro
RuleNamespaceUID: r.New.NamespaceUID,
RuleGroup: r.New.RuleGroup,
ParentVersion: parentVersion,
Version: r.New.Version,
Version: r.New.Version + 1,
Created: r.New.Updated,
Condition: r.New.Condition,
Title: r.New.Title,
@ -248,7 +252,7 @@ func (st DBstore) UpdateAlertRules(ctx context.Context, rules []UpdateRule) erro
})
}
// GetOrgAlertRules is a handler for retrieving alert rules of specific organisation.
// ListAlertRules is a handler for retrieving alert rules of specific organisation.
func (st DBstore) ListAlertRules(ctx context.Context, query *ngmodels.ListAlertRulesQuery) error {
return st.SQLStore.WithDbSession(ctx, func(sess *sqlstore.DBSession) error {
q := sess.Table("alert_rule")
@ -317,7 +321,7 @@ func (st DBstore) GetRuleGroupInterval(ctx context.Context, orgID int64, namespa
})
}
// GetNamespaces returns the folders that are visible to the user and have at least one alert in it
// GetUserVisibleNamespaces returns the folders that are visible to the user and have at least one alert in it
func (st DBstore) GetUserVisibleNamespaces(ctx context.Context, orgID int64, user *models.SignedInUser) (map[string]*models.Folder, error) {
namespaceMap := make(map[string]*models.Folder)

View File

@ -0,0 +1,91 @@
package store
import (
"context"
"errors"
"fmt"
"testing"
"time"
"github.com/stretchr/testify/require"
"golang.org/x/exp/rand"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/sqlstore"
"github.com/grafana/grafana/pkg/util"
)
func TestUpdateAlertRules(t *testing.T) {
sqlStore := sqlstore.InitTestDB(t)
store := DBstore{
SQLStore: sqlStore,
BaseInterval: time.Duration(rand.Int63n(100)) * time.Second,
}
createRule := func(t *testing.T) *models.AlertRule {
t.Helper()
rule := models.AlertRuleGen(withIntervalMatching(store.BaseInterval))()
err := sqlStore.WithDbSession(context.Background(), func(sess *sqlstore.DBSession) error {
_, err := sess.Table(models.AlertRule{}).InsertOne(rule)
if err != nil {
return err
}
dbRule := &models.AlertRule{}
exist, err := sess.Table(models.AlertRule{}).ID(rule.ID).Get(dbRule)
if err != nil {
return err
}
if !exist {
return errors.New("cannot read inserted record")
}
rule = dbRule
return nil
})
require.NoError(t, err)
return rule
}
t.Run("should increase version", func(t *testing.T) {
rule := createRule(t)
newRule := models.CopyRule(rule)
newRule.Title = util.GenerateShortUID()
err := store.UpdateAlertRules(context.Background(), []UpdateRule{{
Existing: rule,
New: *newRule,
},
})
require.NoError(t, err)
dbrule := &models.AlertRule{}
err = sqlStore.WithDbSession(context.Background(), func(sess *sqlstore.DBSession) error {
exist, err := sess.Table(models.AlertRule{}).ID(rule.ID).Get(dbrule)
require.Truef(t, exist, fmt.Sprintf("rule with ID %d does not exist", rule.ID))
return err
})
require.NoError(t, err)
require.Equal(t, rule.Version+1, dbrule.Version)
})
t.Run("should fail due to optimistic locking if version does not match", func(t *testing.T) {
rule := createRule(t)
rule.Version-- // simulate version discrepancy
newRule := models.CopyRule(rule)
newRule.Title = util.GenerateShortUID()
err := store.UpdateAlertRules(context.Background(), []UpdateRule{{
Existing: rule,
New: *newRule,
},
})
require.ErrorIs(t, err, ErrOptimisticLock)
})
}
func withIntervalMatching(baseInterval time.Duration) func(*models.AlertRule) {
return func(rule *models.AlertRule) {
rule.IntervalSeconds = int64(baseInterval.Seconds()) * rand.Int63n(10)
rule.For = time.Duration(rule.IntervalSeconds*rand.Int63n(9)+1) * time.Second
}
}