diff --git a/CHANGELOG.md b/CHANGELOG.md index f36061b2c5..abb4c40d28 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ BUG FIXES: * `tofu test` now supports accessing module outputs when the module has no resources. ([#1409](https://github.com/opentofu/opentofu/pull/1409)) * Fixed support for provider functions in tests ([#1603](https://github.com/opentofu/opentofu/pull/1603)) * Added a better error message on `for_each` block with sensitive value of unsuitable type. ([#1485](https://github.com/opentofu/opentofu/pull/1485)) +* Fix race condition on locking in gcs backend ([#1342](https://github.com/opentofu/opentofu/pull/1342)) ## Previous Releases diff --git a/internal/backend/remote-state/gcs/client.go b/internal/backend/remote-state/gcs/client.go index 6d4d565fd3..d454383cc9 100644 --- a/internal/backend/remote-state/gcs/client.go +++ b/internal/backend/remote-state/gcs/client.go @@ -7,6 +7,7 @@ package gcs import ( "encoding/json" + "errors" "fmt" "io" "strconv" @@ -135,9 +136,13 @@ func (c *remoteClient) lockError(err error) *statemgr.LockError { } info, infoErr := c.lockInfo() - if infoErr != nil { + switch { + case errors.Is(infoErr, storage.ErrObjectNotExist): + // Race condition - file exists initially but then has been deleted by other process + lockErr.InconsistentRead = true + case infoErr != nil: lockErr.Err = multierror.Append(lockErr.Err, infoErr) - } else { + default: lockErr.Info = info } return lockErr diff --git a/internal/states/statemgr/locker.go b/internal/states/statemgr/locker.go index 597d4d0dfc..fde81643ef 100644 --- a/internal/states/statemgr/locker.go +++ b/internal/states/statemgr/locker.go @@ -89,9 +89,7 @@ func LockWithContext(ctx context.Context, s Locker, info *LockInfo) (string, err return "", err } - if le == nil || le.Info == nil || le.Info.ID == "" { - // If we don't have a complete LockError then there's something - // wrong with the lock. + if !le.Retriable() { return "", err } @@ -99,6 +97,11 @@ func LockWithContext(ctx context.Context, s Locker, info *LockInfo) (string, err postLockHook() } + // Lock() can be repeated without sleep + if le.RetriableWithoutDelay() { + continue + } + // there's an existing lock, wait and try again select { case <-ctx.Done(): @@ -214,6 +217,10 @@ func (l *LockInfo) String() string { type LockError struct { Info *LockInfo Err error + + // Set when writing of lock file fails because of conflict and + // then reading fails because file doesn't exist (removed by other process) + InconsistentRead bool } func (e *LockError) Error() string { @@ -227,3 +234,19 @@ func (e *LockError) Error() string { } return strings.Join(out, "\n") } + +// Retriable returns true when locking should be retried +func (e *LockError) Retriable() bool { + // If we don't have a complete LockError then there's something + // wrong with the lock. + if e == nil { + return false + } + + return e.InconsistentRead || (e.Info != nil && e.Info.ID != "") +} + +// RetriableWithoutDelay returns true when delaying can be avoided +func (e *LockError) RetriableWithoutDelay() bool { + return e != nil && e.InconsistentRead +}