make pg backend acquire schema-based global locks (#2411)

Signed-off-by: ollevche <ollevche@gmail.com>
2025-02-25 18:45:20 -06:00 · 2025-01-31 14:21:36 +02:00 · 2025-01-31 14:21:36 +02:00 · 2a4d81042b
commit 2a4d81042b
parent 2f27d7eb90
7 changed files with 213 additions and 22 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -3,6 +3,7 @@
 UPGRADE NOTES:

 * Using the `ghcr.io/opentofu/opentofu` image as a base image for custom images is no longer supported. Please see https://opentofu.org/docs/intro/install/docker/ for instructions on building your own image.
+* OpenTofu 1.10 with `pg` backend must not be used in parallel with older versions. It may lead to unsafe state writes, when the database is shared across multiple projects.

 NEW FEATURES:

@ -27,6 +28,8 @@ BUG FIXES:
 - Changing Go version to 1.22.11 in order to fix [CVE-2024-45336](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2024-45336) and [CVE-2024-45341](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2024-45341) ([#2438](https://github.com/opentofu/opentofu/pull/2438))
 - Fix the error message when default value of a complex variable is containing a wrong type ([2394](https://github.com/opentofu/opentofu/issues/2394))
 - Fix the way OpenTofu downloads a module that is sourced from a GitHub branch containing slashes in the name. ([2396](https://github.com/opentofu/opentofu/issues/2396))
+- `pg` backend doesn't fail on workspace creation for paralel runs, when the database is shared across multiple projects. ([#2411](https://github.com/opentofu/opentofu/pull/2411))
+
 ## Previous Releases

 For information on prior major and minor releases, see their changelogs:
--- a/internal/backend/remote-state/pg/backend_test.go
+++ b/internal/backend/remote-state/pg/backend_test.go
@ -160,7 +160,7 @@ func TestBackendConfig(t *testing.T) {
 			if err != nil {
 				t.Fatal(err)
 			}
-			defer dbCleaner.Query(fmt.Sprintf("DROP SCHEMA IF EXISTS %s CASCADE", schemaName))
+			defer dropSchemaByQuotedName(t, dbCleaner, schemaName)

 			var diags tfdiags.Diagnostics
 			b := New(encryption.StateEncryptionDisabled()).(*Backend)
@ -324,7 +324,7 @@ func TestBackendConfigSkipOptions(t *testing.T) {
 			if tc.Setup != nil {
 				tc.Setup(t, db, schemaName)
 			}
-			defer db.Query(fmt.Sprintf("DROP SCHEMA IF EXISTS %s CASCADE", schemaName))
+			defer dropSchemaByQuotedName(t, db, schemaName)

 			b := backend.TestBackendConfig(t, New(encryption.StateEncryptionDisabled()), config).(*Backend)

@ -393,7 +393,7 @@ func TestBackendStates(t *testing.T) {
 			if err != nil {
 				t.Fatal(err)
 			}
-			defer dbCleaner.Query(fmt.Sprintf("DROP SCHEMA IF EXISTS %s CASCADE", pq.QuoteIdentifier(schemaName)))
+			defer dropSchema(t, dbCleaner, schemaName)

 			config := backend.TestWrapConfig(map[string]interface{}{
 				"conn_str":    connStr,
@ -418,7 +418,7 @@ func TestBackendStateLocks(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	defer dbCleaner.Query(fmt.Sprintf("DROP SCHEMA IF EXISTS %s CASCADE", schemaName))
+	defer dropSchema(t, dbCleaner, schemaName)

 	config := backend.TestWrapConfig(map[string]interface{}{
 		"conn_str":    connStr,
@ -448,7 +448,6 @@ func TestBackendConcurrentLock(t *testing.T) {
 	}

 	getStateMgr := func(schemaName string) (statemgr.Full, *statemgr.LockInfo) {
-		defer dbCleaner.Query(fmt.Sprintf("DROP SCHEMA IF EXISTS %s CASCADE", schemaName))
 		config := backend.TestWrapConfig(map[string]interface{}{
 			"conn_str":    connStr,
 			"schema_name": schemaName,
@ -470,8 +469,12 @@ func TestBackendConcurrentLock(t *testing.T) {
 		return stateMgr, info
 	}

-	s1, i1 := getStateMgr(fmt.Sprintf("terraform_%s_1", t.Name()))
-	s2, i2 := getStateMgr(fmt.Sprintf("terraform_%s_2", t.Name()))
+	firstSchema, secondSchema := fmt.Sprintf("terraform_%s_1", t.Name()), fmt.Sprintf("terraform_%s_2", t.Name())
+	defer dropSchema(t, dbCleaner, firstSchema)
+	defer dropSchema(t, dbCleaner, secondSchema)
+
+	s1, i1 := getStateMgr(firstSchema)
+	s2, i2 := getStateMgr(secondSchema)

 	// First we need to create the workspace as the lock for creating them is
 	// global
@ -524,3 +527,18 @@ func TestBackendConcurrentLock(t *testing.T) {
 func getDatabaseUrl() string {
 	return os.Getenv("DATABASE_URL")
 }
+
+func dropSchema(t *testing.T, db *sql.DB, schemaName string) {
+	dropSchemaByQuotedName(t, db, pq.QuoteIdentifier(schemaName))
+}
+
+func dropSchemaByQuotedName(t *testing.T, db *sql.DB, quotedSchemaName string) {
+	rows, err := db.Query(fmt.Sprintf("DROP SCHEMA IF EXISTS %s CASCADE", quotedSchemaName))
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer rows.Close()
+	if err = rows.Err(); err != nil {
+		t.Fatal(err)
+	}
+}
--- a/internal/backend/remote-state/pg/client.go
+++ b/internal/backend/remote-state/pg/client.go
@ -9,6 +9,7 @@ import (
 	"crypto/md5"
 	"database/sql"
 	"fmt"
+	"hash/fnv"

 	uuid "github.com/hashicorp/go-uuid"
 	_ "github.com/lib/pq"
@ -90,15 +91,20 @@ func (c *RemoteClient) Lock(info *statemgr.LockInfo) (string, error) {
 		return nil
 	}

-	// Try to acquire locks for the existing row `id` and the creation lock `-1`.
-	query := `SELECT %s.id, pg_try_advisory_lock(%s.id), pg_try_advisory_lock(-1) FROM %s.%s WHERE %s.name = $1`
-	row := c.Client.QueryRow(fmt.Sprintf(query, statesTableName, statesTableName, c.SchemaName, statesTableName, statesTableName), c.Name)
+	creationLockID := c.composeCreationLockID()
+
+	// Try to acquire locks for the existing row `id` and the creation lock.
+	//nolint:gosec // we only parameterize user passed values
+	query := fmt.Sprintf(`SELECT %s.id, pg_try_advisory_lock(%s.id), pg_try_advisory_lock(%s) FROM %s.%s WHERE %s.name = $1`,
+		statesTableName, statesTableName, creationLockID, c.SchemaName, statesTableName, statesTableName)
+
+	row := c.Client.QueryRow(query, c.Name)
 	var pgLockId, didLock, didLockForCreate []byte
 	err = row.Scan(&pgLockId, &didLock, &didLockForCreate)
 	switch {
 	case err == sql.ErrNoRows:
 		// No rows means we're creating the workspace. Take the creation lock.
-		innerRow := c.Client.QueryRow(`SELECT pg_try_advisory_lock(-1)`)
+		innerRow := c.Client.QueryRow(fmt.Sprintf(`SELECT pg_try_advisory_lock(%s)`, creationLockID))
 		var innerDidLock []byte
 		err := innerRow.Scan(&innerDidLock)
 		if err != nil {
@ -107,20 +113,20 @@ func (c *RemoteClient) Lock(info *statemgr.LockInfo) (string, error) {
 		if string(innerDidLock) == "false" {
 			return "", &statemgr.LockError{Info: info, Err: fmt.Errorf("Already locked for workspace creation: %s", c.Name)}
 		}
-		info.Path = "-1"
+		info.Path = creationLockID
 	case err != nil:
 		return "", &statemgr.LockError{Info: info, Err: err}
 	case string(didLock) == "false":
 		// Existing workspace is already locked. Release the attempted creation lock.
-		lockUnlock("-1")
+		_ = lockUnlock(creationLockID)
 		return "", &statemgr.LockError{Info: info, Err: fmt.Errorf("Workspace is already locked: %s", c.Name)}
 	case string(didLockForCreate) == "false":
 		// Someone has the creation lock already. Release the existing workspace because it might not be safe to touch.
-		lockUnlock(string(pgLockId))
+		_ = lockUnlock(string(pgLockId))
 		return "", &statemgr.LockError{Info: info, Err: fmt.Errorf("Cannot lock workspace; already locked for workspace creation: %s", c.Name)}
 	default:
 		// Existing workspace is now locked. Release the attempted creation lock.
-		lockUnlock("-1")
+		_ = lockUnlock(creationLockID)
 		info.Path = string(pgLockId)
 	}
 	c.info = info
@ -128,10 +134,6 @@ func (c *RemoteClient) Lock(info *statemgr.LockInfo) (string, error) {
 	return info.ID, nil
 }

-func (c *RemoteClient) getLockInfo() (*statemgr.LockInfo, error) {
-	return c.info, nil
-}
-
 func (c *RemoteClient) Unlock(id string) error {
 	if c.info != nil && c.info.Path != "" {
 		query := `SELECT pg_advisory_unlock(%s)`
@ -145,3 +147,9 @@ func (c *RemoteClient) Unlock(id string) error {
 	}
 	return nil
 }
+
+func (c *RemoteClient) composeCreationLockID() string {
+	hash := fnv.New32()
+	hash.Write([]byte(c.SchemaName))
+	return fmt.Sprintf("%d", int64(hash.Sum32())*-1)
+}
--- a/internal/backend/remote-state/pg/client_test.go
+++ b/internal/backend/remote-state/pg/client_test.go
@ -12,10 +12,12 @@ import (
 	"database/sql"
 	"fmt"
 	"testing"
+	"time"

 	"github.com/opentofu/opentofu/internal/backend"
 	"github.com/opentofu/opentofu/internal/encryption"
 	"github.com/opentofu/opentofu/internal/states/remote"
+	"github.com/opentofu/opentofu/internal/states/statemgr"
 )

 func TestRemoteClient_impl(t *testing.T) {
@ -31,7 +33,7 @@ func TestRemoteClient(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	defer dbCleaner.Query(fmt.Sprintf("DROP SCHEMA IF EXISTS %s CASCADE", schemaName))
+	defer dropSchema(t, dbCleaner, schemaName)

 	config := backend.TestWrapConfig(map[string]interface{}{
 		"conn_str":    connStr,
@ -59,7 +61,7 @@ func TestRemoteLocks(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	defer dbCleaner.Query(fmt.Sprintf("DROP SCHEMA IF EXISTS %s CASCADE", schemaName))
+	defer dropSchema(t, dbCleaner, schemaName)

 	config := backend.TestWrapConfig(map[string]interface{}{
 		"conn_str":    connStr,
@ -80,3 +82,91 @@ func TestRemoteLocks(t *testing.T) {

 	remote.TestRemoteLocks(t, s1.(*remote.State).Client, s2.(*remote.State).Client)
 }
+
+// TestConcurrentCreationLocksInDifferentSchemas tests whether backends with different schemas
+// affect each other while taking global workspace creation locks.
+func TestConcurrentCreationLocksInDifferentSchemas(t *testing.T) {
+	testACC(t)
+	connStr := getDatabaseUrl()
+	dbCleaner, err := sql.Open("postgres", connStr)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	firstSchema := fmt.Sprintf("terraform_%s_1", t.Name())
+	secondSchema := fmt.Sprintf("terraform_%s_2", t.Name())
+
+	defer dropSchema(t, dbCleaner, firstSchema)
+	defer dropSchema(t, dbCleaner, secondSchema)
+
+	firstConfig := backend.TestWrapConfig(map[string]interface{}{
+		"conn_str":    connStr,
+		"schema_name": firstSchema,
+	})
+
+	secondConfig := backend.TestWrapConfig(map[string]interface{}{
+		"conn_str":    connStr,
+		"schema_name": secondSchema,
+	})
+
+	//nolint:errcheck // this is a test, I am fine with panic here
+	firstBackend := backend.TestBackendConfig(t, New(encryption.StateEncryptionDisabled()), firstConfig).(*Backend)
+
+	//nolint:errcheck // this is a test, I am fine with panic here
+	secondBackend := backend.TestBackendConfig(t, New(encryption.StateEncryptionDisabled()), secondConfig).(*Backend)
+
+	//nolint:errcheck // this is a test, I am fine with panic here
+	thirdBackend := backend.TestBackendConfig(t, New(encryption.StateEncryptionDisabled()), secondConfig).(*Backend)
+
+	// We operate on remote clients instead of state managers to simulate the
+	// first call to backend.StateMgr(), which creates an empty state in default
+	// workspace.
+	firstClient := &RemoteClient{
+		Client:     firstBackend.db,
+		Name:       backend.DefaultStateName,
+		SchemaName: firstBackend.schemaName,
+	}
+
+	secondClient := &RemoteClient{
+		Client:     secondBackend.db,
+		Name:       backend.DefaultStateName,
+		SchemaName: secondBackend.schemaName,
+	}
+
+	thirdClient := &RemoteClient{
+		Client:     thirdBackend.db,
+		Name:       backend.DefaultStateName,
+		SchemaName: thirdBackend.schemaName,
+	}
+
+	// It doesn't matter what lock info to supply for workspace creation.
+	lock := &statemgr.LockInfo{
+		ID:        "1",
+		Operation: "test",
+		Info:      "This needs to lock for workspace creation",
+		Who:       "me",
+		Version:   "1",
+		Created:   time.Date(1999, 8, 19, 0, 0, 0, 0, time.UTC),
+	}
+
+	// Those calls with empty database must think they are locking
+	// for workspace creation, both of them must succeed since they
+	// are operating on different schemas.
+	if _, err = firstClient.Lock(lock); err != nil {
+		t.Fatal(err)
+	}
+	if _, err = secondClient.Lock(lock); err != nil {
+		t.Fatal(err)
+	}
+
+	// This call must fail since we are trying to acquire the same
+	// lock as the first client. We need to make this call from a
+	// separate session, since advisory locks are okay to be re-acquired
+	// during the same session.
+	if _, err = thirdClient.Lock(lock); err == nil {
+		t.Fatal("Expected an error to be thrown on a second lock attempt")
+	} else if lockErr := err.(*statemgr.LockError); lockErr.Info != lock && //nolint:errcheck,errorlint // this is a test, I am fine with panic here
+		lockErr.Err.Error() != "Already locked for workspace creation: default" {
+		t.Fatalf("Unexpected error thrown on a second lock attempt: %v", err)
+	}
+}
--- a/rfc/20250127-PostgreSQL-Locking.md
+++ b/rfc/20250127-PostgreSQL-Locking.md
@ -0,0 +1,57 @@
+# PostgreSQL Backend: Schema Based Locking (Workspace Creation)
+
+Issue: https://github.com/opentofu/opentofu/issues/2218 <!-- Ideally, this issue will have the "needs-rfc" label added by the Core Team during triage -->
+
+PostgreSQL (pg) backend allows users to set a schema to be used for state storage. It makes it possible
+to reuse the same database with different schemas per OpenTofu "setup" (e.g. different projects, dev/stage/prod
+environments, etc.). Those configuration setups are isolated and must be applied without ties to each other,
+even though they are operating in the same database.
+
+Currently, workspace creation locking is database scoped with a shared static ID, which disallows creation of
+workspaces (including the `default` one) in parallel if the database is reused across multiple OpenTofu setups.
+
+## Proposed Solution
+
+Historically, `pg` backend used transactions to handle locking, which comes with its own problems due to the
+need of transactions rollback. This approach doesn't align with the locking design of OpenTofu, so session
+based locking via `pg_advisory_lock` fits better (which is the second version of `pg` locking implementation).
+
+Also, `pg_advisory_lock` is database scoped, so `pg` backend needs to handle collisions between different state
+IDs, even if they come from different OpenTofu setups (i.e. different schemas inside the same database). This is
+handled via single sequence in a shared schema (`public`).
+
+However, workspace creation locking uses a static ID (`-1`), which makes any workspace creation blocking, even if
+those workspaces are isolated (i.e. from different schemas). This is the problem, which needs to be fixed. Proposed
+solution is to change static `-1` ID to schema-based ID to isolate workspace creation locking to a specific schema.
+
+### User Documentation
+
+We don't need to change anything from the user perspective. The only difference is that parallel `tofu apply` calls
+for different setups must not fail even if their backends are using the same database (but different schemas).
+
+### Technical Approach
+
+Technically, we need to use schema based value instead of static `-1` ID in calls to `pg_advisory_lock`. We should 
+use negative values since positive ones are reserved for state IDs in `pg_advisory_lock`. Proposed solution is to
+hash the name of the schema (which is unique per database) and negate the integer value to be used in `pg_advisory_lock`.
+
+This is still prone to collisions and is not going to solve 100% of similar issues, however, this is the easiest available
+option. Additional attention needs to be paid to overflows, since `pg_advisory_lock` uses 64-bit signed integers as IDs
+and we want to generate a hash, which fits into that range.
+
+Since `pg_advisory_lock` is session scoped we don't need to worry about the migration or breaking changes. Problem may arise
+if users run parallel `tofu apply` for the same backend with different versions of OpenTofu (i.e. one run acquires `-1` lock
+and the other one acquires the hash of the same schema name). In this scenario, we end up with unsafe database writes.
+
+### Open Questions
+
+* Do we need to put an additional safe guard for unsafe writes described previously? Is the release notes warning enough?
+
+### Future Considerations
+
+We may want to reduce potential hash collisions in the future.
+
+## Potential Alternatives
+
+Alternatively, we can introduce a new table to generate a sequentual ID tied to a specific schema name. This allows us to
+not rely on hashes and produce unique IDs for each schema. However, it could introduce more problems with database state.
--- a/website/docs/intro/whats-new.mdx
+++ b/website/docs/intro/whats-new.mdx
@ -50,4 +50,11 @@ However, we have added instructions on building your own base image in the [cont

 ## Bugfixes

- Fixed an issue where an invalid provider name in the `provider_meta` block would crash OpenTofu rather than report an error ([#2347](https://github.com/opentofu/opentofu/pull/2347))
+:::warning
+OpenTofu 1.10 changed how workspace creation locking works. It is unsafe to
+use `pg` backend with multiple OpenTofu versions (under 1.10) in parallel if those
+are sharing the same underlying database.
+:::
+
+- Fixed an issue where an invalid provider name in the `provider_meta` block would crash OpenTofu rather than report an error ([#2347](https://github.com/opentofu/opentofu/pull/2347))
+- `pg` backend doesn't fail on workspace creation for parallel runs, when the database is shared across multiple projects. ([#2411](https://github.com/opentofu/opentofu/pull/2411))
--- a/website/docs/language/settings/backends/pg.mdx
+++ b/website/docs/language/settings/backends/pg.mdx
@ -9,6 +9,12 @@ Stores the state in a [Postgres database](https://www.postgresql.org) version 10

 This backend supports [state locking](../../../language/state/locking.mdx).

+:::warning
+OpenTofu 1.10 changed how workspace creation locking works. It is unsafe to
+use `pg` backend with multiple OpenTofu versions (under 1.10) in parallel if those
+are sharing the same underlying database.
+:::
+
 ## Example Configuration

 ```hcl
@ -102,6 +108,8 @@ The table is keyed by the [workspace](../../../language/state/workspaces.mdx) na

 Locking is supported using [Postgres advisory locks](https://www.postgresql.org/docs/9.5/explicit-locking.html#ADVISORY-LOCKS). [`force-unlock`](../../../cli/commands/force-unlock.mdx) is not supported, because these database-native locks will automatically unlock when the session is aborted or the connection fails. To see outstanding locks in a Postgres server, use the [`pg_locks` system view](https://www.postgresql.org/docs/9.5/view-pg-locks.html).

+Advisory locks are used for multiple scenarios: state updates and state creation. When the state is updated, advisory lock is acquired with state ID. Otherwise, on state (and workspace) creation, it is acquired with the hash of schema name. This way, multiple backend configurations doesn't affect each other, when the database is shared.
+
 The **states** table contains:

 - a serial integer `id`, used as the key for advisory locks