diff --git a/internal/backend/local/hook_state.go b/internal/backend/local/hook_state.go index 65a8a2b12e..d591d3644f 100644 --- a/internal/backend/local/hook_state.go +++ b/internal/backend/local/hook_state.go @@ -33,8 +33,30 @@ type StateHook struct { // and PersistInterval is ignored if this is nil. Schemas *terraform.Schemas - lastPersist time.Time - forcePersist bool + intermediatePersist IntermediateStatePersistInfo +} + +type IntermediateStatePersistInfo struct { + // RequestedPersistInterval is the persist interval requested by whatever + // instantiated the StateHook. + // + // Implementations of [IntermediateStateConditionalPersister] should ideally + // respect this, but may ignore it if they use something other than the + // passage of time to make their decision. + RequestedPersistInterval time.Duration + + // LastPersist is the time when the last intermediate state snapshot was + // persisted, or the time of the first report for Terraform Core if there + // hasn't yet been a persisted snapshot. + LastPersist time.Time + + // ForcePersist is true when Terraform CLI has receieved an interrupt + // signal and is therefore trying to create snapshots more aggressively + // in anticipation of possibly being terminated ungracefully. + // [IntermediateStateConditionalPersister] implementations should ideally + // persist every snapshot they get when this flag is set, unless they have + // some external information that implies this shouldn't be necessary. + ForcePersist bool } var _ terraform.Hook = (*StateHook)(nil) @@ -43,10 +65,12 @@ func (h *StateHook) PostStateUpdate(new *states.State) (terraform.HookAction, er h.Lock() defer h.Unlock() - if h.lastPersist.IsZero() { + h.intermediatePersist.RequestedPersistInterval = h.PersistInterval + + if h.intermediatePersist.LastPersist.IsZero() { // The first PostStateUpdate starts the clock for intermediate // calls to PersistState. - h.lastPersist = time.Now() + h.intermediatePersist.LastPersist = time.Now() } if h.StateMgr != nil { @@ -54,12 +78,14 @@ func (h *StateHook) PostStateUpdate(new *states.State) (terraform.HookAction, er return terraform.HookActionHalt, err } if mgrPersist, ok := h.StateMgr.(statemgr.Persister); ok && h.PersistInterval != 0 && h.Schemas != nil { - if h.forcePersist || time.Since(h.lastPersist) >= h.PersistInterval { + if h.shouldPersist() { err := mgrPersist.PersistState(h.Schemas) if err != nil { return terraform.HookActionHalt, err } - h.lastPersist = time.Now() + h.intermediatePersist.LastPersist = time.Now() + } else { + log.Printf("[DEBUG] State storage %T declined to persist a state snapshot", h.StateMgr) } } } @@ -78,21 +104,72 @@ func (h *StateHook) Stopping() { // do if they _do_ subsequently hard-kill Terraform during an apply. if mgrPersist, ok := h.StateMgr.(statemgr.Persister); ok && h.Schemas != nil { - err := mgrPersist.PersistState(h.Schemas) - if err != nil { - // This hook can't affect Terraform Core's ongoing behavior, - // but it's a best effort thing anyway so we'll just emit a - // log to aid with debugging. - log.Printf("[ERROR] Failed to persist state after interruption: %s", err) - } - // While we're in the stopping phase we'll try to persist every // new state update to maximize every opportunity we get to avoid // losing track of objects that have been created or updated. // Terraform Core won't start any new operations after it's been // stopped, so at most we should see one more PostStateUpdate // call per already-active request. - h.forcePersist = true + h.intermediatePersist.ForcePersist = true + + if h.shouldPersist() { + err := mgrPersist.PersistState(h.Schemas) + if err != nil { + // This hook can't affect Terraform Core's ongoing behavior, + // but it's a best effort thing anyway so we'll just emit a + // log to aid with debugging. + log.Printf("[ERROR] Failed to persist state after interruption: %s", err) + } + } else { + log.Printf("[DEBUG] State storage %T declined to persist a state snapshot", h.StateMgr) + } } } + +func (h *StateHook) shouldPersist() bool { + if m, ok := h.StateMgr.(IntermediateStateConditionalPersister); ok { + return m.ShouldPersistIntermediateState(&h.intermediatePersist) + } + return DefaultIntermediateStatePersistRule(&h.intermediatePersist) +} + +// DefaultIntermediateStatePersistRule is the default implementation of +// [IntermediateStateConditionalPersister.ShouldPersistIntermediateState] used +// when the selected state manager doesn't implement that interface. +// +// Implementers of that interface can optionally wrap a call to this function +// if they want to combine the default behavior with some logic of their own. +func DefaultIntermediateStatePersistRule(info *IntermediateStatePersistInfo) bool { + return info.ForcePersist || time.Since(info.LastPersist) >= info.RequestedPersistInterval +} + +// IntermediateStateConditionalPersister is an optional extension of +// [statemgr.Persister] that allows an implementation to tailor the rules for +// whether to create intermediate state snapshots when Terraform Core emits +// events reporting that the state might have changed. +// +// For state managers that don't implement this interface, [StateHook] uses +// a default set of rules that aim to be a good compromise between how long +// a state change can be active before it gets committed as a snapshot vs. +// how many intermediate snapshots will get created. That compromise is subject +// to change over time, but a state manager can implement this interface to +// exert full control over those rules. +type IntermediateStateConditionalPersister interface { + // ShouldPersistIntermediateState will be called each time Terraform Core + // emits an intermediate state event that is potentially eligible to be + // persisted. + // + // The implemention should return true to signal that the state snapshot + // most recently provided to the object's WriteState should be persisted, + // or false if it should not be persisted. If this function returns true + // then the receiver will see a subsequent call to + // [statemgr.Persister.PersistState] to request persistence. + // + // The implementation must not modify anything reachable through the + // arguments, and must not retain pointers to anything reachable through + // them after the function returns. However, implementers can assume that + // nothing will write to anything reachable through the arguments while + // this function is active. + ShouldPersistIntermediateState(info *IntermediateStatePersistInfo) bool +} diff --git a/internal/backend/local/hook_state_test.go b/internal/backend/local/hook_state_test.go index 2c208be865..3c850b80f5 100644 --- a/internal/backend/local/hook_state_test.go +++ b/internal/backend/local/hook_state_test.go @@ -41,7 +41,9 @@ func TestStateHookStopping(t *testing.T) { StateMgr: is, Schemas: &terraform.Schemas{}, PersistInterval: 4 * time.Hour, - lastPersist: time.Now(), + intermediatePersist: IntermediateStatePersistInfo{ + LastPersist: time.Now(), + }, } s := statemgr.TestFullInitialState() @@ -61,7 +63,7 @@ func TestStateHookStopping(t *testing.T) { // We'll now force lastPersist to be long enough ago that persisting // should be due on the next call. - hook.lastPersist = time.Now().Add(-5 * time.Hour) + hook.intermediatePersist.LastPersist = time.Now().Add(-5 * time.Hour) hook.PostStateUpdate(s) if is.Written == nil || !is.Written.Equal(s) { t.Fatalf("mismatching state written") @@ -132,6 +134,111 @@ func TestStateHookStopping(t *testing.T) { } } +func TestStateHookCustomPersistRule(t *testing.T) { + is := &testPersistentStateThatRefusesToPersist{} + hook := &StateHook{ + StateMgr: is, + Schemas: &terraform.Schemas{}, + PersistInterval: 4 * time.Hour, + intermediatePersist: IntermediateStatePersistInfo{ + LastPersist: time.Now(), + }, + } + + s := statemgr.TestFullInitialState() + action, err := hook.PostStateUpdate(s) + if err != nil { + t.Fatalf("unexpected error from PostStateUpdate: %s", err) + } + if got, want := action, terraform.HookActionContinue; got != want { + t.Fatalf("wrong hookaction %#v; want %#v", got, want) + } + if is.Written == nil || !is.Written.Equal(s) { + t.Fatalf("mismatching state written") + } + if is.Persisted != nil { + t.Fatalf("persisted too soon") + } + + // We'll now force lastPersist to be long enough ago that persisting + // should be due on the next call. + hook.intermediatePersist.LastPersist = time.Now().Add(-5 * time.Hour) + hook.PostStateUpdate(s) + if is.Written == nil || !is.Written.Equal(s) { + t.Fatalf("mismatching state written") + } + if is.Persisted != nil { + t.Fatalf("has a persisted state, but shouldn't") + } + hook.PostStateUpdate(s) + if is.Written == nil || !is.Written.Equal(s) { + t.Fatalf("mismatching state written") + } + if is.Persisted != nil { + t.Fatalf("has a persisted state, but shouldn't") + } + + gotLog := is.CallLog + wantLog := []string{ + // Initial call before we reset lastPersist + "WriteState", + "ShouldPersistIntermediateState", + // Previous call should return false, preventing a "PersistState" call + + // Write and then decline to persist + "WriteState", + "ShouldPersistIntermediateState", + // Previous call should return false, preventing a "PersistState" call + + // Final call before we start "stopping". + "WriteState", + "ShouldPersistIntermediateState", + // Previous call should return false, preventing a "PersistState" call + } + if diff := cmp.Diff(wantLog, gotLog); diff != "" { + t.Fatalf("wrong call log so far\n%s", diff) + } + + // We'll reset the log now before we try seeing what happens after + // we use "Stopped". + is.CallLog = is.CallLog[:0] + is.Persisted = nil + + hook.Stopping() + if is.Persisted == nil || !is.Persisted.Equal(s) { + t.Fatalf("mismatching state persisted") + } + + is.Persisted = nil + hook.PostStateUpdate(s) + if is.Persisted == nil || !is.Persisted.Equal(s) { + t.Fatalf("mismatching state persisted") + } + is.Persisted = nil + hook.PostStateUpdate(s) + if is.Persisted == nil || !is.Persisted.Equal(s) { + t.Fatalf("mismatching state persisted") + } + + gotLog = is.CallLog + wantLog = []string{ + "ShouldPersistIntermediateState", + // Previous call should return true, allowing the following "PersistState" call + "PersistState", + "WriteState", + "ShouldPersistIntermediateState", + // Previous call should return true, allowing the following "PersistState" call + "PersistState", + "WriteState", + "ShouldPersistIntermediateState", + // Previous call should return true, allowing the following "PersistState" call + "PersistState", + } + if diff := cmp.Diff(wantLog, gotLog); diff != "" { + t.Fatalf("wrong call log once in stopping mode\n%s", diff) + } +} + type testPersistentState struct { CallLog []string @@ -156,3 +263,35 @@ func (sm *testPersistentState) PersistState(schemas *terraform.Schemas) error { sm.Persisted = sm.Written return nil } + +type testPersistentStateThatRefusesToPersist struct { + CallLog []string + + Written *states.State + Persisted *states.State +} + +var _ statemgr.Writer = (*testPersistentStateThatRefusesToPersist)(nil) +var _ statemgr.Persister = (*testPersistentStateThatRefusesToPersist)(nil) +var _ IntermediateStateConditionalPersister = (*testPersistentStateThatRefusesToPersist)(nil) + +func (sm *testPersistentStateThatRefusesToPersist) WriteState(state *states.State) error { + sm.CallLog = append(sm.CallLog, "WriteState") + sm.Written = state + return nil +} + +func (sm *testPersistentStateThatRefusesToPersist) PersistState(schemas *terraform.Schemas) error { + if schemas == nil { + return fmt.Errorf("no schemas") + } + sm.CallLog = append(sm.CallLog, "PersistState") + sm.Persisted = sm.Written + return nil +} + +// ShouldPersistIntermediateState implements IntermediateStateConditionalPersister +func (sm *testPersistentStateThatRefusesToPersist) ShouldPersistIntermediateState(info *IntermediateStatePersistInfo) bool { + sm.CallLog = append(sm.CallLog, "ShouldPersistIntermediateState") + return info.ForcePersist +}