mirror of
https://github.com/opentofu/opentofu.git
synced 2025-01-26 16:36:26 -06:00
add the remote-exec retry function to communicator
Every provisioner that uses communicator implements its own retryFunc. Take the remote-exec implementation (since it's the most complete) and put it in the communicator package for each provisioner to use. Add a public interface `communicator.Fatal`, which can wrap an error to indicate a fatal error that should not be retried.
This commit is contained in:
parent
c2306be0fa
commit
bc90eca19f
@ -1,8 +1,11 @@
|
||||
package communicator
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/terraform/communicator/remote"
|
||||
@ -51,3 +54,93 @@ func New(s *terraform.InstanceState) (Communicator, error) {
|
||||
return nil, fmt.Errorf("connection type '%s' not supported", connType)
|
||||
}
|
||||
}
|
||||
|
||||
// maxBackoffDealy is the maximum delay between retry attempts
|
||||
var maxBackoffDelay = 10 * time.Second
|
||||
var initialBackoffDelay = time.Second
|
||||
|
||||
type Fatal interface {
|
||||
FatalError() error
|
||||
}
|
||||
|
||||
func Retry(ctx context.Context, f func() error) error {
|
||||
// container for atomic error value
|
||||
type errWrap struct {
|
||||
E error
|
||||
}
|
||||
|
||||
// Try the function in a goroutine
|
||||
var errVal atomic.Value
|
||||
doneCh := make(chan struct{})
|
||||
go func() {
|
||||
defer close(doneCh)
|
||||
|
||||
delay := time.Duration(0)
|
||||
for {
|
||||
// If our context ended, we want to exit right away.
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-time.After(delay):
|
||||
}
|
||||
|
||||
// Try the function call
|
||||
err := f()
|
||||
|
||||
// return if we have no error, or a FatalError
|
||||
done := false
|
||||
switch e := err.(type) {
|
||||
case nil:
|
||||
done = true
|
||||
case Fatal:
|
||||
err = e.FatalError()
|
||||
done = true
|
||||
}
|
||||
|
||||
errVal.Store(&errWrap{err})
|
||||
|
||||
if done {
|
||||
return
|
||||
}
|
||||
|
||||
log.Printf("[WARN] retryable error: %v", err)
|
||||
|
||||
delay *= 2
|
||||
|
||||
if delay == 0 {
|
||||
delay = initialBackoffDelay
|
||||
}
|
||||
|
||||
if delay > maxBackoffDelay {
|
||||
delay = maxBackoffDelay
|
||||
}
|
||||
|
||||
log.Printf("[INFO] sleeping for %s", delay)
|
||||
}
|
||||
}()
|
||||
|
||||
// Wait for completion
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
case <-doneCh:
|
||||
}
|
||||
|
||||
var lastErr error
|
||||
// Check if we got an error executing
|
||||
if ev, ok := errVal.Load().(errWrap); ok {
|
||||
lastErr = ev.E
|
||||
}
|
||||
|
||||
// Check if we have a context error to check if we're interrupted or timeout
|
||||
switch ctx.Err() {
|
||||
case context.Canceled:
|
||||
return fmt.Errorf("interrupted - last error: %v", lastErr)
|
||||
case context.DeadlineExceeded:
|
||||
return fmt.Errorf("timeout - last error: %v", lastErr)
|
||||
}
|
||||
|
||||
if lastErr != nil {
|
||||
return lastErr
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -1,7 +1,12 @@
|
||||
package communicator
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"net"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/terraform/terraform"
|
||||
)
|
||||
@ -28,3 +33,66 @@ func TestCommunicator_new(t *testing.T) {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}
|
||||
func TestRetryFunc(t *testing.T) {
|
||||
origMax := maxBackoffDelay
|
||||
maxBackoffDelay = time.Second
|
||||
origStart := initialBackoffDelay
|
||||
initialBackoffDelay = 10 * time.Millisecond
|
||||
|
||||
defer func() {
|
||||
maxBackoffDelay = origMax
|
||||
initialBackoffDelay = origStart
|
||||
}()
|
||||
|
||||
// succeed on the third try
|
||||
errs := []error{io.EOF, &net.OpError{Err: errors.New("ERROR")}, nil}
|
||||
count := 0
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
|
||||
defer cancel()
|
||||
|
||||
err := Retry(ctx, func() error {
|
||||
if count >= len(errs) {
|
||||
return errors.New("failed to stop after nil error")
|
||||
}
|
||||
|
||||
err := errs[count]
|
||||
count++
|
||||
|
||||
return err
|
||||
})
|
||||
|
||||
if count != 3 {
|
||||
t.Fatal("retry func should have been called 3 times")
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRetryFuncBackoff(t *testing.T) {
|
||||
origMax := maxBackoffDelay
|
||||
maxBackoffDelay = time.Second
|
||||
origStart := initialBackoffDelay
|
||||
initialBackoffDelay = 100 * time.Millisecond
|
||||
|
||||
defer func() {
|
||||
maxBackoffDelay = origMax
|
||||
initialBackoffDelay = origStart
|
||||
}()
|
||||
|
||||
count := 0
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
|
||||
defer cancel()
|
||||
|
||||
Retry(ctx, func() error {
|
||||
count++
|
||||
return io.EOF
|
||||
})
|
||||
|
||||
if count > 4 {
|
||||
t.Fatalf("retry func failed to backoff. called %d times", count)
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user