Merge pull request #20588 from hashicorp/svh/f-retry

backend/remote: also retry on server errors
2024-12-24 16:10:46 -06:00 · 2019-03-08 18:17:29 +01:00 · 2019-03-08 18:17:29 +01:00 · e75e845804
commit e75e845804
parent ec9a4b7dae 0232d84a0d
5 changed files with 112 additions and 33 deletions
--- a/backend/remote/backend.go
+++ b/backend/remote/backend.go
@ -10,6 +10,7 @@ import (
 	"sort"
 	"strings"
 	"sync"
+	"time"

 	tfe "github.com/hashicorp/go-tfe"
 	version "github.com/hashicorp/go-version"
@ -56,6 +57,9 @@ type Remote struct {
 	// client is the remote backend API client.
 	client *tfe.Client

+	// lastRetry is set to the last time a request was retried.
+	lastRetry time.Time
+
 	// hostname of the remote backend server.
 	hostname string

@ -279,10 +283,11 @@ func (b *Remote) Configure(obj cty.Value) tfdiags.Diagnostics {
 	}

 	cfg := &tfe.Config{
-		Address:  service.String(),
-		BasePath: service.Path,
-		Token:    token,
-		Headers:  make(http.Header),
+		Address:      service.String(),
+		BasePath:     service.Path,
+		Token:        token,
+		Headers:      make(http.Header),
+		RetryLogHook: b.retryLogHook,
 	}

 	// Set the version header to the current version.
@ -324,6 +329,9 @@ func (b *Remote) Configure(obj cty.Value) tfdiags.Diagnostics {
 	b.local = backendLocal.NewWithBackend(b)
 	b.forceLocal = b.forceLocal || !entitlements.Operations

+	// Enable retries for server errors as the backend is now fully configured.
+	b.client.RetryServerErrors(true)
+
 	return diags
 }

@ -470,6 +478,31 @@ func (b *Remote) token() (string, error) {
 	return "", nil
 }

+// retryLogHook is invoked each time a request is retried allowing the
+// backend to log any connection issues to prevent data loss.
+func (b *Remote) retryLogHook(attemptNum int, resp *http.Response) {
+	if b.CLI != nil {
+		// Ignore the first retry to make sure any delayed output will
+		// be written to the console before we start logging retries.
+		//
+		// The retry logic in the TFE client will retry both rate limited
+		// requests and server errors, but in the remote backend we only
+		// care about server errors so we ignore rate limit (429) errors.
+		if attemptNum == 0 || resp.StatusCode == 429 {
+			// Reset the last retry time.
+			b.lastRetry = time.Now()
+			return
+		}
+
+		if attemptNum == 1 {
+			b.CLI.Output(b.Colorize().Color(strings.TrimSpace(initialRetryError)))
+		} else {
+			b.CLI.Output(b.Colorize().Color(strings.TrimSpace(
+				fmt.Sprintf(repeatedRetryError, time.Since(b.lastRetry).Round(time.Second)))))
+		}
+	}
+}
+
 // Workspaces implements backend.Enhanced.
 func (b *Remote) Workspaces() ([]string, error) {
 	if b.prefix == "" {
@ -858,6 +891,17 @@ func checkConstraintsWarning(err error) tfdiags.Diagnostic {
 	)
 }

+// The newline in this error is to make it look good in the CLI!
+const initialRetryError = `
+[reset][yellow]There was an error connecting to the remote backend. Please do not exit
+Terraform to prevent data loss! Trying to restore the connection...
+[reset]
+`
+
+const repeatedRetryError = `
+[reset][yellow]Still trying to restore the connection... (%s elapsed)[reset]
+`
+
 const operationCanceled = `
 [reset][red]The remote operation was successfully cancelled.[reset]
 `
--- a/go.mod
+++ b/go.mod
@ -55,7 +55,7 @@ require (
 	github.com/hashicorp/go-retryablehttp v0.5.1
 	github.com/hashicorp/go-rootcerts v1.0.0
 	github.com/hashicorp/go-sockaddr v0.0.0-20180320115054-6d291a969b86 // indirect
-	github.com/hashicorp/go-tfe v0.3.10
+	github.com/hashicorp/go-tfe v0.3.11
 	github.com/hashicorp/go-uuid v1.0.0
 	github.com/hashicorp/go-version v1.1.0
 	github.com/hashicorp/golang-lru v0.5.0 // indirect
--- a/go.sum
+++ b/go.sum
@ -189,8 +189,8 @@ github.com/hashicorp/go-slug v0.2.0 h1:MVdZAkTmDsUi1AT+3NQDsn8n3ssnVSIHwiM6RcUHv
 github.com/hashicorp/go-slug v0.2.0/go.mod h1:+zDycQOzGqOqMW7Kn2fp9vz/NtqpMLQlgb9JUF+0km4=
 github.com/hashicorp/go-sockaddr v0.0.0-20180320115054-6d291a969b86 h1:7YOlAIO2YWnJZkQp7B5eFykaIY7C9JndqAFQyVV5BhM=
 github.com/hashicorp/go-sockaddr v0.0.0-20180320115054-6d291a969b86/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU=
-github.com/hashicorp/go-tfe v0.3.10 h1:6uPnPHNPxXDe3k/Vt6fovygYTaWJ8f/7zdHc++f7NJU=
-github.com/hashicorp/go-tfe v0.3.10/go.mod h1:LHLchj07PCYgQqcyE5Sz+g4zrMNW+nALKbiSNTZedEs=
+github.com/hashicorp/go-tfe v0.3.11 h1:PHw0f1XeriVkqBikhXgZm/t65GR/fEH1iUl/d/9qKbU=
+github.com/hashicorp/go-tfe v0.3.11/go.mod h1:LHLchj07PCYgQqcyE5Sz+g4zrMNW+nALKbiSNTZedEs=
 github.com/hashicorp/go-uuid v1.0.0 h1:RS8zrF7PhGwyNPOtxSClXXj9HA8feRnJzgnI1RJCSnM=
 github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
 github.com/hashicorp/go-version v1.1.0 h1:bPIoEKD27tNdebFGGxxYwcL4nepeY4j1QP23PFRGzg0=
--- a/vendor/github.com/hashicorp/go-tfe/tfe.go
+++ b/vendor/github.com/hashicorp/go-tfe/tfe.go
@ -48,6 +48,9 @@ var (
 	ErrResourceNotFound = errors.New("resource not found")
 )

+// RetryLogHook allows a function to run before each retry.
+type RetryLogHook func(attemptNum int, resp *http.Response)
+
 // Config provides configuration details to the API client.
 type Config struct {
 	// The address of the Terraform Enterprise API.
@ -64,6 +67,9 @@ type Config struct {

 	// A custom HTTP client to use.
 	HTTPClient *http.Client
+
+	// RetryLogHook is invoked each time a request is retried.
+	RetryLogHook RetryLogHook
 }

 // DefaultConfig returns a default config structure.
@ -90,11 +96,13 @@ func DefaultConfig() *Config {
 // Client is the Terraform Enterprise API client. It provides the basic
 // connectivity and configuration for accessing the TFE API.
 type Client struct {
-	baseURL *url.URL
-	token   string
-	headers http.Header
-	http    *retryablehttp.Client
-	limiter *rate.Limiter
+	baseURL           *url.URL
+	token             string
+	headers           http.Header
+	http              *retryablehttp.Client
+	limiter           *rate.Limiter
+	retryLogHook      RetryLogHook
+	retryServerErrors bool

 	Applies               Applies
 	ConfigurationVersions ConfigurationVersions
@ -139,6 +147,9 @@ func NewClient(cfg *Config) (*Client, error) {
 		if cfg.HTTPClient != nil {
 			config.HTTPClient = cfg.HTTPClient
 		}
+		if cfg.RetryLogHook != nil {
+			config.RetryLogHook = cfg.RetryLogHook
+		}
 	}

 	// Parse the address to make sure its a valid URL.
@ -159,18 +170,20 @@ func NewClient(cfg *Config) (*Client, error) {

 	// Create the client.
 	client := &Client{
-		baseURL: baseURL,
-		token:   config.Token,
-		headers: config.Headers,
-		http: &retryablehttp.Client{
-			Backoff:      rateLimitBackoff,
-			CheckRetry:   rateLimitRetry,
-			ErrorHandler: retryablehttp.PassthroughErrorHandler,
-			HTTPClient:   config.HTTPClient,
-			RetryWaitMin: 100 * time.Millisecond,
-			RetryWaitMax: 400 * time.Millisecond,
-			RetryMax:     30,
-		},
+		baseURL:      baseURL,
+		token:        config.Token,
+		headers:      config.Headers,
+		retryLogHook: config.RetryLogHook,
+	}
+
+	client.http = &retryablehttp.Client{
+		Backoff:      client.retryHTTPBackoff,
+		CheckRetry:   client.retryHTTPCheck,
+		ErrorHandler: retryablehttp.PassthroughErrorHandler,
+		HTTPClient:   config.HTTPClient,
+		RetryWaitMin: 100 * time.Millisecond,
+		RetryWaitMax: 400 * time.Millisecond,
+		RetryMax:     30,
 	}

 	// Configure the rate limiter.
@ -203,24 +216,46 @@ func NewClient(cfg *Config) (*Client, error) {
 	return client, nil
 }

-// rateLimitRetry provides a callback for Client.CheckRetry, which will only
-// retry when receiving a 429 response which indicates being rate limited.
-func rateLimitRetry(ctx context.Context, resp *http.Response, err error) (bool, error) {
-	// Do not retry on context.Canceled or context.DeadlineExceeded.
+// RetryServerErrors configures the retry HTTP check to also retry
+// unexpected errors or requests that failed with a server error.
+func (c *Client) RetryServerErrors(retry bool) {
+	c.retryServerErrors = retry
+}
+
+// retryHTTPCheck provides a callback for Client.CheckRetry which
+// will retry both rate limit (429) and server (>= 500) errors.
+func (c *Client) retryHTTPCheck(ctx context.Context, resp *http.Response, err error) (bool, error) {
 	if ctx.Err() != nil {
 		return false, ctx.Err()
 	}
-	// Do not retry on any unexpected errors.
 	if err != nil {
-		return false, err
+		return c.retryServerErrors, err
 	}
-	// Only retry when we are rate limited.
-	if resp.StatusCode == 429 {
+	if resp.StatusCode == 429 || (c.retryServerErrors && resp.StatusCode >= 500) {
 		return true, nil
 	}
 	return false, nil
 }

+// retryHTTPBackoff provides a generic callback for Client.Backoff which
+// will pass through all calls based on the status code of the response.
+func (c *Client) retryHTTPBackoff(min, max time.Duration, attemptNum int, resp *http.Response) time.Duration {
+	if c.retryLogHook != nil {
+		c.retryLogHook(attemptNum, resp)
+	}
+
+	// Use the rate limit backoff function when we are rate limited.
+	if resp.StatusCode == 429 {
+		return rateLimitBackoff(min, max, attemptNum, resp)
+	}
+
+	// Set custom duration's when we experience a service interruption.
+	min = 700 * time.Millisecond
+	max = 900 * time.Millisecond
+
+	return retryablehttp.LinearJitterBackoff(min, max, attemptNum, resp)
+}
+
 // rateLimitBackoff provides a callback for Client.Backoff which will use the
 // X-RateLimit_Reset header to determine the time to wait. We add some jitter
 // to prevent a thundering herd.
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@ -290,7 +290,7 @@ github.com/hashicorp/go-rootcerts
 github.com/hashicorp/go-safetemp
 # github.com/hashicorp/go-slug v0.2.0
 github.com/hashicorp/go-slug
-# github.com/hashicorp/go-tfe v0.3.10
+# github.com/hashicorp/go-tfe v0.3.11
 github.com/hashicorp/go-tfe
 # github.com/hashicorp/go-uuid v1.0.0
 github.com/hashicorp/go-uuid