command: Start of propagating OpenTelemetry context

Several times over the years we've considered adding tracing
instrumentation to Terraform, since even when running in isolation as a
CLI program it has a "distributed system-like" structure, with lots of
concurrent internal work and also some work delegated to provider plugins
that are essentially temporarily-running microservices.

However, it's always felt a bit overwhelming to do it because much of
Terraform predates the Go context.Context idiom and so it's tough to get
a clean chain of context.Context values all the way down the stack without
disturbing a lot of existing APIs.

This commit aims to just get that process started by establishing how a
context can propagate from "package main" into the command package,
focusing initially on "terraform init" and some other commands that share
some underlying functions with that command.

OpenTelemetry has emerged as a de-facto industry standard and so this uses
its API directly, without any attempt to hide it behind an abstraction.
The OpenTelemetry API is itself already an adapter layer, so we should be
able to swap in any backend that uses comparable concepts. For now we just
discard the tracing reports by default, and allow users to opt in to
delivering traces over OTLP by setting an environment variable when
running Terraform (the environment variable was established in an earlier
commit, so this commit builds on that.)

When tracing collection is enabled, every Terraform CLI run will generate
at least one overall span representing the command that was run. Some
commands might also create child spans, but most currently do not.
This commit is contained in:
Martin Atkins 2023-07-10 11:29:57 -07:00
parent 503896af54
commit 7bee77bdd3
13 changed files with 159 additions and 45 deletions

View File

@ -4,6 +4,7 @@
package main
import (
"context"
"fmt"
"log"
"path/filepath"
@ -11,6 +12,7 @@ import (
"github.com/hashicorp/go-checkpoint"
"github.com/hashicorp/terraform/internal/command"
"github.com/hashicorp/terraform/internal/command/cliconfig"
"go.opentelemetry.io/otel/codes"
)
func init() {
@ -21,7 +23,7 @@ var checkpointResult chan *checkpoint.CheckResponse
// runCheckpoint runs a HashiCorp Checkpoint request. You can read about
// Checkpoint here: https://github.com/hashicorp/go-checkpoint.
func runCheckpoint(c *cliconfig.Config) {
func runCheckpoint(ctx context.Context, c *cliconfig.Config) {
// If the user doesn't want checkpoint at all, then return.
if c.DisableCheckpoint {
log.Printf("[INFO] Checkpoint disabled. Not running.")
@ -29,6 +31,10 @@ func runCheckpoint(c *cliconfig.Config) {
return
}
ctx, span := tracer.Start(ctx, "HashiCorp Checkpoint")
_ = ctx // prevent staticcheck from complaining to avoid a maintenence hazard of having the wrong ctx in scope here
defer span.End()
configDir, err := cliconfig.ConfigDir()
if err != nil {
log.Printf("[ERR] Checkpoint setup error: %s", err)
@ -55,7 +61,10 @@ func runCheckpoint(c *cliconfig.Config) {
})
if err != nil {
log.Printf("[ERR] Checkpoint error: %s", err)
span.SetStatus(codes.Error, err.Error())
resp = nil
} else {
span.SetStatus(codes.Ok, "checkpoint request succeeded")
}
checkpointResult <- resp

View File

@ -4,15 +4,15 @@
package main
import (
"context"
"os"
"os/signal"
"github.com/mitchellh/cli"
"github.com/hashicorp/go-plugin"
svchost "github.com/hashicorp/terraform-svchost"
"github.com/hashicorp/terraform-svchost/auth"
"github.com/hashicorp/terraform-svchost/disco"
"github.com/mitchellh/cli"
"github.com/hashicorp/terraform/internal/addrs"
"github.com/hashicorp/terraform/internal/command"
@ -53,6 +53,7 @@ var HiddenCommands map[string]struct{}
var Ui cli.Ui
func initCommands(
ctx context.Context,
originalWorkingDir string,
streams *terminal.Streams,
config *cliconfig.Config,
@ -101,7 +102,8 @@ func initCommands(
PluginCacheMayBreakDependencyLockFile: config.PluginCacheMayBreakDependencyLockFile,
ShutdownCh: makeShutdownCh(),
ShutdownCh: makeShutdownCh(),
CallerContext: ctx,
ProviderSource: providerSrc,
ProviderDevOverrides: providerDevOverrides,

View File

@ -4,6 +4,7 @@
package command
import (
"context"
"fmt"
"strings"
@ -28,6 +29,10 @@ func (c *GetCommand) Run(args []string) int {
return 1
}
// Initialization can be aborted by interruption signals
ctx, done := c.InterruptibleContext(c.CommandContext())
defer done()
path, err := ModulePath(cmdFlags.Args())
if err != nil {
c.Ui.Error(err.Error())
@ -36,7 +41,7 @@ func (c *GetCommand) Run(args []string) int {
path = c.normalizePath(path)
abort, diags := getModules(&c.Meta, path, update)
abort, diags := getModules(ctx, &c.Meta, path, update)
c.showDiagnostics(diags)
if abort || diags.HasErrors() {
return 1
@ -76,10 +81,10 @@ func (c *GetCommand) Synopsis() string {
return "Install or upgrade remote Terraform modules"
}
func getModules(m *Meta, path string, upgrade bool) (abort bool, diags tfdiags.Diagnostics) {
func getModules(ctx context.Context, m *Meta, path string, upgrade bool) (abort bool, diags tfdiags.Diagnostics) {
hooks := uiModuleInstallHooks{
Ui: m.Ui,
ShowLocalPaths: true,
}
return m.installModules(path, upgrade, hooks)
return m.installModules(ctx, path, upgrade, hooks)
}

View File

@ -15,6 +15,9 @@ import (
svchost "github.com/hashicorp/terraform-svchost"
"github.com/posener/complete"
"github.com/zclconf/go-cty/cty"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
"go.opentelemetry.io/otel/trace"
"github.com/hashicorp/terraform/internal/addrs"
"github.com/hashicorp/terraform/internal/backend"
@ -107,6 +110,10 @@ func (c *InitCommand) Run(args []string) int {
return 1
}
// Initialization can be aborted by interruption signals
ctx, done := c.InterruptibleContext(c.CommandContext())
defer done()
// This will track whether we outputted anything so that we know whether
// to output a newline before the success message
var header bool
@ -134,12 +141,19 @@ func (c *InitCommand) Run(args []string) int {
ShowLocalPaths: false, // since they are in a weird location for init
}
initDirFromModuleAbort, initDirFromModuleDiags := c.initDirFromModule(path, src, hooks)
ctx, span := tracer.Start(ctx, "-from-module=...", trace.WithAttributes(
attribute.String("module_source", src),
))
initDirFromModuleAbort, initDirFromModuleDiags := c.initDirFromModule(ctx, path, src, hooks)
diags = diags.Append(initDirFromModuleDiags)
if initDirFromModuleAbort || initDirFromModuleDiags.HasErrors() {
c.showDiagnostics(diags)
span.SetStatus(codes.Error, "module installation failed")
span.End()
return 1
}
span.End()
c.Ui.Output("")
}
@ -181,12 +195,12 @@ func (c *InitCommand) Run(args []string) int {
switch {
case flagCloud && rootModEarly.CloudConfig != nil:
back, backendOutput, backDiags = c.initCloud(rootModEarly, flagConfigExtra)
back, backendOutput, backDiags = c.initCloud(ctx, rootModEarly, flagConfigExtra)
case flagBackend:
back, backendOutput, backDiags = c.initBackend(rootModEarly, flagConfigExtra)
back, backendOutput, backDiags = c.initBackend(ctx, rootModEarly, flagConfigExtra)
default:
// load the previously-stored backend config
back, backDiags = c.Meta.backendFromState()
back, backDiags = c.Meta.backendFromState(ctx)
}
if backendOutput {
header = true
@ -219,7 +233,7 @@ func (c *InitCommand) Run(args []string) int {
}
if flagGet {
modsOutput, modsAbort, modsDiags := c.getModules(path, rootModEarly, flagUpgrade)
modsOutput, modsAbort, modsDiags := c.getModules(ctx, path, rootModEarly, flagUpgrade)
diags = diags.Append(modsDiags)
if modsAbort || modsDiags.HasErrors() {
c.showDiagnostics(diags)
@ -287,7 +301,7 @@ func (c *InitCommand) Run(args []string) int {
}
// Now that we have loaded all modules, check the module tree for missing providers.
providersOutput, providersAbort, providerDiags := c.getProviders(config, state, flagUpgrade, flagPluginPath, flagLockfile)
providersOutput, providersAbort, providerDiags := c.getProviders(ctx, config, state, flagUpgrade, flagPluginPath, flagLockfile)
diags = diags.Append(providerDiags)
if providersAbort || providerDiags.HasErrors() {
c.showDiagnostics(diags)
@ -328,7 +342,7 @@ func (c *InitCommand) Run(args []string) int {
return 0
}
func (c *InitCommand) getModules(path string, earlyRoot *configs.Module, upgrade bool) (output bool, abort bool, diags tfdiags.Diagnostics) {
func (c *InitCommand) getModules(ctx context.Context, path string, earlyRoot *configs.Module, upgrade bool) (output bool, abort bool, diags tfdiags.Diagnostics) {
testModules := false // We can also have modules buried in test files.
for _, file := range earlyRoot.Tests {
for _, run := range file.Runs {
@ -343,6 +357,11 @@ func (c *InitCommand) getModules(path string, earlyRoot *configs.Module, upgrade
return false, false, nil
}
ctx, span := tracer.Start(ctx, "install modules", trace.WithAttributes(
attribute.Bool("upgrade", upgrade),
))
defer span.End()
if upgrade {
c.Ui.Output(c.Colorize().Color("[reset][bold]Upgrading modules..."))
} else {
@ -354,7 +373,7 @@ func (c *InitCommand) getModules(path string, earlyRoot *configs.Module, upgrade
ShowLocalPaths: true,
}
installAbort, installDiags := c.installModules(path, upgrade, hooks)
installAbort, installDiags := c.installModules(ctx, path, upgrade, hooks)
diags = diags.Append(installDiags)
// At this point, installModules may have generated error diags or been
@ -377,7 +396,11 @@ func (c *InitCommand) getModules(path string, earlyRoot *configs.Module, upgrade
return true, installAbort, diags
}
func (c *InitCommand) initCloud(root *configs.Module, extraConfig rawFlags) (be backend.Backend, output bool, diags tfdiags.Diagnostics) {
func (c *InitCommand) initCloud(ctx context.Context, root *configs.Module, extraConfig rawFlags) (be backend.Backend, output bool, diags tfdiags.Diagnostics) {
ctx, span := tracer.Start(ctx, "initialize Terraform Cloud")
_ = ctx // prevent staticcheck from complaining to avoid a maintenence hazard of having the wrong ctx in scope here
defer span.End()
c.Ui.Output(c.Colorize().Color("\n[reset][bold]Initializing Terraform Cloud..."))
if len(extraConfig.AllItems()) != 0 {
@ -401,7 +424,11 @@ func (c *InitCommand) initCloud(root *configs.Module, extraConfig rawFlags) (be
return back, true, diags
}
func (c *InitCommand) initBackend(root *configs.Module, extraConfig rawFlags) (be backend.Backend, output bool, diags tfdiags.Diagnostics) {
func (c *InitCommand) initBackend(ctx context.Context, root *configs.Module, extraConfig rawFlags) (be backend.Backend, output bool, diags tfdiags.Diagnostics) {
ctx, span := tracer.Start(ctx, "initialize backend")
_ = ctx // prevent staticcheck from complaining to avoid a maintenence hazard of having the wrong ctx in scope here
defer span.End()
c.Ui.Output(c.Colorize().Color("\n[reset][bold]Initializing the backend..."))
var backendConfig *configs.Backend
@ -482,7 +509,10 @@ the backend configuration is present and valid.
// Load the complete module tree, and fetch any missing providers.
// This method outputs its own Ui.
func (c *InitCommand) getProviders(config *configs.Config, state *states.State, upgrade bool, pluginDirs []string, flagLockfile string) (output, abort bool, diags tfdiags.Diagnostics) {
func (c *InitCommand) getProviders(ctx context.Context, config *configs.Config, state *states.State, upgrade bool, pluginDirs []string, flagLockfile string) (output, abort bool, diags tfdiags.Diagnostics) {
ctx, span := tracer.Start(ctx, "install providers")
defer span.End()
// Dev overrides cause the result of "terraform init" to be irrelevant for
// any overridden providers, so we'll warn about it to avoid later
// confusion when Terraform ends up using a different provider than the
@ -540,10 +570,6 @@ func (c *InitCommand) getProviders(config *configs.Config, state *states.State,
log.Printf("[DEBUG] will search for provider plugins in %s", pluginDirs)
}
// Installation can be aborted by interruption signals
ctx, done := c.InterruptibleContext()
defer done()
// We want to print out a nice warning if we don't manage to pull
// checksums for all our providers. This is tracked via callbacks
// and incomplete providers are stored here for later analysis.

View File

@ -131,6 +131,15 @@ type Meta struct {
// web browser.
BrowserLauncher webbrowser.Launcher
// A context.Context provided by the caller -- typically "package main" --
// which might be carrying telemetry-related metadata and so should be
// used when creating downstream traces, etc.
//
// This isn't guaranteed to be set, so use [Meta.CommandContext] to
// safely create a context for the entire execution of a command, which
// will be connected to this parent context if it's present.
CallerContext context.Context
// When this channel is closed, the command will be cancelled.
ShutdownCh <-chan struct{}
@ -393,12 +402,23 @@ func (m *Meta) StdinPiped() bool {
// InterruptibleContext returns a context.Context that will be cancelled
// if the process is interrupted by a platform-specific interrupt signal.
//
// The typical way to use this is to pass the result of [Meta.CommandContext]
// as the base context, but that's appropriate only if the interruptible
// context is being created directly inside the "Run" method of a particular
// command, to create a context representing the entire remaining runtime of
// that command:
//
// As usual with cancelable contexts, the caller must always call the given
// cancel function once all operations are complete in order to make sure
// that the context resources will still be freed even if there is no
// interruption.
func (m *Meta) InterruptibleContext() (context.Context, context.CancelFunc) {
base := context.Background()
//
// // This example is only for when using this function very early in
// // the "Run" method of a Command implementation. If you already have
// // an active context, pass that in as base instead.
// ctx, done := c.InterruptibleContext(c.CommandContext())
// defer done()
func (m *Meta) InterruptibleContext(base context.Context) (context.Context, context.CancelFunc) {
if m.ShutdownCh == nil {
// If we're running in a unit testing context without a shutdown
// channel populated then we'll return an uncancelable channel.
@ -417,6 +437,27 @@ func (m *Meta) InterruptibleContext() (context.Context, context.CancelFunc) {
return ctx, cancel
}
// CommandContext returns the "root context" to use in the main Run function
// of a command.
//
// This method is just a substitute for passing a context directly to the
// "Run" method of a command, which we can't do because that API is owned by
// mitchellh/cli rather than by Terraform. Use this only in situations
// comparable to the context having been passed in as an argument to Run.
//
// If the caller (e.g. "package main") provided a context when it instantiated
// the Meta then the returned context will inherit all of its values, deadlines,
// etc. If the caller did not provide a context then the result is an inert
// background context ready to be passed to other functions.
func (m *Meta) CommandContext() context.Context {
if m.CallerContext == nil {
return context.Background()
}
// We just return the caller context directly for now, since we don't
// have anything to add to it.
return m.CallerContext
}
// RunOperation executes the given operation on the given backend, blocking
// until that operation completes or is interrupted, and then returns
// the RunningOperation object representing the completed or

View File

@ -766,10 +766,10 @@ func (m *Meta) determineInitReason(previousBackendType string, currentBackendTyp
}
// backendFromState returns the initialized (not configured) backend directly
// from the state. This should be used only when a user runs `terraform init
// -backend=false`. This function returns a local backend if there is no state
// or no backend configured.
func (m *Meta) backendFromState() (backend.Backend, tfdiags.Diagnostics) {
// from the backend state. This should be used only when a user runs
// `terraform init -backend=false`. This function returns a local backend if
// there is no backend state or no backend configured.
func (m *Meta) backendFromState(ctx context.Context) (backend.Backend, tfdiags.Diagnostics) {
var diags tfdiags.Diagnostics
// Get the path to where we store a local cache of backend configuration
// if we're using a remote backend. This may not yet exist which means

View File

@ -4,6 +4,7 @@
package command
import (
"context"
"io/ioutil"
"os"
"path/filepath"
@ -1939,7 +1940,7 @@ func TestBackendFromState(t *testing.T) {
// them to match just for this test.
wd.OverrideDataDir(".")
stateBackend, diags := m.backendFromState()
stateBackend, diags := m.backendFromState(context.Background())
if diags.HasErrors() {
t.Fatal(diags.Err())
}

View File

@ -14,6 +14,8 @@ import (
"github.com/hashicorp/hcl/v2/hclsyntax"
"github.com/zclconf/go-cty/cty"
"github.com/zclconf/go-cty/cty/convert"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"github.com/hashicorp/terraform/internal/configs"
"github.com/hashicorp/terraform/internal/configs/configload"
@ -181,7 +183,10 @@ func (m *Meta) loadHCLFile(filename string) (hcl.Body, tfdiags.Diagnostics) {
// can then be relayed to the end-user. The uiModuleInstallHooks type in
// this package has a reasonable implementation for displaying notifications
// via a provided cli.Ui.
func (m *Meta) installModules(rootDir string, upgrade bool, hooks initwd.ModuleInstallHooks) (abort bool, diags tfdiags.Diagnostics) {
func (m *Meta) installModules(ctx context.Context, rootDir string, upgrade bool, hooks initwd.ModuleInstallHooks) (abort bool, diags tfdiags.Diagnostics) {
ctx, span := tracer.Start(ctx, "install modules")
defer span.End()
rootDir = m.normalizePath(rootDir)
err := os.MkdirAll(m.modulesDir(), os.ModePerm)
@ -198,10 +203,6 @@ func (m *Meta) installModules(rootDir string, upgrade bool, hooks initwd.ModuleI
inst := initwd.NewModuleInstaller(m.modulesDir(), loader, m.registryClient())
// Installation can be aborted by interruption signals
ctx, done := m.InterruptibleContext()
defer done()
_, moreDiags := inst.InstallModules(ctx, rootDir, upgrade, hooks)
diags = diags.Append(moreDiags)
@ -223,10 +224,11 @@ func (m *Meta) installModules(rootDir string, upgrade bool, hooks initwd.ModuleI
// can then be relayed to the end-user. The uiModuleInstallHooks type in
// this package has a reasonable implementation for displaying notifications
// via a provided cli.Ui.
func (m *Meta) initDirFromModule(targetDir string, addr string, hooks initwd.ModuleInstallHooks) (abort bool, diags tfdiags.Diagnostics) {
// Installation can be aborted by interruption signals
ctx, done := m.InterruptibleContext()
defer done()
func (m *Meta) initDirFromModule(ctx context.Context, targetDir string, addr string, hooks initwd.ModuleInstallHooks) (abort bool, diags tfdiags.Diagnostics) {
ctx, span := tracer.Start(ctx, "initialize directory from module", trace.WithAttributes(
attribute.String("source_addr", addr),
))
defer span.End()
loader, err := m.initConfigLoader()
if err != nil {

View File

@ -85,6 +85,10 @@ func (c *ProvidersLockCommand) Run(args []string) int {
}
}
// Installation steps can be cancelled by SIGINT and similar.
ctx, done := c.InterruptibleContext(c.CommandContext())
defer done()
// Unlike other commands, this command ignores the installation methods
// selected in the CLI configuration and instead chooses an installation
// method based on CLI options.
@ -185,8 +189,6 @@ func (c *ProvidersLockCommand) Run(args []string) int {
// merge all of the generated locks together at the end.
updatedLocks := map[getproviders.Platform]*depsfile.Locks{}
selectedVersions := map[addrs.Provider]getproviders.Version{}
ctx, cancel := c.InterruptibleContext()
defer cancel()
for _, platform := range platforms {
tempDir, err := ioutil.TempDir("", "terraform-providers-lock")
if err != nil {

View File

@ -74,6 +74,10 @@ func (c *ProvidersMirrorCommand) Run(args []string) int {
}
}
// Installation steps can be cancelled by SIGINT and similar.
ctx, done := c.InterruptibleContext(c.CommandContext())
defer done()
config, confDiags := c.loadConfig(".")
diags = diags.Append(confDiags)
reqs, moreDiags := config.ProviderRequirements()
@ -132,8 +136,6 @@ func (c *ProvidersMirrorCommand) Run(args []string) int {
// infrequently to update a mirror, so it doesn't need to optimize away
// fetches of packages that might already be present.
ctx, cancel := c.InterruptibleContext()
defer cancel()
for provider, constraints := range reqs {
if provider.IsBuiltIn() {
c.Ui.Output(fmt.Sprintf("- Skipping %s because it is built in to Terraform CLI", provider.ForDisplay()))

View File

@ -0,0 +1,12 @@
package command
import (
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/trace"
)
var tracer trace.Tracer
func init() {
tracer = otel.Tracer("github.com/hashicorp/terraform/internal/command")
}

15
main.go
View File

@ -4,6 +4,7 @@
package main
import (
"context"
"encoding/json"
"fmt"
"log"
@ -13,6 +14,7 @@ import (
"runtime"
"strings"
"github.com/apparentlymart/go-shquot/shquot"
"github.com/hashicorp/go-plugin"
"github.com/hashicorp/terraform-svchost/disco"
"github.com/hashicorp/terraform/internal/addrs"
@ -26,6 +28,7 @@ import (
"github.com/mattn/go-shellwords"
"github.com/mitchellh/cli"
"github.com/mitchellh/colorstring"
"go.opentelemetry.io/otel/trace"
backendInit "github.com/hashicorp/terraform/internal/backend/init"
)
@ -75,6 +78,14 @@ func realMain() int {
Ui.Error(fmt.Sprintf("Unset environment variable %s if you don't intend to collect telemetry from Terraform.", openTelemetryExporterEnvVar))
return 1
}
var ctx context.Context
var otelSpan trace.Span
{
// At minimum we emit a span covering the entire command execution.
_, displayArgs := shquot.POSIXShellSplit(os.Args)
ctx, otelSpan = tracer.Start(context.Background(), fmt.Sprintf("terraform %s", displayArgs))
defer otelSpan.End()
}
tmpLogPath := os.Getenv(envTmpLogPath)
if tmpLogPath != "" {
@ -235,11 +246,11 @@ func realMain() int {
// in case they need to refer back to it for any special reason, though
// they should primarily be working with the override working directory
// that we've now switched to above.
initCommands(originalWd, streams, config, services, providerSrc, providerDevOverrides, unmanagedProviders)
initCommands(ctx, originalWd, streams, config, services, providerSrc, providerDevOverrides, unmanagedProviders)
}
// Run checkpoint
go runCheckpoint(config)
go runCheckpoint(ctx, config)
// Make sure we clean up any managed plugins at the end of this
defer plugin.CleanupClients()

View File

@ -42,7 +42,8 @@ func init() {
// the standard OpenTelemetry environment variable OTEL_TRACES_EXPORTER=otlp
// to enable an OTLP exporter, which is in turn configured by all of the
// standard OTLP exporter environment variables:
// https://opentelemetry.io/docs/specs/otel/protocol/exporter/#configuration-options
//
// https://opentelemetry.io/docs/specs/otel/protocol/exporter/#configuration-options
//
// We don't currently support any other telemetry export protocols, because
// OTLP has emerged as a de-facto standard and each other exporter we support