Merge pull request #26694 from hashicorp/jbardin/plugin-panics

Handle panics in plugins
This commit is contained in:
James Bardin 2020-10-26 10:32:10 -04:00 committed by GitHub
commit 12c07752f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 299 additions and 40 deletions

View File

@ -24,14 +24,22 @@ const (
envLogProvider = "TF_LOG_PROVIDER"
)
var (
// ValidLevels are the log level names that Terraform recognizes.
var ValidLevels = []string{"TRACE", "DEBUG", "INFO", "WARN", "ERROR", "OFF"}
ValidLevels = []string{"TRACE", "DEBUG", "INFO", "WARN", "ERROR", "OFF"}
// logger is the global hclog logger
var logger hclog.Logger
logger hclog.Logger
// logWriter is a global writer for logs, to be used with the std log package
var logWriter io.Writer
logWriter io.Writer
// initialize our cache of panic output from providers
panics = &panicRecorder{
panics: make(map[string][]string),
maxLines: 100,
}
)
func init() {
logger = newHCLogger("")
@ -98,13 +106,17 @@ func NewLogger(name string) hclog.Logger {
if name == "" {
panic("logger name required")
}
return logger.Named(name)
return &logPanicWrapper{
Logger: logger.Named(name),
}
}
// NewProviderLogger returns a logger for the provider plugin, possibly with a
// different log level from the global logger.
func NewProviderLogger(prefix string) hclog.Logger {
l := logger.Named(prefix + "provider")
l := &logPanicWrapper{
Logger: logger.Named(prefix + "provider"),
}
level := providerLogLevel()
logger.Debug("created provider logger", "level", level)

View File

@ -5,7 +5,10 @@ import (
"io"
"io/ioutil"
"os"
"strings"
"sync"
"github.com/hashicorp/go-hclog"
"github.com/mitchellh/panicwrap"
)
@ -69,3 +72,111 @@ func PanicHandler(tmpLogPath string) panicwrap.HandlerFunc {
fmt.Printf(panicOutput, f.Name())
}
}
const pluginPanicOutput = `
Stack trace from the %[1]s plugin:
%s
Error: The %[1]s plugin crashed!
This is always indicative of a bug within the plugin. It would be immensely
helpful if you could report the crash with the plugin's maintainers so that it
can be fixed. The output above should help diagnose the issue.
`
// PluginPanics returns a series of provider panics that were collected during
// execution, and formatted for output.
func PluginPanics() []string {
return panics.allPanics()
}
// panicRecorder provides a registry to check for plugin panics that may have
// happened when a plugin suddenly terminates.
type panicRecorder struct {
sync.Mutex
// panics maps the plugin name to the panic output lines received from
// the logger.
panics map[string][]string
// maxLines is the max number of lines we'll record after seeing a
// panic header. Since this is going to be printed in the UI output, we
// don't want to destroy the scrollback. In most cases, the first few lines
// of the stack trace is all that are required.
maxLines int
}
// registerPlugin returns an accumulator function which will accept lines of
// a panic stack trace to collect into an error when requested.
func (p *panicRecorder) registerPlugin(name string) func(string) {
p.Lock()
defer p.Unlock()
// In most cases we shouldn't be starting a plugin if it already
// panicked, but clear out previous entries just in case.
delete(p.panics, name)
count := 0
// this callback is used by the logger to store panic output
return func(line string) {
p.Lock()
defer p.Unlock()
// stop recording if there are too many lines.
if count > p.maxLines {
return
}
count++
p.panics[name] = append(p.panics[name], line)
}
}
func (p *panicRecorder) allPanics() []string {
p.Lock()
defer p.Unlock()
var res []string
for name, lines := range p.panics {
if len(lines) == 0 {
continue
}
res = append(res, fmt.Sprintf(pluginPanicOutput, name, strings.Join(lines, "\n")))
}
return res
}
// logPanicWrapper wraps an hclog.Logger and intercepts and records any output
// that appears to be a panic.
type logPanicWrapper struct {
hclog.Logger
panicRecorder func(string)
inPanic bool
}
// go-plugin will create a new named logger for each plugin binary.
func (l *logPanicWrapper) Named(name string) hclog.Logger {
return &logPanicWrapper{
Logger: l.Logger.Named(name),
panicRecorder: panics.registerPlugin(name),
}
}
// we only need to implement Debug, since that is the default output level used
// by go-plugin when encountering unstructured output on stderr.
func (l *logPanicWrapper) Debug(msg string, args ...interface{}) {
// We don't have access to the binary itself, so guess based on the stderr
// output if this is the start of the traceback. An occasional false
// positive shouldn't be a big deal, since this is only retrieved after an
// error of some sort.
l.inPanic = l.inPanic || strings.HasPrefix(msg, "panic: ") || strings.HasPrefix(msg, "fatal error: ")
if l.inPanic && l.panicRecorder != nil {
l.panicRecorder(msg)
}
l.Logger.Debug(msg, args...)
}

View File

@ -0,0 +1,51 @@
package logging
import (
"fmt"
"strings"
"testing"
)
func TestPanicRecorder(t *testing.T) {
rec := panics.registerPlugin("test")
output := []string{
"panic: test",
" stack info",
}
for _, line := range output {
rec(line)
}
expected := fmt.Sprintf(pluginPanicOutput, "test", strings.Join(output, "\n"))
res := PluginPanics()
if len(res) == 0 {
t.Fatal("no output")
}
if res[0] != expected {
t.Fatalf("expected: %q\ngot: %q", expected, res[0])
}
}
func TestPanicLimit(t *testing.T) {
rec := panics.registerPlugin("test")
rec("panic: test")
for i := 0; i < 200; i++ {
rec(fmt.Sprintf("LINE: %d", i))
}
res := PluginPanics()
// take the extra content into account
max := strings.Count(pluginPanicOutput, "\n") + panics.maxLines
for _, out := range res {
found := strings.Count(out, "\n")
if found > max {
t.Fatalf("expected no more than %d lines, got: %d", max, found)
}
}
}

10
main.go
View File

@ -288,6 +288,16 @@ func wrappedMain() int {
return 1
}
// if we are exiting with a non-zero code, check if it was caused by any
// plugins crashing
if exitCode != 0 {
for _, panicLog := range logging.PluginPanics() {
// we don't write this to Error, or else panicwrap will think this
// process panicked
Ui.Info(panicLog)
}
}
return exitCode
}

74
plugin/grpc_error.go Normal file
View File

@ -0,0 +1,74 @@
package plugin
import (
"fmt"
"path"
"runtime"
"github.com/hashicorp/terraform/tfdiags"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
// grpcErr extracts some known error types and formats them into better
// representations for core. This must only be called from plugin methods.
// Since we don't use RPC status errors for the plugin protocol, these do not
// contain any useful details, and we can return some text that at least
// indicates the plugin call and possible error condition.
func grpcErr(err error) (diags tfdiags.Diagnostics) {
if err == nil {
return
}
// extract the method name from the caller.
pc, _, _, ok := runtime.Caller(1)
if !ok {
logger.Error("unknown grpc call", "error", err)
return diags.Append(err)
}
f := runtime.FuncForPC(pc)
// Function names will contain the full import path. Take the last
// segment, which will let users know which method was being called.
_, requestName := path.Split(f.Name())
// Here we can at least correlate the error in the logs to a particular binary.
logger.Error(requestName, "error", err)
// TODO: while this expands the error codes into somewhat better messages,
// this still does not easily link the error to an actual user-recognizable
// plugin. The grpc plugin does not know its configured name, and the
// errors are in a list of diagnostics, making it hard for the caller to
// annotate the returned errors.
switch status.Code(err) {
case codes.Unavailable:
// This case is when the plugin has stopped running for some reason,
// and is usually the result of a crash.
diags = diags.Append(tfdiags.Sourceless(
tfdiags.Error,
"Plugin did not respond",
fmt.Sprintf("The plugin encountered an error, and failed to respond to the %s call. "+
"The plugin logs may contain more details.", requestName),
))
case codes.Canceled:
diags = diags.Append(tfdiags.Sourceless(
tfdiags.Error,
"Request cancelled",
fmt.Sprintf("The %s request was cancelled.", requestName),
))
case codes.Unimplemented:
diags = diags.Append(tfdiags.Sourceless(
tfdiags.Error,
"Unsupported plugin method",
fmt.Sprintf("The %s method is not supported by this plugin.", requestName),
))
default:
diags = diags.Append(tfdiags.Sourceless(
tfdiags.Error,
"Plugin error",
fmt.Sprintf("The plugin returned an unexpected error from %s: %v", requestName, err),
))
}
return
}

View File

@ -3,12 +3,12 @@ package plugin
import (
"context"
"errors"
"log"
"sync"
"github.com/zclconf/go-cty/cty"
plugin "github.com/hashicorp/go-plugin"
"github.com/hashicorp/terraform/internal/logging"
proto "github.com/hashicorp/terraform/internal/tfplugin5"
"github.com/hashicorp/terraform/plugin/convert"
"github.com/hashicorp/terraform/providers"
@ -17,6 +17,8 @@ import (
"google.golang.org/grpc"
)
var logger = logging.HCLogger()
// GRPCProviderPlugin implements plugin.GRPCPlugin for the go-plugin package.
type GRPCProviderPlugin struct {
plugin.Plugin
@ -114,7 +116,7 @@ func (p *GRPCProvider) getProviderMetaSchema() providers.Schema {
}
func (p *GRPCProvider) GetSchema() (resp providers.GetSchemaResponse) {
log.Printf("[TRACE] GRPCProvider: GetSchema")
logger.Trace("GRPCProvider: GetSchema")
p.mu.Lock()
defer p.mu.Unlock()
@ -133,7 +135,7 @@ func (p *GRPCProvider) GetSchema() (resp providers.GetSchemaResponse) {
const maxRecvSize = 64 << 20
protoResp, err := p.client.GetSchema(p.ctx, new(proto.GetProviderSchema_Request), grpc.MaxRecvMsgSizeCallOption{MaxRecvMsgSize: maxRecvSize})
if err != nil {
resp.Diagnostics = resp.Diagnostics.Append(err)
resp.Diagnostics = resp.Diagnostics.Append(grpcErr(err))
return resp
}
@ -146,7 +148,7 @@ func (p *GRPCProvider) GetSchema() (resp providers.GetSchemaResponse) {
resp.Provider = convert.ProtoToProviderSchema(protoResp.Provider)
if protoResp.ProviderMeta == nil {
log.Printf("[TRACE] No provider meta schema returned")
logger.Debug("No provider meta schema returned")
} else {
resp.ProviderMeta = convert.ProtoToProviderSchema(protoResp.ProviderMeta)
}
@ -165,7 +167,7 @@ func (p *GRPCProvider) GetSchema() (resp providers.GetSchemaResponse) {
}
func (p *GRPCProvider) PrepareProviderConfig(r providers.PrepareProviderConfigRequest) (resp providers.PrepareProviderConfigResponse) {
log.Printf("[TRACE] GRPCProvider: PrepareProviderConfig")
logger.Trace("GRPCProvider: PrepareProviderConfig")
schema := p.getSchema()
ty := schema.Provider.Block.ImpliedType()
@ -182,7 +184,7 @@ func (p *GRPCProvider) PrepareProviderConfig(r providers.PrepareProviderConfigRe
protoResp, err := p.client.PrepareProviderConfig(p.ctx, protoReq)
if err != nil {
resp.Diagnostics = resp.Diagnostics.Append(err)
resp.Diagnostics = resp.Diagnostics.Append(grpcErr(err))
return resp
}
@ -198,7 +200,7 @@ func (p *GRPCProvider) PrepareProviderConfig(r providers.PrepareProviderConfigRe
}
func (p *GRPCProvider) ValidateResourceTypeConfig(r providers.ValidateResourceTypeConfigRequest) (resp providers.ValidateResourceTypeConfigResponse) {
log.Printf("[TRACE] GRPCProvider: ValidateResourceTypeConfig")
logger.Trace("GRPCProvider: ValidateResourceTypeConfig")
resourceSchema := p.getResourceSchema(r.TypeName)
mp, err := msgpack.Marshal(r.Config, resourceSchema.Block.ImpliedType())
@ -214,7 +216,7 @@ func (p *GRPCProvider) ValidateResourceTypeConfig(r providers.ValidateResourceTy
protoResp, err := p.client.ValidateResourceTypeConfig(p.ctx, protoReq)
if err != nil {
resp.Diagnostics = resp.Diagnostics.Append(err)
resp.Diagnostics = resp.Diagnostics.Append(grpcErr(err))
return resp
}
@ -223,7 +225,7 @@ func (p *GRPCProvider) ValidateResourceTypeConfig(r providers.ValidateResourceTy
}
func (p *GRPCProvider) ValidateDataSourceConfig(r providers.ValidateDataSourceConfigRequest) (resp providers.ValidateDataSourceConfigResponse) {
log.Printf("[TRACE] GRPCProvider: ValidateDataSourceConfig")
logger.Trace("GRPCProvider: ValidateDataSourceConfig")
dataSchema := p.getDatasourceSchema(r.TypeName)
@ -240,7 +242,7 @@ func (p *GRPCProvider) ValidateDataSourceConfig(r providers.ValidateDataSourceCo
protoResp, err := p.client.ValidateDataSourceConfig(p.ctx, protoReq)
if err != nil {
resp.Diagnostics = resp.Diagnostics.Append(err)
resp.Diagnostics = resp.Diagnostics.Append(grpcErr(err))
return resp
}
resp.Diagnostics = resp.Diagnostics.Append(convert.ProtoToDiagnostics(protoResp.Diagnostics))
@ -248,7 +250,7 @@ func (p *GRPCProvider) ValidateDataSourceConfig(r providers.ValidateDataSourceCo
}
func (p *GRPCProvider) UpgradeResourceState(r providers.UpgradeResourceStateRequest) (resp providers.UpgradeResourceStateResponse) {
log.Printf("[TRACE] GRPCProvider: UpgradeResourceState")
logger.Trace("GRPCProvider: UpgradeResourceState")
resSchema := p.getResourceSchema(r.TypeName)
@ -263,7 +265,7 @@ func (p *GRPCProvider) UpgradeResourceState(r providers.UpgradeResourceStateRequ
protoResp, err := p.client.UpgradeResourceState(p.ctx, protoReq)
if err != nil {
resp.Diagnostics = resp.Diagnostics.Append(err)
resp.Diagnostics = resp.Diagnostics.Append(grpcErr(err))
return resp
}
resp.Diagnostics = resp.Diagnostics.Append(convert.ProtoToDiagnostics(protoResp.Diagnostics))
@ -285,7 +287,7 @@ func (p *GRPCProvider) UpgradeResourceState(r providers.UpgradeResourceStateRequ
}
func (p *GRPCProvider) Configure(r providers.ConfigureRequest) (resp providers.ConfigureResponse) {
log.Printf("[TRACE] GRPCProvider: Configure")
logger.Trace("GRPCProvider: Configure")
schema := p.getSchema()
@ -307,7 +309,7 @@ func (p *GRPCProvider) Configure(r providers.ConfigureRequest) (resp providers.C
protoResp, err := p.client.Configure(p.ctx, protoReq)
if err != nil {
resp.Diagnostics = resp.Diagnostics.Append(err)
resp.Diagnostics = resp.Diagnostics.Append(grpcErr(err))
return resp
}
resp.Diagnostics = resp.Diagnostics.Append(convert.ProtoToDiagnostics(protoResp.Diagnostics))
@ -315,7 +317,7 @@ func (p *GRPCProvider) Configure(r providers.ConfigureRequest) (resp providers.C
}
func (p *GRPCProvider) Stop() error {
log.Printf("[TRACE] GRPCProvider: Stop")
logger.Trace("GRPCProvider: Stop")
resp, err := p.client.Stop(p.ctx, new(proto.Stop_Request))
if err != nil {
@ -329,7 +331,7 @@ func (p *GRPCProvider) Stop() error {
}
func (p *GRPCProvider) ReadResource(r providers.ReadResourceRequest) (resp providers.ReadResourceResponse) {
log.Printf("[TRACE] GRPCProvider: ReadResource")
logger.Trace("GRPCProvider: ReadResource")
resSchema := p.getResourceSchema(r.TypeName)
metaSchema := p.getProviderMetaSchema()
@ -357,7 +359,7 @@ func (p *GRPCProvider) ReadResource(r providers.ReadResourceRequest) (resp provi
protoResp, err := p.client.ReadResource(p.ctx, protoReq)
if err != nil {
resp.Diagnostics = resp.Diagnostics.Append(err)
resp.Diagnostics = resp.Diagnostics.Append(grpcErr(err))
return resp
}
resp.Diagnostics = resp.Diagnostics.Append(convert.ProtoToDiagnostics(protoResp.Diagnostics))
@ -374,7 +376,7 @@ func (p *GRPCProvider) ReadResource(r providers.ReadResourceRequest) (resp provi
}
func (p *GRPCProvider) PlanResourceChange(r providers.PlanResourceChangeRequest) (resp providers.PlanResourceChangeResponse) {
log.Printf("[TRACE] GRPCProvider: PlanResourceChange")
logger.Trace("GRPCProvider: PlanResourceChange")
resSchema := p.getResourceSchema(r.TypeName)
metaSchema := p.getProviderMetaSchema()
@ -416,7 +418,7 @@ func (p *GRPCProvider) PlanResourceChange(r providers.PlanResourceChangeRequest)
protoResp, err := p.client.PlanResourceChange(p.ctx, protoReq)
if err != nil {
resp.Diagnostics = resp.Diagnostics.Append(err)
resp.Diagnostics = resp.Diagnostics.Append(grpcErr(err))
return resp
}
resp.Diagnostics = resp.Diagnostics.Append(convert.ProtoToDiagnostics(protoResp.Diagnostics))
@ -440,7 +442,7 @@ func (p *GRPCProvider) PlanResourceChange(r providers.PlanResourceChangeRequest)
}
func (p *GRPCProvider) ApplyResourceChange(r providers.ApplyResourceChangeRequest) (resp providers.ApplyResourceChangeResponse) {
log.Printf("[TRACE] GRPCProvider: ApplyResourceChange")
logger.Trace("GRPCProvider: ApplyResourceChange")
resSchema := p.getResourceSchema(r.TypeName)
metaSchema := p.getProviderMetaSchema()
@ -480,7 +482,7 @@ func (p *GRPCProvider) ApplyResourceChange(r providers.ApplyResourceChangeReques
protoResp, err := p.client.ApplyResourceChange(p.ctx, protoReq)
if err != nil {
resp.Diagnostics = resp.Diagnostics.Append(err)
resp.Diagnostics = resp.Diagnostics.Append(grpcErr(err))
return resp
}
resp.Diagnostics = resp.Diagnostics.Append(convert.ProtoToDiagnostics(protoResp.Diagnostics))
@ -500,7 +502,7 @@ func (p *GRPCProvider) ApplyResourceChange(r providers.ApplyResourceChangeReques
}
func (p *GRPCProvider) ImportResourceState(r providers.ImportResourceStateRequest) (resp providers.ImportResourceStateResponse) {
log.Printf("[TRACE] GRPCProvider: ImportResourceState")
logger.Trace("GRPCProvider: ImportResourceState")
protoReq := &proto.ImportResourceState_Request{
TypeName: r.TypeName,
@ -509,7 +511,7 @@ func (p *GRPCProvider) ImportResourceState(r providers.ImportResourceStateReques
protoResp, err := p.client.ImportResourceState(p.ctx, protoReq)
if err != nil {
resp.Diagnostics = resp.Diagnostics.Append(err)
resp.Diagnostics = resp.Diagnostics.Append(grpcErr(err))
return resp
}
resp.Diagnostics = resp.Diagnostics.Append(convert.ProtoToDiagnostics(protoResp.Diagnostics))
@ -534,7 +536,7 @@ func (p *GRPCProvider) ImportResourceState(r providers.ImportResourceStateReques
}
func (p *GRPCProvider) ReadDataSource(r providers.ReadDataSourceRequest) (resp providers.ReadDataSourceResponse) {
log.Printf("[TRACE] GRPCProvider: ReadDataSource")
logger.Trace("GRPCProvider: ReadDataSource")
dataSchema := p.getDatasourceSchema(r.TypeName)
metaSchema := p.getProviderMetaSchema()
@ -563,7 +565,7 @@ func (p *GRPCProvider) ReadDataSource(r providers.ReadDataSourceRequest) (resp p
protoResp, err := p.client.ReadDataSource(p.ctx, protoReq)
if err != nil {
resp.Diagnostics = resp.Diagnostics.Append(err)
resp.Diagnostics = resp.Diagnostics.Append(grpcErr(err))
return resp
}
resp.Diagnostics = resp.Diagnostics.Append(convert.ProtoToDiagnostics(protoResp.Diagnostics))
@ -580,7 +582,7 @@ func (p *GRPCProvider) ReadDataSource(r providers.ReadDataSourceRequest) (resp p
// closing the grpc connection is final, and terraform will call it at the end of every phase.
func (p *GRPCProvider) Close() error {
log.Printf("[TRACE] GRPCProvider: Close")
logger.Trace("GRPCProvider: Close")
// Make sure to stop the server if we're not running within go-plugin.
if p.TestServer != nil {
@ -592,7 +594,7 @@ func (p *GRPCProvider) Close() error {
// where the factory is built and is the only point with access to the
// plugin.Client.
if p.PluginClient == nil {
log.Println("[DEBUG] provider has no plugin.Client")
logger.Debug("provider has no plugin.Client")
return nil
}

View File

@ -4,7 +4,6 @@ import (
"context"
"errors"
"io"
"log"
"sync"
plugin "github.com/hashicorp/go-plugin"
@ -61,7 +60,7 @@ func (p *GRPCProvisioner) GetSchema() (resp provisioners.GetSchemaResponse) {
protoResp, err := p.client.GetSchema(p.ctx, new(proto.GetProvisionerSchema_Request))
if err != nil {
resp.Diagnostics = resp.Diagnostics.Append(err)
resp.Diagnostics = resp.Diagnostics.Append(grpcErr(err))
return resp
}
resp.Diagnostics = resp.Diagnostics.Append(convert.ProtoToDiagnostics(protoResp.Diagnostics))
@ -96,7 +95,7 @@ func (p *GRPCProvisioner) ValidateProvisionerConfig(r provisioners.ValidateProvi
}
protoResp, err := p.client.ValidateProvisionerConfig(p.ctx, protoReq)
if err != nil {
resp.Diagnostics = resp.Diagnostics.Append(err)
resp.Diagnostics = resp.Diagnostics.Append(grpcErr(err))
return resp
}
resp.Diagnostics = resp.Diagnostics.Append(convert.ProtoToDiagnostics(protoResp.Diagnostics))
@ -130,7 +129,7 @@ func (p *GRPCProvisioner) ProvisionResource(r provisioners.ProvisionResourceRequ
outputClient, err := p.client.ProvisionResource(p.ctx, protoReq)
if err != nil {
resp.Diagnostics = resp.Diagnostics.Append(err)
resp.Diagnostics = resp.Diagnostics.Append(grpcErr(err))
return resp
}
@ -169,7 +168,7 @@ func (p *GRPCProvisioner) Stop() error {
func (p *GRPCProvisioner) Close() error {
// check this since it's not automatically inserted during plugin creation
if p.PluginClient == nil {
log.Println("[DEBUG] provider has no plugin.Client")
logger.Debug("provisioner has no plugin.Client")
return nil
}