From fc0e28b2b40c0fb2b3df89088bb766f9e8a0dac5 Mon Sep 17 00:00:00 2001 From: Martin Atkins Date: Thu, 21 Jun 2018 19:02:27 -0700 Subject: [PATCH] configload: Don't download the same module source multiple times It is common for the same module source package to be referenced multiple times in the same configuration, either because there are literally multiple instances of the same module source or because a single package (or repository) contains multiple modules in sub-directories and many of them are referenced. To optimize this, here we introduce a simple caching behavior where the module installer will detect if it's asked to install multiple times from the same source and produce the second and subsequent directories by copying the first, rather than by downloading again over the network. This optimization is applied once all of the go-getter detection has completed and sub-directory portions have been trimmed, so it is also able to normalize differently-specified source addresses that all ultimately detect to the same resolved address. When installing, we always extract the entire specified package (or repository) and then reference the specified sub-directory, so we can safely re-use existing directories when the base package is the same, even if the sub-directory is different. However, as a result we do not yet address the fact that the same package will be stored multiple times _on disk_, which may still be problematic when referencing large repositories multiple times in disk-storage-constrained environments. We could address this in a subsequent change by investigating the use of symlinks where possible. Since the Registry installer is implemented just as an extra resolution step in front of go-getter, this optimization applies to registry modules too. This does not apply to local relative references, which will continue to just resolve into the already-prepared directory of their parent module. The cache of previously installed paths lives only for the duration of one call to InstallModules, so we will never re-use directories that were created by previous runs of "terraform init" and there is no risk that older versions will pollute the cache when attempting an upgrade from a source address that doesn't explicitly specify a version. No additional tests are added here because the existing module installer tests (when TF_ACC=1) already cover the case of installing multiple modules from the same source. --- configs/configload/getter.go | 54 ++++++++++++++----- configs/configload/loader_init_from_module.go | 3 +- configs/configload/loader_install.go | 17 +++--- 3 files changed, 52 insertions(+), 22 deletions(-) diff --git a/configs/configload/getter.go b/configs/configload/getter.go index fffc48c045..4a3daceee4 100644 --- a/configs/configload/getter.go +++ b/configs/configload/getter.go @@ -1,7 +1,9 @@ package configload import ( + "fmt" "log" + "os" "path/filepath" cleanhttp "github.com/hashicorp/go-cleanhttp" @@ -56,6 +58,16 @@ var getterHTTPGetter = &getter.HttpGetter{ Netrc: true, } +// A reusingGetter is a helper for the module installer that remembers +// the final resolved addresses of all of the sources it has already been +// asked to install, and will copy from a prior installation directory if +// it has the same resolved source address. +// +// The keys in a reusingGetter are resolved and trimmed source addresses +// (with a scheme always present, and without any "subdir" component), +// and the values are the paths where each source was previously installed. +type reusingGetter map[string]string + // getWithGoGetter retrieves the package referenced in the given address // into the installation path and then returns the full path to any subdir // indicated in the address. @@ -65,7 +77,7 @@ var getterHTTPGetter = &getter.HttpGetter{ // end-user-actionable error messages. At this time we do not have any // reasonable way to improve these error messages at this layer because // the underlying errors are not separatelyr recognizable. -func getWithGoGetter(instPath, addr string) (string, error) { +func (g reusingGetter) getWithGoGetter(instPath, addr string) (string, error) { packageAddr, subDir := splitAddrSubdir(addr) log.Printf("[DEBUG] will download %q to %s", packageAddr, instPath) @@ -85,20 +97,36 @@ func getWithGoGetter(instPath, addr string) (string, error) { log.Printf("[TRACE] go-getter detectors rewrote %q to %q", packageAddr, realAddr) } - client := getter.Client{ - Src: realAddr, - Dst: instPath, - Pwd: instPath, + if prevDir, exists := g[realAddr]; exists { + log.Printf("[TRACE] copying previous install %s to %s", prevDir, instPath) + err := os.Mkdir(instPath, os.ModePerm) + if err != nil { + return "", fmt.Errorf("failed to create directory %s: %s", instPath, err) + } + err = copyDir(instPath, prevDir) + if err != nil { + return "", fmt.Errorf("failed to copy from %s to %s: %s", prevDir, instPath, err) + } + } else { + log.Printf("[TRACE] fetching %q to %q", realAddr, instPath) + client := getter.Client{ + Src: realAddr, + Dst: instPath, + Pwd: instPath, - Mode: getter.ClientModeDir, + Mode: getter.ClientModeDir, - Detectors: goGetterNoDetectors, // we already did detection above - Decompressors: goGetterDecompressors, - Getters: goGetterGetters, - } - err = client.Get() - if err != nil { - return "", err + Detectors: goGetterNoDetectors, // we already did detection above + Decompressors: goGetterDecompressors, + Getters: goGetterGetters, + } + err = client.Get() + if err != nil { + return "", err + } + // Remember where we installed this so we might reuse this directory + // on subsequent calls to avoid re-downloading. + g[realAddr] = instPath } // Our subDir string can contain wildcards until this point, so that diff --git a/configs/configload/loader_init_from_module.go b/configs/configload/loader_init_from_module.go index 4730244875..0e41ea22c3 100644 --- a/configs/configload/loader_init_from_module.go +++ b/configs/configload/loader_init_from_module.go @@ -149,7 +149,8 @@ func (l *Loader) InitDirFromModule(rootDir, sourceAddr string, hooks InstallHook wrapHooks := installHooksInitDir{ Wrapped: hooks, } - instDiags := subLoader.installDescendentModules(fakeRootModule, rootDir, true, wrapHooks) + getter := reusingGetter{} + instDiags := subLoader.installDescendentModules(fakeRootModule, rootDir, true, wrapHooks, getter) diags = append(diags, instDiags...) if instDiags.HasErrors() { return diags diff --git a/configs/configload/loader_install.go b/configs/configload/loader_install.go index b9f6be8ddd..e989861c36 100644 --- a/configs/configload/loader_install.go +++ b/configs/configload/loader_install.go @@ -53,13 +53,14 @@ func (l *Loader) InstallModules(rootDir string, upgrade bool, hooks InstallHooks return diags } - instDiags := l.installDescendentModules(rootMod, rootDir, upgrade, hooks) + getter := reusingGetter{} + instDiags := l.installDescendentModules(rootMod, rootDir, upgrade, hooks, getter) diags = append(diags, instDiags...) return diags } -func (l *Loader) installDescendentModules(rootMod *configs.Module, rootDir string, upgrade bool, hooks InstallHooks) hcl.Diagnostics { +func (l *Loader) installDescendentModules(rootMod *configs.Module, rootDir string, upgrade bool, hooks InstallHooks, getter reusingGetter) hcl.Diagnostics { var diags hcl.Diagnostics if hooks == nil { @@ -173,14 +174,14 @@ func (l *Loader) installDescendentModules(rootMod *configs.Module, rootDir strin } log.Printf("[TRACE] %s is a registry module at %s", key, addr) - mod, v, mDiags := l.installRegistryModule(req, key, instPath, addr, hooks) + mod, v, mDiags := l.installRegistryModule(req, key, instPath, addr, hooks, getter) diags = append(diags, mDiags...) return mod, v, diags default: log.Printf("[TRACE] %s address %q will be handled by go-getter", key, req.SourceAddr) - mod, mDiags := l.installGoGetterModule(req, key, instPath, hooks) + mod, mDiags := l.installGoGetterModule(req, key, instPath, hooks, getter) diags = append(diags, mDiags...) return mod, nil, diags } @@ -262,7 +263,7 @@ func (l *Loader) installLocalModule(req *configs.ModuleRequest, key string, hook return mod, diags } -func (l *Loader) installRegistryModule(req *configs.ModuleRequest, key string, instPath string, addr *regsrc.Module, hooks InstallHooks) (*configs.Module, *version.Version, hcl.Diagnostics) { +func (l *Loader) installRegistryModule(req *configs.ModuleRequest, key string, instPath string, addr *regsrc.Module, hooks InstallHooks, getter reusingGetter) (*configs.Module, *version.Version, hcl.Diagnostics) { var diags hcl.Diagnostics hostname, err := addr.SvcHost() @@ -406,7 +407,7 @@ func (l *Loader) installRegistryModule(req *configs.ModuleRequest, key string, i log.Printf("[TRACE] %s %s %s is available at %q", key, addr, latestMatch, dlAddr) - modDir, err := getWithGoGetter(instPath, dlAddr) + modDir, err := getter.getWithGoGetter(instPath, dlAddr) if err != nil { // Errors returned by go-getter have very inconsistent quality as // end-user error messages, but for now we're accepting that because @@ -463,14 +464,14 @@ func (l *Loader) installRegistryModule(req *configs.ModuleRequest, key string, i return mod, latestMatch, diags } -func (l *Loader) installGoGetterModule(req *configs.ModuleRequest, key string, instPath string, hooks InstallHooks) (*configs.Module, hcl.Diagnostics) { +func (l *Loader) installGoGetterModule(req *configs.ModuleRequest, key string, instPath string, hooks InstallHooks, getter reusingGetter) (*configs.Module, hcl.Diagnostics) { var diags hcl.Diagnostics // Report up to the caller that we're about to start downloading. packageAddr, _ := splitAddrSubdir(req.SourceAddr) hooks.Download(key, packageAddr, nil) - modDir, err := getWithGoGetter(instPath, req.SourceAddr) + modDir, err := getter.getWithGoGetter(instPath, req.SourceAddr) if err != nil { // Errors returned by go-getter have very inconsistent quality as // end-user error messages, but for now we're accepting that because