configload: Don't download the same module source multiple times

It is common for the same module source package to be referenced multiple
times in the same configuration, either because there are literally
multiple instances of the same module source or because a single package
(or repository) contains multiple modules in sub-directories and many
of them are referenced.

To optimize this, here we introduce a simple caching behavior where the
module installer will detect if it's asked to install multiple times from
the same source and produce the second and subsequent directories by
copying the first, rather than by downloading again over the network.

This optimization is applied once all of the go-getter detection has
completed and sub-directory portions have been trimmed, so it is also
able to normalize differently-specified source addresses that all
ultimately detect to the same resolved address. When installing, we
always extract the entire specified package (or repository) and then
reference the specified sub-directory, so we can safely re-use existing
directories when the base package is the same, even if the sub-directory
is different.

However, as a result we do not yet address the fact that the same package
will be stored multiple times _on disk_, which may still be problematic
when referencing large repositories multiple times in
disk-storage-constrained environments. We could address this in a
subsequent change by investigating the use of symlinks where possible.

Since the Registry installer is implemented just as an extra resolution
step in front of go-getter, this optimization applies to registry
modules too. This does not apply to local relative references, which will
continue to just resolve into the already-prepared directory of their
parent module.

The cache of previously installed paths lives only for the duration of
one call to InstallModules, so we will never re-use directories that
were created by previous runs of "terraform init" and there is no risk
that older versions will pollute the cache when attempting an upgrade
from a source address that doesn't explicitly specify a version.

No additional tests are added here because the existing module installer
tests (when TF_ACC=1) already cover the case of installing multiple
modules from the same source.
This commit is contained in:
Martin Atkins 2018-06-21 19:02:27 -07:00
parent 8c3c0418d4
commit fc0e28b2b4
3 changed files with 52 additions and 22 deletions

View File

@ -1,7 +1,9 @@
package configload package configload
import ( import (
"fmt"
"log" "log"
"os"
"path/filepath" "path/filepath"
cleanhttp "github.com/hashicorp/go-cleanhttp" cleanhttp "github.com/hashicorp/go-cleanhttp"
@ -56,6 +58,16 @@ var getterHTTPGetter = &getter.HttpGetter{
Netrc: true, Netrc: true,
} }
// A reusingGetter is a helper for the module installer that remembers
// the final resolved addresses of all of the sources it has already been
// asked to install, and will copy from a prior installation directory if
// it has the same resolved source address.
//
// The keys in a reusingGetter are resolved and trimmed source addresses
// (with a scheme always present, and without any "subdir" component),
// and the values are the paths where each source was previously installed.
type reusingGetter map[string]string
// getWithGoGetter retrieves the package referenced in the given address // getWithGoGetter retrieves the package referenced in the given address
// into the installation path and then returns the full path to any subdir // into the installation path and then returns the full path to any subdir
// indicated in the address. // indicated in the address.
@ -65,7 +77,7 @@ var getterHTTPGetter = &getter.HttpGetter{
// end-user-actionable error messages. At this time we do not have any // end-user-actionable error messages. At this time we do not have any
// reasonable way to improve these error messages at this layer because // reasonable way to improve these error messages at this layer because
// the underlying errors are not separatelyr recognizable. // the underlying errors are not separatelyr recognizable.
func getWithGoGetter(instPath, addr string) (string, error) { func (g reusingGetter) getWithGoGetter(instPath, addr string) (string, error) {
packageAddr, subDir := splitAddrSubdir(addr) packageAddr, subDir := splitAddrSubdir(addr)
log.Printf("[DEBUG] will download %q to %s", packageAddr, instPath) log.Printf("[DEBUG] will download %q to %s", packageAddr, instPath)
@ -85,20 +97,36 @@ func getWithGoGetter(instPath, addr string) (string, error) {
log.Printf("[TRACE] go-getter detectors rewrote %q to %q", packageAddr, realAddr) log.Printf("[TRACE] go-getter detectors rewrote %q to %q", packageAddr, realAddr)
} }
client := getter.Client{ if prevDir, exists := g[realAddr]; exists {
Src: realAddr, log.Printf("[TRACE] copying previous install %s to %s", prevDir, instPath)
Dst: instPath, err := os.Mkdir(instPath, os.ModePerm)
Pwd: instPath, if err != nil {
return "", fmt.Errorf("failed to create directory %s: %s", instPath, err)
}
err = copyDir(instPath, prevDir)
if err != nil {
return "", fmt.Errorf("failed to copy from %s to %s: %s", prevDir, instPath, err)
}
} else {
log.Printf("[TRACE] fetching %q to %q", realAddr, instPath)
client := getter.Client{
Src: realAddr,
Dst: instPath,
Pwd: instPath,
Mode: getter.ClientModeDir, Mode: getter.ClientModeDir,
Detectors: goGetterNoDetectors, // we already did detection above Detectors: goGetterNoDetectors, // we already did detection above
Decompressors: goGetterDecompressors, Decompressors: goGetterDecompressors,
Getters: goGetterGetters, Getters: goGetterGetters,
} }
err = client.Get() err = client.Get()
if err != nil { if err != nil {
return "", err return "", err
}
// Remember where we installed this so we might reuse this directory
// on subsequent calls to avoid re-downloading.
g[realAddr] = instPath
} }
// Our subDir string can contain wildcards until this point, so that // Our subDir string can contain wildcards until this point, so that

View File

@ -149,7 +149,8 @@ func (l *Loader) InitDirFromModule(rootDir, sourceAddr string, hooks InstallHook
wrapHooks := installHooksInitDir{ wrapHooks := installHooksInitDir{
Wrapped: hooks, Wrapped: hooks,
} }
instDiags := subLoader.installDescendentModules(fakeRootModule, rootDir, true, wrapHooks) getter := reusingGetter{}
instDiags := subLoader.installDescendentModules(fakeRootModule, rootDir, true, wrapHooks, getter)
diags = append(diags, instDiags...) diags = append(diags, instDiags...)
if instDiags.HasErrors() { if instDiags.HasErrors() {
return diags return diags

View File

@ -53,13 +53,14 @@ func (l *Loader) InstallModules(rootDir string, upgrade bool, hooks InstallHooks
return diags return diags
} }
instDiags := l.installDescendentModules(rootMod, rootDir, upgrade, hooks) getter := reusingGetter{}
instDiags := l.installDescendentModules(rootMod, rootDir, upgrade, hooks, getter)
diags = append(diags, instDiags...) diags = append(diags, instDiags...)
return diags return diags
} }
func (l *Loader) installDescendentModules(rootMod *configs.Module, rootDir string, upgrade bool, hooks InstallHooks) hcl.Diagnostics { func (l *Loader) installDescendentModules(rootMod *configs.Module, rootDir string, upgrade bool, hooks InstallHooks, getter reusingGetter) hcl.Diagnostics {
var diags hcl.Diagnostics var diags hcl.Diagnostics
if hooks == nil { if hooks == nil {
@ -173,14 +174,14 @@ func (l *Loader) installDescendentModules(rootMod *configs.Module, rootDir strin
} }
log.Printf("[TRACE] %s is a registry module at %s", key, addr) log.Printf("[TRACE] %s is a registry module at %s", key, addr)
mod, v, mDiags := l.installRegistryModule(req, key, instPath, addr, hooks) mod, v, mDiags := l.installRegistryModule(req, key, instPath, addr, hooks, getter)
diags = append(diags, mDiags...) diags = append(diags, mDiags...)
return mod, v, diags return mod, v, diags
default: default:
log.Printf("[TRACE] %s address %q will be handled by go-getter", key, req.SourceAddr) log.Printf("[TRACE] %s address %q will be handled by go-getter", key, req.SourceAddr)
mod, mDiags := l.installGoGetterModule(req, key, instPath, hooks) mod, mDiags := l.installGoGetterModule(req, key, instPath, hooks, getter)
diags = append(diags, mDiags...) diags = append(diags, mDiags...)
return mod, nil, diags return mod, nil, diags
} }
@ -262,7 +263,7 @@ func (l *Loader) installLocalModule(req *configs.ModuleRequest, key string, hook
return mod, diags return mod, diags
} }
func (l *Loader) installRegistryModule(req *configs.ModuleRequest, key string, instPath string, addr *regsrc.Module, hooks InstallHooks) (*configs.Module, *version.Version, hcl.Diagnostics) { func (l *Loader) installRegistryModule(req *configs.ModuleRequest, key string, instPath string, addr *regsrc.Module, hooks InstallHooks, getter reusingGetter) (*configs.Module, *version.Version, hcl.Diagnostics) {
var diags hcl.Diagnostics var diags hcl.Diagnostics
hostname, err := addr.SvcHost() hostname, err := addr.SvcHost()
@ -406,7 +407,7 @@ func (l *Loader) installRegistryModule(req *configs.ModuleRequest, key string, i
log.Printf("[TRACE] %s %s %s is available at %q", key, addr, latestMatch, dlAddr) log.Printf("[TRACE] %s %s %s is available at %q", key, addr, latestMatch, dlAddr)
modDir, err := getWithGoGetter(instPath, dlAddr) modDir, err := getter.getWithGoGetter(instPath, dlAddr)
if err != nil { if err != nil {
// Errors returned by go-getter have very inconsistent quality as // Errors returned by go-getter have very inconsistent quality as
// end-user error messages, but for now we're accepting that because // end-user error messages, but for now we're accepting that because
@ -463,14 +464,14 @@ func (l *Loader) installRegistryModule(req *configs.ModuleRequest, key string, i
return mod, latestMatch, diags return mod, latestMatch, diags
} }
func (l *Loader) installGoGetterModule(req *configs.ModuleRequest, key string, instPath string, hooks InstallHooks) (*configs.Module, hcl.Diagnostics) { func (l *Loader) installGoGetterModule(req *configs.ModuleRequest, key string, instPath string, hooks InstallHooks, getter reusingGetter) (*configs.Module, hcl.Diagnostics) {
var diags hcl.Diagnostics var diags hcl.Diagnostics
// Report up to the caller that we're about to start downloading. // Report up to the caller that we're about to start downloading.
packageAddr, _ := splitAddrSubdir(req.SourceAddr) packageAddr, _ := splitAddrSubdir(req.SourceAddr)
hooks.Download(key, packageAddr, nil) hooks.Download(key, packageAddr, nil)
modDir, err := getWithGoGetter(instPath, req.SourceAddr) modDir, err := getter.getWithGoGetter(instPath, req.SourceAddr)
if err != nil { if err != nil {
// Errors returned by go-getter have very inconsistent quality as // Errors returned by go-getter have very inconsistent quality as
// end-user error messages, but for now we're accepting that because // end-user error messages, but for now we're accepting that because