diff --git a/internal/addrs/module_package.go b/internal/addrs/module_package.go new file mode 100644 index 0000000000..e21ed6fc29 --- /dev/null +++ b/internal/addrs/module_package.go @@ -0,0 +1,30 @@ +package addrs + +// A ModulePackage represents a physical location where Terraform can retrieve +// a module package, which is an archive, repository, or other similar +// container which delivers the source code for one or more Terraform modules. +// +// A ModulePackage is a string in go-getter's address syntax. By convention, +// we use ModulePackage-typed values only for the result of successfully +// running the go-getter "detectors", which produces an address string which +// includes an explicit installation method prefix along with an address +// string in the format expected by that installation method. +// +// Note that although the "detector" phase of go-getter does do some simple +// normalization in certain cases, it isn't generally possible to compare +// two ModulePackage values to decide if they refer to the same package. Two +// equal ModulePackage values represent the same package, but there might be +// other non-equal ModulePackage values that also refer to that package, and +// there is no reliable way to determine that. +// +// Don't convert a user-provided string directly to ModulePackage. Instead, +// use ParseModuleSource with a remote module address and then access the +// ModulePackage value from the result, making sure to also handle the +// selected subdirectory if any. You should convert directly to ModulePackage +// only for a string that is hard-coded into the program (e.g. in a unit test) +// where you've ensured that it's already in the expected syntax. +type ModulePackage string + +func (p ModulePackage) String() string { + return string(p) +} diff --git a/internal/addrs/module_source.go b/internal/addrs/module_source.go new file mode 100644 index 0000000000..c532a5f7b4 --- /dev/null +++ b/internal/addrs/module_source.go @@ -0,0 +1,459 @@ +package addrs + +import ( + "fmt" + "path" + "regexp" + "strings" + + svchost "github.com/hashicorp/terraform-svchost" + "github.com/hashicorp/terraform/internal/getmodules" +) + +// ModuleSource is the general type for all three of the possible module source +// address types. The concrete implementations of this are ModuleSourceLocal, +// ModuleSourceRegistry, and ModuleSourceRemote. +type ModuleSource interface { + // String returns a full representation of the address, including any + // additional components that are typically implied by omission in + // user-written addresses. + // + // We typically use this longer representation in error message, in case + // the inclusion of normally-omitted components is helpful in debugging + // unexpected behavior. + String() string + + // ForDisplay is similar to String but instead returns a representation of + // the idiomatic way to write the address in configuration, omitting + // components that are commonly just implied in addresses written by + // users. + // + // We typically use this shorter representation in informational messages, + // such as the note that we're about to start downloading a package. + ForDisplay() string + + moduleSource() +} + +var _ ModuleSource = ModuleSourceLocal("") +var _ ModuleSource = ModuleSourceRegistry{} +var _ ModuleSource = ModuleSourceRemote{} + +var moduleSourceLocalPrefixes = []string{ + "./", + "../", + ".\\", + "..\\", +} + +func ParseModuleSource(raw string) (ModuleSource, error) { + for _, prefix := range moduleSourceLocalPrefixes { + if strings.HasPrefix(raw, prefix) { + return parseModuleSourceLocal(raw) + } + } + + // For historical reasons, whether an address is a registry + // address is defined only by whether it can be successfully + // parsed as one, and anything else must fall through to be + // parsed as a direct remote source, where go-getter might + // then recognize it as a filesystem path. This is odd + // but matches behavior we've had since Terraform v0.10 which + // existing modules may be relying on. + // (Notice that this means that there's never any path where + // the registry source parse error gets returned to the caller, + // which is annoying but has been true for many releases + // without it posing a serious problem in practice.) + if ret, err := parseModuleSourceRegistry(raw); err == nil { + return ret, nil + } + + // If we get down here then we treat everything else as a + // remote address. In practice there's very little that + // go-getter doesn't consider invalid input, so even invalid + // nonsense will probably interpreted as _something_ here + // and then fail during installation instead. We can't + // really improve this situation for historical reasons. + return parseModuleSourceRemote(raw) +} + +// ModuleSourceLocal is a ModuleSource representing a local path reference +// from the caller's directory to the callee's directory within the same +// module package. +// +// A "module package" here means a set of modules distributed together in +// the same archive, repository, or similar. That's a significant distinction +// because we always download and cache entire module packages at once, +// and then create relative references within the same directory in order +// to ensure all modules in the package are looking at a consistent filesystem +// layout. We also assume that modules within a package are maintained together, +// which means that cross-cutting maintenence across all of them would be +// possible. +// +// The actual value of a ModuleSourceLocal is a normalized relative path using +// forward slashes, even on operating systems that have other conventions, +// because we're representing traversal within the logical filesystem +// represented by the containing package, not actually within the physical +// filesystem we unpacked the package into. We should typically not construct +// ModuleSourceLocal values directly, except in tests where we can ensure +// the value meets our assumptions. Use ParseModuleSource instead if the +// input string is not hard-coded in the program. +type ModuleSourceLocal string + +func parseModuleSourceLocal(raw string) (ModuleSourceLocal, error) { + // As long as we have a suitable prefix (detected by ParseModuleSource) + // there is no failure case for local paths: we just use the "path" + // package's cleaning logic to remove any redundant "./" and "../" + // sequences and any duplicate slashes and accept whatever that + // produces. + + // Although using backslashes (Windows-style) is non-idiomatic, we do + // allow it and just normalize it away, so the rest of Terraform will + // only see the forward-slash form. + if strings.Contains(raw, `\`) { + // Note: We use string replacement rather than filepath.ToSlash + // here because the filepath package behavior varies by current + // platform, but we want to interpret configured paths the same + // across all platforms: these are virtual paths within a module + // package, not physical filesystem paths. + raw = strings.ReplaceAll(raw, `\`, "/") + } + + // Note that we could've historically blocked using "//" in a path here + // in order to avoid confusion with the subdir syntax in remote addresses, + // but we historically just treated that as the same as a single slash + // and so we continue to do that now for compatibility. Clean strips those + // out and reduces them to just a single slash. + clean := path.Clean(raw) + + // However, we do need to keep a single "./" on the front if it isn't + // a "../" path, or else it would be ambigous with the registry address + // syntax. + if !strings.HasPrefix(clean, "../") { + clean = "./" + clean + } + + return ModuleSourceLocal(clean), nil +} + +func (s ModuleSourceLocal) moduleSource() {} + +func (s ModuleSourceLocal) String() string { + // We assume that our underlying string was already normalized at + // construction, so we just return it verbatim. + return string(s) +} + +func (s ModuleSourceLocal) ForDisplay() string { + return s.String() // the two string representations are identical for this address type +} + +// ModuleSourceRegistry is a ModuleSource representing a module listed in a +// Terraform module registry. +// +// A registry source isn't a direct source location but rather an indirection +// over a ModuleSourceRemote. The job of a registry is to translate the +// combination of a ModuleSourceRegistry and a module version number into +// a concrete ModuleSourceRemote that Terraform will then download and +// install. +type ModuleSourceRegistry struct { + Host svchost.Hostname + Namespace string + Name string + TargetSystem string + + // If Subdir is non-empty then it represents a sub-directory within the + // remote package that the registry address eventually resolves to. + // This will ultimately become the suffix of the Subdir of the + // ModuleSourceRemote that the registry address translates to. + // + // Subdir uses a normalized forward-slash-based path syntax within the + // virtual filesystem represented by the final package. It will never + // include `../` or `./` sequences. + Subdir string +} + +// DefaultModuleRegistryHost is the hostname used for registry-based module +// source addresses that do not have an explicit hostname. +const DefaultModuleRegistryHost = svchost.Hostname("registry.terraform.io") + +var moduleRegistryNamePattern = regexp.MustCompile("^[0-9A-Za-z](?:[0-9A-Za-z-_]{0,62}[0-9A-Za-z])?$") +var moduleRegistryTargetSystemPattern = regexp.MustCompile("^[0-9a-z]{1,64}$") + +func parseModuleSourceRegistry(raw string) (ModuleSourceRegistry, error) { + var err error + + var subDir string + raw, subDir = getmodules.SplitPackageSubdir(raw) + if strings.HasPrefix(subDir, "../") { + return ModuleSourceRegistry{}, fmt.Errorf("subdirectory path %q leads outside of the module package", subDir) + } + + parts := strings.Split(raw, "/") + // A valid registry address has either three or four parts, because the + // leading hostname part is optional. + if len(parts) != 3 && len(parts) != 4 { + return ModuleSourceRegistry{}, fmt.Errorf("a module registry source address must have either three or four slash-separated components") + } + + host := DefaultModuleRegistryHost + if len(parts) == 4 { + host, err = svchost.ForComparison(parts[0]) + if err != nil { + // The svchost library doesn't produce very good error messages to + // return to an end-user, so we'll use some custom ones here. + switch { + case strings.Contains(parts[0], "--"): + // Looks like possibly punycode, which we don't allow here + // to ensure that source addresses are written readably. + return ModuleSourceRegistry{}, fmt.Errorf("invalid module registry hostname %q; internationalized domain names must be given as direct unicode characters, not in punycode", parts[0]) + default: + return ModuleSourceRegistry{}, fmt.Errorf("invalid module registry hostname %q", parts[0]) + } + } + if !strings.Contains(host.String(), ".") { + return ModuleSourceRegistry{}, fmt.Errorf("invalid module registry hostname: must contain at least one dot") + } + // Discard the hostname prefix now that we've processed it + parts = parts[1:] + } + + ret := ModuleSourceRegistry{ + Host: host, + + Subdir: subDir, + } + + if host == svchost.Hostname("github.com") || host == svchost.Hostname("bitbucket.org") { + return ret, fmt.Errorf("can't use %q as a module registry host, because it's reserved for installing directly from version control repositories", host) + } + + if ret.Namespace, err = parseModuleRegistryName(parts[0]); err != nil { + if strings.Contains(parts[0], ".") { + // Seems like the user omitted one of the latter components in + // an address with an explicit hostname. + return ret, fmt.Errorf("source address must have three more components after the hostname: the namespace, the name, and the target system") + } + return ret, fmt.Errorf("invalid namespace %q: %s", parts[0], err) + } + if ret.Name, err = parseModuleRegistryName(parts[1]); err != nil { + return ret, fmt.Errorf("invalid module name %q: %s", parts[1], err) + } + if ret.TargetSystem, err = parseModuleRegistryTargetSystem(parts[2]); err != nil { + if strings.Contains(parts[2], "?") { + // The user was trying to include a query string, probably? + return ret, fmt.Errorf("module registry addresses may not include a query string portion") + } + return ret, fmt.Errorf("invalid target system %q: %s", parts[2], err) + } + + return ret, nil +} + +// parseModuleRegistryName validates and normalizes a string in either the +// "namespace" or "name" position of a module registry source address. +func parseModuleRegistryName(given string) (string, error) { + // Similar to the names in provider source addresses, we defined these + // to be compatible with what filesystems and typical remote systems + // like GitHub allow in names. Unfortunately we didn't end up defining + // these exactly equivalently: provider names can only use dashes as + // punctuation, whereas module names can use underscores. So here we're + // using some regular expressions from the original module source + // implementation, rather than using the IDNA rules as we do in + // ParseProviderPart. + + if !moduleRegistryNamePattern.MatchString(given) { + return "", fmt.Errorf("must be between one and 64 characters, including ASCII letters, digits, dashes, and underscores, where dashes and underscores may not be the prefix or suffix") + } + + // We also skip normalizing the name to lowercase, because we historically + // didn't do that and so existing module registries might be doing + // case-sensitive matching. + return given, nil +} + +// parseModuleRegistryTargetSystem validates and normalizes a string in the +// "target system" position of a module registry source address. This is +// what we historically called "provider" but never actually enforced as +// being a provider address, and now _cannot_ be a provider address because +// provider addresses have three slash-separated components of their own. +func parseModuleRegistryTargetSystem(given string) (string, error) { + // Similar to the names in provider source addresses, we defined these + // to be compatible with what filesystems and typical remote systems + // like GitHub allow in names. Unfortunately we didn't end up defining + // these exactly equivalently: provider names can only use dashes as + // punctuation, whereas module names can use underscores. So here we're + // using some regular expressions from the original module source + // implementation, rather than using the IDNA rules as we do in + // ParseProviderPart. + + if !moduleRegistryTargetSystemPattern.MatchString(given) { + return "", fmt.Errorf("must be between one and 64 ASCII letters or digits") + } + + // We also skip normalizing the name to lowercase, because we historically + // didn't do that and so existing module registries might be doing + // case-sensitive matching. + return given, nil +} + +func (s ModuleSourceRegistry) moduleSource() {} + +func (s ModuleSourceRegistry) String() string { + var buf strings.Builder + // Note: we're using the "display" form of the hostname here because + // for our service hostnames "for display" means something different: + // it means to render non-ASCII characters directly as Unicode + // characters, rather than using the "punycode" representation we + // use for internal processing, and so the "display" representation + // is actually what users would write in their configurations. + buf.WriteString(s.Host.ForDisplay()) + buf.WriteByte('/') + buf.WriteString(s.ForRegistryProtocol()) + if s.Subdir != "" { + buf.WriteString("//") + buf.WriteString(s.Subdir) + } + return buf.String() +} + +func (s ModuleSourceRegistry) ForDisplay() string { + var buf strings.Builder + if s.Host != DefaultModuleRegistryHost { + buf.WriteString(s.Host.ForDisplay()) + buf.WriteByte('/') + } + buf.WriteString(s.ForRegistryProtocol()) + if s.Subdir != "" { + buf.WriteString("//") + buf.WriteString(s.Subdir) + } + return buf.String() +} + +// ForRegistryProtocol returns a string representation of just the namespace, +// name, and target system portions of the address, always omitting the +// registry hostname and the subdirectory portion, if any. +// +// This is primarily intended for generating addresses to send to the +// registry in question via the registry protocol, since the protocol +// skips sending the registry its own hostname as part of identifiers. +func (s ModuleSourceRegistry) ForRegistryProtocol() string { + var buf strings.Builder + buf.WriteString(s.Namespace) + buf.WriteByte('/') + buf.WriteString(s.Name) + buf.WriteByte('/') + buf.WriteString(s.TargetSystem) + return buf.String() +} + +// ModuleSourceRemote is a ModuleSource representing a remote location from +// which we can retrieve a module package. +// +// A ModuleSourceRemote can optionally include a "subdirectory" path, which +// means that it's selecting a sub-directory of the given package to use as +// the entry point into the package. +type ModuleSourceRemote struct { + // PackageAddr is the address of the remote package that the requested + // module belongs to. + PackageAddr ModulePackage + + // If Subdir is non-empty then it represents a sub-directory within the + // remote package which will serve as the entry-point for the package. + // + // Subdir uses a normalized forward-slash-based path syntax within the + // virtual filesystem represented by the final package. It will never + // include `../` or `./` sequences. + Subdir string +} + +func parseModuleSourceRemote(raw string) (ModuleSourceRemote, error) { + var subDir string + raw, subDir = getmodules.SplitPackageSubdir(raw) + if strings.HasPrefix(subDir, "../") { + return ModuleSourceRemote{}, fmt.Errorf("subdirectory path %q leads outside of the module package", subDir) + } + + // A remote source address is really just a go-getter address resulting + // from go-getter's "detect" phase, which adds on the prefix specifying + // which protocol it should use and possibly also adjusts the + // protocol-specific part into different syntax. + // + // Note that for historical reasons this can potentially do network + // requests in order to disambiguate certain address types, although + // that's a legacy thing that is only for some specific, less-commonly-used + // address types. Most just do local string manipulation. We should + // aim to remove the network requests over time, if possible. + norm, moreSubDir, err := getmodules.NormalizePackageAddress(raw) + if err != nil { + return ModuleSourceRemote{}, err + } + + if moreSubDir != "" { + switch { + case subDir != "": + // The detector's own subdir goes first, because the + // subdir we were given is conceptually relative to + // the subdirectory that we just detected. + subDir = path.Join(moreSubDir, subDir) + default: + subDir = path.Clean(moreSubDir) + } + if strings.HasPrefix(subDir, "../") { + // This would suggest a bug in a go-getter detector, but + // we'll catch it anyway to avoid doing something confusing + // downstream. + return ModuleSourceRemote{}, fmt.Errorf("detected subdirectory path %q of %q leads outside of the module package", subDir, norm) + } + } + + return ModuleSourceRemote{ + PackageAddr: ModulePackage(norm), + Subdir: subDir, + }, nil +} + +func (s ModuleSourceRemote) moduleSource() {} + +func (s ModuleSourceRemote) String() string { + if s.Subdir != "" { + return s.PackageAddr.String() + "//" + s.Subdir + } + return s.PackageAddr.String() +} + +func (s ModuleSourceRemote) ForDisplay() string { + // The two string representations are identical for this address type. + // This isn't really entirely true to the idea of "ForDisplay" since + // it'll often include some additional components added in by the + // go-getter detectors, but we don't have any function to turn a + // "detected" string back into an idiomatic shorthand the user might've + // entered. + return s.String() +} + +// FromRegistry can be called on a remote source address that was returned +// from a module registry, passing in the original registry source address +// that the registry was asked about, in order to get the effective final +// remote source address. +// +// Specifically, this method handles the situations where one or both of +// the two addresses contain subdirectory paths, combining both when necessary +// in order to ensure that both the registry's given path and the user's +// given path are both respected. +// +// This will return nonsense if given a registry address other than the one +// that generated the reciever via a registry lookup. +func (s ModuleSourceRemote) FromRegistry(given ModuleSourceRegistry) ModuleSourceRemote { + ret := s // not a pointer, so this is a shallow copy + + switch { + case s.Subdir != "" && given.Subdir != "": + ret.Subdir = path.Join(s.Subdir, given.Subdir) + case given.Subdir != "": + ret.Subdir = given.Subdir + } + + return ret +} diff --git a/internal/addrs/module_source_test.go b/internal/addrs/module_source_test.go new file mode 100644 index 0000000000..1180a7e456 --- /dev/null +++ b/internal/addrs/module_source_test.go @@ -0,0 +1,531 @@ +package addrs + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + svchost "github.com/hashicorp/terraform-svchost" +) + +func TestParseModuleSource(t *testing.T) { + tests := map[string]struct { + input string + want ModuleSource + wantErr string + }{ + // Local paths + "local in subdirectory": { + input: "./child", + want: ModuleSourceLocal("./child"), + }, + "local in subdirectory non-normalized": { + input: "./nope/../child", + want: ModuleSourceLocal("./child"), + }, + "local in sibling directory": { + input: "../sibling", + want: ModuleSourceLocal("../sibling"), + }, + "local in sibling directory non-normalized": { + input: "./nope/../../sibling", + want: ModuleSourceLocal("../sibling"), + }, + "Windows-style local in subdirectory": { + input: `.\child`, + want: ModuleSourceLocal("./child"), + }, + "Windows-style local in subdirectory non-normalized": { + input: `.\nope\..\child`, + want: ModuleSourceLocal("./child"), + }, + "Windows-style local in sibling directory": { + input: `..\sibling`, + want: ModuleSourceLocal("../sibling"), + }, + "Windows-style local in sibling directory non-normalized": { + input: `.\nope\..\..\sibling`, + want: ModuleSourceLocal("../sibling"), + }, + "an abominable mix of different slashes": { + input: `./nope\nope/why\./please\don't`, + want: ModuleSourceLocal("./nope/nope/why/please/don't"), + }, + + // Registry addresses + // (NOTE: There is another test function TestParseModuleSourceRegistry + // which tests this situation more exhaustively, so this is just a + // token set of cases to see that we are indeed calling into the + // registry address parser when appropriate.) + "main registry implied": { + input: "hashicorp/subnets/cidr", + want: ModuleSourceRegistry{ + Host: svchost.Hostname("registry.terraform.io"), + Namespace: "hashicorp", + Name: "subnets", + TargetSystem: "cidr", + Subdir: "", + }, + }, + "main registry implied, subdir": { + input: "hashicorp/subnets/cidr//examples/foo", + want: ModuleSourceRegistry{ + Host: svchost.Hostname("registry.terraform.io"), + Namespace: "hashicorp", + Name: "subnets", + TargetSystem: "cidr", + Subdir: "examples/foo", + }, + }, + "main registry implied, escaping subdir": { + input: "hashicorp/subnets/cidr//../nope", + // NOTE: This error is actually being caught by the _remote package_ + // address parser, because any registry parsing failure falls back + // to that but both of them have the same subdir validation. This + // case is here to make sure that stays true, so we keep reporting + // a suitable error when the user writes a registry-looking thing. + wantErr: `subdirectory path "../nope" leads outside of the module package`, + }, + "custom registry": { + input: "example.com/awesomecorp/network/happycloud", + want: ModuleSourceRegistry{ + Host: svchost.Hostname("example.com"), + Namespace: "awesomecorp", + Name: "network", + TargetSystem: "happycloud", + Subdir: "", + }, + }, + "custom registry, subdir": { + input: "example.com/awesomecorp/network/happycloud//examples/foo", + want: ModuleSourceRegistry{ + Host: svchost.Hostname("example.com"), + Namespace: "awesomecorp", + Name: "network", + TargetSystem: "happycloud", + Subdir: "examples/foo", + }, + }, + + // Remote package addresses + "github.com shorthand": { + input: "github.com/hashicorp/terraform-cidr-subnets", + want: ModuleSourceRemote{ + PackageAddr: ModulePackage("git::https://github.com/hashicorp/terraform-cidr-subnets.git"), + }, + }, + "github.com shorthand, subdir": { + input: "github.com/hashicorp/terraform-cidr-subnets//example/foo", + want: ModuleSourceRemote{ + PackageAddr: ModulePackage("git::https://github.com/hashicorp/terraform-cidr-subnets.git"), + Subdir: "example/foo", + }, + }, + "git protocol, URL-style": { + input: "git://example.com/code/baz.git", + want: ModuleSourceRemote{ + PackageAddr: ModulePackage("git://example.com/code/baz.git"), + }, + }, + "git protocol, URL-style, subdir": { + input: "git://example.com/code/baz.git//bleep/bloop", + want: ModuleSourceRemote{ + PackageAddr: ModulePackage("git://example.com/code/baz.git"), + Subdir: "bleep/bloop", + }, + }, + "git over HTTPS, URL-style": { + input: "git::https://example.com/code/baz.git", + want: ModuleSourceRemote{ + PackageAddr: ModulePackage("git::https://example.com/code/baz.git"), + }, + }, + "git over HTTPS, URL-style, subdir": { + input: "git::https://example.com/code/baz.git//bleep/bloop", + want: ModuleSourceRemote{ + PackageAddr: ModulePackage("git::https://example.com/code/baz.git"), + Subdir: "bleep/bloop", + }, + }, + "git over SSH, URL-style": { + input: "git::ssh://git@example.com/code/baz.git", + want: ModuleSourceRemote{ + PackageAddr: ModulePackage("git::ssh://git@example.com/code/baz.git"), + }, + }, + "git over SSH, URL-style, subdir": { + input: "git::ssh://git@example.com/code/baz.git//bleep/bloop", + want: ModuleSourceRemote{ + PackageAddr: ModulePackage("git::ssh://git@example.com/code/baz.git"), + Subdir: "bleep/bloop", + }, + }, + "git over SSH, scp-style": { + input: "git::git@example.com:code/baz.git", + want: ModuleSourceRemote{ + // Normalized to URL-style + PackageAddr: ModulePackage("git::ssh://git@example.com/code/baz.git"), + }, + }, + "git over SSH, scp-style, subdir": { + input: "git::git@example.com:code/baz.git//bleep/bloop", + want: ModuleSourceRemote{ + // Normalized to URL-style + PackageAddr: ModulePackage("git::ssh://git@example.com/code/baz.git"), + Subdir: "bleep/bloop", + }, + }, + + // NOTE: We intentionally don't test the bitbucket.org shorthands + // here, because that detector makes direct HTTP tequests to the + // Bitbucket API and thus isn't appropriate for unit testing. + + "Google Cloud Storage bucket implied, path prefix": { + input: "www.googleapis.com/storage/v1/BUCKET_NAME/PATH_TO_MODULE", + want: ModuleSourceRemote{ + PackageAddr: ModulePackage("gcs::https://www.googleapis.com/storage/v1/BUCKET_NAME/PATH_TO_MODULE"), + }, + }, + "Google Cloud Storage bucket, path prefix": { + input: "gcs::https://www.googleapis.com/storage/v1/BUCKET_NAME/PATH_TO_MODULE", + want: ModuleSourceRemote{ + PackageAddr: ModulePackage("gcs::https://www.googleapis.com/storage/v1/BUCKET_NAME/PATH_TO_MODULE"), + }, + }, + "Google Cloud Storage bucket implied, archive object": { + input: "www.googleapis.com/storage/v1/BUCKET_NAME/PATH/TO/module.zip", + want: ModuleSourceRemote{ + PackageAddr: ModulePackage("gcs::https://www.googleapis.com/storage/v1/BUCKET_NAME/PATH/TO/module.zip"), + }, + }, + "Google Cloud Storage bucket, archive object": { + input: "gcs::https://www.googleapis.com/storage/v1/BUCKET_NAME/PATH/TO/module.zip", + want: ModuleSourceRemote{ + PackageAddr: ModulePackage("gcs::https://www.googleapis.com/storage/v1/BUCKET_NAME/PATH/TO/module.zip"), + }, + }, + + "Amazon S3 bucket implied, archive object": { + input: "s3-eu-west-1.amazonaws.com/examplecorp-terraform-modules/vpc.zip", + want: ModuleSourceRemote{ + PackageAddr: ModulePackage("s3::https://s3-eu-west-1.amazonaws.com/examplecorp-terraform-modules/vpc.zip"), + }, + }, + "Amazon S3 bucket, archive object": { + input: "s3::https://s3-eu-west-1.amazonaws.com/examplecorp-terraform-modules/vpc.zip", + want: ModuleSourceRemote{ + PackageAddr: ModulePackage("s3::https://s3-eu-west-1.amazonaws.com/examplecorp-terraform-modules/vpc.zip"), + }, + }, + + "HTTP URL": { + input: "http://example.com/module", + want: ModuleSourceRemote{ + PackageAddr: ModulePackage("http://example.com/module"), + }, + }, + "HTTPS URL": { + input: "https://example.com/module", + want: ModuleSourceRemote{ + PackageAddr: ModulePackage("https://example.com/module"), + }, + }, + "HTTPS URL, archive file": { + input: "https://example.com/module.zip", + want: ModuleSourceRemote{ + PackageAddr: ModulePackage("https://example.com/module.zip"), + }, + }, + "HTTPS URL, forced archive file": { + input: "https://example.com/module?archive=tar", + want: ModuleSourceRemote{ + PackageAddr: ModulePackage("https://example.com/module?archive=tar"), + }, + }, + "HTTPS URL, forced archive file and checksum": { + input: "https://example.com/module?archive=tar&checksum=blah", + want: ModuleSourceRemote{ + // The query string only actually gets processed when we finally + // do the get, so "checksum=blah" is accepted as valid up + // at this parsing layer. + PackageAddr: ModulePackage("https://example.com/module?archive=tar&checksum=blah"), + }, + }, + + "absolute filesystem path": { + // Although a local directory isn't really "remote", we do + // treat it as such because we still need to do all of the same + // high-level steps to work with these, even though "downloading" + // is replaced by a deep filesystem copy instead. + input: "/tmp/foo/example", + want: ModuleSourceRemote{ + PackageAddr: ModulePackage("file:///tmp/foo/example"), + }, + }, + "absolute filesystem path, subdir": { + // This is a funny situation where the user wants to use a + // directory elsewhere on their system as a package containing + // multiple modules, but the entry point is not at the root + // of that subtree, and so they can use the usual subdir + // syntax to move the package root higher in the real filesystem. + input: "/tmp/foo//example", + want: ModuleSourceRemote{ + PackageAddr: ModulePackage("file:///tmp/foo"), + Subdir: "example", + }, + }, + + "subdir escaping out of package": { + // This is general logic for all subdir regardless of installation + // protocol, but we're using a filesystem path here just as an + // easy placeholder/ + input: "/tmp/foo//example/../../invalid", + wantErr: `subdirectory path "../invalid" leads outside of the module package`, + }, + + "go-getter will accept all sorts of garbage": { + input: "dfgdfgsd:dgfhdfghdfghdfg/dfghdfghdfg", + want: ModuleSourceRemote{ + // Unfortunately go-getter doesn't actually reject a totally + // invalid address like this until getting time, so it's + // pretty difficult to make remote address parsing actually + // return an error in practice. + PackageAddr: ModulePackage("dfgdfgsd:dgfhdfghdfghdfg/dfghdfghdfg"), + }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + addr, err := ParseModuleSource(test.input) + + if test.wantErr != "" { + switch { + case err == nil: + t.Errorf("unexpected success\nwant error: %s", test.wantErr) + case err.Error() != test.wantErr: + t.Errorf("wrong error messages\ngot: %s\nwant: %s", err.Error(), test.wantErr) + } + return + } + + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if diff := cmp.Diff(addr, test.want); diff != "" { + t.Errorf("wrong result\n%s", diff) + } + }) + } + +} + +func TestModuleSourceRemoteFromRegistry(t *testing.T) { + t.Run("both have subdir", func(t *testing.T) { + remote := ModuleSourceRemote{ + PackageAddr: ModulePackage("boop"), + Subdir: "foo", + } + registry := ModuleSourceRegistry{ + Subdir: "bar", + } + gotAddr := remote.FromRegistry(registry) + if remote.Subdir != "foo" { + t.Errorf("FromRegistry modified the reciever; should be pure function") + } + if registry.Subdir != "bar" { + t.Errorf("FromRegistry modified the given address; should be pure function") + } + if got, want := gotAddr.Subdir, "foo/bar"; got != want { + t.Errorf("wrong resolved subdir\ngot: %s\nwant: %s", got, want) + } + }) + t.Run("only remote has subdir", func(t *testing.T) { + remote := ModuleSourceRemote{ + PackageAddr: ModulePackage("boop"), + Subdir: "foo", + } + registry := ModuleSourceRegistry{ + Subdir: "", + } + gotAddr := remote.FromRegistry(registry) + if remote.Subdir != "foo" { + t.Errorf("FromRegistry modified the reciever; should be pure function") + } + if registry.Subdir != "" { + t.Errorf("FromRegistry modified the given address; should be pure function") + } + if got, want := gotAddr.Subdir, "foo"; got != want { + t.Errorf("wrong resolved subdir\ngot: %s\nwant: %s", got, want) + } + }) + t.Run("only registry has subdir", func(t *testing.T) { + remote := ModuleSourceRemote{ + PackageAddr: ModulePackage("boop"), + Subdir: "", + } + registry := ModuleSourceRegistry{ + Subdir: "bar", + } + gotAddr := remote.FromRegistry(registry) + if remote.Subdir != "" { + t.Errorf("FromRegistry modified the reciever; should be pure function") + } + if registry.Subdir != "bar" { + t.Errorf("FromRegistry modified the given address; should be pure function") + } + if got, want := gotAddr.Subdir, "bar"; got != want { + t.Errorf("wrong resolved subdir\ngot: %s\nwant: %s", got, want) + } + }) +} + +func TestParseModuleSourceRegistry(t *testing.T) { + // We test parseModuleSourceRegistry alone here, in addition to testing + // it indirectly as part of TestParseModuleSource, because general + // module parsing unfortunately eats all of the error situations from + // registry passing by falling back to trying for a direct remote package + // address. + + // Historical note: These test cases were originally derived from the + // ones in the old internal/registry/regsrc package that the + // ModuleSourceRegistry type is replacing. That package had the notion + // of "normalized" addresses as separate from the original user input, + // but this new implementation doesn't try to preserve the original + // user input at all, and so the main string output is always normalized. + // + // That package also had some behaviors to turn the namespace, name, and + // remote system portions into lowercase, but apparently we didn't + // actually make use of that in the end and were preserving the case + // the user provided in the input, and so for backward compatibility + // we're continuing to do that here, at the expense of now making the + // "ForDisplay" output case-preserving where its predecessor in the + // old package wasn't. The main Terraform Registry at registry.terraform.io + // is itself case-insensitive anyway, so our case-preserving here is + // entirely for the benefit of existing third-party registry + // implementations that might be case-sensitive, which we must remain + // compatible with now. + + tests := map[string]struct { + input string + wantString string + wantForDisplay string + wantForProtocol string + wantErr string + }{ + "public registry": { + input: `hashicorp/consul/aws`, + wantString: `registry.terraform.io/hashicorp/consul/aws`, + wantForDisplay: `hashicorp/consul/aws`, + wantForProtocol: `hashicorp/consul/aws`, + }, + "public registry with subdir": { + input: `hashicorp/consul/aws//foo`, + wantString: `registry.terraform.io/hashicorp/consul/aws//foo`, + wantForDisplay: `hashicorp/consul/aws//foo`, + wantForProtocol: `hashicorp/consul/aws`, + }, + "public registry using explicit hostname": { + input: `registry.terraform.io/hashicorp/consul/aws`, + wantString: `registry.terraform.io/hashicorp/consul/aws`, + wantForDisplay: `hashicorp/consul/aws`, + wantForProtocol: `hashicorp/consul/aws`, + }, + "public registry with mixed case names": { + input: `HashiCorp/Consul/aws`, + wantString: `registry.terraform.io/HashiCorp/Consul/aws`, + wantForDisplay: `HashiCorp/Consul/aws`, + wantForProtocol: `HashiCorp/Consul/aws`, + }, + "private registry with non-standard port": { + input: `Example.com:1234/HashiCorp/Consul/aws`, + wantString: `example.com:1234/HashiCorp/Consul/aws`, + wantForDisplay: `example.com:1234/HashiCorp/Consul/aws`, + wantForProtocol: `HashiCorp/Consul/aws`, + }, + "private registry with IDN hostname": { + input: `Испытание.com/HashiCorp/Consul/aws`, + wantString: `испытание.com/HashiCorp/Consul/aws`, + wantForDisplay: `испытание.com/HashiCorp/Consul/aws`, + wantForProtocol: `HashiCorp/Consul/aws`, + }, + "private registry with IDN hostname and non-standard port": { + input: `Испытание.com:1234/HashiCorp/Consul/aws//Foo`, + wantString: `испытание.com:1234/HashiCorp/Consul/aws//Foo`, + wantForDisplay: `испытание.com:1234/HashiCorp/Consul/aws//Foo`, + wantForProtocol: `HashiCorp/Consul/aws`, + }, + "invalid hostname": { + input: `---.com/HashiCorp/Consul/aws`, + wantErr: `invalid module registry hostname "---.com"; internationalized domain names must be given as direct unicode characters, not in punycode`, + }, + "hostname with only one label": { + // This was historically forbidden in our initial implementation, + // so we keep it forbidden to avoid newly interpreting such + // addresses as registry addresses rather than remote source + // addresses. + input: `foo/var/baz/qux`, + wantErr: `invalid module registry hostname: must contain at least one dot`, + }, + "invalid target system": { + input: `foo/var/no-no-no`, + wantErr: `invalid target system "no-no-no": must be between one and 64 ASCII letters or digits`, + }, + "invalid namespace": { + input: `boop!/var/baz`, + wantErr: `invalid namespace "boop!": must be between one and 64 characters, including ASCII letters, digits, dashes, and underscores, where dashes and underscores may not be the prefix or suffix`, + }, + "missing part with explicit hostname": { + input: `foo.com/var/baz`, + wantErr: `source address must have three more components after the hostname: the namespace, the name, and the target system`, + }, + "errant query string": { + input: `foo/var/baz?otherthing`, + wantErr: `module registry addresses may not include a query string portion`, + }, + "github.com": { + // We don't allow using github.com like a module registry because + // that conflicts with the historically-supported shorthand for + // installing directly from GitHub-hosted git repositories. + input: `github.com/HashiCorp/Consul/aws`, + wantErr: `can't use "github.com" as a module registry host, because it's reserved for installing directly from version control repositories`, + }, + "bitbucket.org": { + // We don't allow using bitbucket.org like a module registry because + // that conflicts with the historically-supported shorthand for + // installing directly from BitBucket-hosted git repositories. + input: `bitbucket.org/HashiCorp/Consul/aws`, + wantErr: `can't use "bitbucket.org" as a module registry host, because it's reserved for installing directly from version control repositories`, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + addr, err := parseModuleSourceRegistry(test.input) + + if test.wantErr != "" { + switch { + case err == nil: + t.Errorf("unexpected success\nwant error: %s", test.wantErr) + case err.Error() != test.wantErr: + t.Errorf("wrong error messages\ngot: %s\nwant: %s", err.Error(), test.wantErr) + } + return + } + + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if got, want := addr.String(), test.wantString; got != want { + t.Errorf("wrong String() result\ngot: %s\nwant: %s", got, want) + } + if got, want := addr.ForDisplay(), test.wantForDisplay; got != want { + t.Errorf("wrong ForDisplay() result\ngot: %s\nwant: %s", got, want) + } + if got, want := addr.ForRegistryProtocol(), test.wantForProtocol; got != want { + t.Errorf("wrong ForRegistryProtocol() result\ngot: %s\nwant: %s", got, want) + } + }) + } +}