diff --git a/internal/getmodules/git_getter.go b/internal/getmodules/git_getter.go index 535112e7ec..1b811b8fbd 100644 --- a/internal/getmodules/git_getter.go +++ b/internal/getmodules/git_getter.go @@ -80,7 +80,7 @@ func (g *gitGetter) Get(dst string, u *url.URL) error { // Extract some query parameters we use var ref, sshKey string - var depth int + depth := 0 // 0 means "not set" q := u.Query() if len(q) > 0 { ref = q.Get("ref") @@ -194,20 +194,54 @@ func (g *gitGetter) checkout(dst string, ref string) error { return getRunCommand(cmd) } +// gitCommitIDRegex is a pattern intended to match strings that seem +// "likely to be" git commit IDs, rather than named refs. This cannot be +// an exact decision because it's valid to name a branch or tag after a series +// of hexadecimal digits too. +// +// We require at least 7 digits here because that's the smallest size git +// itself will typically generate, and so it'll reduce the risk of false +// positives on short branch names that happen to also be "hex words". +var gitCommitIDRegex = regexp.MustCompile("^[0-9a-fA-F]{7,40}$") + func (g *gitGetter) clone(ctx context.Context, dst, sshKeyFile string, u *url.URL, ref string, depth int) error { args := []string{"clone"} + autoBranch := false if ref == "" { ref = findRemoteDefaultBranch(u) + autoBranch = true } if depth > 0 { args = append(args, "--depth", strconv.Itoa(depth)) + args = append(args, "--branch", ref) } + args = append(args, u.String(), dst) - args = append(args, "--branch", ref, u.String(), dst) cmd := exec.CommandContext(ctx, "git", args...) setupGitEnv(cmd, sshKeyFile) - return getRunCommand(cmd) + err := getRunCommand(cmd) + if err != nil { + if depth > 0 && !autoBranch { + // If we're creating a shallow clone then the given ref must be + // a named ref (branch or tag) rather than a commit directly. + // We can't accurately recognize the resulting error here without + // hard-coding assumptions about git's human-readable output, but + // we can at least try a heuristic. + if gitCommitIDRegex.MatchString(ref) { + return fmt.Errorf("%w (note that setting 'depth' requires 'ref' to be a branch or tag name)", err) + } + } + return err + } + + if depth < 1 && !autoBranch { + // If we didn't add --depth and --branch above then we will now be + // on the remote repository's default branch, rather than the selected + // ref, so we'll need to fix that before we return. + return g.checkout(dst, ref) + } + return nil } func (g *gitGetter) update(ctx context.Context, dst, sshKeyFile, ref string, depth int) error { diff --git a/internal/getmodules/git_getter_test.go b/internal/getmodules/git_getter_test.go index 79b3e15925..5893c9c8e1 100644 --- a/internal/getmodules/git_getter_test.go +++ b/internal/getmodules/git_getter_test.go @@ -90,6 +90,58 @@ func TestGitGetter_branch(t *testing.T) { } } +func TestGitGetter_commitID(t *testing.T) { + if !testHasGit { + t.Skip("git not found, skipping") + } + + g := new(gitGetter) + dst := tempDir(t) + + // We're going to create different content on the main branch vs. + // another branch here, so that below we can recognize if we + // correctly cloned the commit actually requested (from the + // "other branch"), not the one at HEAD. + repo := testGitRepo(t, "commit_id") + repo.git("checkout", "-b", "main-branch") + repo.commitFile("wrong.txt", "Nope") + repo.git("checkout", "-b", "other-branch") + repo.commitFile("hello.txt", "Yep") + commitID, err := repo.latestCommit() + if err != nil { + t.Fatal(err) + } + // Return to the main branch so that HEAD of this repository + // will be that, rather than "test-branch". + repo.git("checkout", "main-branch") + + q := repo.url.Query() + q.Add("ref", commitID) + repo.url.RawQuery = q.Encode() + + t.Logf("Getting %s", repo.url) + if err := g.Get(dst, repo.url); err != nil { + t.Fatalf("err: %s", err) + } + + // Verify the main file exists + mainPath := filepath.Join(dst, "hello.txt") + if _, err := os.Stat(mainPath); err != nil { + t.Fatalf("err: %s", err) + } + + // Get again should work + if err := g.Get(dst, repo.url); err != nil { + t.Fatalf("err: %s", err) + } + + // Verify the main file exists + mainPath = filepath.Join(dst, "hello.txt") + if _, err := os.Stat(mainPath); err != nil { + t.Fatalf("err: %s", err) + } +} + func TestGitGetter_remoteWithoutMaster(t *testing.T) { if !testHasGit { t.Log("git not found, skipping") @@ -214,6 +266,44 @@ func TestGitGetter_shallowCloneWithTag(t *testing.T) { } } +func TestGitGetter_shallowCloneWithCommitID(t *testing.T) { + if !testHasGit { + t.Log("git not found, skipping") + t.Skip() + } + + g := new(gitGetter) + dst := tempDir(t) + + repo := testGitRepo(t, "upstream") + repo.commitFile("v1.0.txt", "0") + repo.git("tag", "v1.0") + repo.commitFile("v1.1.txt", "1") + + commitID, err := repo.latestCommit() + if err != nil { + t.Fatal(err) + } + + // Specify a clone depth of 1 with a naked commit ID + // This is intentionally invalid: shallow clone always requires a named ref. + q := repo.url.Query() + q.Add("ref", commitID[:8]) + q.Add("depth", "1") + repo.url.RawQuery = q.Encode() + + t.Logf("Getting %s", repo.url) + err = g.Get(dst, repo.url) + if err == nil { + t.Fatalf("success; want error") + } + // We use a heuristic to generate an extra hint in the error message if + // it looks like the user was trying to combine ref=COMMIT with depth. + if got, want := err.Error(), "(note that setting 'depth' requires 'ref' to be a branch or tag name)"; !strings.Contains(got, want) { + t.Errorf("missing error message hint\ngot: %s\nwant substring: %s", got, want) + } +} + func TestGitGetter_branchUpdate(t *testing.T) { if !testHasGit { t.Skip("git not found, skipping") @@ -664,6 +754,19 @@ func (r *gitRepo) commitFile(file, content string) { r.git("commit", "-m", "Adding "+file) } +// latestCommit returns the full commit id of the latest commit on the current +// branch. +func (r *gitRepo) latestCommit() (string, error) { + cmd := exec.Command("git", "rev-parse", "HEAD") + cmd.Dir = r.dir + rawOut, err := cmd.Output() + if err != nil { + return "", err + } + rawOut = bytes.TrimSpace(rawOut) + return string(rawOut), nil +} + // This is a read-only deploy key for an empty test repository. // Note: This is split over multiple lines to avoid being disabled by key // scanners automatically.