diff --git a/docs/env-variables.md b/docs/env-variables.md index e6fa7ca5..6b5bd28c 100644 --- a/docs/env-variables.md +++ b/docs/env-variables.md @@ -27,7 +27,8 @@ | `--git-url` | `ENVBUILDER_GIT_URL` | | The URL of a Git repository containing a Devcontainer or Docker image to clone. This is optional. | | `--git-clone-depth` | `ENVBUILDER_GIT_CLONE_DEPTH` | | The depth to use when cloning the Git repository. | | `--git-clone-single-branch` | `ENVBUILDER_GIT_CLONE_SINGLE_BRANCH` | | Clone only a single branch of the Git repository. | -| `--git-clone-thinpack` | `ENVBUILDER_GIT_CLONE_THINPACK` | `true` | Git clone with thin pack compatibility enabled, ensuring that even when thin pack compatibility is activated,it will not be turned on for the domain dev.zaure.com. | +| `--git-clone-thinpack` | `ENVBUILDER_GIT_CLONE_THINPACK` | `true` | Git clone with thin pack compatibility enabled, ensuring that even when thin pack compatibility is activated,it will not be turned on for the domain dev.azure.com. | +| `--git-clone-submodules` | `ENVBUILDER_GIT_CLONE_SUBMODULES` | | Clone Git submodules after cloning the repository. Accepts 'true' (max depth 10), 'false' (disabled), or a positive integer for max recursion depth. | | `--git-username` | `ENVBUILDER_GIT_USERNAME` | | The username to use for Git authentication. This is optional. | | `--git-password` | `ENVBUILDER_GIT_PASSWORD` | | The password to use for Git authentication. This is optional. | | `--git-ssh-private-key-path` | `ENVBUILDER_GIT_SSH_PRIVATE_KEY_PATH` | | Path to an SSH private key to be used for Git authentication. If this is set, then GIT_SSH_PRIVATE_KEY_BASE64 cannot be set. | diff --git a/git/git.go b/git/git.go index 320b40c4..74eaea56 100644 --- a/git/git.go +++ b/git/git.go @@ -7,7 +7,10 @@ import ( "fmt" "io" "net" + "net/url" "os" + "path" + "regexp" "strings" "github.com/coder/envbuilder/internal/ebutil" @@ -15,6 +18,7 @@ import ( "github.com/go-git/go-billy/v5" "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/config" "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/cache" "github.com/go-git/go-git/v5/plumbing/protocol/packp/capability" @@ -32,15 +36,16 @@ type CloneRepoOptions struct { Path string Storage billy.Filesystem - RepoURL string - RepoAuth transport.AuthMethod - Progress sideband.Progress - Insecure bool - SingleBranch bool - ThinPack bool - Depth int - CABundle []byte - ProxyOptions transport.ProxyOptions + RepoURL string + RepoAuth transport.AuthMethod + Progress sideband.Progress + Insecure bool + SingleBranch bool + ThinPack bool + Depth int + CABundle []byte + ProxyOptions transport.ProxyOptions + SubmoduleDepth int // 0 = disabled, >0 = max recursion depth } // CloneRepo will clone the repository at the given URL into the given path. @@ -111,28 +116,48 @@ func CloneRepo(ctx context.Context, logf func(string, ...any), opts CloneRepoOpt if err != nil { return false, fmt.Errorf("open %q: %w", opts.RepoURL, err) } - if repo != nil { - return false, nil - } - _, err = git.CloneContext(ctx, gitStorage, fs, &git.CloneOptions{ - URL: parsed.Cleaned, - Auth: opts.RepoAuth, - Progress: opts.Progress, - ReferenceName: plumbing.ReferenceName(parsed.Reference), - InsecureSkipTLS: opts.Insecure, - Depth: opts.Depth, - SingleBranch: opts.SingleBranch, - CABundle: opts.CABundle, - ProxyOptions: opts.ProxyOptions, - }) - if errors.Is(err, git.ErrRepositoryAlreadyExists) { - return false, nil + alreadyCloned := repo != nil + if !alreadyCloned { + repo, err = git.CloneContext(ctx, gitStorage, fs, &git.CloneOptions{ + URL: parsed.Cleaned, + Auth: opts.RepoAuth, + Progress: opts.Progress, + ReferenceName: plumbing.ReferenceName(parsed.Reference), + InsecureSkipTLS: opts.Insecure, + Depth: opts.Depth, + SingleBranch: opts.SingleBranch, + CABundle: opts.CABundle, + ProxyOptions: opts.ProxyOptions, + }) + if errors.Is(err, git.ErrRepositoryAlreadyExists) { + // The repository was created between our Open and CloneContext + // calls. Reopen it so submodule initialization can still run. + repo, err = git.Open(fsStorage, gitDir) + if err != nil { + return false, fmt.Errorf("reopen existing %q: %w", opts.RepoURL, err) + } + alreadyCloned = true + } + if err != nil { + return false, fmt.Errorf("clone %q: %w", opts.RepoURL, err) + } } - if err != nil { - return false, fmt.Errorf("clone %q: %w", opts.RepoURL, err) + + // Initialize submodules on every call, not only after a fresh clone, so + // that a transient failure during the first run can be retried on the + // next workspace start. + if opts.SubmoduleDepth > 0 { + w, err := repo.Worktree() + if err != nil { + return !alreadyCloned, fmt.Errorf("get worktree: %w", err) + } + if err := initSubmodules(ctx, logf, repo, w, opts.RepoURL, opts.RepoAuth, opts, 1); err != nil { + return !alreadyCloned, fmt.Errorf("init submodules: %w", err) + } } - return true, nil + + return !alreadyCloned, nil } // ShallowCloneRepo will clone the repository at the given URL into the given path @@ -354,14 +379,15 @@ func CloneOptionsFromOptions(logf func(string, ...any), options options.Options) } cloneOpts := CloneRepoOptions{ - RepoURL: options.GitURL, - Path: options.WorkspaceFolder, - Storage: options.Filesystem, - Insecure: options.Insecure, - SingleBranch: options.GitCloneSingleBranch, - ThinPack: options.GitCloneThinPack, - Depth: int(options.GitCloneDepth), - CABundle: caBundle, + RepoURL: options.GitURL, + Path: options.WorkspaceFolder, + Storage: options.Filesystem, + Insecure: options.Insecure, + SingleBranch: options.GitCloneSingleBranch, + ThinPack: options.GitCloneThinPack, + Depth: int(options.GitCloneDepth), + CABundle: caBundle, + SubmoduleDepth: options.GitCloneSubmoduleDepth, } cloneOpts.RepoAuth = SetupRepoAuth(logf, &options) @@ -419,3 +445,384 @@ func ProgressWriter(write func(line string, args ...any)) io.WriteCloser { done: done, } } + +// scpLikeURLRegex matches SCP-like URLs: user@host:path (where host is not empty and path doesn't start with /) +// This handles: git@github.com:org/repo, deploy@host:repo, user@10.0.0.5:project +var scpLikeURLRegex = regexp.MustCompile(`^([^@]+)@([^:]+):(.+)$`) + +// extractHost extracts the host from a URL, handling both standard URLs and SCP-like URLs. +// Returns empty string if host cannot be determined. +func extractHost(u string) string { + // Try standard URL parsing first + if parsed, err := url.Parse(u); err == nil && parsed.Host != "" { + // Remove port if present + host := parsed.Hostname() + return strings.ToLower(host) + } + + // Handle SCP-like URLs: user@host:path + if matches := scpLikeURLRegex.FindStringSubmatch(u); matches != nil { + return strings.ToLower(matches[2]) + } + + return "" +} + +// SameHost checks if two URLs point to the same host. +// Used to determine if credentials should be forwarded to submodules. +// The comparison is hostname-only. Port is ignored to match git's +// credential-helper convention, which keys credentials on the host alone. +func SameHost(url1, url2 string) bool { + host1 := extractHost(url1) + host2 := extractHost(url2) + return host1 != "" && host2 != "" && host1 == host2 +} + +// RedactURL redacts credentials from a URL for safe logging. +// Handles: +// - Standard URLs with userinfo: https://user:pass@host, https://token@host +// - URL-encoded credentials: https://user:p%40ss@host +// - SCP-like URLs: git@host:path, deploy@host:path, user@10.0.0.5:path +// - Various schemes: http, https, ssh, git, ftp, sftp +// - IPv6 hosts: https://user@[2001:db8::1]/path +func RedactURL(u string) string { + // Try to parse as a standard URL first (handles schemes like https://, ssh://, etc.) + parsed, err := url.Parse(u) + if err == nil && parsed.Scheme != "" && parsed.Host != "" { + // Successfully parsed as a URL with a scheme and host + // Redact userinfo if present (handles user, user:pass, token, URL-encoded creds) + if parsed.User != nil { + // Build URL manually to avoid url.User encoding *** as %2A%2A%2A + result := parsed.Scheme + "://***@" + parsed.Host + parsed.Path + if parsed.RawQuery != "" { + result += "?" + parsed.RawQuery + } + if parsed.Fragment != "" { + result += "#" + parsed.Fragment + } + return result + } + return parsed.String() + } + + // Handle SCP-like URLs: user@host:path (no scheme) + // Only check this if url.Parse didn't find a valid scheme+host + // (to avoid matching URLs like https://user@[ipv6]:path) + // This catches: git@github.com:org/repo, deploy@host:repo, oauth2:token@gitlab.com:org/repo + if matches := scpLikeURLRegex.FindStringSubmatch(u); matches != nil { + // matches[1] = user part (could be git, deploy, oauth2:token, etc.) + // matches[2] = host + // matches[3] = path + return "***@" + matches[2] + ":" + matches[3] + } + + // If we can't parse it and it's not SCP-like, return as-is + // (probably not a URL with credentials) + return u +} + +// ResolveSubmoduleURL resolves a potentially relative submodule URL against a parent repository URL. +func ResolveSubmoduleURL(parentURL, submoduleURL string) (string, error) { + // If the submodule URL is absolute (contains ://) or doesn't start with ./ or ../, return it as-is + if strings.Contains(submoduleURL, "://") || (!strings.HasPrefix(submoduleURL, "../") && !strings.HasPrefix(submoduleURL, "./")) { + return submoduleURL, nil + } + + // Parse the parent URL using go-git's endpoint parser, which handles + // SCP-like URLs (git@host:path) in addition to standard URLs. + parentEP, err := transport.NewEndpoint(parentURL) + if err != nil { + return "", fmt.Errorf("parse parent URL: %w", err) + } + + // Credentials embedded in the parent URL must not leak into resolved + // submodule URLs. They should flow only through CloneRepoOptions.RepoAuth, + // which is gated by SameHost. For ssh:// endpoints the user portion is + // the SSH login name rather than a credential, so it is preserved. + parentEP.Password = "" + if !strings.EqualFold(parentEP.Protocol, "ssh") { + parentEP.User = "" + } + + // For relative URLs, we need to resolve them against the parent's path. + // The parent path represents a repository (like a file in filesystem terms), + // so ../something means "sibling repository". + parentPath := strings.TrimSuffix(parentEP.Path, "/") + + // Split the submodule URL into components + // and manually walk up the directory tree for each ../ + currentPath := parentPath + relativeParts := strings.Split(submoduleURL, "/") + + for _, part := range relativeParts { + if part == ".." { + // Go up one directory + currentPath = path.Dir(currentPath) + } else if part == "." { + // Stay in current directory + continue + } else if part != "" { + // Add this component to the path + currentPath = currentPath + "/" + part + } + } + + // Reconstruct the URL with the resolved path. + parentEP.Path = path.Clean(currentPath) + return parentEP.String(), nil +} + +// initSubmodules recursively initializes and updates the submodules of repo. +// currentDepth tracks the current recursion level, starting at 1. parentAuth +// is the auth that was actually used to fetch the current parent. It must be +// the auth for this level, not the root auth, so that a credential withheld +// at any point in the chain stays withheld for every level below it. +func initSubmodules(ctx context.Context, logf func(string, ...any), repo *git.Repository, parentWorktree *git.Worktree, parentURL string, parentAuth transport.AuthMethod, opts CloneRepoOptions, currentDepth int) error { + if currentDepth > opts.SubmoduleDepth { + logf("⚠ Skipping nested submodules: max depth %d reached", opts.SubmoduleDepth) + return nil + } + logf("🔗 Initializing git submodules (depth %d/%d)...", currentDepth, opts.SubmoduleDepth) + + subs, err := parentWorktree.Submodules() + if err != nil { + return fmt.Errorf("get submodules: %w", err) + } + + if len(subs) == 0 { + logf("No submodules found") + return nil + } + + logf("Found %d submodule(s)", len(subs)) + + // Get the parent repository URL for resolving relative submodule URLs + effectiveParentURL := parentURL + if cfg, cfgErr := repo.Config(); cfgErr == nil { + if origin, ok := cfg.Remotes["origin"]; ok && len(origin.URLs) > 0 { + effectiveParentURL = origin.URLs[0] + } + } + logf("Parent repository URL: %s", RedactURL(effectiveParentURL)) + + for _, sub := range subs { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + subConfig := sub.Config() + logf("📦 Initializing submodule: %s", subConfig.Name) + logf(" Submodule path: %s", subConfig.Path) + logf(" Submodule URL (from .gitmodules): %s", RedactURL(subConfig.URL)) + + // Get the expected commit hash + subStatus, err := sub.Status() + if err != nil { + return fmt.Errorf("get submodule status for %q: %w", subConfig.Name, err) + } + logf(" Expected commit: %s", subStatus.Expected) + + // Resolve the submodule URL + resolvedURL, err := ResolveSubmoduleURL(effectiveParentURL, subConfig.URL) + if err != nil { + return fmt.Errorf("resolve submodule URL for %q: %w", subConfig.Name, err) + } + logf(" Resolved URL: %s", RedactURL(resolvedURL)) + + submoduleAuth := submoduleAuthFor(logf, effectiveParentURL, resolvedURL, parentAuth) + + // Clone the submodule manually + if err := cloneSubmodule(ctx, logf, parentWorktree, subConfig, subStatus.Expected, resolvedURL, submoduleAuth, opts); err != nil { + return fmt.Errorf("clone submodule %q: %w", subConfig.Name, err) + } + + logf("✓ Submodule initialized: %s", subConfig.Name) + + // Recurse into any nested submodules. We open the on-disk repository + // directly rather than calling sub.Repository(), which requires the + // submodule to be registered in .git/config via sub.Init(). The custom + // clone path does not perform that registration. + if currentDepth >= opts.SubmoduleDepth { + continue + } + subRepo, subWorktree, err := openSubmoduleRepo(parentWorktree, subConfig.Path) + if err != nil { + logf(" ⚠ Could not open submodule repository %s for nested traversal: %v", subConfig.Name, err) + continue + } + nestedSubs, err := subWorktree.Submodules() + if err != nil { + logf(" ⚠ Could not list nested submodules in %s: %v", subConfig.Name, err) + continue + } + if len(nestedSubs) == 0 { + continue + } + logf(" Found %d nested submodule(s) in %s", len(nestedSubs), subConfig.Name) + if err := initSubmodules(ctx, logf, subRepo, subWorktree, resolvedURL, submoduleAuth, opts, currentDepth+1); err != nil { + return fmt.Errorf("init nested submodules in %q: %w", subConfig.Name, err) + } + } + + logf("✓ All submodules initialized successfully") + return nil +} + +// submoduleAuthFor returns the auth to use when fetching a submodule. It +// returns parentAuth if the submodule shares the parent's host, and nil +// otherwise. A warning is logged when parent auth is set but withheld +// because the hosts differ. +func submoduleAuthFor(logf func(string, ...any), parentURL, submoduleURL string, parentAuth transport.AuthMethod) transport.AuthMethod { + if parentAuth == nil { + return nil + } + if SameHost(parentURL, submoduleURL) { + return parentAuth + } + logf(" ⚠ Not forwarding auth to submodule (different host: %s)", extractHost(submoduleURL)) + return nil +} + +// openSubmoduleRepo opens the on-disk repository written by cloneSubmodule +// at parentWorktree/submodulePath/.git and returns it along with its worktree. +func openSubmoduleRepo(parentWorktree *git.Worktree, submodulePath string) (*git.Repository, *git.Worktree, error) { + subFS, err := parentWorktree.Filesystem.Chroot(submodulePath) + if err != nil { + return nil, nil, fmt.Errorf("chroot to submodule path: %w", err) + } + subGitDir, err := subFS.Chroot(".git") + if err != nil { + return nil, nil, fmt.Errorf("chroot to .git: %w", err) + } + subRepo, err := git.Open( + filesystem.NewStorage(subGitDir, cache.NewObjectLRU(cache.DefaultMaxSize*10)), + subFS, + ) + if err != nil { + return nil, nil, fmt.Errorf("open submodule repository: %w", err) + } + subWorktree, err := subRepo.Worktree() + if err != nil { + return nil, nil, fmt.Errorf("get submodule worktree: %w", err) + } + return subRepo, subWorktree, nil +} + +// cloneSubmodule clones a single submodule into the parent worktree. +// The caller is responsible for deciding whether to forward auth, so +// submoduleAuth may be nil even when the parent was authenticated. +func cloneSubmodule(ctx context.Context, logf func(string, ...any), parentWorktree *git.Worktree, subConfig *config.Submodule, expectedHash plumbing.Hash, resolvedURL string, submoduleAuth transport.AuthMethod, opts CloneRepoOptions) error { + // Get the submodule directory within the parent worktree + subFS, err := parentWorktree.Filesystem.Chroot(subConfig.Path) + if err != nil { + return fmt.Errorf("chroot to submodule path: %w", err) + } + + // Check if already cloned + if _, statErr := subFS.Stat(".git"); statErr == nil { + logf(" Submodule already cloned, checking out expected commit...") + // Open the existing repository + subGitDir, err := subFS.Chroot(".git") + if err != nil { + return fmt.Errorf("chroot to existing .git: %w", err) + } + subRepo, err := git.Open( + filesystem.NewStorage(subGitDir, cache.NewObjectLRU(cache.DefaultMaxSize*10)), + subFS, + ) + if err != nil { + return fmt.Errorf("open existing submodule: %w", err) + } + return checkoutSubmoduleCommit(ctx, logf, subRepo, expectedHash, submoduleAuth, opts) + } + + // Clone the submodule + logf(" Cloning submodule from: %s", RedactURL(resolvedURL)) + + // Create .git directory for the submodule + if err := subFS.MkdirAll(".git", 0o755); err != nil { + return fmt.Errorf("create .git directory: %w", err) + } + subGitDir, err := subFS.Chroot(".git") + if err != nil { + return fmt.Errorf("chroot to .git: %w", err) + } + gitStorage := filesystem.NewStorage(subGitDir, cache.NewObjectLRU(cache.DefaultMaxSize*10)) + + // Clone the submodule repository. SingleBranch is false so all branches + // are fetched and the expected commit is reachable. We honor the parent's + // clone depth. If the expected commit is not reachable from the shallow + // tip, the fetch-by-hash path in checkoutSubmoduleCommit will deepen as + // needed. + subRepo, err := git.CloneContext(ctx, gitStorage, subFS, &git.CloneOptions{ + URL: resolvedURL, + Auth: submoduleAuth, + Progress: opts.Progress, + InsecureSkipTLS: opts.Insecure, + CABundle: opts.CABundle, + ProxyOptions: opts.ProxyOptions, + Depth: opts.Depth, + SingleBranch: false, + NoCheckout: true, + }) + if err != nil { + return fmt.Errorf("clone submodule repository: %w", err) + } + + return checkoutSubmoduleCommit(ctx, logf, subRepo, expectedHash, submoduleAuth, opts) +} + +// checkoutSubmoduleCommit ensures expectedHash is present in subRepo, +// fetching it from the remote if it is not already there, and then checks +// it out into the submodule's worktree. +func checkoutSubmoduleCommit(ctx context.Context, logf func(string, ...any), subRepo *git.Repository, expectedHash plumbing.Hash, submoduleAuth transport.AuthMethod, opts CloneRepoOptions) error { + // Verify the commit exists + logf(" Verifying commit exists: %s", expectedHash) + if _, err := subRepo.CommitObject(expectedHash); err != nil { + // Commit not found, try fetching with the specific hash + logf(" Commit not found, attempting to fetch it directly...") + fetchErr := subRepo.FetchContext(ctx, &git.FetchOptions{ + RemoteName: "origin", + RefSpecs: []config.RefSpec{ + config.RefSpec("+" + expectedHash.String() + ":" + expectedHash.String()), + }, + Auth: submoduleAuth, + Progress: opts.Progress, + InsecureSkipTLS: opts.Insecure, + CABundle: opts.CABundle, + ProxyOptions: opts.ProxyOptions, + }) + if fetchErr != nil && !errors.Is(fetchErr, git.NoErrAlreadyUpToDate) { + // If that fails, try fetching all refs + logf(" Direct fetch failed, fetching all refs...") + fetchAllErr := subRepo.FetchContext(ctx, &git.FetchOptions{ + RemoteName: "origin", + Auth: submoduleAuth, + Progress: opts.Progress, + InsecureSkipTLS: opts.Insecure, + CABundle: opts.CABundle, + ProxyOptions: opts.ProxyOptions, + }) + if fetchAllErr != nil && !errors.Is(fetchAllErr, git.NoErrAlreadyUpToDate) { + return fmt.Errorf("fetch commit %s: %w", expectedHash, fetchAllErr) + } + } + // Verify again + if _, err := subRepo.CommitObject(expectedHash); err != nil { + return fmt.Errorf("commit %s still not found after fetch: %w", expectedHash, err) + } + } + + // Checkout the specific commit expected by the parent repository + logf(" Checking out commit: %s", expectedHash) + subWorktree, err := subRepo.Worktree() + if err != nil { + return fmt.Errorf("get submodule worktree: %w", err) + } + if err := subWorktree.Checkout(&git.CheckoutOptions{Hash: expectedHash}); err != nil { + return fmt.Errorf("checkout expected commit %s: %w", expectedHash, err) + } + return nil +} diff --git a/git/git_test.go b/git/git_test.go index c6422897..5c97ed57 100644 --- a/git/git_test.go +++ b/git/git_test.go @@ -533,6 +533,382 @@ func mustRead(t *testing.T, fs billy.Filesystem, path string) string { return string(content) } +func TestRedactURL(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + input string + expect string + }{ + // Standard URLs without credentials + { + name: "https no creds", + input: "https://github.com/org/repo.git", + expect: "https://github.com/org/repo.git", + }, + { + name: "git protocol no creds", + input: "git://github.com/org/repo.git", + expect: "git://github.com/org/repo.git", + }, + + // HTTPS with various credential formats + { + name: "https with user and password", + input: "https://user:password@github.com/org/repo.git", + expect: "https://***@github.com/org/repo.git", + }, + { + name: "https with token only (no password)", + input: "https://ghp_xxxxxxxxxxxx@github.com/org/repo.git", + expect: "https://***@github.com/org/repo.git", + }, + { + name: "https with user only (no password)", + input: "https://user@github.com/org/repo.git", + expect: "https://***@github.com/org/repo.git", + }, + { + name: "https with x-access-token", + input: "https://x-access-token:ghp_secret123@github.com/org/repo.git", + expect: "https://***@github.com/org/repo.git", + }, + + // URL-encoded credentials + { + name: "https with URL-encoded password", + input: "https://user:p%40ss%3Aw0rd@github.com/org/repo.git", + expect: "https://***@github.com/org/repo.git", + }, + { + name: "https with URL-encoded username", + input: "https://user%40domain:pass@github.com/org/repo.git", + expect: "https://***@github.com/org/repo.git", + }, + + // HTTP + { + name: "http with creds", + input: "http://user:pass@example.com/repo.git", + expect: "http://***@example.com/repo.git", + }, + + // SSH URLs (with scheme) + { + name: "ssh with user", + input: "ssh://git@github.com/org/repo.git", + expect: "ssh://***@github.com/org/repo.git", + }, + { + name: "ssh with different user", + input: "ssh://deploy@github.com/org/repo.git", + expect: "ssh://***@github.com/org/repo.git", + }, + + // SCP-like URLs (no scheme) + { + name: "scp-like git user", + input: "git@github.com:org/repo.git", + expect: "***@github.com:org/repo.git", + }, + { + name: "scp-like deploy user", + input: "deploy@host:repo.git", + expect: "***@host:repo.git", + }, + { + name: "scp-like with IP address", + input: "user@10.0.0.5:project.git", + expect: "***@10.0.0.5:project.git", + }, + { + name: "scp-like with token as user", + input: "oauth2:ghp_secret@gitlab.com:org/repo.git", + expect: "***@gitlab.com:org/repo.git", + }, + + // IPv6 hosts + { + name: "https with IPv6 and creds", + input: "https://user:pass@[2001:db8::1]/path/repo.git", + expect: "https://***@[2001:db8::1]/path/repo.git", + }, + { + name: "https with IPv6 no creds", + input: "https://[2001:db8::1]/path/repo.git", + expect: "https://[2001:db8::1]/path/repo.git", + }, + + // Other schemes + { + name: "ftp with creds", + input: "ftp://user:pass@host/path", + expect: "ftp://***@host/path", + }, + { + name: "sftp with user only", + input: "sftp://user@host/path", + expect: "sftp://***@host/path", + }, + + // Edge cases + { + name: "plain path (not a URL)", + input: "/local/path/to/repo", + expect: "/local/path/to/repo", + }, + { + name: "relative path", + input: "../sibling/repo.git", + expect: "../sibling/repo.git", + }, + { + name: "file URL", + input: "file:///local/repo.git", + expect: "file:///local/repo.git", + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + got := git.RedactURL(tc.input) + require.Equal(t, tc.expect, got) + }) + } +} + +func TestSameHost(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + url1 string + url2 string + expect bool + }{ + // Same host cases + { + name: "https same host", + url1: "https://github.com/org/repo.git", + url2: "https://github.com/other/submodule.git", + expect: true, + }, + { + name: "https and scp same host", + url1: "https://github.com/org/repo.git", + url2: "git@github.com:other/submodule.git", + expect: true, + }, + { + name: "scp same host", + url1: "git@github.com:org/repo.git", + url2: "git@github.com:other/submodule.git", + expect: true, + }, + { + name: "case insensitive", + url1: "https://GitHub.com/org/repo.git", + url2: "https://github.com/other/submodule.git", + expect: true, + }, + { + name: "with port same host", + url1: "https://github.com:443/org/repo.git", + url2: "https://github.com/other/submodule.git", + expect: true, + }, + { + name: "ssh scheme same host", + url1: "ssh://git@github.com/org/repo.git", + url2: "https://github.com/other/submodule.git", + expect: true, + }, + + // Different host cases + { + name: "different hosts", + url1: "https://github.com/org/repo.git", + url2: "https://gitlab.com/other/submodule.git", + expect: false, + }, + { + name: "scp different hosts", + url1: "git@github.com:org/repo.git", + url2: "git@evil.com:exfiltrate/creds.git", + expect: false, + }, + { + name: "subdomain is different", + url1: "https://github.com/org/repo.git", + url2: "https://api.github.com/other/submodule.git", + expect: false, + }, + + // Edge cases + { + name: "empty url1", + url1: "", + url2: "https://github.com/other/submodule.git", + expect: false, + }, + { + name: "relative url", + url1: "https://github.com/org/repo.git", + url2: "../other/submodule.git", + expect: false, + }, + { + name: "file path", + url1: "https://github.com/org/repo.git", + url2: "/local/path/to/repo", + expect: false, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + got := git.SameHost(tc.url1, tc.url2) + require.Equal(t, tc.expect, got) + }) + } +} + +func TestResolveSubmoduleURL(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + parentURL string + subURL string + expect string + expectErr string + }{ + { + name: "absolute", + parentURL: "https://example.com/org/main.git", + subURL: "https://github.com/other/repo.git", + expect: "https://github.com/other/repo.git", + }, + { + name: "relativeSibling", + parentURL: "https://example.com/org/main.git", + subURL: "../deps/lib.git", + expect: "https://example.com/org/deps/lib.git", + }, + { + name: "relativeChild", + parentURL: "https://example.com/org/main.git", + subURL: "./extras/tool.git", + expect: "https://example.com/org/main.git/extras/tool.git", + }, + { + name: "scpRelativeSibling", + parentURL: "git@github.com:org/main.git", + subURL: "../deps/lib.git", + expect: "ssh://git@github.com/org/deps/lib.git", + }, + { + name: "scpRelativeChild", + parentURL: "git@github.com:org/main.git", + subURL: "./extras/tool.git", + expect: "ssh://git@github.com/org/main.git/extras/tool.git", + }, + { + name: "scpMultiLevelUp", + parentURL: "git@github.com:a/b/c/repo.git", + subURL: "../../other/lib.git", + expect: "ssh://git@github.com/a/b/other/lib.git", + }, + { + name: "scpWithPort", + parentURL: "git@github.com:2222:org/main.git", + subURL: "../deps/lib.git", + expect: "ssh://git@github.com:2222/org/deps/lib.git", + }, + { + name: "httpsMultiLevelUp", + parentURL: "https://example.com/a/b/c/repo.git", + subURL: "../../other/lib.git", + expect: "https://example.com/a/b/other/lib.git", + }, + { + name: "scpParentWithAbsoluteSubmodule", + parentURL: "git@github.com:org/main.git", + subURL: "https://github.com/other/submodule.git", + expect: "https://github.com/other/submodule.git", + }, + { + name: "httpsParentWithTokenStripped", + parentURL: "https://token123@github.com/org/main.git", + subURL: "../deps/lib.git", + expect: "https://github.com/org/deps/lib.git", + }, + { + name: "httpsParentWithUserPassStripped", + parentURL: "https://user:pass@example.com/org/main.git", + subURL: "./extras/tool.git", + expect: "https://example.com/org/main.git/extras/tool.git", + }, + { + name: "sshSchemeUserPreserved", + parentURL: "ssh://deploy@host.tld/org/main.git", + subURL: "../deps/lib.git", + expect: "ssh://deploy@host.tld/org/deps/lib.git", + }, + { + name: "sshSchemePasswordStripped", + parentURL: "ssh://deploy:secret@host.tld/org/main.git", + subURL: "../deps/lib.git", + expect: "ssh://deploy@host.tld/org/deps/lib.git", + }, + { + name: "gitSchemeRelative", + parentURL: "git://host.tld/org/main.git", + subURL: "../deps/lib.git", + expect: "git://host.tld/org/deps/lib.git", + }, + { + name: "unparseableParent", + parentURL: "https://[bad-bracket", + subURL: "./child", + expectErr: "parse parent URL", + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + t.Parallel() + got, err := git.ResolveSubmoduleURL(c.parentURL, c.subURL) + if c.expectErr != "" { + require.ErrorContains(t, err, c.expectErr) + return + } + require.NoError(t, err) + require.Equal(t, c.expect, got) + }) + } +} + +func TestCloneOptionsFromOptions_Submodules(t *testing.T) { + t.Parallel() + + fs := memfs.New() + opts := options.Options{ + Filesystem: fs, + WorkspaceFolder: "/workspace", + GitURL: "https://example.com/example/repo.git", + GitCloneSubmoduleDepth: 10, + GitCloneThinPack: true, + } + + cloneOpts, err := git.CloneOptionsFromOptions(t.Logf, opts) + require.NoError(t, err) + require.Equal(t, 10, cloneOpts.SubmoduleDepth) +} + // generates a random ed25519 private key func randKeygen(t *testing.T) gossh.Signer { t.Helper() diff --git a/git/submodule_auth_internal_test.go b/git/submodule_auth_internal_test.go new file mode 100644 index 00000000..f9c0b959 --- /dev/null +++ b/git/submodule_auth_internal_test.go @@ -0,0 +1,88 @@ +package git + +import ( + "bytes" + "testing" + + "github.com/go-git/go-git/v5/plumbing/transport" + "github.com/stretchr/testify/require" +) + +func TestSubmoduleAuthFor(t *testing.T) { + t.Parallel() + + type fakeAuth struct{ transport.AuthMethod } + parentAuth := fakeAuth{} + + cases := []struct { + name string + parentURL string + submoduleURL string + parentAuth transport.AuthMethod + wantAuth transport.AuthMethod + wantWarn bool + }{ + { + name: "noParentAuth", + parentURL: "https://github.com/org/parent.git", + submoduleURL: "https://github.com/org/sub.git", + }, + { + name: "sameHostForwards", + parentURL: "https://github.com/org/parent.git", + submoduleURL: "https://github.com/org/sub.git", + parentAuth: parentAuth, + wantAuth: parentAuth, + }, + { + name: "differentHostWithholdsAndWarns", + parentURL: "https://github.com/org/parent.git", + submoduleURL: "https://evil.com/exfil.git", + parentAuth: parentAuth, + wantWarn: true, + }, + { + name: "differentHostNoParentAuthNoWarn", + parentURL: "https://github.com/org/parent.git", + submoduleURL: "https://evil.com/exfil.git", + }, + { + name: "scpAndHttpsSameHost", + parentURL: "git@github.com:org/parent.git", + submoduleURL: "https://github.com/org/sub.git", + parentAuth: parentAuth, + wantAuth: parentAuth, + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + t.Parallel() + var buf bytes.Buffer + logf := func(format string, _ ...any) { _, _ = buf.WriteString(format) } + got := submoduleAuthFor(logf, c.parentURL, c.submoduleURL, c.parentAuth) + require.Equal(t, c.wantAuth, got) + if c.wantWarn { + require.Contains(t, buf.String(), "Not forwarding auth") + } else { + require.NotContains(t, buf.String(), "Not forwarding auth") + } + }) + } +} + +// Once auth is withheld at one level of submodule recursion, it must stay +// withheld for every level below, even when the deeper hosts match each other. +func TestSubmoduleAuthChainStaysWithheld(t *testing.T) { + t.Parallel() + + type fakeAuth struct{ transport.AuthMethod } + rootAuth := fakeAuth{} + logf := func(string, ...any) {} + + level1 := submoduleAuthFor(logf, "https://github.com/org/parent.git", "https://evil.com/repo.git", rootAuth) + require.Nil(t, level1) + + level2 := submoduleAuthFor(logf, "https://evil.com/repo.git", "https://evil.com/nested.git", level1) + require.Nil(t, level2) +} diff --git a/integration/integration_test.go b/integration/integration_test.go index 102bfba8..82f0c4c8 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -33,6 +33,7 @@ import ( "github.com/coder/envbuilder/testutil/gittest" "github.com/coder/envbuilder/testutil/mwtest" "github.com/coder/envbuilder/testutil/registrytest" + "github.com/go-git/go-billy/v5/memfs" "github.com/go-git/go-billy/v5/osfs" gossh "golang.org/x/crypto/ssh" @@ -418,6 +419,42 @@ func TestSucceedsGitAuth(t *testing.T) { require.Contains(t, gitConfig, srv.URL) } +func TestGitSubmodules(t *testing.T) { + t.Parallel() + + submoduleFS := memfs.New() + submoduleRepo := gittest.NewRepo(t, submoduleFS, + gittest.Commit(t, "subfile.txt", "submodule content", "submodule commit"), + ) + submoduleHead, err := submoduleRepo.Head() + require.NoError(t, err) + submoduleSrv := httptest.NewServer(mwtest.BasicAuthMW("", "")(gittest.NewServer(submoduleFS))) + t.Cleanup(submoduleSrv.Close) + + parentFS := memfs.New() + _ = gittest.NewRepo(t, parentFS, + gittest.Commit(t, "Dockerfile", "FROM "+testImageAlpine, "my test commit"), + gittest.CommitSubmodule(t, "submod", submoduleSrv.URL, submoduleHead.Hash()), + ) + parentSrv := httptest.NewServer(mwtest.BasicAuthMW("", "")(gittest.NewServer(parentFS))) + t.Cleanup(parentSrv.Close) + + ctr, err := runEnvbuilder(t, runOpts{env: []string{ + envbuilderEnv("GIT_URL", parentSrv.URL), + envbuilderEnv("DOCKERFILE_PATH", "Dockerfile"), + envbuilderEnv("GIT_CLONE_SUBMODULES", "true"), + }}) + require.NoError(t, err) + + gitmodules := execContainer(t, ctr, "cat /workspaces/empty/.gitmodules") + require.Contains(t, gitmodules, "[submodule") + + // Read a committed file from the submodule worktree to confirm that the + // submodule was actually checked out, not just registered in .gitmodules. + subfileContent := execContainer(t, ctr, "cat /workspaces/empty/submod/subfile.txt") + require.Contains(t, subfileContent, "submodule content") +} + func TestGitSSHAuth(t *testing.T) { t.Parallel() diff --git a/options/options.go b/options/options.go index 8cdf723a..508d1d00 100644 --- a/options/options.go +++ b/options/options.go @@ -5,6 +5,7 @@ import ( "encoding/base64" "fmt" "os" + "strconv" "strings" "github.com/coder/envbuilder/log" @@ -12,6 +13,45 @@ import ( "github.com/go-git/go-billy/v5" ) +// SubmoduleDepth is a custom type for handling submodule depth that accepts +// "true" (defaults to 10), "false" (0), or a positive integer. +type SubmoduleDepth int + +const DefaultSubmoduleDepth = 10 + +func (s *SubmoduleDepth) Set(val string) error { + lower := strings.ToLower(strings.TrimSpace(val)) + switch lower { + case "true", "yes": + *s = DefaultSubmoduleDepth + return nil + case "false", "no", "": + *s = 0 + return nil + } + n, err := strconv.Atoi(lower) + if err != nil { + return fmt.Errorf("invalid submodule depth %q: must be true, false, or a positive integer", val) + } + if n < 0 { + return fmt.Errorf("submodule depth must be non-negative, got %d", n) + } + *s = SubmoduleDepth(n) + return nil +} + +func (s *SubmoduleDepth) String() string { + return strconv.Itoa(int(*s)) +} + +func (s *SubmoduleDepth) Type() string { + return "submodule-depth" +} + +func SubmoduleDepthOf(s *int) *SubmoduleDepth { + return (*SubmoduleDepth)(s) +} + // Options contains the configuration for the envbuilder. type Options struct { // SetupScript is the script to run before the init script. It runs as the @@ -108,6 +148,11 @@ type Options struct { GitCloneSingleBranch bool // GitCloneThinPack clone with thin pack compabilities. This is optional. GitCloneThinPack bool + // GitCloneSubmoduleDepth controls submodule initialization after cloning. + // 0 = disabled (default), positive integer = max recursion depth. + // The flag accepts "true" (defaults to DefaultSubmoduleDepth), "false" + // (0), or a positive integer for the max recursion depth. + GitCloneSubmoduleDepth int // GitUsername is the username to use for Git authentication. This is // optional. GitUsername string @@ -384,7 +429,14 @@ func (o *Options) CLI() serpent.OptionSet { Default: "true", Description: "Git clone with thin pack compatibility enabled, " + "ensuring that even when thin pack compatibility is activated," + - "it will not be turned on for the domain dev.zaure.com.", + "it will not be turned on for the domain dev.azure.com.", + }, + { + Flag: "git-clone-submodules", + Env: WithEnvPrefix("GIT_CLONE_SUBMODULES"), + Value: SubmoduleDepthOf(&o.GitCloneSubmoduleDepth), + Description: "Clone Git submodules after cloning the repository. " + + "Accepts 'true' (max depth 10), 'false' (disabled), or a positive integer for max recursion depth.", }, { Flag: "git-username", diff --git a/options/options_test.go b/options/options_test.go index ed5dcd3c..a835b2cd 100644 --- a/options/options_test.go +++ b/options/options_test.go @@ -72,6 +72,45 @@ func TestEnvOptionParsing(t *testing.T) { require.False(t, o.GitCloneSingleBranch) require.True(t, o.GitCloneThinPack) }) + + t.Run("remote repo build mode", func(t *testing.T) { + t.Setenv(options.WithEnvPrefix("REMOTE_REPO_BUILD_MODE"), "true") + o := runCLI() + require.True(t, o.RemoteRepoBuildMode) + }) + + t.Run("binary path", func(t *testing.T) { + const val = "/usr/local/bin/envbuilder" + t.Setenv(options.WithEnvPrefix("BINARY_PATH"), val) + o := runCLI() + require.Equal(t, o.BinaryPath, val) + }) + }) + + t.Run("submodule depth", func(t *testing.T) { + t.Run("true", func(t *testing.T) { + t.Setenv(options.WithEnvPrefix("GIT_CLONE_SUBMODULES"), "true") + o := runCLI() + require.Equal(t, int(options.DefaultSubmoduleDepth), o.GitCloneSubmoduleDepth) + }) + + t.Run("integer", func(t *testing.T) { + t.Setenv(options.WithEnvPrefix("GIT_CLONE_SUBMODULES"), "3") + o := runCLI() + require.Equal(t, 3, o.GitCloneSubmoduleDepth) + }) + + t.Run("integer with whitespace", func(t *testing.T) { + t.Setenv(options.WithEnvPrefix("GIT_CLONE_SUBMODULES"), " 5 ") + o := runCLI() + require.Equal(t, 5, o.GitCloneSubmoduleDepth) + }) + + t.Run("false", func(t *testing.T) { + t.Setenv(options.WithEnvPrefix("GIT_CLONE_SUBMODULES"), "false") + o := runCLI() + require.Equal(t, 0, o.GitCloneSubmoduleDepth) + }) }) } diff --git a/options/testdata/options.golden b/options/testdata/options.golden index 92a85232..6c086d56 100644 --- a/options/testdata/options.golden +++ b/options/testdata/options.golden @@ -99,10 +99,15 @@ OPTIONS: --git-clone-single-branch bool, $ENVBUILDER_GIT_CLONE_SINGLE_BRANCH Clone only a single branch of the Git repository. + --git-clone-submodules submodule-depth, $ENVBUILDER_GIT_CLONE_SUBMODULES + Clone Git submodules after cloning the repository. Accepts 'true' (max + depth 10), 'false' (disabled), or a positive integer for max recursion + depth. + --git-clone-thinpack bool, $ENVBUILDER_GIT_CLONE_THINPACK (default: true) Git clone with thin pack compatibility enabled, ensuring that even when thin pack compatibility is activated,it will not be turned on for - the domain dev.zaure.com. + the domain dev.azure.com. --git-http-proxy-url string, $ENVBUILDER_GIT_HTTP_PROXY_URL The URL for the HTTP proxy. This is optional. diff --git a/testutil/gittest/gittest.go b/testutil/gittest/gittest.go index 1a0e2424..402a0b5a 100644 --- a/testutil/gittest/gittest.go +++ b/testutil/gittest/gittest.go @@ -20,8 +20,11 @@ import ( "github.com/go-git/go-billy/v5" "github.com/go-git/go-billy/v5/memfs" "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/config" "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/cache" + "github.com/go-git/go-git/v5/plumbing/filemode" + "github.com/go-git/go-git/v5/plumbing/format/index" "github.com/go-git/go-git/v5/plumbing/format/pktline" "github.com/go-git/go-git/v5/plumbing/object" "github.com/go-git/go-git/v5/plumbing/protocol/packp" @@ -270,6 +273,56 @@ func NewRepo(t *testing.T, fs billy.Filesystem, commits ...CommitFunc) *git.Repo return repo } +// CommitSubmodule creates a commit that adds a submodule with proper .gitmodules and gitlink entry. +func CommitSubmodule(t *testing.T, path, url string, hash plumbing.Hash) CommitFunc { + return func(fs billy.Filesystem, repo *git.Repository) { + t.Helper() + tree, err := repo.Worktree() + require.NoError(t, err) + + // Create .gitmodules file + gitmodulesContent := fmt.Sprintf("[submodule %q]\n\tpath = %s\n\turl = %s\n", path, path, url) + WriteFile(t, fs, ".gitmodules", gitmodulesContent) + _, err = tree.Add(".gitmodules") + require.NoError(t, err) + + // Add submodule config to .git/config + cfg, err := repo.Config() + require.NoError(t, err) + cfg.Submodules[path] = &config.Submodule{ + Name: path, + Path: path, + URL: url, + } + err = repo.SetConfig(cfg) + require.NoError(t, err) + + // Create the gitlink entry (mode 160000 commit reference) + // We need to add it directly to the index + idx, err := repo.Storer.Index() + require.NoError(t, err) + + // Add a gitlink entry - this is a special index entry with mode 160000 + idx.Entries = append(idx.Entries, &index.Entry{ + Mode: filemode.Submodule, + Hash: hash, + Name: path, + }) + err = repo.Storer.SetIndex(idx) + require.NoError(t, err) + + // Commit the changes + _, err = tree.Commit("add submodule", &git.CommitOptions{ + Author: &object.Signature{ + Name: "Example", + Email: "test@example.com", + When: time.Now(), + }, + }) + require.NoError(t, err) + } +} + // WriteFile writes a file to the filesystem. func WriteFile(t *testing.T, fs billy.Filesystem, path, content string) { t.Helper()