From ad46667f507e3385dac78c32f26d96726d7db77a Mon Sep 17 00:00:00 2001 From: Christophe de Vienne Date: Tue, 2 Apr 2024 19:41:11 +0200 Subject: [PATCH] hg: Add a cache for mercurial repositories. The idea is to save the whole untouched clone (with no checkout) in the cache. If already present, the pull is done directly in the cache, and is faster (except on very small repos) because only new changeset are transfered. If the ref is a changeset id (not a tag, branch, topic or bookmark), and the changeset is already known in the cached clone, no pull is done which avoid any network exchange. Then we copy the cached entry and do the checkout. Signed-off-by: Christophe de Vienne --- pkg/vendir/directory/directory.go | 2 +- pkg/vendir/fetch/hg/hg.go | 61 ++++++++++++++++++++++++++++--- pkg/vendir/fetch/hg/sync.go | 34 ++++++++++++++--- 3 files changed, 85 insertions(+), 12 deletions(-) diff --git a/pkg/vendir/directory/directory.go b/pkg/vendir/directory/directory.go index be2f89cf..1a06f88d 100644 --- a/pkg/vendir/directory/directory.go +++ b/pkg/vendir/directory/directory.go @@ -119,7 +119,7 @@ func (d *Directory) Sync(syncOpts SyncOpts) (ctlconf.LockDirectory, error) { lockDirContents.Git = &lock case contents.Hg != nil: - hgSync := ctlhg.NewSync(*contents.Hg, NewInfoLog(d.ui), syncOpts.RefFetcher) + hgSync := ctlhg.NewSync(*contents.Hg, NewInfoLog(d.ui), syncOpts.RefFetcher, syncOpts.Cache) d.ui.PrintLinef("Fetching: %s + %s (hg from %s)", d.opts.Path, contents.Path, hgSync.Desc()) diff --git a/pkg/vendir/fetch/hg/hg.go b/pkg/vendir/fetch/hg/hg.go index 9969535a..04c5c379 100644 --- a/pkg/vendir/fetch/hg/hg.go +++ b/pkg/vendir/fetch/hg/hg.go @@ -23,32 +23,74 @@ type Hg struct { refFetcher ctlfetch.RefFetcher authDir string env []string + cacheID string } func NewHg(opts ctlconf.DirectoryContentsHg, infoLog io.Writer, refFetcher ctlfetch.RefFetcher, tempArea ctlfetch.TempArea, ) (*Hg, error) { - t := Hg{opts, infoLog, refFetcher, "", nil} + t := Hg{opts, infoLog, refFetcher, "", nil, ""} if err := t.setup(tempArea); err != nil { return nil, err } return &t, nil } +// CacheID returns a cache id for the repository +// It doesn't include the ref because we want to reuse a cache when only the ref +// is changed +// Basically we combine all data used to write the hgrc file +func (t *Hg) CacheID() string { + return t.cacheID +} + //nolint:revive type HgInfo struct { SHA string ChangeSetTitle string } -func (t *Hg) Retrieve(dstPath string, tempArea ctlfetch.TempArea) (HgInfo, error) { - if len(t.opts.URL) == 0 { - return HgInfo{}, fmt.Errorf("Expected non-empty URL") +// CloneHasTargetRef returns true if the given clone contains the target +// ref, and this ref is a revision id (not a tag or a branch) +func (t *Hg) CloneHasTargetRef(dstPath string) bool { + out, _, err := t.run([]string{"id", "--id", "-r", t.opts.Ref}, dstPath) + if err != nil { + return false } + out = strings.TrimSpace(out) + if strings.HasPrefix(t.opts.Ref, out) { + return true + } + return false +} - err := t.fetch(dstPath, tempArea) - if err != nil { +func (t *Hg) Clone(dstPath string) error { + if err := t.initClone(dstPath); err != nil { + return err + } + return t.SyncClone(dstPath) +} + +func (t *Hg) SyncClone(dstPath string) error { + if _, _, err := t.run([]string{"pull"}, dstPath); err != nil { + return err + } + return nil +} + +func (t *Hg) LocalClone(localClone, dstPath string) error { + if err := t.initClone(dstPath); err != nil { + return err + } + if _, _, err := t.run([]string{"pull", localClone}, dstPath); err != nil { + return err + } + return nil +} + +func (t *Hg) Checkout(dstPath string) (HgInfo, error) { + if _, _, err := t.run([]string{"checkout", t.opts.Ref}, dstPath); err != nil { return HgInfo{}, err } @@ -80,6 +122,10 @@ func (t *Hg) Close() { } func (t *Hg) setup(tempArea ctlfetch.TempArea) error { + if len(t.opts.URL) == 0 { + return fmt.Errorf("Expected non-empty URL") + } + authOpts, err := t.getAuthOpts() if err != nil { return err @@ -132,6 +178,7 @@ hgauth.password = %s } sshCmd = append(sshCmd, "-i", path, "-o", "IdentitiesOnly=yes") + t.cacheID += "private-key=" + *authOpts.PrivateKey + "|" } if authOpts.KnownHosts != nil { @@ -143,6 +190,7 @@ hgauth.password = %s } sshCmd = append(sshCmd, "-o", "StrictHostKeyChecking=yes", "-o", "UserKnownHostsFile="+path) + t.cacheID += "known-hosts=" + *authOpts.KnownHosts + "|" } else { sshCmd = append(sshCmd, "-o", "StrictHostKeyChecking=no") } @@ -157,6 +205,7 @@ hgauth.password = %s return fmt.Errorf("Writing %s: %s", hgRcPath, err) } t.env = append(t.env, "HGRCPATH="+hgRcPath) + t.cacheID += hgRc } return nil diff --git a/pkg/vendir/fetch/hg/sync.go b/pkg/vendir/fetch/hg/sync.go index c1fefe85..b401c735 100644 --- a/pkg/vendir/fetch/hg/sync.go +++ b/pkg/vendir/fetch/hg/sync.go @@ -11,18 +11,20 @@ import ( ctlconf "carvel.dev/vendir/pkg/vendir/config" ctlfetch "carvel.dev/vendir/pkg/vendir/fetch" + ctlcache "carvel.dev/vendir/pkg/vendir/fetch/cache" ) type Sync struct { opts ctlconf.DirectoryContentsHg log io.Writer refFetcher ctlfetch.RefFetcher + cache ctlcache.Cache } func NewSync(opts ctlconf.DirectoryContentsHg, - log io.Writer, refFetcher ctlfetch.RefFetcher) Sync { + log io.Writer, refFetcher ctlfetch.RefFetcher, cache ctlcache.Cache) Sync { - return Sync{opts, log, refFetcher} + return Sync{opts, log, refFetcher, cache} } func (d Sync) Desc() string { @@ -46,13 +48,35 @@ func (d Sync) Sync(dstPath string, tempArea ctlfetch.TempArea) (ctlconf.LockDire hg, err := NewHg(d.opts, d.log, d.refFetcher, tempArea) if err != nil { - return hgLockConf, err + return hgLockConf, fmt.Errorf("Setting up hg: %w", err) } defer hg.Close() - info, err := hg.Retrieve(incomingTmpPath, tempArea) + if cachePath, ok := d.cache.Has("hg", hg.CacheID()); ok { + // Sync directly in the cache if needed + if !hg.CloneHasTargetRef(cachePath) { + if err := hg.SyncClone(cachePath); err != nil { + return hgLockConf, fmt.Errorf("Syncing hg cached clone: %w", err) + } + } + // fetch from cachedDir + if err := d.cache.CopyFrom("hg", hg.CacheID(), incomingTmpPath); err != nil { + return hgLockConf, fmt.Errorf("Extracting cached hg clone: %w", err) + } + } else { + // fetch in the target directory, and save it to cache + if err := hg.Clone(incomingTmpPath); err != nil { + return hgLockConf, fmt.Errorf("Cloning hg repository: %w", err) + } + if err := d.cache.Save("hg", hg.CacheID(), incomingTmpPath); err != nil { + return hgLockConf, fmt.Errorf("Saving hg repository to cache: %w", err) + } + } + + // now checkout the wanted revision + info, err := hg.Checkout(incomingTmpPath) if err != nil { - return hgLockConf, fmt.Errorf("Fetching hg repository: %s", err) + return hgLockConf, fmt.Errorf("Checking out hg repository: %s", err) } hgLockConf.SHA = info.SHA