Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add directory tracking to sync #425

Merged
merged 15 commits into from
Jun 12, 2023
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 57 additions & 7 deletions internal/sync_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"encoding/json"
"fmt"
"io"
"io/fs"
"net/http"
"os"
"os/exec"
Expand All @@ -15,6 +16,7 @@ import (
"time"

_ "github.com/databricks/cli/cmd/sync"
"github.com/databricks/cli/libs/filer"
"github.com/databricks/cli/libs/sync"
"github.com/databricks/cli/libs/testfile"
"github.com/databricks/databricks-sdk-go"
Expand Down Expand Up @@ -63,6 +65,7 @@ type syncTest struct {
t *testing.T
c *cobraTestRunner
w *databricks.WorkspaceClient
f filer.Filer
localRoot string
remoteRoot string
}
Expand All @@ -73,6 +76,8 @@ func setupSyncTest(t *testing.T, args ...string) *syncTest {
w := databricks.Must(databricks.NewWorkspaceClient())
localRoot := t.TempDir()
remoteRoot := temporaryWorkspaceDir(t, w)
f, err := filer.NewWorkspaceFilesClient(w, remoteRoot)
require.NoError(t, err)

// Prepend common arguments.
args = append([]string{
Expand All @@ -90,6 +95,7 @@ func setupSyncTest(t *testing.T, args ...string) *syncTest {
t: t,
c: c,
w: w,
f: f,
localRoot: localRoot,
remoteRoot: remoteRoot,
}
Expand Down Expand Up @@ -160,6 +166,21 @@ func (a *syncTest) remoteFileContent(ctx context.Context, relativePath string, e
}, 30*time.Second, 5*time.Second)
}

func (a *syncTest) remoteNotExist(ctx context.Context, relativePath string) {
_, err := a.f.Stat(ctx, relativePath)
require.ErrorIs(a.t, err, fs.ErrNotExist)
}

func (a *syncTest) remoteExists(ctx context.Context, relativePath string) {
_, err := a.f.Stat(ctx, relativePath)
require.NoError(a.t, err)
}

func (a *syncTest) touchFile(ctx context.Context, path string) {
err := a.f.Write(ctx, path, strings.NewReader("contents"), filer.CreateParentDirectories)
require.NoError(a.t, err)
}

func (a *syncTest) objectType(ctx context.Context, relativePath string, expected string) {
path := path.Join(a.remoteRoot, relativePath)

Expand Down Expand Up @@ -297,11 +318,43 @@ func TestAccSyncNestedFolderSync(t *testing.T) {
// delete
f.Remove(t)
assertSync.waitForCompletionMarker()
// directories are not cleaned up right now. This is not ideal
assertSync.remoteDirContent(ctx, "dir1/dir2/dir3", []string{})
assertSync.remoteNotExist(ctx, "dir1")
assertSync.snapshotContains(append(repoFiles, ".gitignore"))
}

func TestAccSyncNestedFolderDoesntFailOnNonEmptyDirectory(t *testing.T) {
ctx := context.Background()
assertSync := setupSyncTest(t, "--watch")

// .gitignore is created by the sync process to enforce .databricks is not synced
assertSync.waitForCompletionMarker()
assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore"))

// New file
localFilePath := filepath.Join(assertSync.localRoot, "dir1/dir2/dir3/foo.txt")
err := os.MkdirAll(filepath.Dir(localFilePath), 0o755)
assert.NoError(t, err)
f := testfile.CreateFile(t, localFilePath)
defer f.Close(t)
assertSync.waitForCompletionMarker()
assertSync.remoteDirContent(ctx, "dir1/dir2/dir3", []string{"foo.txt"})

// Add file to dir1 to simulate a user writing to the workspace directly.
assertSync.touchFile(ctx, "dir1/foo.txt")

// Remove original file.
f.Remove(t)
assertSync.waitForCompletionMarker()

// Sync should have removed these directories.
assertSync.remoteNotExist(ctx, "dir1/dir2/dir3")
assertSync.remoteNotExist(ctx, "dir1/dir2")

// Sync should have ignored not being able to delete dir1.
assertSync.remoteExists(ctx, "dir1/foo.txt")
assertSync.remoteExists(ctx, "dir1")
}

func TestAccSyncNestedSpacePlusAndHashAreEscapedSync(t *testing.T) {
ctx := context.Background()
assertSync := setupSyncTest(t, "--watch")
Expand All @@ -326,12 +379,10 @@ func TestAccSyncNestedSpacePlusAndHashAreEscapedSync(t *testing.T) {
// delete
f.Remove(t)
assertSync.waitForCompletionMarker()
// directories are not cleaned up right now. This is not ideal
assertSync.remoteDirContent(ctx, "dir1/a b+c/c+d e", []string{})
assertSync.remoteNotExist(ctx, "dir1/a b+c/c+d e")
assertSync.snapshotContains(append(repoFiles, ".gitignore"))
}

// sync does not clean up empty directories from the workspace file system.
// This is a check for the edge case when a user does the following:
//
// 1. Add file foo/bar.txt
Expand Down Expand Up @@ -359,8 +410,7 @@ func TestAccSyncIncrementalFileOverwritesFolder(t *testing.T) {
f.Remove(t)
os.Remove(filepath.Join(assertSync.localRoot, "foo"))
assertSync.waitForCompletionMarker()
assertSync.remoteDirContent(ctx, "foo", []string{})
assertSync.objectType(ctx, "foo", "DIRECTORY")
assertSync.remoteNotExist(ctx, "foo")
assertSync.snapshotContains(append(repoFiles, ".gitignore"))

f2 := testfile.CreateFile(t, filepath.Join(assertSync.localRoot, "foo"))
Expand Down
92 changes: 91 additions & 1 deletion libs/sync/diff.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,100 @@
package sync

import (
"path"
)

type diff struct {
put []string
delete []string
rmdir []string
mkdir []string
put []string
}

func (d diff) IsEmpty() bool {
return len(d.put) == 0 && len(d.delete) == 0
}

// groupedMkdir returns a slice of slices of paths to create.
// Because the underlying mkdir calls create intermediate directories,
// we can group them together to reduce the total number of calls.
// This returns a slice of a slice for parity with [groupedRmdir].
func (d diff) groupedMkdir() [][]string {
// Compute the set of prefixes of all paths to create.
prefixes := make(map[string]bool)
for _, name := range d.mkdir {
dir := path.Dir(name)
for dir != "." && dir != "/" {
pietern marked this conversation as resolved.
Show resolved Hide resolved
prefixes[dir] = true
dir = path.Dir(dir)
}
}

var out []string

// Collect all paths that are not a prefix of another path.
for _, name := range d.mkdir {
if !prefixes[name] {
out = append(out, name)
}
}

return [][]string{out}
}

// groupedRmdir returns a slice of slices of paths to delete.
// The outer slice is ordered such that each inner slice can be
// deleted in parallel, as long as it is processed in order.
// The first entry will contain leaf directories, the second entry
// will contain intermediate directories, and so on.
func (d diff) groupedRmdir() [][]string {
// Compute the number of times each directory is a prefix of another directory.
prefixes := make(map[string]int)
for _, dir := range d.rmdir {
prefixes[dir] = 0
}
for _, dir := range d.rmdir {
dir = path.Dir(dir)
for dir != "." && dir != "/" {
// Increment the prefix count for this directory, only if it
// it one of the directories we are deleting.
if _, ok := prefixes[dir]; ok {
prefixes[dir]++
}
dir = path.Dir(dir)
}
}

var out [][]string

for len(prefixes) > 0 {
var toDelete []string

// Find directories which are not a prefix of another directory.
// These are the directories we can delete.
for dir, count := range prefixes {
if count == 0 {
toDelete = append(toDelete, dir)
delete(prefixes, dir)
}
}

// Remove these directories from the prefixes map.
for _, dir := range toDelete {
dir = path.Dir(dir)
for dir != "." && dir != "/" {
// Decrement the prefix count for this directory, only if it
// it one of the directories we are deleting.
if _, ok := prefixes[dir]; ok {
prefixes[dir]--
}
dir = path.Dir(dir)
}
}

// Add these directories to the output.
out = append(out, toDelete)
}

return out
}
73 changes: 73 additions & 0 deletions libs/sync/diff_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package sync

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestDiffGroupedMkdir(t *testing.T) {
d := diff{
mkdir: []string{
"foo",
"foo/bar",
"foo/bar/baz1",
"foo/bar/baz2",
"foo1",
"a/b",
"a/b/c/d/e/f",
},
}

// Expect only leaf directories to be included.
out := d.groupedMkdir()
assert.Len(t, out, 1)
assert.ElementsMatch(t, []string{
"foo/bar/baz1",
"foo/bar/baz2",
"foo1",
"a/b/c/d/e/f",
}, out[0])
pietern marked this conversation as resolved.
Show resolved Hide resolved
}

func TestDiffGroupedRmdir(t *testing.T) {
d := diff{
rmdir: []string{
"a/b/c/d/e/f",
"a/b/c/d/e",
"a/b/c/d",
"a/b/c",
"a/b/e/f/g/h",
"a/b/e/f/g",
"a/b/e/f",
"a/b/e",
"a/b",
},
}

out := d.groupedRmdir()
assert.Len(t, out, 5)
assert.ElementsMatch(t, []string{"a/b/c/d/e/f", "a/b/e/f/g/h"}, out[0])
assert.ElementsMatch(t, []string{"a/b/c/d/e", "a/b/e/f/g"}, out[1])
assert.ElementsMatch(t, []string{"a/b/c/d", "a/b/e/f"}, out[2])
assert.ElementsMatch(t, []string{"a/b/c", "a/b/e"}, out[3])
assert.ElementsMatch(t, []string{"a/b"}, out[4])
}

func TestDiffGroupedRmdirWithLeafsOnly(t *testing.T) {
d := diff{
rmdir: []string{
"foo/bar/baz1",
"foo/bar1",
"foo/bar/baz2",
"foo/bar2",
"foo1",
"foo2",
},
}

// Expect all directories to be included.
out := d.groupedRmdir()
assert.Len(t, out, 1)
assert.ElementsMatch(t, d.rmdir, out[0])
}
54 changes: 54 additions & 0 deletions libs/sync/dirset.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package sync

import (
"path"
"path/filepath"
"sort"
)

// DirSet is a set of directories.
type DirSet map[string]struct{}

// MakeDirSet turns a list of file paths into the complete set of directories
// that is needed to store them (including parent directories).
func MakeDirSet(files []string) DirSet {
out := map[string]struct{}{}

// Iterate over all files.
for _, f := range files {
// Get the directory of the file in /-separated form.
dir := filepath.ToSlash(filepath.Dir(f))

// Add this directory and its parents until it is either "." or already in the set.
for dir != "." {
if _, ok := out[dir]; ok {
break
}
out[dir] = struct{}{}
dir = path.Dir(dir)
}
}

return out
}

// Slice returns a sorted copy of the dirset elements as a slice.
func (dirset DirSet) Slice() []string {
out := make([]string, 0, len(dirset))
for dir := range dirset {
out = append(out, dir)
}
sort.Strings(out)
return out
}

// Remove returns the set difference of two DirSets.
func (dirset DirSet) Remove(other DirSet) DirSet {
out := map[string]struct{}{}
for dir := range dirset {
if _, ok := other[dir]; !ok {
out[dir] = struct{}{}
}
}
return out
}
Loading