Skip to content

Commit

Permalink
Add paths field to bundle sync configuration (#1694)
Browse files Browse the repository at this point in the history
## Changes

This field allows a user to configure paths to synchronize to the
workspace.

Allowed values are relative paths to files and directories anchored at
the directory where the field is set. If one or more values traverse up
the directory tree (to an ancestor of the bundle root directory), the
CLI will dynamically determine the root path to use to ensure that the
file tree structure remains intact.

For example, given a `databricks.yml` in `my_bundle` that includes:

```yaml
sync:
  paths:
    - ../common
    - .
```

Then upon synchronization, the workspace will look like:
```
.
├── common
│   └── lib.py
└── my_bundle
    ├── databricks.yml
    └── notebook.py
```

If not set behavior remains identical.

## Tests

* Newly added unit tests for the mutators and under `bundle/tests`.
* Manually confirmed a bundle without this configuration works the same.
* Manually confirmed a bundle with this configuration works.
  • Loading branch information
pietern authored Aug 21, 2024
1 parent 6f34529 commit 6e8cd83
Show file tree
Hide file tree
Showing 27 changed files with 760 additions and 59 deletions.
8 changes: 8 additions & 0 deletions bundle/bundle.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ type Bundle struct {
// Exclusively use this field for filesystem operations.
BundleRoot vfs.Path

// SyncRoot is a virtual filesystem path to the root directory of the files that are synchronized to the workspace.
// It can be an ancestor to [BundleRoot], but not a descendant; that is, [SyncRoot] must contain [BundleRoot].
SyncRoot vfs.Path

// SyncRootPath is the local path to the root directory of files that are synchronized to the workspace.
// It is equal to `SyncRoot.Native()` and included as dedicated field for convenient access.
SyncRootPath string

Config config.Root

// Metadata about the bundle deployment. This is the interface Databricks services
Expand Down
4 changes: 4 additions & 0 deletions bundle/bundle_read_only.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ func (r ReadOnlyBundle) BundleRoot() vfs.Path {
return r.b.BundleRoot
}

func (r ReadOnlyBundle) SyncRoot() vfs.Path {
return r.b.SyncRoot
}

func (r ReadOnlyBundle) WorkspaceClient() *databricks.WorkspaceClient {
return r.b.WorkspaceClient()
}
Expand Down
4 changes: 2 additions & 2 deletions bundle/config/mutator/configure_wsfs.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func (m *configureWSFS) Name() string {
}

func (m *configureWSFS) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
root := b.BundleRoot.Native()
root := b.SyncRoot.Native()

// The bundle root must be located in /Workspace/
if !strings.HasPrefix(root, "/Workspace/") {
Expand All @@ -45,6 +45,6 @@ func (m *configureWSFS) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno
return diag.FromErr(err)
}

b.BundleRoot = p
b.SyncRoot = p
return nil
}
4 changes: 4 additions & 0 deletions bundle/config/mutator/rewrite_sync_paths.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ func (m *rewriteSyncPaths) makeRelativeTo(root string) dyn.MapFunc {
func (m *rewriteSyncPaths) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) {
return dyn.Map(v, "sync", func(_ dyn.Path, v dyn.Value) (nv dyn.Value, err error) {
v, err = dyn.Map(v, "paths", dyn.Foreach(m.makeRelativeTo(b.RootPath)))
if err != nil {
return dyn.InvalidValue, err
}
v, err = dyn.Map(v, "include", dyn.Foreach(m.makeRelativeTo(b.RootPath)))
if err != nil {
return dyn.InvalidValue, err
Expand Down
16 changes: 16 additions & 0 deletions bundle/config/mutator/rewrite_sync_paths_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ func TestRewriteSyncPathsRelative(t *testing.T) {
RootPath: ".",
Config: config.Root{
Sync: config.Sync{
Paths: []string{
".",
"../common",
},
Include: []string{
"foo",
"bar",
Expand All @@ -29,6 +33,8 @@ func TestRewriteSyncPathsRelative(t *testing.T) {
},
}

bundletest.SetLocation(b, "sync.paths[0]", "./databricks.yml")
bundletest.SetLocation(b, "sync.paths[1]", "./databricks.yml")
bundletest.SetLocation(b, "sync.include[0]", "./file.yml")
bundletest.SetLocation(b, "sync.include[1]", "./a/file.yml")
bundletest.SetLocation(b, "sync.exclude[0]", "./a/b/file.yml")
Expand All @@ -37,6 +43,8 @@ func TestRewriteSyncPathsRelative(t *testing.T) {
diags := bundle.Apply(context.Background(), b, mutator.RewriteSyncPaths())
assert.NoError(t, diags.Error())

assert.Equal(t, filepath.Clean("."), b.Config.Sync.Paths[0])
assert.Equal(t, filepath.Clean("../common"), b.Config.Sync.Paths[1])
assert.Equal(t, filepath.Clean("foo"), b.Config.Sync.Include[0])
assert.Equal(t, filepath.Clean("a/bar"), b.Config.Sync.Include[1])
assert.Equal(t, filepath.Clean("a/b/baz"), b.Config.Sync.Exclude[0])
Expand All @@ -48,6 +56,10 @@ func TestRewriteSyncPathsAbsolute(t *testing.T) {
RootPath: "/tmp/dir",
Config: config.Root{
Sync: config.Sync{
Paths: []string{
".",
"../common",
},
Include: []string{
"foo",
"bar",
Expand All @@ -60,6 +72,8 @@ func TestRewriteSyncPathsAbsolute(t *testing.T) {
},
}

bundletest.SetLocation(b, "sync.paths[0]", "/tmp/dir/databricks.yml")
bundletest.SetLocation(b, "sync.paths[1]", "/tmp/dir/databricks.yml")
bundletest.SetLocation(b, "sync.include[0]", "/tmp/dir/file.yml")
bundletest.SetLocation(b, "sync.include[1]", "/tmp/dir/a/file.yml")
bundletest.SetLocation(b, "sync.exclude[0]", "/tmp/dir/a/b/file.yml")
Expand All @@ -68,6 +82,8 @@ func TestRewriteSyncPathsAbsolute(t *testing.T) {
diags := bundle.Apply(context.Background(), b, mutator.RewriteSyncPaths())
assert.NoError(t, diags.Error())

assert.Equal(t, filepath.Clean("."), b.Config.Sync.Paths[0])
assert.Equal(t, filepath.Clean("../common"), b.Config.Sync.Paths[1])
assert.Equal(t, filepath.Clean("foo"), b.Config.Sync.Include[0])
assert.Equal(t, filepath.Clean("a/bar"), b.Config.Sync.Include[1])
assert.Equal(t, filepath.Clean("a/b/baz"), b.Config.Sync.Exclude[0])
Expand Down
48 changes: 48 additions & 0 deletions bundle/config/mutator/sync_default_path.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package mutator

import (
"context"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
)

type syncDefaultPath struct{}

// SyncDefaultPath configures the default sync path to be equal to the bundle root.
func SyncDefaultPath() bundle.Mutator {
return &syncDefaultPath{}
}

func (m *syncDefaultPath) Name() string {
return "SyncDefaultPath"
}

func (m *syncDefaultPath) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
isset := false
err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) {
pv, _ := dyn.Get(v, "sync.paths")

// If the sync paths field is already set, do nothing.
// We know it is set if its value is either a nil or a sequence (empty or not).
switch pv.Kind() {
case dyn.KindNil, dyn.KindSequence:
isset = true
}

return v, nil
})
if err != nil {
return diag.FromErr(err)
}

// If the sync paths field is already set, do nothing.
if isset {
return nil
}

// Set the sync paths to the default value.
b.Config.Sync.Paths = []string{"."}
return nil
}
82 changes: 82 additions & 0 deletions bundle/config/mutator/sync_default_path_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package mutator_test

import (
"context"
"testing"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/mutator"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestSyncDefaultPath_DefaultIfUnset(t *testing.T) {
b := &bundle.Bundle{
RootPath: "/tmp/some/dir",
Config: config.Root{},
}

ctx := context.Background()
diags := bundle.Apply(ctx, b, mutator.SyncDefaultPath())
require.NoError(t, diags.Error())
assert.Equal(t, []string{"."}, b.Config.Sync.Paths)
}

func TestSyncDefaultPath_SkipIfSet(t *testing.T) {
tcases := []struct {
name string
paths dyn.Value
expect []string
}{
{
name: "nil",
paths: dyn.V(nil),
expect: nil,
},
{
name: "empty sequence",
paths: dyn.V([]dyn.Value{}),
expect: []string{},
},
{
name: "non-empty sequence",
paths: dyn.V([]dyn.Value{dyn.V("something")}),
expect: []string{"something"},
},
}

for _, tcase := range tcases {
t.Run(tcase.name, func(t *testing.T) {
b := &bundle.Bundle{
RootPath: "/tmp/some/dir",
Config: config.Root{},
}

diags := bundle.ApplyFunc(context.Background(), b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) {
v, err := dyn.Set(v, "sync", dyn.V(dyn.NewMapping()))
if err != nil {
return dyn.InvalidValue, err
}
v, err = dyn.Set(v, "sync.paths", tcase.paths)
if err != nil {
return dyn.InvalidValue, err
}
return v, nil
})
return diag.FromErr(err)
})
require.NoError(t, diags.Error())

ctx := context.Background()
diags = bundle.Apply(ctx, b, mutator.SyncDefaultPath())
require.NoError(t, diags.Error())

// If the sync paths field is already set, do nothing.
assert.Equal(t, tcase.expect, b.Config.Sync.Paths)
})
}
}
120 changes: 120 additions & 0 deletions bundle/config/mutator/sync_infer_root.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
package mutator

import (
"context"
"fmt"
"path/filepath"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
"github.com/databricks/cli/libs/vfs"
)

type syncInferRoot struct{}

// SyncInferRoot is a mutator that infers the root path of all files to synchronize by looking at the
// paths in the sync configuration. The sync root may be different from the bundle root
// when the user intends to synchronize files outside the bundle root.
//
// The sync root can be equivalent to or an ancestor of the bundle root, but not a descendant.
// That is, the sync root must contain the bundle root.
//
// This mutator requires all sync-related paths and patterns to be relative to the bundle root path.
// This is done by the [RewriteSyncPaths] mutator, which must run before this mutator.
func SyncInferRoot() bundle.Mutator {
return &syncInferRoot{}
}

func (m *syncInferRoot) Name() string {
return "SyncInferRoot"
}

// computeRoot finds the innermost path that contains the specified path.
// It traverses up the root path until it finds the innermost path.
// If the path does not exist, it returns an empty string.
//
// See "sync_infer_root_internal_test.go" for examples.
func (m *syncInferRoot) computeRoot(path string, root string) string {
for !filepath.IsLocal(path) {
// Break if we have reached the root of the filesystem.
dir := filepath.Dir(root)
if dir == root {
return ""
}

// Update the sync path as we navigate up the directory tree.
path = filepath.Join(filepath.Base(root), path)

// Move up the directory tree.
root = dir
}

return filepath.Clean(root)
}

func (m *syncInferRoot) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
var diags diag.Diagnostics

// Use the bundle root path as the starting point for inferring the sync root path.
bundleRootPath := filepath.Clean(b.RootPath)

// Infer the sync root path by looking at each one of the sync paths.
// Every sync path must be a descendant of the final sync root path.
syncRootPath := bundleRootPath
for _, path := range b.Config.Sync.Paths {
computedPath := m.computeRoot(path, bundleRootPath)
if computedPath == "" {
continue
}

// Update sync root path if the computed root path is an ancestor of the current sync root path.
if len(computedPath) < len(syncRootPath) {
syncRootPath = computedPath
}
}

// The new sync root path can only be an ancestor of the previous root path.
// Compute the relative path from the sync root to the bundle root.
rel, err := filepath.Rel(syncRootPath, bundleRootPath)
if err != nil {
return diag.FromErr(err)
}

// If during computation of the sync root path we hit the root of the filesystem,
// then one or more of the sync paths are outside the filesystem.
// Check if this happened by verifying that none of the paths escape the root
// when joined with the sync root path.
for i, path := range b.Config.Sync.Paths {
if filepath.IsLocal(filepath.Join(rel, path)) {
continue
}

diags = append(diags, diag.Diagnostic{
Severity: diag.Error,
Summary: fmt.Sprintf("invalid sync path %q", path),
Locations: b.Config.GetLocations(fmt.Sprintf("sync.paths[%d]", i)),
Paths: []dyn.Path{dyn.NewPath(dyn.Key("sync"), dyn.Key("paths"), dyn.Index(i))},
})
}

if diags.HasError() {
return diags
}

// Update all paths in the sync configuration to be relative to the sync root.
for i, p := range b.Config.Sync.Paths {
b.Config.Sync.Paths[i] = filepath.Join(rel, p)
}
for i, p := range b.Config.Sync.Include {
b.Config.Sync.Include[i] = filepath.Join(rel, p)
}
for i, p := range b.Config.Sync.Exclude {
b.Config.Sync.Exclude[i] = filepath.Join(rel, p)
}

// Configure the sync root path.
b.SyncRoot = vfs.MustNew(syncRootPath)
b.SyncRootPath = syncRootPath
return nil
}
Loading

0 comments on commit 6e8cd83

Please sign in to comment.