Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added run_as section for bundle configuration #692

Merged
merged 5 commits into from
Aug 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions bundle/config/mutator/run_as.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package mutator

import (
"context"
"slices"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/databricks-sdk-go/service/jobs"
)

type setRunAs struct {
}

// SetRunAs mutator is used to go over defined resources such as Jobs and DLT Pipelines
// And set correct execution identity ("run_as" for a job or "is_owner" permission for DLT)
// if top-level "run-as" section is defined in the configuration.
func SetRunAs() bundle.Mutator {
andrewnester marked this conversation as resolved.
Show resolved Hide resolved
return &setRunAs{}
}

func (m *setRunAs) Name() string {
return "SetRunAs"
}

func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) error {
runAs := b.Config.RunAs
if runAs == nil {
return nil
}

for i := range b.Config.Resources.Jobs {
job := b.Config.Resources.Jobs[i]
if job.RunAs != nil {
continue
}
job.RunAs = &jobs.JobRunAs{
ServicePrincipalName: runAs.ServicePrincipalName,
UserName: runAs.UserName,
}
}

me := b.Config.Workspace.CurrentUser.UserName
// If user deploying the bundle and the one defined in run_as are the same
// Do not add IS_OWNER permission. Current user is implied to be an owner in this case.
// Otherwise, it will fail due to this bug https://github.com/databricks/terraform-provider-databricks/issues/2407
if runAs.UserName == me || runAs.ServicePrincipalName == me {
return nil
}

for i := range b.Config.Resources.Pipelines {
pipeline := b.Config.Resources.Pipelines[i]
pipeline.Permissions = slices.DeleteFunc(pipeline.Permissions, func(p resources.Permission) bool {
return (runAs.ServicePrincipalName != "" && p.ServicePrincipalName == runAs.ServicePrincipalName) ||
(runAs.UserName != "" && p.UserName == runAs.UserName)
})
pipeline.Permissions = append(pipeline.Permissions, resources.Permission{
Level: "IS_OWNER",
ServicePrincipalName: runAs.ServicePrincipalName,
UserName: runAs.UserName,
})
andrewnester marked this conversation as resolved.
Show resolved Hide resolved
}

return nil
}
8 changes: 8 additions & 0 deletions bundle/config/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"strings"

"github.com/databricks/cli/bundle/config/variable"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/ghodss/yaml"
"github.com/imdario/mergo"
)
Expand Down Expand Up @@ -80,6 +81,9 @@ type Root struct {

// Sync section specifies options for files synchronization
Sync Sync `json:"sync"`

// RunAs section allows to define an execution identity for jobs and pipelines runs
RunAs *jobs.JobRunAs `json:"run_as,omitempty"`
}

func Load(path string) (*Root, error) {
Expand Down Expand Up @@ -237,6 +241,10 @@ func (r *Root) MergeTargetOverrides(target *Target) error {
}
}

if target.RunAs != nil {
r.RunAs = target.RunAs
}

if target.Mode != "" {
r.Bundle.Mode = target.Mode
}
Expand Down
4 changes: 4 additions & 0 deletions bundle/config/target.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package config

import "github.com/databricks/databricks-sdk-go/service/jobs"

type Mode string

// Target defines overrides for a single target.
Expand Down Expand Up @@ -31,6 +33,8 @@ type Target struct {
Variables map[string]string `json:"variables,omitempty"`

Git Git `json:"git,omitempty"`

RunAs *jobs.JobRunAs `json:"run_as,omitempty"`
}

const (
Expand Down
1 change: 1 addition & 0 deletions bundle/phases/initialize.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ func Initialize() bundle.Mutator {
"initialize",
[]bundle.Mutator{
mutator.PopulateCurrentUser(),
mutator.SetRunAs(),
mutator.DefineDefaultWorkspaceRoot(),
mutator.ExpandWorkspaceRoot(),
mutator.DefineDefaultWorkspacePaths(),
Expand Down
42 changes: 42 additions & 0 deletions bundle/tests/run_as/databricks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
bundle:
name: "run_as"

run_as:
service_principal_name: "my_service_principal"

targets:
development:
mode: development
run_as:
user_name: "my_user_name"

resources:
pipelines:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually remember there's one more caveat here: Terraform doesn't allow you to list yourself as the OWNER if you're the person deploying the pipeline. Does that case work in your implementation?

Copy link
Contributor Author

@andrewnester andrewnester Aug 23, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lennartkats-db I added some logic to not add current user as an owner permission in this case

nyc_taxi_pipeline:
permissions:
- level: CAN_VIEW
service_principal_name: my_service_principal
- level: CAN_VIEW
user_name: my_user_name
name: "nyc taxi loader"
libraries:
- notebook:
path: ./dlt/nyc_taxi_loader
jobs:
job_one:
name: Job One
tasks:
- task:
notebook_path: "./test.py"
job_two:
name: Job Two
tasks:
- task:
notebook_path: "./test.py"
job_three:
name: Job Three
run_as:
service_principal_name: "my_service_principal_for_job"
tasks:
- task:
notebook_path: "./test.py"
82 changes: 82 additions & 0 deletions bundle/tests/run_as_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package config_tests

import (
"context"
"testing"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/mutator"
"github.com/databricks/databricks-sdk-go/service/iam"
"github.com/stretchr/testify/assert"
)

func TestRunAsDefault(t *testing.T) {
b := load(t, "./run_as")
b.Config.Workspace.CurrentUser = &config.User{
User: &iam.User{
UserName: "jane@doe.com",
},
}
ctx := context.Background()
err := bundle.Apply(ctx, b, mutator.SetRunAs())
assert.NoError(t, err)

assert.Len(t, b.Config.Resources.Jobs, 3)
jobs := b.Config.Resources.Jobs

assert.NotNil(t, jobs["job_one"].RunAs)
assert.Equal(t, "my_service_principal", jobs["job_one"].RunAs.ServicePrincipalName)
assert.Equal(t, "", jobs["job_one"].RunAs.UserName)

assert.NotNil(t, jobs["job_two"].RunAs)
assert.Equal(t, "my_service_principal", jobs["job_two"].RunAs.ServicePrincipalName)
assert.Equal(t, "", jobs["job_two"].RunAs.UserName)

assert.NotNil(t, jobs["job_three"].RunAs)
assert.Equal(t, "my_service_principal_for_job", jobs["job_three"].RunAs.ServicePrincipalName)
assert.Equal(t, "", jobs["job_three"].RunAs.UserName)

pipelines := b.Config.Resources.Pipelines
assert.Len(t, pipelines["nyc_taxi_pipeline"].Permissions, 2)
assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[0].Level, "CAN_VIEW")
assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[0].UserName, "my_user_name")

assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[1].Level, "IS_OWNER")
assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[1].ServicePrincipalName, "my_service_principal")
}

func TestRunAsDevelopment(t *testing.T) {
b := loadTarget(t, "./run_as", "development")
b.Config.Workspace.CurrentUser = &config.User{
User: &iam.User{
UserName: "jane@doe.com",
},
}
ctx := context.Background()
err := bundle.Apply(ctx, b, mutator.SetRunAs())
assert.NoError(t, err)

assert.Len(t, b.Config.Resources.Jobs, 3)
jobs := b.Config.Resources.Jobs

assert.NotNil(t, jobs["job_one"].RunAs)
assert.Equal(t, "", jobs["job_one"].RunAs.ServicePrincipalName)
assert.Equal(t, "my_user_name", jobs["job_one"].RunAs.UserName)

assert.NotNil(t, jobs["job_two"].RunAs)
assert.Equal(t, "", jobs["job_two"].RunAs.ServicePrincipalName)
assert.Equal(t, "my_user_name", jobs["job_two"].RunAs.UserName)

assert.NotNil(t, jobs["job_three"].RunAs)
assert.Equal(t, "my_service_principal_for_job", jobs["job_three"].RunAs.ServicePrincipalName)
assert.Equal(t, "", jobs["job_three"].RunAs.UserName)

pipelines := b.Config.Resources.Pipelines
assert.Len(t, pipelines["nyc_taxi_pipeline"].Permissions, 2)
assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[0].Level, "CAN_VIEW")
assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[0].ServicePrincipalName, "my_service_principal")

assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[1].Level, "IS_OWNER")
assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[1].UserName, "my_user_name")
}