Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add an experimental dbt-sql template #1059

Merged
merged 29 commits into from
Feb 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
4fee665
Add a dbt template
lennartkats-db Dec 12, 2023
b3a5ef8
Use a template for VS Code settings
lennartkats-db Dec 13, 2023
c81d139
Tweak message
lennartkats-db Dec 19, 2023
c900cff
Update
lennartkats-db Dec 19, 2023
16f26a3
Add tests
lennartkats-db Dec 24, 2023
cd52c83
Merge remote-tracking branch 'databricks/main' into dbt-template
lennartkats-db Dec 28, 2023
d85d4c4
Fix test
lennartkats-db Dec 28, 2023
419bd27
Merge remote-tracking branch 'databricks/main' into dbt-template
lennartkats-db Jan 8, 2024
9030f56
Add template
lennartkats-db Jan 13, 2024
45ea8db
Improve catalog handling
lennartkats-db Jan 13, 2024
0268c88
Minor tweaks
lennartkats-db Jan 13, 2024
94ebd9a
Update template to use materialized views & streaming tables
lennartkats-db Jan 20, 2024
14bc1fa
Add conditional
lennartkats-db Jan 20, 2024
1501298
Improve template
lennartkats-db Jan 20, 2024
6fc5ed4
Offer an option to use personal schemas
lennartkats-db Jan 22, 2024
220a1ea
Merge remote-tracking branch 'databricks/main' into dbt-template
lennartkats-db Jan 22, 2024
99f920e
Fix ANSI mode
lennartkats-db Jan 24, 2024
af0dd6d
Merge remote-tracking branch 'databricks/main' into dbt-template
lennartkats-db Jan 24, 2024
1099eed
Don't ask for a "production" schema, just assume "default"
lennartkats-db Jan 25, 2024
33c5e91
Explain mode: development
lennartkats-db Jan 25, 2024
7275310
Change project layout based on OSS team feedback
lennartkats-db Jan 26, 2024
de7bd78
Improve DX with default_catalog helper
lennartkats-db Jan 27, 2024
8e7c6a1
Remove from list of templates for now
lennartkats-db Jan 28, 2024
18c6b70
Update README.md
lennartkats-db Jan 28, 2024
a660efa
Merge remote-tracking branch 'databricks/main' into dbt-template
lennartkats-db Jan 28, 2024
2f52ff1
Mark as experimental
lennartkats-db Jan 29, 2024
e041148
Restore sql-dbt template in hidden form
lennartkats-db Feb 19, 2024
00bf2fe
Merge remote-tracking branch 'databricks/main' into dbt-template
lennartkats-db Feb 19, 2024
e5fb708
Copy-editing
lennartkats-db Feb 19, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion cmd/bundle/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ type nativeTemplate struct {
gitUrl string
description string
aliases []string
hidden bool
}

const customTemplate = "custom..."
Expand All @@ -34,6 +35,11 @@ var nativeTemplates = []nativeTemplate{
name: "default-python",
description: "The default Python template for Notebooks / Delta Live Tables / Workflows",
},
{
name: "dbt-sql",
description: "The dbt SQL template (https://www.databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)",
hidden: true,
},
{
name: "mlops-stacks",
gitUrl: "https://github.com/databricks/mlops-stacks",
Expand All @@ -50,7 +56,7 @@ var nativeTemplates = []nativeTemplate{
func nativeTemplateHelpDescriptions() string {
var lines []string
for _, template := range nativeTemplates {
if template.name != customTemplate {
if template.name != customTemplate && !template.hidden {
lines = append(lines, fmt.Sprintf("- %s: %s", template.name, template.description))
}
}
Expand All @@ -61,6 +67,9 @@ func nativeTemplateHelpDescriptions() string {
func nativeTemplateOptions() []cmdio.Tuple {
names := make([]cmdio.Tuple, 0, len(nativeTemplates))
for _, template := range nativeTemplates {
if template.hidden {
continue
}
tuple := cmdio.Tuple{
Name: template.name,
Id: template.description,
Expand Down
21 changes: 21 additions & 0 deletions libs/template/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

"github.com/databricks/cli/cmd/root"
"github.com/databricks/cli/libs/auth"
"github.com/databricks/databricks-sdk-go/apierr"
"github.com/databricks/databricks-sdk-go/service/iam"
)

Expand All @@ -29,6 +30,7 @@ type pair struct {

var cachedUser *iam.User
var cachedIsServicePrincipal *bool
var cachedCatalog *string

func loadHelpers(ctx context.Context) template.FuncMap {
w := root.WorkspaceClient(ctx)
Expand Down Expand Up @@ -108,6 +110,25 @@ func loadHelpers(ctx context.Context) template.FuncMap {
}
return auth.GetShortUserName(cachedUser.UserName), nil
},
lennartkats-db marked this conversation as resolved.
Show resolved Hide resolved
// Get the default workspace catalog. If there is no default, or if
// Unity Catalog is not enabled, return an empty string.
"default_catalog": func() (string, error) {
if cachedCatalog == nil {
metastore, err := w.Metastores.Current(ctx)
if err != nil {
var aerr *apierr.APIError
if errors.As(err, &aerr) && aerr.ErrorCode == "METASTORE_DOES_NOT_EXIST" {
// Workspace doesn't have a metastore assigned, ignore error
empty_default := ""
cachedCatalog = &empty_default
return "", nil
}
return "", err
}
cachedCatalog = &metastore.DefaultCatalogName
}
return *cachedCatalog, nil
},
"is_service_principal": func() (bool, error) {
if cachedIsServicePrincipal != nil {
return *cachedIsServicePrincipal, nil
Expand Down
33 changes: 28 additions & 5 deletions libs/template/renderer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ func assertFilePermissions(t *testing.T, path string, perm fs.FileMode) {
assert.Equal(t, perm, info.Mode().Perm())
}

func assertBuiltinTemplateValid(t *testing.T, settings map[string]any, target string, isServicePrincipal bool, build bool, tempDir string) {
func assertBuiltinTemplateValid(t *testing.T, template string, settings map[string]any, target string, isServicePrincipal bool, build bool, tempDir string) {
ctx := context.Background()

templatePath, err := prepareBuiltinTemplates("default-python", tempDir)
templatePath, err := prepareBuiltinTemplates(template, tempDir)
require.NoError(t, err)
libraryPath := filepath.Join(templatePath, "library")

Expand All @@ -50,6 +50,9 @@ func assertBuiltinTemplateValid(t *testing.T, settings map[string]any, target st

// Prepare helpers
cachedUser = &iam.User{UserName: "user@domain.com"}
if isServicePrincipal {
cachedUser.UserName = "1d410060-a513-496f-a197-23cc82e5f46d"
}
cachedIsServicePrincipal = &isServicePrincipal
ctx = root.SetWorkspaceClient(ctx, w)
helpers := loadHelpers(ctx)
Expand Down Expand Up @@ -102,11 +105,13 @@ func TestPrepareBuiltInTemplatesWithRelativePaths(t *testing.T) {
assert.Equal(t, "./default-python", dir)
}

func TestBuiltinTemplateValid(t *testing.T) {
func TestBuiltinPythonTemplateValid(t *testing.T) {
// Test option combinations
options := []string{"yes", "no"}
isServicePrincipal := false
build := false
catalog := "hive_metastore"
cachedCatalog = &catalog
for _, includeNotebook := range options {
for _, includeDlt := range options {
for _, includePython := range options {
Expand All @@ -118,7 +123,7 @@ func TestBuiltinTemplateValid(t *testing.T) {
"include_python": includePython,
}
tempDir := t.TempDir()
assertBuiltinTemplateValid(t, config, "dev", isServicePrincipal, build, tempDir)
assertBuiltinTemplateValid(t, "default-python", config, "dev", isServicePrincipal, build, tempDir)
}
}
}
Expand All @@ -140,10 +145,28 @@ func TestBuiltinTemplateValid(t *testing.T) {
require.NoError(t, err)
defer os.RemoveAll(tempDir)

assertBuiltinTemplateValid(t, config, "prod", isServicePrincipal, build, tempDir)
assertBuiltinTemplateValid(t, "default-python", config, "prod", isServicePrincipal, build, tempDir)
defer os.RemoveAll(tempDir)
}

func TestBuiltinDbtTemplateValid(t *testing.T) {
for _, personal_schemas := range []string{"yes", "no"} {
for _, target := range []string{"dev", "prod"} {
for _, isServicePrincipal := range []bool{true, false} {
config := map[string]any{
"project_name": "my_project",
"http_path": "/sql/1.0/warehouses/123",
"default_catalog": "hive_metastore",
"personal_schemas": personal_schemas,
"shared_schema": "lennart",
}
build := false
assertBuiltinTemplateValid(t, "dbt-sql", config, target, isServicePrincipal, build, t.TempDir())
}
}
}
}

func TestRendererWithAssociatedTemplateInLibrary(t *testing.T) {
tmpDir := t.TempDir()

Expand Down
9 changes: 9 additions & 0 deletions libs/template/templates/dbt-sql/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# dbt template

This folder provides a template for using dbt-core with Databricks Asset Bundles.
It leverages dbt-core for local development and relies on Databricks Asset Bundles
for deployment (either manually or with CI/CD). In production,
dbt is executed using Databricks Workflows.

* Learn more about the dbt and its standard project structure here: https://docs.getdbt.com/docs/build/projects.
* Learn more about Databricks Asset Bundles here: https://docs.databricks.com/en/dev-tools/bundles/index.html
53 changes: 53 additions & 0 deletions libs/template/templates/dbt-sql/databricks_template_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{
"welcome_message": "\nWelcome to the (EXPERIMENTAL) dbt template for Databricks Asset Bundles!",
"properties": {
"project_name": {
"type": "string",
"pattern": "^[A-Za-z_][A-Za-z0-9_]+$",
"pattern_match_failure_message": "Name must consist of letters, numbers, and underscores.",
"default": "dbt_project",
"description": "\nPlease provide a unique name for this project.\nproject_name",
"order": 1
},
"http_path": {
"type": "string",
"pattern": "^/sql/.\\../warehouses/[a-z0-9]+$",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does this pattern work without + or *?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure I understand this question. There is a + in there?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or maybe you're referring to the \.\\.. part? That matches a version, like 1.0.

"pattern_match_failure_message": "Path must be of the form /sql/1.0/warehouses/<warehouse id>",
"description": " \nPlease provide the HTTP Path of the SQL warehouse you would like to use with dbt during development.\nYou can find this path by clicking on \"Connection details\" for your SQL warehouse.\nhttp_path [example: /sql/1.0/warehouses/abcdef1234567890]",
"order": 2
lennartkats-db marked this conversation as resolved.
Show resolved Hide resolved
},
lennartkats-db marked this conversation as resolved.
Show resolved Hide resolved
"default_catalog": {
"type": "string",
"default": "{{default_catalog}}",
"pattern": "^\\w*$",
"pattern_match_failure_message": "Invalid catalog name.",
"description": "\nPlease provide an initial catalog{{if eq (default_catalog) \"\"}} (leave blank when not using Unity Catalog){{end}}.\ndefault_catalog",
"order": 3
},
"personal_schemas": {
"type": "string",
"description": "\nWould you like to use a personal schema for each user working on this project? (e.g., 'catalog.{{short_name}}')\npersonal_schemas",
"enum": [
"yes, use a schema based on the current user name during development",
"no, use a shared schema during development"
],
"order": 4
},
"shared_schema": {
"skip_prompt_if": {
"properties": {
"personal_schemas": {
"const": "yes, use a schema based on the current user name during development"
}
}
},
"type": "string",
"default": "default",
"pattern": "^\\w+$",
"pattern_match_failure_message": "Invalid schema name.",
"description": "\nPlease provide an initial schema during development.\ndefault_schema",
"order": 5
}
},
"success_message": "\n📊 Your new project has been created in the '{{.project_name}}' directory!\nIf you already have dbt installed, just type 'cd {{.project_name}}; dbt init' to get started.\nRefer to the README.md file for full \"getting started\" guide and production setup instructions.\n"
}
7 changes: 7 additions & 0 deletions libs/template/templates/dbt-sql/library/versions.tmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{{define "latest_lts_dbr_version" -}}
13.3.x-scala2.12
{{- end}}

{{define "latest_lts_db_connect_version_spec" -}}
>=13.3,<13.4
{{- end}}
9 changes: 9 additions & 0 deletions libs/template/templates/dbt-sql/template/__preamble.tmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Preamble

This file only template directives; it is skipped for the actual output.

{{skip "__preamble"}}

{{if eq .project_name "dbt"}}
{{fail "Project name 'dbt' is not supported"}}
{{end}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Typings for Pylance in Visual Studio Code
# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md
from databricks.sdk.runtime import *
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"recommendations": [
"redhat.vscode-yaml",
"innoverio.vscode-dbt-power-user",
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"python.analysis.stubPath": ".vscode",
"databricks.python.envFile": "${workspaceFolder}/.env",
"jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])",
"jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------",
"python.testing.pytestArgs": [
"."
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"python.analysis.extraPaths": ["src"],
"files.exclude": {
"**/*.egg-info": true,
"**/__pycache__": true,
".pytest_cache": true,
},
"python.envFile": "${workspaceFolder}/.databricks/.databricks.env",
"python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python",
"sqltools.connections": [
{
"connectionMethod": "VS Code Extension (beta)",
"catalog": "hive_metastore",
"previewLimit": 50,
"driver": "Databricks",
"name": "databricks",
"path": "{{.http_path}}"
}
],
"sqltools.autoConnectTo": "",
"[jinja-sql]": {
"editor.defaultFormatter": "innoverio.vscode-dbt-power-user"
}
}
Loading
Loading