-
Notifications
You must be signed in to change notification settings - Fork 52
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Library to validate and normalize cloud specific tags (#819)
## Changes Prompted by the proposed fix for a tagging-related problem in #810, I investigated how tag validation works. This turned out to be quite a bit more complex than anticipated. Tags at the job level (or cluster level) are passed through to the underlying compute infrastructure and as such are tested against cloud-specific validation rules. GCP appears to be the most restrictive. It would be disappointing to always restrict to `\w+`, so this package implements validation and normalization rules for each cloud. It can pick the right cloud to use using a Go SDK configuration. ## Tests Exhaustive unit tests. The regular expressions were pulled by #814.
- Loading branch information
1 parent
f7170dd
commit 52d3a5e
Showing
13 changed files
with
532 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
package tags | ||
|
||
import ( | ||
"regexp" | ||
"unicode" | ||
|
||
"golang.org/x/text/unicode/rangetable" | ||
) | ||
|
||
// The union of all characters allowed in AWS tags. | ||
// This must be used only after filtering out non-Latin1 characters, | ||
// because the [unicode] classes include non-Latin1 characters. | ||
var awsChars = rangetable.Merge( | ||
unicode.Digit, | ||
unicode.Space, | ||
unicode.Letter, | ||
rangetable.New('+', '-', '=', '.', ':', '/', '@'), | ||
) | ||
|
||
var awsTag = &tag{ | ||
keyLength: 127, | ||
keyPattern: regexp.MustCompile(`^[\d \w\+\-=\.:\/@]*$`), | ||
keyNormalize: chain( | ||
normalizeMarks(), | ||
replaceNotIn(latin1, '_'), | ||
replaceNotIn(awsChars, '_'), | ||
), | ||
|
||
valueLength: 255, | ||
valuePattern: regexp.MustCompile(`^[\d \w\+\-=\.:/@]*$`), | ||
valueNormalize: chain( | ||
normalizeMarks(), | ||
replaceNotIn(latin1, '_'), | ||
replaceNotIn(awsChars, '_'), | ||
), | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
package tags | ||
|
||
import ( | ||
"strings" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
func TestAwsNormalizeKey(t *testing.T) { | ||
assert.Equal(t, "1 a b c", awsTag.NormalizeKey("1 a b c")) | ||
assert.Equal(t, "+-=.:/@__", awsTag.NormalizeKey("+-=.:/@?)")) | ||
assert.Equal(t, "test", awsTag.NormalizeKey("test")) | ||
|
||
// Remove marks; unicode becomes underscore. | ||
assert.Equal(t, "cafe _", awsTag.NormalizeKey("café 🍎")) | ||
|
||
// Replace forbidden characters with underscore. | ||
assert.Equal(t, "cafe __", awsTag.NormalizeKey("café 🍎?")) | ||
} | ||
|
||
func TestAwsNormalizeValue(t *testing.T) { | ||
assert.Equal(t, "1 a b c", awsTag.NormalizeValue("1 a b c")) | ||
assert.Equal(t, "+-=.:/@__", awsTag.NormalizeValue("+-=.:/@?)")) | ||
assert.Equal(t, "test", awsTag.NormalizeValue("test")) | ||
|
||
// Remove marks; unicode becomes underscore. | ||
assert.Equal(t, "cafe _", awsTag.NormalizeValue("café 🍎")) | ||
|
||
// Replace forbidden characters with underscore. | ||
assert.Equal(t, "cafe __", awsTag.NormalizeValue("café 🍎?")) | ||
} | ||
|
||
func TestAwsValidateKey(t *testing.T) { | ||
assert.ErrorContains(t, awsTag.ValidateKey(""), "not be empty") | ||
assert.ErrorContains(t, awsTag.ValidateKey(strings.Repeat("a", 512)), "length") | ||
assert.ErrorContains(t, awsTag.ValidateKey("café 🍎"), "latin") | ||
assert.ErrorContains(t, awsTag.ValidateKey("????"), "pattern") | ||
assert.NoError(t, awsTag.ValidateKey(strings.Repeat("a", 127))) | ||
assert.NoError(t, awsTag.ValidateKey(awsTag.NormalizeKey("café 🍎"))) | ||
} | ||
|
||
func TestAwsValidateValue(t *testing.T) { | ||
assert.ErrorContains(t, awsTag.ValidateValue(strings.Repeat("a", 512)), "length") | ||
assert.ErrorContains(t, awsTag.ValidateValue("café 🍎"), "latin1") | ||
assert.ErrorContains(t, awsTag.ValidateValue("????"), "pattern") | ||
assert.NoError(t, awsTag.ValidateValue(strings.Repeat("a", 127))) | ||
assert.NoError(t, awsTag.ValidateValue(awsTag.NormalizeValue("café 🍎"))) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
package tags | ||
|
||
import ( | ||
"regexp" | ||
|
||
"golang.org/x/text/unicode/rangetable" | ||
) | ||
|
||
// All characters that may not be used in Azure tag keys. | ||
var azureForbiddenChars = rangetable.New('<', '>', '*', '&', '%', ';', '\\', '/', '+', '?') | ||
|
||
var azureTag = &tag{ | ||
keyLength: 512, | ||
keyPattern: regexp.MustCompile(`^[^<>\*&%;\\\/\+\?]*$`), | ||
keyNormalize: chain( | ||
replaceNotIn(latin1, '_'), | ||
replaceIn(azureForbiddenChars, '_'), | ||
), | ||
|
||
valueLength: 256, | ||
valuePattern: regexp.MustCompile(`^.*$`), | ||
valueNormalize: chain( | ||
replaceNotIn(latin1, '_'), | ||
), | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
package tags | ||
|
||
import ( | ||
"strings" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
func TestAzureNormalizeKey(t *testing.T) { | ||
assert.Equal(t, "test", azureTag.NormalizeKey("test")) | ||
assert.Equal(t, "café __", azureTag.NormalizeKey("café 🍎?")) | ||
} | ||
|
||
func TestAzureNormalizeValue(t *testing.T) { | ||
assert.Equal(t, "test", azureTag.NormalizeValue("test")) | ||
assert.Equal(t, "café _?", azureTag.NormalizeValue("café 🍎?")) | ||
} | ||
|
||
func TestAzureValidateKey(t *testing.T) { | ||
assert.ErrorContains(t, azureTag.ValidateKey(""), "not be empty") | ||
assert.ErrorContains(t, azureTag.ValidateKey(strings.Repeat("a", 513)), "length") | ||
assert.ErrorContains(t, azureTag.ValidateKey("café 🍎"), "latin") | ||
assert.ErrorContains(t, azureTag.ValidateKey("????"), "pattern") | ||
assert.NoError(t, azureTag.ValidateKey(strings.Repeat("a", 127))) | ||
assert.NoError(t, azureTag.ValidateKey(azureTag.NormalizeKey("café 🍎"))) | ||
} | ||
|
||
func TestAzureValidateValue(t *testing.T) { | ||
assert.ErrorContains(t, azureTag.ValidateValue(strings.Repeat("a", 513)), "length") | ||
assert.ErrorContains(t, azureTag.ValidateValue("café 🍎"), "latin") | ||
assert.NoError(t, azureTag.ValidateValue(strings.Repeat("a", 127))) | ||
assert.NoError(t, azureTag.ValidateValue(azureTag.NormalizeValue("café 🍎"))) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
package tags | ||
|
||
import "github.com/databricks/databricks-sdk-go/config" | ||
|
||
type Cloud interface { | ||
// ValidateKey checks if a tag key can be used with the cloud provider. | ||
ValidateKey(key string) error | ||
|
||
// ValidateValue checks if a tag value can be used with the cloud provider. | ||
ValidateValue(value string) error | ||
|
||
// NormalizeKey normalizes a tag key for the cloud provider. | ||
NormalizeKey(key string) string | ||
|
||
// NormalizeValue normalizes a tag value for the cloud provider. | ||
NormalizeValue(value string) string | ||
} | ||
|
||
func ForCloud(cfg *config.Config) Cloud { | ||
var t *tag | ||
switch { | ||
case cfg.IsAws(): | ||
t = awsTag | ||
case cfg.IsAzure(): | ||
t = azureTag | ||
case cfg.IsGcp(): | ||
t = gcpTag | ||
default: | ||
panic("unknown cloud provider") | ||
} | ||
return t | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
package tags | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/databricks/databricks-sdk-go/config" | ||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
func TestForCloudAws(t *testing.T) { | ||
c := &config.Config{ | ||
Host: "https://dbc-XXXXXXXX-YYYY.cloud.databricks.com/", | ||
} | ||
|
||
assert.Equal(t, awsTag, ForCloud(c)) | ||
} | ||
|
||
func TestForCloudAzure(t *testing.T) { | ||
c := &config.Config{ | ||
Host: "https://adb-xxx.y.azuredatabricks.net/", | ||
} | ||
|
||
assert.Equal(t, azureTag, ForCloud(c)) | ||
} | ||
|
||
func TestForCloudGcp(t *testing.T) { | ||
c := &config.Config{ | ||
Host: "https://123.4.gcp.databricks.com/", | ||
} | ||
|
||
assert.Equal(t, gcpTag, ForCloud(c)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
package tags | ||
|
||
import ( | ||
"regexp" | ||
"unicode" | ||
) | ||
|
||
// Tag keys and values on GCP are limited to 63 characters and must match the | ||
// regular expression `^([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$`. | ||
// For normalization, we define one table for the outer characters and | ||
// one table for the inner characters. The outer table is used to trim | ||
// leading and trailing characters, and the inner table is used to | ||
// replace invalid characters with underscores. | ||
|
||
var gcpOuter = &unicode.RangeTable{ | ||
R16: []unicode.Range16{ | ||
// 0-9 | ||
{0x0030, 0x0039, 1}, | ||
// A-Z | ||
{0x0041, 0x005A, 1}, | ||
// a-z | ||
{0x0061, 0x007A, 1}, | ||
}, | ||
LatinOffset: 3, | ||
} | ||
|
||
var gcpInner = &unicode.RangeTable{ | ||
R16: []unicode.Range16{ | ||
// Hyphen-minus (dash) | ||
{0x002D, 0x002D, 1}, | ||
// Full stop (period) | ||
{0x002E, 0x002E, 1}, | ||
// 0-9 | ||
{0x0030, 0x0039, 1}, | ||
// A-Z | ||
{0x0041, 0x005A, 1}, | ||
// Low line (underscore) | ||
{0x005F, 0x005F, 1}, | ||
// a-z | ||
{0x0061, 0x007A, 1}, | ||
}, | ||
LatinOffset: 6, | ||
} | ||
|
||
var gcpTag = &tag{ | ||
keyLength: 63, | ||
keyPattern: regexp.MustCompile(`^([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$`), | ||
keyNormalize: chain( | ||
normalizeMarks(), | ||
replaceNotIn(latin1, '_'), | ||
replaceNotIn(gcpInner, '_'), | ||
trimIfNotIn(gcpOuter), | ||
), | ||
|
||
valueLength: 63, | ||
valuePattern: regexp.MustCompile(`^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$`), | ||
valueNormalize: chain( | ||
normalizeMarks(), | ||
replaceNotIn(latin1, '_'), | ||
replaceNotIn(gcpInner, '_'), | ||
trimIfNotIn(gcpOuter), | ||
), | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
package tags | ||
|
||
import ( | ||
"strings" | ||
"testing" | ||
"unicode" | ||
|
||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
func TestGcpOuter(t *testing.T) { | ||
assert.True(t, unicode.In('A', gcpOuter)) | ||
assert.True(t, unicode.In('Z', gcpOuter)) | ||
assert.True(t, unicode.In('a', gcpOuter)) | ||
assert.True(t, unicode.In('z', gcpOuter)) | ||
assert.True(t, unicode.In('0', gcpOuter)) | ||
assert.True(t, unicode.In('9', gcpOuter)) | ||
assert.False(t, unicode.In('-', gcpOuter)) | ||
assert.False(t, unicode.In('.', gcpOuter)) | ||
assert.False(t, unicode.In('_', gcpOuter)) | ||
assert.False(t, unicode.In('!', gcpOuter)) | ||
} | ||
|
||
func TestGcpInner(t *testing.T) { | ||
assert.True(t, unicode.In('A', gcpInner)) | ||
assert.True(t, unicode.In('Z', gcpInner)) | ||
assert.True(t, unicode.In('a', gcpInner)) | ||
assert.True(t, unicode.In('z', gcpInner)) | ||
assert.True(t, unicode.In('0', gcpInner)) | ||
assert.True(t, unicode.In('9', gcpInner)) | ||
assert.True(t, unicode.In('-', gcpInner)) | ||
assert.True(t, unicode.In('.', gcpInner)) | ||
assert.True(t, unicode.In('_', gcpInner)) | ||
assert.False(t, unicode.In('!', gcpInner)) | ||
} | ||
|
||
func TestGcpNormalizeKey(t *testing.T) { | ||
assert.Equal(t, "test", gcpTag.NormalizeKey("test")) | ||
assert.Equal(t, "cafe", gcpTag.NormalizeKey("café 🍎?")) | ||
assert.Equal(t, "cafe_foo", gcpTag.NormalizeKey("__café_foo__")) | ||
|
||
} | ||
|
||
func TestGcpNormalizeValue(t *testing.T) { | ||
assert.Equal(t, "test", gcpTag.NormalizeValue("test")) | ||
assert.Equal(t, "cafe", gcpTag.NormalizeValue("café 🍎?")) | ||
assert.Equal(t, "cafe_foo", gcpTag.NormalizeValue("__café_foo__")) | ||
} | ||
|
||
func TestGcpValidateKey(t *testing.T) { | ||
assert.ErrorContains(t, gcpTag.ValidateKey(""), "not be empty") | ||
assert.ErrorContains(t, gcpTag.ValidateKey(strings.Repeat("a", 64)), "length") | ||
assert.ErrorContains(t, gcpTag.ValidateKey("café 🍎"), "latin") | ||
assert.ErrorContains(t, gcpTag.ValidateKey("????"), "pattern") | ||
assert.NoError(t, gcpTag.ValidateKey(strings.Repeat("a", 32))) | ||
assert.NoError(t, gcpTag.ValidateKey(gcpTag.NormalizeKey("café 🍎"))) | ||
} | ||
|
||
func TestGcpValidateValue(t *testing.T) { | ||
assert.ErrorContains(t, gcpTag.ValidateValue(strings.Repeat("a", 64)), "length") | ||
assert.ErrorContains(t, gcpTag.ValidateValue("café 🍎"), "latin") | ||
assert.ErrorContains(t, gcpTag.ValidateValue("????"), "pattern") | ||
assert.NoError(t, gcpTag.ValidateValue(strings.Repeat("a", 32))) | ||
assert.NoError(t, gcpTag.ValidateValue(gcpTag.NormalizeValue("café 🍎"))) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
package tags | ||
|
||
import "unicode" | ||
|
||
// Range table for all characters in the Latin1 character set. | ||
var latin1 = &unicode.RangeTable{ | ||
R16: []unicode.Range16{ | ||
{0x0000, 0x00ff, 1}, | ||
}, | ||
LatinOffset: 1, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
package tags | ||
|
||
import ( | ||
"testing" | ||
"unicode" | ||
|
||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
func TestLatinTable(t *testing.T) { | ||
assert.True(t, unicode.In('\u0000', latin1)) | ||
assert.True(t, unicode.In('A', latin1)) | ||
assert.True(t, unicode.In('Z', latin1)) | ||
assert.True(t, unicode.In('\u00ff', latin1)) | ||
assert.False(t, unicode.In('\u0100', latin1)) | ||
} |
Oops, something went wrong.