Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lighting/parser: align NULL and ESCAPED BY with LOAD DATA #40909

Merged
merged 11 commits into from
Feb 13, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 61 additions & 22 deletions br/pkg/lightning/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -477,16 +477,43 @@ type PostRestore struct {
Compact bool `toml:"compact" json:"compact"`
}

// StringOrStringSlice can unmarshal a TOML string as string slice with one element.
type StringOrStringSlice []string

func (s *StringOrStringSlice) UnmarshalTOML(in interface{}) error {
switch v := in.(type) {
case string:
*s = []string{v}
case []interface{}:
*s = make([]string, 0, len(v))
for _, vv := range v {
vs, ok := vv.(string)
if !ok {
return errors.Errorf("invalid string slice '%v'", in)
}
*s = append(*s, vs)
}
default:
return errors.Errorf("invalid string slice '%v'", in)
}
return nil
}

type CSVConfig struct {
// Separator, Delimiter and Terminator should all be in utf8mb4 encoding.
Separator string `toml:"separator" json:"separator"`
Delimiter string `toml:"delimiter" json:"delimiter"`
Terminator string `toml:"terminator" json:"terminator"`
Null string `toml:"null" json:"null"`
Header bool `toml:"header" json:"header"`
TrimLastSep bool `toml:"trim-last-separator" json:"trim-last-separator"`
NotNull bool `toml:"not-null" json:"not-null"`
BackslashEscape bool `toml:"backslash-escape" json:"backslash-escape"`
Separator string `toml:"separator" json:"separator"`
Delimiter string `toml:"delimiter" json:"delimiter"`
Terminator string `toml:"terminator" json:"terminator"`
// for non-empty Delimiter (for example quotes), Null elements inside quotes are not considered as null except for
// `\N` (when escape-by is `\`). That is to say, `\N` is special for null because it always means null.
Null StringOrStringSlice `toml:"null" json:"null"`
Header bool `toml:"header" json:"header"`
TrimLastSep bool `toml:"trim-last-separator" json:"trim-last-separator"`
NotNull bool `toml:"not-null" json:"not-null"`
// deprecated, use `escaped-by` instead.
BackslashEscape bool `toml:"backslash-escape" json:"backslash-escape"`
// EscapedBy has higher priority than BackslashEscape, currently it must be a single character if set.
EscapedBy string `toml:"escaped-by" json:"escaped-by"`
// hide these options for lightning configuration file, they can only be used by LOAD DATA
// https://dev.mysql.com/doc/refman/8.0/en/load-data.html#load-data-field-line-handling
StartingBy string `toml:"-" json:"-"`
Expand Down Expand Up @@ -743,13 +770,13 @@ func NewConfig() *Config {
Mydumper: MydumperRuntime{
ReadBlockSize: ReadBlockSize,
CSV: CSVConfig{
Separator: ",",
Delimiter: `"`,
Header: true,
NotNull: false,
Null: `\N`,
BackslashEscape: true,
TrimLastSep: false,
Separator: ",",
Delimiter: `"`,
Header: true,
NotNull: false,
Null: []string{`\N`},
EscapedBy: `\`,
TrimLastSep: false,
},
StrictFormat: false,
MaxRegionSize: MaxRegionSize,
Expand Down Expand Up @@ -880,15 +907,27 @@ func (cfg *Config) Adjust(ctx context.Context) error {
return common.ErrInvalidConfig.GenWithStack("`mydumper.csv.separator` and `mydumper.csv.delimiter` must not be prefix of each other")
}

if csv.BackslashEscape {
if csv.Separator == `\` {
return common.ErrInvalidConfig.GenWithStack("cannot use '\\' as CSV separator when `mydumper.csv.backslash-escape` is true")
if len(csv.EscapedBy) > 1 {
return common.ErrInvalidConfig.GenWithStack("`mydumper.csv.escaped-by` must be empty or a single character")
}
if csv.BackslashEscape && csv.EscapedBy == "" {
csv.EscapedBy = `\`
}

// keep compatibility with old behaviour
if !csv.NotNull && len(csv.Null) == 0 {
csv.Null = []string{""}
}

if len(csv.EscapedBy) > 0 {
if csv.Separator == csv.EscapedBy {
return common.ErrInvalidConfig.GenWithStack("cannot use '%s' both as CSV separator and `mydumper.csv.escaped-by`", csv.EscapedBy)
}
if csv.Delimiter == `\` {
return common.ErrInvalidConfig.GenWithStack("cannot use '\\' as CSV delimiter when `mydumper.csv.backslash-escape` is true")
if csv.Delimiter == csv.EscapedBy {
return common.ErrInvalidConfig.GenWithStack("cannot use '%s' both as CSV delimiter and `mydumper.csv.escaped-by`", csv.EscapedBy)
}
if csv.Terminator == `\` {
return common.ErrInvalidConfig.GenWithStack("cannot use '\\' as CSV terminator when `mydumper.csv.backslash-escape` is true")
if csv.Terminator == csv.EscapedBy {
return common.ErrInvalidConfig.GenWithStack("cannot use '%s' both as CSV terminator and `mydumper.csv.escaped-by`", csv.EscapedBy)
}
}

Expand Down
19 changes: 19 additions & 0 deletions br/pkg/lightning/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,25 @@ func TestInvalidTOML(t *testing.T) {
require.EqualError(t, err, "toml: line 2: expected '.' or '=', but got '[' instead")
}

func TestStringOrStringSlice(t *testing.T) {
cfg := &config.Config{}
err := cfg.LoadFromTOML([]byte(`
[mydumper.csv]
null = '\N'
`))
require.NoError(t, err)
err = cfg.LoadFromTOML([]byte(`
[mydumper.csv]
null = [ '\N', 'NULL' ]
`))
require.NoError(t, err)
err = cfg.LoadFromTOML([]byte(`
[mydumper.csv]
null = [ '\N', 123 ]
`))
require.ErrorContains(t, err, "invalid string slice")
}

func TestTOMLUnusedKeys(t *testing.T) {
cfg := &config.Config{}
err := cfg.LoadFromTOML([]byte(`
Expand Down
1 change: 1 addition & 0 deletions br/pkg/lightning/mydump/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ go_library(
"@com_github_xitongsys_parquet_go//parquet",
"@com_github_xitongsys_parquet_go//reader",
"@com_github_xitongsys_parquet_go//source",
"@org_golang_x_exp//slices",
"@org_golang_x_text//encoding",
"@org_golang_x_text//encoding/simplifiedchinese",
"@org_uber_go_zap//:zap",
Expand Down
Loading