Skip to content

Commit

Permalink
support specialized csv separator and delimiter (pingcap#116)
Browse files Browse the repository at this point in the history
* support specialized csv separator and delimiter

* address comments

* address comment
  • Loading branch information
lichunzhu authored Jul 8, 2020
1 parent b275625 commit 60b859d
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 46 deletions.
6 changes: 6 additions & 0 deletions dumpling/cmd/dumpling/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ var (
caPath string
certPath string
keyPath string
csvSeparator string
csvDelimiter string

dumpEmptyDatabase bool
escapeBackslash bool
Expand Down Expand Up @@ -115,6 +117,8 @@ func main() {
pflag.StringVar(&caPath, "ca", "", "The path name to the certificate authority file for TLS connection")
pflag.StringVar(&certPath, "cert", "", "The path name to the client certificate file for TLS connection")
pflag.StringVar(&keyPath, "key", "", "The path name to the client private key file for TLS connection")
pflag.StringVar(&csvSeparator, "csv-separator", ",", "The separator for csv files, default ','")
pflag.StringVar(&csvDelimiter, "csv-delimiter", "\"", "The delimiter for values in csv files, default '\"'")

printVersion := pflag.BoolP("version", "V", false, "Print Dumpling version")

Expand Down Expand Up @@ -185,6 +189,8 @@ func main() {
conf.Security.CertPath = certPath
conf.Security.KeyPath = keyPath
conf.SessionParams["tidb_mem_quota_query"] = tidbMemQuotaQuery
conf.CsvSeparator = csvSeparator
conf.CsvDelimiter = csvDelimiter

err = export.Dump(context.Background(), conf)
if err != nil {
Expand Down
2 changes: 2 additions & 0 deletions dumpling/v4/export/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ type Config struct {
NoData bool
CsvNullValue string
Sql string
CsvSeparator string
CsvDelimiter string

TableFilter filter.Filter
Rows uint64
Expand Down
2 changes: 1 addition & 1 deletion dumpling/v4/export/ir.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ type RowReceiverStringer interface {

type Stringer interface {
WriteToBuffer(*bytes.Buffer, bool)
WriteToBufferInCsv(*bytes.Buffer, bool, string)
WriteToBufferInCsv(*bytes.Buffer, bool, *csvOption)
}

type RowReceiver interface {
Expand Down
46 changes: 23 additions & 23 deletions dumpling/v4/export/sql_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ import (
var colTypeRowReceiverMap = map[string]func() RowReceiverStringer{}

var nullValue = "NULL"
var quotationMark byte = '\''
var doubleQuotationMark byte = '"'
var quotationMark = []byte{'\''}
var doubleQuotationMark = []byte{'"'}

func init() {
for _, s := range dataTypeString {
Expand Down Expand Up @@ -45,16 +45,16 @@ var dataTypeBin = []string{
"BIT",
}

func getEscapeQuotation(escapeBackSlash bool, escapeQuotation byte) byte {
func getEscapeQuotation(escapeBackSlash bool, escapeQuotation []byte) []byte {
if escapeBackSlash {
return 0
return nil
}
return escapeQuotation
}

func escape(s []byte, bf *bytes.Buffer, escapeQuotation byte) {
if escapeQuotation != 0 {
bf.Write(bytes.ReplaceAll(s, []byte{escapeQuotation}, []byte{escapeQuotation, escapeQuotation}))
func escape(s []byte, bf *bytes.Buffer, escapeQuotation []byte) {
if len(escapeQuotation) > 0 {
bf.Write(bytes.ReplaceAll(s, escapeQuotation, append(escapeQuotation, escapeQuotation...)))
return
}

Expand Down Expand Up @@ -153,11 +153,11 @@ func (r RowReceiverArr) WriteToBuffer(bf *bytes.Buffer, escapeBackslash bool) {
bf.WriteByte(')')
}

func (r RowReceiverArr) WriteToBufferInCsv(bf *bytes.Buffer, escapeBackslash bool, csvNullValue string) {
func (r RowReceiverArr) WriteToBufferInCsv(bf *bytes.Buffer, escapeBackslash bool, opt *csvOption) {
for i, receiver := range r {
receiver.WriteToBufferInCsv(bf, escapeBackslash, csvNullValue)
receiver.WriteToBufferInCsv(bf, escapeBackslash, opt)
if i != len(r)-1 {
bf.WriteByte(',')
bf.Write(opt.separator)
}
}
}
Expand All @@ -174,11 +174,11 @@ func (s SQLTypeNumber) WriteToBuffer(bf *bytes.Buffer, _ bool) {
}
}

func (s SQLTypeNumber) WriteToBufferInCsv(bf *bytes.Buffer, _ bool, csvNullValue string) {
func (s SQLTypeNumber) WriteToBufferInCsv(bf *bytes.Buffer, _ bool, opt *csvOption) {
if s.RawBytes != nil {
bf.Write(s.RawBytes)
} else {
bf.WriteString(csvNullValue)
bf.WriteString(opt.nullValue)
}
}

Expand All @@ -198,21 +198,21 @@ func (s *SQLTypeString) ReportSize() uint64 {

func (s *SQLTypeString) WriteToBuffer(bf *bytes.Buffer, escapeBackslash bool) {
if s.RawBytes != nil {
bf.WriteByte(quotationMark)
bf.Write(quotationMark)
escape(s.RawBytes, bf, getEscapeQuotation(escapeBackslash, quotationMark))
bf.WriteByte(quotationMark)
bf.Write(quotationMark)
} else {
bf.WriteString(nullValue)
}
}

func (s *SQLTypeString) WriteToBufferInCsv(bf *bytes.Buffer, escapeBackslash bool, csvNullValue string) {
func (s *SQLTypeString) WriteToBufferInCsv(bf *bytes.Buffer, escapeBackslash bool, opt *csvOption) {
if s.RawBytes != nil {
bf.WriteByte(doubleQuotationMark)
escape(s.RawBytes, bf, getEscapeQuotation(escapeBackslash, doubleQuotationMark))
bf.WriteByte(doubleQuotationMark)
bf.Write(opt.delimiter)
escape(s.RawBytes, bf, getEscapeQuotation(escapeBackslash, opt.delimiter))
bf.Write(opt.delimiter)
} else {
bf.WriteString(csvNullValue)
bf.WriteString(opt.nullValue)
}
}

Expand All @@ -231,12 +231,12 @@ func (s *SQLTypeBytes) WriteToBuffer(bf *bytes.Buffer, _ bool) {
fmt.Fprintf(bf, "x'%x'", s.RawBytes)
}

func (s *SQLTypeBytes) WriteToBufferInCsv(bf *bytes.Buffer, _ bool, csvNullValue string) {
func (s *SQLTypeBytes) WriteToBufferInCsv(bf *bytes.Buffer, _ bool, opt *csvOption) {
if s.RawBytes != nil {
bf.WriteByte(doubleQuotationMark)
bf.Write(opt.delimiter)
bf.Write(s.RawBytes)
bf.WriteByte(doubleQuotationMark)
bf.Write(opt.delimiter)
} else {
bf.WriteString(csvNullValue)
bf.WriteString(opt.nullValue)
}
}
14 changes: 1 addition & 13 deletions dumpling/v4/export/test_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ func (m *mockTableIR) EscapeBackSlash() bool {
return m.escapeBackSlash
}

func newMockTableIR(databaseName, tableName string, data [][]driver.Value, specialComments, colTypes []string) TableDataIR {
func newMockTableIR(databaseName, tableName string, data [][]driver.Value, specialComments, colTypes []string) *mockTableIR {
return &mockTableIR{
dbName: databaseName,
tblName: tableName,
Expand All @@ -158,15 +158,3 @@ func newMockTableIR(databaseName, tableName string, data [][]driver.Value, speci
colTypes: colTypes,
}
}

func newMockTableIRWithError(databaseName, tableName string, data [][]driver.Value, specialComments, colTypes []string, err error) TableDataIR {
return &mockTableIR{
dbName: databaseName,
tblName: tableName,
data: data,
specCmt: specialComments,
selectedField: "*",
colTypes: colTypes,
rowErr: err,
}
}
14 changes: 13 additions & 1 deletion dumpling/v4/export/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,12 @@ type outputFileNamer struct {
tableName string
}

type csvOption struct {
nullValue string
separator []byte
delimiter []byte
}

func newOutputFileNamer(ir TableDataIR) *outputFileNamer {
return &outputFileNamer{
chunkIndex: ir.ChunkIndex(),
Expand All @@ -142,10 +148,16 @@ func (f *CsvWriter) WriteTableData(ctx context.Context, ir TableDataIR) error {
chunksIter := buildChunksIter(ir, f.cfg.FileSize, f.cfg.StatementSize)
defer chunksIter.Rows().Close()

opt := &csvOption{
nullValue: f.cfg.CsvNullValue,
separator: []byte(f.cfg.CsvSeparator),
delimiter: []byte(f.cfg.CsvDelimiter),
}

for {
filePath := path.Join(f.cfg.OutputDirPath, fileName)
fileWriter, tearDown := buildInterceptFileWriter(filePath)
err := WriteInsertInCsv(ctx, chunksIter, fileWriter, f.cfg.NoHeader, f.cfg.CsvNullValue)
err := WriteInsertInCsv(ctx, chunksIter, fileWriter, f.cfg.NoHeader, opt)
tearDown()
if err != nil {
return err
Expand Down
12 changes: 6 additions & 6 deletions dumpling/v4/export/writer_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ func WriteInsert(pCtx context.Context, tblIR TableDataIR, w io.Writer) error {
return wp.Error()
}

func WriteInsertInCsv(pCtx context.Context, tblIR TableDataIR, w io.Writer, noHeader bool, csvNullValue string) error {
func WriteInsertInCsv(pCtx context.Context, tblIR TableDataIR, w io.Writer, noHeader bool, opt *csvOption) error {
fileRowIter := tblIR.Rows()
if !fileRowIter.HasNext() {
return nil
Expand Down Expand Up @@ -223,11 +223,11 @@ func WriteInsertInCsv(pCtx context.Context, tblIR TableDataIR, w io.Writer, noHe

if !noHeader && len(tblIR.ColumnNames()) != 0 {
for i, col := range tblIR.ColumnNames() {
bf.WriteByte(doubleQuotationMark)
escape([]byte(col), bf, getEscapeQuotation(escapeBackSlash, doubleQuotationMark))
bf.WriteByte(doubleQuotationMark)
bf.Write(opt.delimiter)
escape([]byte(col), bf, getEscapeQuotation(escapeBackSlash, opt.delimiter))
bf.Write(opt.delimiter)
if i != len(tblIR.ColumnTypes())-1 {
bf.WriteByte(',')
bf.Write(opt.separator)
}
}
bf.WriteByte('\n')
Expand All @@ -241,7 +241,7 @@ func WriteInsertInCsv(pCtx context.Context, tblIR TableDataIR, w io.Writer, noHe
return err
}

row.WriteToBufferInCsv(bf, escapeBackSlash, csvNullValue)
row.WriteToBufferInCsv(bf, escapeBackSlash, opt)
counter += 1

if bf.Len() >= lengthLimit {
Expand Down
43 changes: 41 additions & 2 deletions dumpling/v4/export/writer_util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ func (s *testUtilSuite) TestWriteInsertReturnsError(c *C) {
}
// row errors at last line
rowErr := errors.New("mock row error")
tableIR := newMockTableIRWithError("test", "employee", data, specCmts, colTypes, rowErr)
tableIR := newMockTableIR("test", "employee", data, specCmts, colTypes)
tableIR.rowErr = rowErr
bf := &bytes.Buffer{}

err := WriteInsert(context.Background(), tableIR, bf)
Expand All @@ -112,13 +113,51 @@ func (s *testUtilSuite) TestWriteInsertInCsv(c *C) {
tableIR := newMockTableIR("test", "employee", data, nil, colTypes)
bf := &bytes.Buffer{}

err := WriteInsertInCsv(context.Background(), tableIR, bf, true, "\\N")
// test nullValue
opt := &csvOption{separator: []byte(","), delimiter: doubleQuotationMark, nullValue: "\\N"}
err := WriteInsertInCsv(context.Background(), tableIR, bf, true, opt)
c.Assert(err, IsNil)
expected := "1,\"male\",\"bob@mail.com\",\"020-1234\",\\N\n" +
"2,\"female\",\"sarah@mail.com\",\"020-1253\",\"healthy\"\n" +
"3,\"male\",\"john@mail.com\",\"020-1256\",\"healthy\"\n" +
"4,\"female\",\"sarah@mail.com\",\"020-1235\",\"healthy\"\n"
c.Assert(bf.String(), Equals, expected)

// test delimiter
bf.Reset()
opt.delimiter = quotationMark
err = WriteInsertInCsv(context.Background(), tableIR, bf, true, opt)
c.Assert(err, IsNil)
expected = "1,'male','bob@mail.com','020-1234',\\N\n" +
"2,'female','sarah@mail.com','020-1253','healthy'\n" +
"3,'male','john@mail.com','020-1256','healthy'\n" +
"4,'female','sarah@mail.com','020-1235','healthy'\n"
c.Assert(bf.String(), Equals, expected)

// test separator
bf.Reset()
opt.separator = []byte(";")
err = WriteInsertInCsv(context.Background(), tableIR, bf, true, opt)
c.Assert(err, IsNil)
expected = "1;'male';'bob@mail.com';'020-1234';\\N\n" +
"2;'female';'sarah@mail.com';'020-1253';'healthy'\n" +
"3;'male';'john@mail.com';'020-1256';'healthy'\n" +
"4;'female';'sarah@mail.com';'020-1235';'healthy'\n"
c.Assert(bf.String(), Equals, expected)

// test delimiter that included in values
bf.Reset()
opt.separator = []byte("&;,?")
opt.delimiter = []byte("ma")
tableIR.colNames = []string{"id", "gender", "email", "phone_number", "status"}
err = WriteInsertInCsv(context.Background(), tableIR, bf, false, opt)
c.Assert(err, IsNil)
expected = "maidma&;,?magenderma&;,?maemamailma&;,?maphone_numberma&;,?mastatusma\n" +
"1&;,?mamamalema&;,?mabob@mamail.comma&;,?ma020-1234ma&;,?\\N\n" +
"2&;,?mafemamalema&;,?masarah@mamail.comma&;,?ma020-1253ma&;,?mahealthyma\n" +
"3&;,?mamamalema&;,?majohn@mamail.comma&;,?ma020-1256ma&;,?mahealthyma\n" +
"4&;,?mafemamalema&;,?masarah@mamail.comma&;,?ma020-1235ma&;,?mahealthyma\n"
c.Assert(bf.String(), Equals, expected)
}

func (s *testUtilSuite) TestSQLDataTypes(c *C) {
Expand Down

0 comments on commit 60b859d

Please sign in to comment.