diff --git a/dumpling/cmd/dumpling/main.go b/dumpling/cmd/dumpling/main.go index 747928a8..5fd83920 100644 --- a/dumpling/cmd/dumpling/main.go +++ b/dumpling/cmd/dumpling/main.go @@ -63,6 +63,8 @@ var ( caPath string certPath string keyPath string + csvSeparator string + csvDelimiter string dumpEmptyDatabase bool escapeBackslash bool @@ -115,6 +117,8 @@ func main() { pflag.StringVar(&caPath, "ca", "", "The path name to the certificate authority file for TLS connection") pflag.StringVar(&certPath, "cert", "", "The path name to the client certificate file for TLS connection") pflag.StringVar(&keyPath, "key", "", "The path name to the client private key file for TLS connection") + pflag.StringVar(&csvSeparator, "csv-separator", ",", "The separator for csv files, default ','") + pflag.StringVar(&csvDelimiter, "csv-delimiter", "\"", "The delimiter for values in csv files, default '\"'") printVersion := pflag.BoolP("version", "V", false, "Print Dumpling version") @@ -185,6 +189,8 @@ func main() { conf.Security.CertPath = certPath conf.Security.KeyPath = keyPath conf.SessionParams["tidb_mem_quota_query"] = tidbMemQuotaQuery + conf.CsvSeparator = csvSeparator + conf.CsvDelimiter = csvDelimiter err = export.Dump(context.Background(), conf) if err != nil { diff --git a/dumpling/v4/export/config.go b/dumpling/v4/export/config.go index 84817b0f..85ffd936 100644 --- a/dumpling/v4/export/config.go +++ b/dumpling/v4/export/config.go @@ -46,6 +46,8 @@ type Config struct { NoData bool CsvNullValue string Sql string + CsvSeparator string + CsvDelimiter string TableFilter filter.Filter Rows uint64 diff --git a/dumpling/v4/export/ir.go b/dumpling/v4/export/ir.go index 4b4e0736..71a71a3f 100644 --- a/dumpling/v4/export/ir.go +++ b/dumpling/v4/export/ir.go @@ -39,7 +39,7 @@ type RowReceiverStringer interface { type Stringer interface { WriteToBuffer(*bytes.Buffer, bool) - WriteToBufferInCsv(*bytes.Buffer, bool, string) + WriteToBufferInCsv(*bytes.Buffer, bool, *csvOption) } type RowReceiver interface { diff --git a/dumpling/v4/export/sql_type.go b/dumpling/v4/export/sql_type.go index aabe6683..c0f781da 100644 --- a/dumpling/v4/export/sql_type.go +++ b/dumpling/v4/export/sql_type.go @@ -9,8 +9,8 @@ import ( var colTypeRowReceiverMap = map[string]func() RowReceiverStringer{} var nullValue = "NULL" -var quotationMark byte = '\'' -var doubleQuotationMark byte = '"' +var quotationMark = []byte{'\''} +var doubleQuotationMark = []byte{'"'} func init() { for _, s := range dataTypeString { @@ -45,16 +45,16 @@ var dataTypeBin = []string{ "BIT", } -func getEscapeQuotation(escapeBackSlash bool, escapeQuotation byte) byte { +func getEscapeQuotation(escapeBackSlash bool, escapeQuotation []byte) []byte { if escapeBackSlash { - return 0 + return nil } return escapeQuotation } -func escape(s []byte, bf *bytes.Buffer, escapeQuotation byte) { - if escapeQuotation != 0 { - bf.Write(bytes.ReplaceAll(s, []byte{escapeQuotation}, []byte{escapeQuotation, escapeQuotation})) +func escape(s []byte, bf *bytes.Buffer, escapeQuotation []byte) { + if len(escapeQuotation) > 0 { + bf.Write(bytes.ReplaceAll(s, escapeQuotation, append(escapeQuotation, escapeQuotation...))) return } @@ -153,11 +153,11 @@ func (r RowReceiverArr) WriteToBuffer(bf *bytes.Buffer, escapeBackslash bool) { bf.WriteByte(')') } -func (r RowReceiverArr) WriteToBufferInCsv(bf *bytes.Buffer, escapeBackslash bool, csvNullValue string) { +func (r RowReceiverArr) WriteToBufferInCsv(bf *bytes.Buffer, escapeBackslash bool, opt *csvOption) { for i, receiver := range r { - receiver.WriteToBufferInCsv(bf, escapeBackslash, csvNullValue) + receiver.WriteToBufferInCsv(bf, escapeBackslash, opt) if i != len(r)-1 { - bf.WriteByte(',') + bf.Write(opt.separator) } } } @@ -174,11 +174,11 @@ func (s SQLTypeNumber) WriteToBuffer(bf *bytes.Buffer, _ bool) { } } -func (s SQLTypeNumber) WriteToBufferInCsv(bf *bytes.Buffer, _ bool, csvNullValue string) { +func (s SQLTypeNumber) WriteToBufferInCsv(bf *bytes.Buffer, _ bool, opt *csvOption) { if s.RawBytes != nil { bf.Write(s.RawBytes) } else { - bf.WriteString(csvNullValue) + bf.WriteString(opt.nullValue) } } @@ -198,21 +198,21 @@ func (s *SQLTypeString) ReportSize() uint64 { func (s *SQLTypeString) WriteToBuffer(bf *bytes.Buffer, escapeBackslash bool) { if s.RawBytes != nil { - bf.WriteByte(quotationMark) + bf.Write(quotationMark) escape(s.RawBytes, bf, getEscapeQuotation(escapeBackslash, quotationMark)) - bf.WriteByte(quotationMark) + bf.Write(quotationMark) } else { bf.WriteString(nullValue) } } -func (s *SQLTypeString) WriteToBufferInCsv(bf *bytes.Buffer, escapeBackslash bool, csvNullValue string) { +func (s *SQLTypeString) WriteToBufferInCsv(bf *bytes.Buffer, escapeBackslash bool, opt *csvOption) { if s.RawBytes != nil { - bf.WriteByte(doubleQuotationMark) - escape(s.RawBytes, bf, getEscapeQuotation(escapeBackslash, doubleQuotationMark)) - bf.WriteByte(doubleQuotationMark) + bf.Write(opt.delimiter) + escape(s.RawBytes, bf, getEscapeQuotation(escapeBackslash, opt.delimiter)) + bf.Write(opt.delimiter) } else { - bf.WriteString(csvNullValue) + bf.WriteString(opt.nullValue) } } @@ -231,12 +231,12 @@ func (s *SQLTypeBytes) WriteToBuffer(bf *bytes.Buffer, _ bool) { fmt.Fprintf(bf, "x'%x'", s.RawBytes) } -func (s *SQLTypeBytes) WriteToBufferInCsv(bf *bytes.Buffer, _ bool, csvNullValue string) { +func (s *SQLTypeBytes) WriteToBufferInCsv(bf *bytes.Buffer, _ bool, opt *csvOption) { if s.RawBytes != nil { - bf.WriteByte(doubleQuotationMark) + bf.Write(opt.delimiter) bf.Write(s.RawBytes) - bf.WriteByte(doubleQuotationMark) + bf.Write(opt.delimiter) } else { - bf.WriteString(csvNullValue) + bf.WriteString(opt.nullValue) } } diff --git a/dumpling/v4/export/test_util.go b/dumpling/v4/export/test_util.go index 799bdf30..1c625541 100644 --- a/dumpling/v4/export/test_util.go +++ b/dumpling/v4/export/test_util.go @@ -148,7 +148,7 @@ func (m *mockTableIR) EscapeBackSlash() bool { return m.escapeBackSlash } -func newMockTableIR(databaseName, tableName string, data [][]driver.Value, specialComments, colTypes []string) TableDataIR { +func newMockTableIR(databaseName, tableName string, data [][]driver.Value, specialComments, colTypes []string) *mockTableIR { return &mockTableIR{ dbName: databaseName, tblName: tableName, @@ -158,15 +158,3 @@ func newMockTableIR(databaseName, tableName string, data [][]driver.Value, speci colTypes: colTypes, } } - -func newMockTableIRWithError(databaseName, tableName string, data [][]driver.Value, specialComments, colTypes []string, err error) TableDataIR { - return &mockTableIR{ - dbName: databaseName, - tblName: tableName, - data: data, - specCmt: specialComments, - selectedField: "*", - colTypes: colTypes, - rowErr: err, - } -} diff --git a/dumpling/v4/export/writer.go b/dumpling/v4/export/writer.go index 234bbbd4..d28b2324 100644 --- a/dumpling/v4/export/writer.go +++ b/dumpling/v4/export/writer.go @@ -118,6 +118,12 @@ type outputFileNamer struct { tableName string } +type csvOption struct { + nullValue string + separator []byte + delimiter []byte +} + func newOutputFileNamer(ir TableDataIR) *outputFileNamer { return &outputFileNamer{ chunkIndex: ir.ChunkIndex(), @@ -142,10 +148,16 @@ func (f *CsvWriter) WriteTableData(ctx context.Context, ir TableDataIR) error { chunksIter := buildChunksIter(ir, f.cfg.FileSize, f.cfg.StatementSize) defer chunksIter.Rows().Close() + opt := &csvOption{ + nullValue: f.cfg.CsvNullValue, + separator: []byte(f.cfg.CsvSeparator), + delimiter: []byte(f.cfg.CsvDelimiter), + } + for { filePath := path.Join(f.cfg.OutputDirPath, fileName) fileWriter, tearDown := buildInterceptFileWriter(filePath) - err := WriteInsertInCsv(ctx, chunksIter, fileWriter, f.cfg.NoHeader, f.cfg.CsvNullValue) + err := WriteInsertInCsv(ctx, chunksIter, fileWriter, f.cfg.NoHeader, opt) tearDown() if err != nil { return err diff --git a/dumpling/v4/export/writer_util.go b/dumpling/v4/export/writer_util.go index d2a20eea..fc7da6dd 100644 --- a/dumpling/v4/export/writer_util.go +++ b/dumpling/v4/export/writer_util.go @@ -189,7 +189,7 @@ func WriteInsert(pCtx context.Context, tblIR TableDataIR, w io.Writer) error { return wp.Error() } -func WriteInsertInCsv(pCtx context.Context, tblIR TableDataIR, w io.Writer, noHeader bool, csvNullValue string) error { +func WriteInsertInCsv(pCtx context.Context, tblIR TableDataIR, w io.Writer, noHeader bool, opt *csvOption) error { fileRowIter := tblIR.Rows() if !fileRowIter.HasNext() { return nil @@ -223,11 +223,11 @@ func WriteInsertInCsv(pCtx context.Context, tblIR TableDataIR, w io.Writer, noHe if !noHeader && len(tblIR.ColumnNames()) != 0 { for i, col := range tblIR.ColumnNames() { - bf.WriteByte(doubleQuotationMark) - escape([]byte(col), bf, getEscapeQuotation(escapeBackSlash, doubleQuotationMark)) - bf.WriteByte(doubleQuotationMark) + bf.Write(opt.delimiter) + escape([]byte(col), bf, getEscapeQuotation(escapeBackSlash, opt.delimiter)) + bf.Write(opt.delimiter) if i != len(tblIR.ColumnTypes())-1 { - bf.WriteByte(',') + bf.Write(opt.separator) } } bf.WriteByte('\n') @@ -241,7 +241,7 @@ func WriteInsertInCsv(pCtx context.Context, tblIR TableDataIR, w io.Writer, noHe return err } - row.WriteToBufferInCsv(bf, escapeBackSlash, csvNullValue) + row.WriteToBufferInCsv(bf, escapeBackSlash, opt) counter += 1 if bf.Len() >= lengthLimit { diff --git a/dumpling/v4/export/writer_util_test.go b/dumpling/v4/export/writer_util_test.go index 50c708f4..0b357d13 100644 --- a/dumpling/v4/export/writer_util_test.go +++ b/dumpling/v4/export/writer_util_test.go @@ -87,7 +87,8 @@ func (s *testUtilSuite) TestWriteInsertReturnsError(c *C) { } // row errors at last line rowErr := errors.New("mock row error") - tableIR := newMockTableIRWithError("test", "employee", data, specCmts, colTypes, rowErr) + tableIR := newMockTableIR("test", "employee", data, specCmts, colTypes) + tableIR.rowErr = rowErr bf := &bytes.Buffer{} err := WriteInsert(context.Background(), tableIR, bf) @@ -112,13 +113,51 @@ func (s *testUtilSuite) TestWriteInsertInCsv(c *C) { tableIR := newMockTableIR("test", "employee", data, nil, colTypes) bf := &bytes.Buffer{} - err := WriteInsertInCsv(context.Background(), tableIR, bf, true, "\\N") + // test nullValue + opt := &csvOption{separator: []byte(","), delimiter: doubleQuotationMark, nullValue: "\\N"} + err := WriteInsertInCsv(context.Background(), tableIR, bf, true, opt) c.Assert(err, IsNil) expected := "1,\"male\",\"bob@mail.com\",\"020-1234\",\\N\n" + "2,\"female\",\"sarah@mail.com\",\"020-1253\",\"healthy\"\n" + "3,\"male\",\"john@mail.com\",\"020-1256\",\"healthy\"\n" + "4,\"female\",\"sarah@mail.com\",\"020-1235\",\"healthy\"\n" c.Assert(bf.String(), Equals, expected) + + // test delimiter + bf.Reset() + opt.delimiter = quotationMark + err = WriteInsertInCsv(context.Background(), tableIR, bf, true, opt) + c.Assert(err, IsNil) + expected = "1,'male','bob@mail.com','020-1234',\\N\n" + + "2,'female','sarah@mail.com','020-1253','healthy'\n" + + "3,'male','john@mail.com','020-1256','healthy'\n" + + "4,'female','sarah@mail.com','020-1235','healthy'\n" + c.Assert(bf.String(), Equals, expected) + + // test separator + bf.Reset() + opt.separator = []byte(";") + err = WriteInsertInCsv(context.Background(), tableIR, bf, true, opt) + c.Assert(err, IsNil) + expected = "1;'male';'bob@mail.com';'020-1234';\\N\n" + + "2;'female';'sarah@mail.com';'020-1253';'healthy'\n" + + "3;'male';'john@mail.com';'020-1256';'healthy'\n" + + "4;'female';'sarah@mail.com';'020-1235';'healthy'\n" + c.Assert(bf.String(), Equals, expected) + + // test delimiter that included in values + bf.Reset() + opt.separator = []byte("&;,?") + opt.delimiter = []byte("ma") + tableIR.colNames = []string{"id", "gender", "email", "phone_number", "status"} + err = WriteInsertInCsv(context.Background(), tableIR, bf, false, opt) + c.Assert(err, IsNil) + expected = "maidma&;,?magenderma&;,?maemamailma&;,?maphone_numberma&;,?mastatusma\n" + + "1&;,?mamamalema&;,?mabob@mamail.comma&;,?ma020-1234ma&;,?\\N\n" + + "2&;,?mafemamalema&;,?masarah@mamail.comma&;,?ma020-1253ma&;,?mahealthyma\n" + + "3&;,?mamamalema&;,?majohn@mamail.comma&;,?ma020-1256ma&;,?mahealthyma\n" + + "4&;,?mafemamalema&;,?masarah@mamail.comma&;,?ma020-1235ma&;,?mahealthyma\n" + c.Assert(bf.String(), Equals, expected) } func (s *testUtilSuite) TestSQLDataTypes(c *C) {