Skip to content

Commit

Permalink
executor: fix escape for select into outfile (pingcap#19661)
Browse files Browse the repository at this point in the history
  • Loading branch information
ti-srebot authored Sep 10, 2020
1 parent 88f98fc commit ab51272
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 17 deletions.
78 changes: 61 additions & 17 deletions executor/select_into.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,15 @@ type SelectIntoExec struct {
baseExecutor
intoOpt *ast.SelectIntoOption

lineBuf []byte
realBuf []byte
writer *bufio.Writer
dstFile *os.File
chk *chunk.Chunk
started bool
lineBuf []byte
realBuf []byte
fieldBuf []byte
escapeBuf []byte
enclosed bool
writer *bufio.Writer
dstFile *os.File
chk *chunk.Chunk
started bool
}

// Open implements the Executor Open interface.
Expand All @@ -57,6 +60,8 @@ func (s *SelectIntoExec) Open(ctx context.Context) error {
s.writer = bufio.NewWriter(s.dstFile)
s.chk = newFirstChunk(s.children[0])
s.lineBuf = make([]byte, 0, 1024)
s.fieldBuf = make([]byte, 0, 64)
s.escapeBuf = make([]byte, 0, 64)
return s.baseExecutor.Open(ctx)
}

Expand All @@ -82,6 +87,35 @@ func (s *SelectIntoExec) considerEncloseOpt(et types.EvalType) bool {
et == types.ETJson
}

func (s *SelectIntoExec) escapeField(f []byte) []byte {
if s.intoOpt.FieldsInfo.Escaped == 0 {
return f
}
s.escapeBuf = s.escapeBuf[:0]
for _, b := range f {
escape := false
switch {
case b == 0:
// we always escape 0
escape = true
b = '0'
case b == s.intoOpt.FieldsInfo.Escaped || b == s.intoOpt.FieldsInfo.Enclosed:
escape = true
case !s.enclosed && len(s.intoOpt.FieldsInfo.Terminated) > 0 && b == s.intoOpt.FieldsInfo.Terminated[0]:
// if field is enclosed, we only escape line terminator, otherwise both field and line terminator will be escaped
escape = true
case len(s.intoOpt.LinesInfo.Terminated) > 0 && b == s.intoOpt.LinesInfo.Terminated[0]:
// we always escape line terminator
escape = true
}
if escape {
s.escapeBuf = append(s.escapeBuf, s.intoOpt.FieldsInfo.Escaped)
}
s.escapeBuf = append(s.escapeBuf, b)
}
return s.escapeBuf
}

func (s *SelectIntoExec) dumpToOutfile() error {
lineTerm := "\n"
if s.intoOpt.LinesInfo.Terminated != "" {
Expand All @@ -102,6 +136,8 @@ func (s *SelectIntoExec) dumpToOutfile() error {
nullTerm := []byte("\\N")
if s.intoOpt.FieldsInfo.Escaped != byte(0) {
nullTerm[0] = s.intoOpt.FieldsInfo.Escaped
} else {
nullTerm = []byte("NULL")
}

cols := s.children[0].Schema().Columns
Expand All @@ -120,34 +156,42 @@ func (s *SelectIntoExec) dumpToOutfile() error {
if (encloseFlag && !encloseOpt) ||
(encloseFlag && encloseOpt && s.considerEncloseOpt(et)) {
s.lineBuf = append(s.lineBuf, encloseByte)
s.enclosed = true
} else {
s.enclosed = false
}
s.fieldBuf = s.fieldBuf[:0]
switch col.GetType().Tp {
case mysql.TypeTiny, mysql.TypeShort, mysql.TypeInt24, mysql.TypeLong:
s.lineBuf = strconv.AppendInt(s.lineBuf, row.GetInt64(j), 10)
s.fieldBuf = strconv.AppendInt(s.fieldBuf, row.GetInt64(j), 10)
case mysql.TypeLonglong:
if mysql.HasUnsignedFlag(col.GetType().Flag) {
s.lineBuf = strconv.AppendUint(s.lineBuf, row.GetUint64(j), 10)
s.fieldBuf = strconv.AppendUint(s.fieldBuf, row.GetUint64(j), 10)
} else {
s.lineBuf = strconv.AppendInt(s.lineBuf, row.GetInt64(j), 10)
s.fieldBuf = strconv.AppendInt(s.fieldBuf, row.GetInt64(j), 10)
}
case mysql.TypeFloat, mysql.TypeDouble:
s.realBuf, s.lineBuf = DumpRealOutfile(s.realBuf, s.lineBuf, row.GetFloat64(j), col.RetType)
s.realBuf, s.fieldBuf = DumpRealOutfile(s.realBuf, s.fieldBuf, row.GetFloat64(j), col.RetType)
case mysql.TypeNewDecimal:
s.lineBuf = append(s.lineBuf, row.GetMyDecimal(j).String()...)
case mysql.TypeString, mysql.TypeVarString, mysql.TypeVarchar, mysql.TypeBit,
s.fieldBuf = append(s.fieldBuf, row.GetMyDecimal(j).String()...)
case mysql.TypeString, mysql.TypeVarString, mysql.TypeVarchar,
mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob, mysql.TypeBlob:
s.fieldBuf = append(s.fieldBuf, row.GetBytes(j)...)
case mysql.TypeBit:
// bit value won't be escaped anyway (verified on MySQL, test case added)
s.lineBuf = append(s.lineBuf, row.GetBytes(j)...)
case mysql.TypeDate, mysql.TypeDatetime, mysql.TypeTimestamp:
s.lineBuf = append(s.lineBuf, row.GetTime(j).String()...)
s.fieldBuf = append(s.fieldBuf, row.GetTime(j).String()...)
case mysql.TypeDuration:
s.lineBuf = append(s.lineBuf, row.GetDuration(j, col.GetType().Decimal).String()...)
s.fieldBuf = append(s.fieldBuf, row.GetDuration(j, col.GetType().Decimal).String()...)
case mysql.TypeEnum:
s.lineBuf = append(s.lineBuf, row.GetEnum(j).String()...)
s.fieldBuf = append(s.fieldBuf, row.GetEnum(j).String()...)
case mysql.TypeSet:
s.lineBuf = append(s.lineBuf, row.GetSet(j).String()...)
s.fieldBuf = append(s.fieldBuf, row.GetSet(j).String()...)
case mysql.TypeJSON:
s.lineBuf = append(s.lineBuf, row.GetJSON(j).String()...)
s.fieldBuf = append(s.fieldBuf, row.GetJSON(j).String()...)
}
s.lineBuf = append(s.lineBuf, s.escapeField(s.fieldBuf)...)
if (encloseFlag && !encloseOpt) ||
(encloseFlag && encloseOpt && s.considerEncloseOpt(et)) {
s.lineBuf = append(s.lineBuf, encloseByte)
Expand Down
70 changes: 70 additions & 0 deletions executor/select_into_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,76 @@ func (s *testSuite1) TestSelectIntoOutfileConstant(c *C) {
`, outfile, c)
}

func (s *testSuite1) TestDeliminators(c *C) {
outfile := randomSelectFilePath("TestDeliminators")
tk := testkit.NewTestKit(c, s.store)
tk.MustExec("use test")

tk.MustExec("CREATE TABLE `tx` (`a` varbinary(20) DEFAULT NULL,`b` int DEFAULT NULL)")
err := tk.ExecToErr(fmt.Sprintf("select * from `tx` into outfile %q fields enclosed by '\"\"'", outfile))
// enclosed by must be a single character
c.Check(err, NotNil)
c.Assert(strings.Contains(err.Error(), "Field separator argument is not what is expected"), IsTrue, Commentf("err: %v", err))
err = tk.ExecToErr(fmt.Sprintf("select * from `tx` into outfile %q fields escaped by 'gg'", outfile))
// so does escaped by
c.Check(err, NotNil)
c.Assert(strings.Contains(err.Error(), "Field separator argument is not what is expected"), IsTrue, Commentf("err: %v", err))

// since the above two test cases failed, it should not has outfile remained on disk
_, err = os.Stat(outfile)
c.Check(os.IsNotExist(err), IsTrue, Commentf("err: %v", err))

tk.MustExec("insert into tx values (NULL, NULL);\n")
tk.MustExec(fmt.Sprintf("select * from `tx` into outfile %q fields escaped by ''", outfile))
// if escaped by is set as empty, then NULL should not be escaped
cmpAndRm("NULL\tNULL\n", outfile, c)

tk.MustExec("delete from tx")
tk.MustExec("insert into tx values ('d\",\"e\",', 3), ('\\\\', 2)")
tk.MustExec(fmt.Sprintf("select * from `tx` into outfile %q FIELDS TERMINATED BY ',' ENCLOSED BY '\"' LINES TERMINATED BY '\\n'", outfile))
// enclosed by character & escaped by characters should be escaped, no matter what
cmpAndRm("\"d\\\",\\\"e\\\",\",\"3\"\n\"\\\\\",\"2\"\n", outfile, c)

tk.MustExec("delete from tx")
tk.MustExec("insert into tx values ('a\tb', 1)")
tk.MustExec(fmt.Sprintf("select * from `tx` into outfile %q FIELDS TERMINATED BY ',' ENCLOSED BY '\"' escaped by '\t' LINES TERMINATED BY '\\n'", outfile))
// enclosed by character & escaped by characters should be escaped, no matter what
cmpAndRm("\"a\t\tb\",\"1\"\n", outfile, c)

tk.MustExec("delete from tx")
tk.MustExec(`insert into tx values ('d","e",', 1)`)
tk.MustExec(`insert into tx values (unhex("00"), 2)`)
tk.MustExec(`insert into tx values ("\r\n\b\Z\t", 3)`)
tk.MustExec(`insert into tx values (null, 4)`)
tk.MustExec(fmt.Sprintf("select * from `tx` into outfile %q FIELDS TERMINATED BY ',' ENCLOSED BY '\"' LINES TERMINATED BY '\\n'", outfile))
// line terminator will be escaped
cmpAndRm("\"d\\\",\\\"e\\\",\",\"1\"\n"+"\"\\0\",\"2\"\n"+"\"\r\\\n\b\032\t\",\"3\"\n"+"\\N,\"4\"\n", outfile, c)

tk.MustExec("create table tb (s char(10), b bit(48), bb blob(6))")
tk.MustExec("insert into tb values ('\\0\\b\\n\\r\\t\\Z', _binary '\\0\\b\\n\\r\\t\\Z', unhex('00080A0D091A'))")
tk.MustExec(fmt.Sprintf("select * from tb into outfile %q", outfile))
// bit type won't be escaped (verified on MySQL)
cmpAndRm("\\0\b\\\n\r\\\t\032\t"+"\000\b\n\r\t\032\t"+"\\0\b\\\n\r\\\t\032\n", outfile, c)

tk.MustExec("create table zero (a varchar(10), b varchar(10), c varchar(10))")
tk.MustExec("insert into zero values (unhex('00'), _binary '\\0', '\\0')")
tk.MustExec(fmt.Sprintf("select * from zero into outfile %q", outfile))
// zero will always be escaped
cmpAndRm("\\0\t\\0\t\\0\n", outfile, c)
tk.MustExec(fmt.Sprintf("select * from zero into outfile %q fields enclosed by '\"'", outfile))
// zero will always be escaped, including when being enclosed
cmpAndRm("\"\\0\"\t\"\\0\"\t\"\\0\"\n", outfile, c)

tk.MustExec("create table tt (a char(10), b char(10), c char(10))")
tk.MustExec("insert into tt values ('abcd', 'abcd', 'abcd')")
tk.MustExec(fmt.Sprintf("select * from tt into outfile %q fields terminated by 'a-' lines terminated by 'b--'", outfile))
// when not escaped, the first character of both terminators will be escaped
cmpAndRm("\\a\\bcda-\\a\\bcda-\\a\\bcdb--", outfile, c)
tk.MustExec(fmt.Sprintf("select * from tt into outfile %q fields terminated by 'a-' enclosed by '\"' lines terminated by 'b--'", outfile))
// when escaped, only line terminator's first character will be escaped
cmpAndRm("\"a\\bcd\"a-\"a\\bcd\"a-\"a\\bcd\"b--", outfile, c)
}

func (s *testSuite1) TestDumpReal(c *C) {
cases := []struct {
val float64
Expand Down

0 comments on commit ab51272

Please sign in to comment.