Skip to content

Commit

Permalink
Fix interpretation of integers in a BIT column (pingcap#161)
Browse files Browse the repository at this point in the history
* tests: fix existing test failure

* mydump: fixed conversion of integers into bits

We need to create a special branch for integers, since casting 123 and
'123' into BIT type behave differently.

Also fixed handling of 0x/0b bit strings since Ragel doesn't recognize
'+' in a regex -_-.

* mydump: store description of `token` in an array instead of switch cases

* tests: test behavior of integers for ENUM and SET types as well
  • Loading branch information
kennytm authored Apr 16, 2019
1 parent c103cd1 commit fcbafa0
Show file tree
Hide file tree
Showing 13 changed files with 775 additions and 674 deletions.
2 changes: 1 addition & 1 deletion lightning/mydump/loader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ func (s *testMydumpLoaderSuite) TestLoader(c *C) {
}{
{name: "i", dataFiles: 1},
{name: "report_case_high_risk", dataFiles: 1},
{name: "tbl_autoid", dataFiles: 1},
{name: "tbl_multi_index", dataFiles: 1},
{name: "tbl_autoid", dataFiles: 1},
}

for i, table := range expected {
Expand Down
60 changes: 56 additions & 4 deletions lightning/mydump/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ package mydump

import (
"bytes"
"fmt"
"io"
"regexp"
"strconv"
"strings"
"time"

Expand Down Expand Up @@ -154,12 +156,43 @@ const (
tokFalse
tokHexString
tokBinString
tokInteger
tokSingleQuoted
tokDoubleQuoted
tokBackQuoted
tokUnquoted
)

var tokenDescriptions = [...]string{
tokNil: "<Nil>",
tokRowBegin: "RowBegin",
tokRowEnd: "RowEnd",
tokValues: "Values",
tokNull: "Null",
tokTrue: "True",
tokFalse: "False",
tokHexString: "HexString",
tokBinString: "BinString",
tokInteger: "Integer",
tokSingleQuoted: "SingleQuoted",
tokDoubleQuoted: "DoubleQuoted",
tokBackQuoted: "BackQuoted",
tokUnquoted: "Unquoted",
}

// String implements the fmt.Stringer interface
//
// Mainly used for debugging a token.
func (tok token) String() string {
t := int(tok)
if t >= 0 && t < len(tokenDescriptions) {
if description := tokenDescriptions[t]; description != "" {
return description
}
}
return fmt.Sprintf("<Unknown(%d)>", t)
}

func (parser *blockParser) readBlock() error {
startTime := time.Now()

Expand Down Expand Up @@ -287,10 +320,10 @@ func (parser *ChunkParser) ReadRow() error {
// stateValues (no-op)
// ( tokRowBegin
// stateRow (reset row)
// 1 tokUnquoted
// 1 tokInteger
// stateRow (append value)
// ,
// 2 tokUnquoted
// 2 tokInteger
// stateRow (append value)
// ) tokRowEnd
// return
Expand All @@ -305,7 +338,7 @@ func (parser *ChunkParser) ReadRow() error {
// ,
// ( tokRowBegin
// stateRow (reset row)
// 3 tokUnquoted
// 3 tokInteger
// stateRow (append value)
// ) tokRowEnd
// return
Expand All @@ -327,7 +360,7 @@ func (parser *ChunkParser) ReadRow() error {
// stateValues
// ( tokRowBegin
// stateRow (reset row)
// 4 tokUnquoted
// 4 tokInteger
// stateRow (append value)
// ) tokRowEnd
// return
Expand Down Expand Up @@ -377,6 +410,25 @@ func (parser *ChunkParser) ReadRow() error {
value.SetInt64(1)
case tokFalse:
value.SetInt64(0)
case tokInteger:
c := string(content)
if strings.HasPrefix(c, "-") {
i, err := strconv.ParseInt(c, 10, 64)
if err == nil {
value.SetInt64(i)
break
}
} else {
u, err := strconv.ParseUint(c, 10, 64)
if err == nil {
value.SetUint64(u)
break
}
}
// if the integer is too long, fallback to treating it as a
// string (all types that treats integer specially like BIT
// can't handle integers more than 64 bits anyway)
fallthrough
case tokUnquoted, tokSingleQuoted, tokDoubleQuoted:
value.SetString(parser.unescapeString(string(content)))
case tokHexString:
Expand Down
10 changes: 8 additions & 2 deletions lightning/mydump/parser.rl
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,9 @@ double_quoted = '"' (^'"' | bs any | '""')** '"';
back_quoted = '`' (^'`' | '``')* '`';
unquoted = ^([,;()'"`] | space)+;

hex_string = /0x[0-9a-fA-F]+/ | "x'"i [0-9a-f]* "'";
bin_string = /0b[01]+/ | "b'"i [01]* "'";
integer = '-'? [0-9]+;
hex_string = '0x' [0-9a-fA-F]+ | "x'"i [0-9a-fA-F]* "'";
bin_string = '0b' [01]+ | "b'"i [01]* "'";

main := |*
comment;
Expand Down Expand Up @@ -98,6 +99,11 @@ main := |*
fbreak;
};

integer => {
consumedToken = tokInteger
fbreak;
};

hex_string => {
consumedToken = tokHexString
fbreak;
Expand Down
Loading

0 comments on commit fcbafa0

Please sign in to comment.