Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

starlark: fix bug in int(string, base=int) #344

Merged
merged 1 commit into from
Jan 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion doc/spec.md
Original file line number Diff line number Diff line change
Expand Up @@ -3186,11 +3186,30 @@ If x is a `bool`, the result is 0 for `False` or 1 for `True`.
If x is a string, it is interpreted as a sequence of digits in the
specified base, decimal by default.
If `base` is zero, x is interpreted like an integer literal, the base
being inferred from an optional base marker such as `0b`, `0o`, or
being inferred from an optional base prefix such as `0b`, `0o`, or
`0x` preceding the first digit.
When the `base` is provided explictly, a matching base prefix is
also permitted, and has no effect.
Irrespective of base, the string may start with an optional `+` or `-`
sign indicating the sign of the result.

```python
int("11") # 11
int("11", 0) # 11
int("11", 10) # 11
int("11", 2) # 3
int("11", 8) # 9
int("11", 16) # 17

int("0x11", 0) # 17
int("0x11", 16) # 17
int("0b1", 16) # 177 (0xb1)
int("0b1", 2) # 1
int("0b1", 0) # 1

int("0x11") # error: invalid literal with base 10
```

### len

`len(x)` returns the number of elements in its argument.
Expand Down
154 changes: 76 additions & 78 deletions starlark/library.go
Original file line number Diff line number Diff line change
Expand Up @@ -473,114 +473,112 @@ func int_(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error)
return nil, err
}

// "If x is not a number or base is given, x must be a string."
if s, ok := AsString(x); ok {
b := 10
if base != nil {
var err error
b, err = AsInt32(base)
if err != nil || b != 0 && (b < 2 || b > 36) {
if err != nil {
return nil, fmt.Errorf("int: for base, got %s, want int", base.Type())
}
if b != 0 && (b < 2 || b > 36) {
return nil, fmt.Errorf("int: base must be an integer >= 2 && <= 36")
}
}
res := parseInt(s, b)
if res == nil {
return nil, fmt.Errorf("int: invalid literal with base %d: %s", b, s)
}
return res, nil
}

orig := s // save original for error message
if base != nil {
return nil, fmt.Errorf("int: can't convert non-string with explicit base")
}

// remove sign
var neg bool
if s != "" {
if s[0] == '+' {
s = s[1:]
} else if s[0] == '-' {
neg = true
s = s[1:]
}
if b, ok := x.(Bool); ok {
if b {
return one, nil
} else {
return zero, nil
}
}

// remove base prefix
baseprefix := 0
if len(s) > 1 && s[0] == '0' {
if len(s) > 2 {
switch s[1] {
case 'o', 'O':
s = s[2:]
baseprefix = 8
case 'x', 'X':
s = s[2:]
baseprefix = 16
case 'b', 'B':
s = s[2:]
baseprefix = 2
}
}
i, err := NumberToInt(x)
if err != nil {
return nil, fmt.Errorf("int: %s", err)
}
return i, nil
}

// parseInt defines the behavior of int(string, base=int). It returns nil on error.
func parseInt(s string, base int) Value {
// remove sign
var neg bool
if s != "" {
if s[0] == '+' {
s = s[1:]
} else if s[0] == '-' {
neg = true
s = s[1:]
}
}

// remove optional base prefix
baseprefix := 0
if len(s) > 1 && s[0] == '0' {
if len(s) > 2 {
switch s[1] {
case 'o', 'O':
baseprefix = 8
case 'x', 'X':
baseprefix = 16
case 'b', 'B':
baseprefix = 2
}
}
if baseprefix != 0 {
// Remove the base prefix if it matches
// the explicit base, or if base=0.
if base == 0 || baseprefix == base {
base = baseprefix
s = s[2:]
}
} else {
// For automatic base detection,
// a string starting with zero
// must be all zeros.
// Thus we reject int("0755", 0).
if baseprefix == 0 && b == 0 {
if base == 0 {
for i := 1; i < len(s); i++ {
if s[i] != '0' {
goto invalid
return nil
}
}
return zero, nil
}

if b != 0 && baseprefix != 0 && baseprefix != b {
// Explicit base doesn't match prefix,
// e.g. int("0o755", 16).
goto invalid
return zero
}
}

// select base
if b == 0 {
if baseprefix != 0 {
b = baseprefix
} else {
b = 10
}
}

// we explicitly handled sign above.
// if a sign remains, it is invalid.
if s != "" && (s[0] == '-' || s[0] == '+') {
goto invalid
}

// s has no sign or base prefix.
//
// int(x) permits arbitrary precision, unlike the scanner.
if i, ok := new(big.Int).SetString(s, b); ok {
res := MakeBigInt(i)
if neg {
res = zero.Sub(res)
}
return res, nil
}

invalid:
return nil, fmt.Errorf("int: invalid literal with base %d: %s", b, orig)
}
if base == 0 {
base = 10
}

if base != nil {
return nil, fmt.Errorf("int: can't convert non-string with explicit base")
// we explicitly handled sign above.
// if a sign remains, it is invalid.
if s != "" && (s[0] == '-' || s[0] == '+') {
return nil
}

if b, ok := x.(Bool); ok {
if b {
return one, nil
} else {
return zero, nil
// s has no sign or base prefix.
if i, ok := new(big.Int).SetString(s, base); ok {
res := MakeBigInt(i)
if neg {
res = zero.Sub(res)
}
return res
}

i, err := NumberToInt(x)
if err != nil {
return nil, fmt.Errorf("int: %s", err)
}
return i, nil
return nil
}

// https://github.com/google/starlark-go/blob/master/doc/spec.md#len
Expand Down
11 changes: 11 additions & 0 deletions starlark/testdata/int.star
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ assert.eq(0x1000000000000001 * 0x1000000000000001, 0x100000000000000200000000000
assert.eq(int("1010", 2), 10)
assert.eq(int("111111101", 2), 509)
assert.eq(int("0b0101", 0), 5)
assert.eq(int("0b0101", 2), 5) # prefix is redundant with explicit base
assert.eq(int("0b00000", 0), 0)
assert.eq(1111111111111111 * 1111111111111111, 1234567901234567654320987654321)
assert.fails(lambda: int("0x123", 8), "invalid literal.*base 8")
Expand All @@ -162,6 +163,16 @@ assert.fails(lambda: int("0o123", 16), "invalid literal.*base 16")
assert.fails(lambda: int("-0o123", 16), "invalid literal.*base 16")
assert.fails(lambda: int("0x110", 2), "invalid literal.*base 2")

# Base prefix is honored only if base=0, or if the prefix matches the explicit base.
# See https://github.com/google/starlark-go/issues/337
assert.fails(lambda: int("0b0"), "invalid literal.*base 10")
assert.eq(int("0b0", 0), 0)
assert.eq(int("0b0", 2), 0)
assert.eq(int("0b0", 16), 0xb0)
assert.eq(int("0x0b0", 16), 0xb0)
assert.eq(int("0x0b0", 0), 0xb0)
assert.eq(int("0x0b0101", 16), 0x0b0101)

# int from string, auto detect base
assert.eq(int("123", 0), 123)
assert.eq(int("+123", 0), +123)
Expand Down
4 changes: 2 additions & 2 deletions syntax/syntax.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,10 +251,10 @@ func (x *Ident) Span() (start, end Position) {
// A Literal represents a literal string or number.
type Literal struct {
commentsRef
Token Token // = STRING | INT
Token Token // = STRING | INT | FLOAT
TokenPos Position
Raw string // uninterpreted text
Value interface{} // = string | int64 | *big.Int
Value interface{} // = string | int64 | *big.Int | float64
}

func (x *Literal) Span() (start, end Position) {
Expand Down