diff --git a/doc/spec.md b/doc/spec.md index cbd117e2..15e4dc22 100644 --- a/doc/spec.md +++ b/doc/spec.md @@ -3186,11 +3186,30 @@ If x is a `bool`, the result is 0 for `False` or 1 for `True`. If x is a string, it is interpreted as a sequence of digits in the specified base, decimal by default. If `base` is zero, x is interpreted like an integer literal, the base -being inferred from an optional base marker such as `0b`, `0o`, or +being inferred from an optional base prefix such as `0b`, `0o`, or `0x` preceding the first digit. +When the `base` is provided explictly, a matching base prefix is +also permitted, and has no effect. Irrespective of base, the string may start with an optional `+` or `-` sign indicating the sign of the result. +```python +int("11") # 11 +int("11", 0) # 11 +int("11", 10) # 11 +int("11", 2) # 3 +int("11", 8) # 9 +int("11", 16) # 17 + +int("0x11", 0) # 17 +int("0x11", 16) # 17 +int("0b1", 16) # 177 (0xb1) +int("0b1", 2) # 1 +int("0b1", 0) # 1 + +int("0x11") # error: invalid literal with base 10 +``` + ### len `len(x)` returns the number of elements in its argument. diff --git a/starlark/library.go b/starlark/library.go index 17638240..56454186 100644 --- a/starlark/library.go +++ b/starlark/library.go @@ -473,114 +473,112 @@ func int_(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) return nil, err } - // "If x is not a number or base is given, x must be a string." if s, ok := AsString(x); ok { b := 10 if base != nil { var err error b, err = AsInt32(base) - if err != nil || b != 0 && (b < 2 || b > 36) { + if err != nil { + return nil, fmt.Errorf("int: for base, got %s, want int", base.Type()) + } + if b != 0 && (b < 2 || b > 36) { return nil, fmt.Errorf("int: base must be an integer >= 2 && <= 36") } } + res := parseInt(s, b) + if res == nil { + return nil, fmt.Errorf("int: invalid literal with base %d: %s", b, s) + } + return res, nil + } - orig := s // save original for error message + if base != nil { + return nil, fmt.Errorf("int: can't convert non-string with explicit base") + } - // remove sign - var neg bool - if s != "" { - if s[0] == '+' { - s = s[1:] - } else if s[0] == '-' { - neg = true - s = s[1:] - } + if b, ok := x.(Bool); ok { + if b { + return one, nil + } else { + return zero, nil } + } - // remove base prefix - baseprefix := 0 - if len(s) > 1 && s[0] == '0' { - if len(s) > 2 { - switch s[1] { - case 'o', 'O': - s = s[2:] - baseprefix = 8 - case 'x', 'X': - s = s[2:] - baseprefix = 16 - case 'b', 'B': - s = s[2:] - baseprefix = 2 - } - } + i, err := NumberToInt(x) + if err != nil { + return nil, fmt.Errorf("int: %s", err) + } + return i, nil +} +// parseInt defines the behavior of int(string, base=int). It returns nil on error. +func parseInt(s string, base int) Value { + // remove sign + var neg bool + if s != "" { + if s[0] == '+' { + s = s[1:] + } else if s[0] == '-' { + neg = true + s = s[1:] + } + } + + // remove optional base prefix + baseprefix := 0 + if len(s) > 1 && s[0] == '0' { + if len(s) > 2 { + switch s[1] { + case 'o', 'O': + baseprefix = 8 + case 'x', 'X': + baseprefix = 16 + case 'b', 'B': + baseprefix = 2 + } + } + if baseprefix != 0 { + // Remove the base prefix if it matches + // the explicit base, or if base=0. + if base == 0 || baseprefix == base { + base = baseprefix + s = s[2:] + } + } else { // For automatic base detection, // a string starting with zero // must be all zeros. // Thus we reject int("0755", 0). - if baseprefix == 0 && b == 0 { + if base == 0 { for i := 1; i < len(s); i++ { if s[i] != '0' { - goto invalid + return nil } } - return zero, nil - } - - if b != 0 && baseprefix != 0 && baseprefix != b { - // Explicit base doesn't match prefix, - // e.g. int("0o755", 16). - goto invalid + return zero } } - - // select base - if b == 0 { - if baseprefix != 0 { - b = baseprefix - } else { - b = 10 - } - } - - // we explicitly handled sign above. - // if a sign remains, it is invalid. - if s != "" && (s[0] == '-' || s[0] == '+') { - goto invalid - } - - // s has no sign or base prefix. - // - // int(x) permits arbitrary precision, unlike the scanner. - if i, ok := new(big.Int).SetString(s, b); ok { - res := MakeBigInt(i) - if neg { - res = zero.Sub(res) - } - return res, nil - } - - invalid: - return nil, fmt.Errorf("int: invalid literal with base %d: %s", b, orig) + } + if base == 0 { + base = 10 } - if base != nil { - return nil, fmt.Errorf("int: can't convert non-string with explicit base") + // we explicitly handled sign above. + // if a sign remains, it is invalid. + if s != "" && (s[0] == '-' || s[0] == '+') { + return nil } - if b, ok := x.(Bool); ok { - if b { - return one, nil - } else { - return zero, nil + // s has no sign or base prefix. + if i, ok := new(big.Int).SetString(s, base); ok { + res := MakeBigInt(i) + if neg { + res = zero.Sub(res) } + return res } - i, err := NumberToInt(x) - if err != nil { - return nil, fmt.Errorf("int: %s", err) - } - return i, nil + return nil } // https://github.com/google/starlark-go/blob/master/doc/spec.md#len diff --git a/starlark/testdata/int.star b/starlark/testdata/int.star index a2e67167..46c0ad0d 100644 --- a/starlark/testdata/int.star +++ b/starlark/testdata/int.star @@ -154,6 +154,7 @@ assert.eq(0x1000000000000001 * 0x1000000000000001, 0x100000000000000200000000000 assert.eq(int("1010", 2), 10) assert.eq(int("111111101", 2), 509) assert.eq(int("0b0101", 0), 5) +assert.eq(int("0b0101", 2), 5) # prefix is redundant with explicit base assert.eq(int("0b00000", 0), 0) assert.eq(1111111111111111 * 1111111111111111, 1234567901234567654320987654321) assert.fails(lambda: int("0x123", 8), "invalid literal.*base 8") @@ -162,6 +163,16 @@ assert.fails(lambda: int("0o123", 16), "invalid literal.*base 16") assert.fails(lambda: int("-0o123", 16), "invalid literal.*base 16") assert.fails(lambda: int("0x110", 2), "invalid literal.*base 2") +# Base prefix is honored only if base=0, or if the prefix matches the explicit base. +# See https://github.com/google/starlark-go/issues/337 +assert.fails(lambda: int("0b0"), "invalid literal.*base 10") +assert.eq(int("0b0", 0), 0) +assert.eq(int("0b0", 2), 0) +assert.eq(int("0b0", 16), 0xb0) +assert.eq(int("0x0b0", 16), 0xb0) +assert.eq(int("0x0b0", 0), 0xb0) +assert.eq(int("0x0b0101", 16), 0x0b0101) + # int from string, auto detect base assert.eq(int("123", 0), 123) assert.eq(int("+123", 0), +123) diff --git a/syntax/syntax.go b/syntax/syntax.go index b4817c1a..8bbf5c0f 100644 --- a/syntax/syntax.go +++ b/syntax/syntax.go @@ -251,10 +251,10 @@ func (x *Ident) Span() (start, end Position) { // A Literal represents a literal string or number. type Literal struct { commentsRef - Token Token // = STRING | INT + Token Token // = STRING | INT | FLOAT TokenPos Position Raw string // uninterpreted text - Value interface{} // = string | int64 | *big.Int + Value interface{} // = string | int64 | *big.Int | float64 } func (x *Literal) Span() (start, end Position) {