Skip to content

Commit

Permalink
refactor(es/parser): Do not use lexical (swc-project#7758)
Browse files Browse the repository at this point in the history
This PR replaces the current usage of lexical within the swc_ecma_parser
crate with equivalent parsing of large numbers using BigInt.

**Description:**

As discussed in
swc-project#7752, lexical contains a
number of soundness issues but doesn't appear to be actively supported.
Given the relatively low integration surface it seems reasonable to
replace the usage of lexical with another package to avoid this issue.

**Related issue:**

- Closes swc-project#7752
  • Loading branch information
gmcsorley-work authored and kdy1 committed Aug 15, 2023
1 parent bfad1ce commit de3d8ba
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 38 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion crates/swc_ecma_parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ verify = ["swc_ecma_visit"]

[dependencies]
either = { version = "1.4" }
lexical = { version = "6.1.0", features = ["power-of-two", "parse-integers", "parse-floats"], default-features = false }
num-bigint = "0.4"
num-traits = "0.2.15"
serde = { version = "1", features = ["derive"] }
smallvec = "1.8.0"
smartstring = "1"
Expand Down
12 changes: 3 additions & 9 deletions crates/swc_ecma_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -294,15 +294,9 @@ impl<'a> Lexer<'a> {
let next = self.input.peek();

let bigint = match next {
Some('x') | Some('X') => {
self.read_radix_number::<16, { lexical::NumberFormatBuilder::hexadecimal() }>()
}
Some('o') | Some('O') => {
self.read_radix_number::<8, { lexical::NumberFormatBuilder::octal() }>()
}
Some('b') | Some('B') => {
self.read_radix_number::<2, { lexical::NumberFormatBuilder::binary() }>()
}
Some('x') | Some('X') => self.read_radix_number::<16>(),
Some('o') | Some('O') => self.read_radix_number::<8>(),
Some('b') | Some('B') => self.read_radix_number::<2>(),
_ => {
return self.read_number(false).map(|v| match v {
Left((value, raw)) => Num { value, raw },
Expand Down
51 changes: 24 additions & 27 deletions crates/swc_ecma_parser/src/lexer/number.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use std::{borrow::Cow, fmt::Write};

use either::Either;
use num_bigint::BigInt as BigIntValue;
use num_traits::{Num as NumTrait, ToPrimitive};
use smartstring::{LazyCompact, SmartString};
use swc_common::SyntaxContext;
use tracing::trace;
Expand Down Expand Up @@ -56,9 +57,7 @@ impl<'a> Lexer<'a> {
let starts_with_zero = self.cur().unwrap() == '0';

// Use read_number_no_dot to support long numbers.
let (val, s, mut raw, not_octal) = self
.read_number_no_dot_as_str::<10, { lexical::NumberFormatBuilder::from_radix(10) }>(
)?;
let (val, s, mut raw, not_octal) = self.read_number_no_dot_as_str::<10>()?;

if self.eat(b'n') {
raw.push('n');
Expand Down Expand Up @@ -102,17 +101,17 @@ impl<'a> Lexer<'a> {
self.emit_strict_mode_error(start, SyntaxError::LegacyDecimal);
} else {
// It's Legacy octal, and we should reinterpret value.
let val = lexical::parse_with_options::<
f64,
_,
{ lexical::NumberFormatBuilder::from_radix(8) },
>(
val_str,
&lexical::parse_float_options::Options::from_radix(8),
)
.unwrap_or_else(|err| {
panic!("failed to parse {} using `lexical`: {:?}", val_str, err)
});
let val = BigIntValue::from_str_radix(val_str, 8)
.unwrap_or_else(|err| {
panic!(
"failed to parse {} using `from_str_radix`: {:?}",
val_str, err
)
})
.to_f64()
.unwrap_or_else(|| {
panic!("failed to parse {} into float using BigInt", val_str)
});

return self.make_legacy_octal(start, val).map(|value| {
Either::Left((value, self.atoms.borrow_mut().intern(&*raw)))
Expand Down Expand Up @@ -235,7 +234,7 @@ impl<'a> Lexer<'a> {
}

/// Returns `Left(value)` or `Right(BigInt)`
pub(super) fn read_radix_number<const RADIX: u8, const FORMAT: u128>(
pub(super) fn read_radix_number<const RADIX: u8>(
&mut self,
) -> LexResult<Either<(f64, Atom), (Box<BigIntValue>, Atom)>> {
debug_assert!(
Expand Down Expand Up @@ -263,7 +262,7 @@ impl<'a> Lexer<'a> {

buf.push(c);

let (val, s, raw, _) = l.read_number_no_dot_as_str::<RADIX, FORMAT>()?;
let (val, s, raw, _) = l.read_number_no_dot_as_str::<RADIX>()?;

buf.push_str(&raw);

Expand Down Expand Up @@ -315,7 +314,7 @@ impl<'a> Lexer<'a> {
///
///
/// Returned bool is `true` is there was `8` or `9`.
fn read_number_no_dot_as_str<const RADIX: u8, const FORMAT: u128>(
fn read_number_no_dot_as_str<const RADIX: u8>(
&mut self,
) -> LexResult<(f64, LazyBigInt<RADIX>, SmartString<LazyCompact>, bool)> {
debug_assert!(
Expand Down Expand Up @@ -351,13 +350,12 @@ impl<'a> Lexer<'a> {
let raw_str = raw.0.take().unwrap();
// Remove number separator from number
let raw_number_str = raw_str.replace('_', "");

let parsed_float = BigIntValue::from_str_radix(&raw_number_str, RADIX as u32)
.expect("failed to parse float using BigInt")
.to_f64()
.expect("failed to parse float using BigInt");
Ok((
lexical::parse_with_options::<f64, _, FORMAT>(
raw_number_str.as_bytes(),
&lexical::parse_float_options::Options::from_radix(RADIX),
)
.expect("failed to parse float using lexical"),
parsed_float,
LazyBigInt::new(raw_number_str),
raw_str,
non_octal,
Expand Down Expand Up @@ -696,7 +694,7 @@ mod tests {
assert_eq!(
(0o73 as f64, "0o73".into()),
lex("0o73", |l| l
.read_radix_number::<8, { lexical::NumberFormatBuilder::octal() }>()
.read_radix_number::<8>()
.unwrap()
.left()
.unwrap())
Expand Down Expand Up @@ -750,18 +748,17 @@ mod tests {
111111111111111111111111111111111111111111111111111111111111111111\
111111111111111111111111111111111111111111111111111111111111111111\
0010111110001111111111";
const FORMAT: u128 = lexical::NumberFormatBuilder::binary();
assert_eq!(
lex(LONG, |l| l
.read_radix_number::<2, FORMAT>()
.read_radix_number::<2>()
.unwrap()
.left()
.unwrap()),
(9.671_406_556_917_009e24, LONG.into())
);
assert_eq!(
lex(VERY_LARGE_BINARY_NUMBER, |l| l
.read_radix_number::<2, FORMAT>()
.read_radix_number::<2>()
.unwrap()
.left()
.unwrap()),
Expand Down

0 comments on commit de3d8ba

Please sign in to comment.