Skip to content

Commit

Permalink
Merge branch 'fix-octal-escape-string' into fix-line-terminator
Browse files Browse the repository at this point in the history
  • Loading branch information
jevancc committed Jan 19, 2021
2 parents e698d16 + 983d786 commit 8d6eb54
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 29 deletions.
60 changes: 38 additions & 22 deletions boa/src/syntax/lexer/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,10 @@ impl StringLiteral {
{
let mut buf = Vec::new();
loop {
let next_ch = cursor.next_char()?.map(char::try_from).transpose().unwrap();
let ch_start_pos = cursor.pos();
let ch = cursor.next_char()?.map(char::try_from).transpose().unwrap();

match next_ch {
match ch {
Some('\'') if terminator == StringTerminator::SingleQuote => {
break;
}
Expand All @@ -117,39 +118,51 @@ impl StringLiteral {
})?;

match escape_ch {
'b' => buf.push('\u{0008}' as u16 /* <BS> */),
't' => buf.push('\u{0009}' as u16 /* <HT> */),
'n' => buf.push('\u{000A}' as u16 /* <LF> */),
'v' => buf.push('\u{000B}' as u16 /* <VT> */),
'f' => buf.push('\u{000C}' as u16 /* <FF> */),
'r' => buf.push('\u{000D}' as u16 /* <CR> */),
'"' => buf.push('\u{0022}' as u16 /* " */),
'\'' => buf.push('\u{0027}' as u16 /* ' */),
'\\' => buf.push('\u{005C}' as u16 /* \ */),
'b' => buf.push(0x0008 /* <BS> */),
't' => buf.push(0x0009 /* <HT> */),
'n' => buf.push(0x000A /* <LF> */),
'v' => buf.push(0x000B /* <VT> */),
'f' => buf.push(0x000C /* <FF> */),
'r' => buf.push(0x000D /* <CR> */),
'"' => buf.push(0x0022 /* " */),
'\'' => buf.push(0x0027 /* ' */),
'\\' => buf.push(0x005C /* \ */),
'0' if cursor
.peek()?
.and_then(|next_byte| char::try_from(next_byte).ok())
.filter(|next_ch| next_ch.is_digit(10))
.is_none() =>
{
buf.push('\u{0000}' as u16 /* NULL */)
buf.push(0x0000 /* NULL */)
}
'x' => {
Self::take_hex_escape_sequence(cursor, Some(&mut buf))?;
Self::take_hex_escape_sequence(cursor, ch_start_pos, Some(&mut buf))?;
}
'u' => {
Self::take_unicode_escape_sequence(cursor, Some(&mut buf))?;
Self::take_unicode_escape_sequence(cursor, ch_start_pos, Some(&mut buf))?;
}
_ if escape_ch.is_digit(10) => {
'8' | '9' => {
// Grammar: NonOctalDecimalEscapeSequence
if strict_mode {
return Err(Error::syntax(
"\\8 and \\9 are not allowed in strict mode",
ch_start_pos,
));
} else {
buf.push(escape_ch as u16);
}
}
_ if escape_ch.is_digit(8) => {
Self::take_legacy_octal_escape_sequence(
cursor,
ch_start_pos,
Some(&mut buf),
strict_mode,
escape_ch as u8,
)?;
}
_ if Self::is_line_terminator(escape_ch) => {
// Match LineContinuation
// Grammar: LineContinuation
// Grammar: \ LineTerminatorSequence
// LineContinuation is the empty String. Do nothing and continue lexing.
}
Expand Down Expand Up @@ -189,6 +202,7 @@ impl StringLiteral {
#[inline]
pub(super) fn take_unicode_escape_sequence<R>(
cursor: &mut Cursor<R>,
start_pos: Position,
code_units_buf: Option<&mut Vec<u16>>,
) -> Result<u32, Error>
where
Expand All @@ -203,14 +217,14 @@ impl StringLiteral {
let code_point_str = unsafe { str::from_utf8_unchecked(code_point_buf.as_slice()) };
// We know this is a single unicode codepoint, convert to u32
let code_point = u32::from_str_radix(&code_point_str, 16).map_err(|_| {
Error::syntax("malformed Unicode character escape sequence", cursor.pos())
Error::syntax("malformed Unicode character escape sequence", start_pos)
})?;

// UTF16Encoding of a numeric code point value
if code_point > 0x10_FFFF {
return Err(Error::syntax(
"Unicode codepoint must not be greater than 0x10FFFF in escape sequence",
cursor.pos(),
start_pos,
));
} else if let Some(code_units_buf) = code_units_buf {
if code_point <= 65535 {
Expand All @@ -234,7 +248,7 @@ impl StringLiteral {
let code_point_str = str::from_utf8(&code_point_utf8_bytes)
.expect("malformed Unicode character escape sequence");
let code_point = u16::from_str_radix(code_point_str, 16)
.map_err(|_| Error::syntax("invalid Unicode escape sequence", cursor.pos()))?;
.map_err(|_| Error::syntax("invalid Unicode escape sequence", start_pos))?;

if let Some(code_units_buf) = code_units_buf {
code_units_buf.push(code_point);
Expand All @@ -247,6 +261,7 @@ impl StringLiteral {
#[inline]
fn take_hex_escape_sequence<R>(
cursor: &mut Cursor<R>,
start_pos: Position,
code_units_buf: Option<&mut Vec<u16>>,
) -> Result<u32, Error>
where
Expand All @@ -257,7 +272,7 @@ impl StringLiteral {
let code_point_str = str::from_utf8(&code_point_utf8_bytes)
.expect("malformed Hexadecimal character escape sequence");
let code_point = u16::from_str_radix(&code_point_str, 16)
.map_err(|_| Error::syntax("invalid Hexadecimal escape sequence", cursor.pos()))?;
.map_err(|_| Error::syntax("invalid Hexadecimal escape sequence", start_pos))?;

if let Some(code_units_buf) = code_units_buf {
code_units_buf.push(code_point);
Expand All @@ -269,6 +284,7 @@ impl StringLiteral {
#[inline]
fn take_legacy_octal_escape_sequence<R>(
cursor: &mut Cursor<R>,
start_pos: Position,
code_units_buf: Option<&mut Vec<u16>>,
strict_mode: bool,
init_byte: u8,
Expand All @@ -278,8 +294,8 @@ impl StringLiteral {
{
if strict_mode {
return Err(Error::syntax(
"octal escape sequences are deprecated",
cursor.pos(),
"octal escape sequences are not allowed in strict mode",
start_pos,
));
}
// Grammar: OctalDigit
Expand Down
81 changes: 74 additions & 7 deletions boa/src/syntax/lexer/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -795,7 +795,7 @@ fn illegal_following_numeric_literal() {
}

#[test]
fn codepoint_with_no_braces() {
fn string_codepoint_with_no_braces() {
let mut lexer = Lexer::new(&br#""test\uD38Dtest""#[..]);
assert!(lexer.next().is_ok());
}
Expand All @@ -814,7 +814,7 @@ fn illegal_code_point_following_numeric_literal() {
}

#[test]
fn non_english_str() {
fn string_unicode() {
let str = r#"'中文';"#;

let mut lexer = Lexer::new(str.as_bytes());
Expand All @@ -828,7 +828,7 @@ fn non_english_str() {
}

#[test]
fn unicode_escape_with_braces() {
fn string_unicode_escape_with_braces() {
let mut lexer = Lexer::new(&br#"'{\u{20ac}\u{a0}\u{a0}}'"#[..]);

let expected = [TokenKind::StringLiteral("{\u{20ac}\u{a0}\u{a0}}".into())];
Expand Down Expand Up @@ -859,7 +859,7 @@ fn unicode_escape_with_braces() {
}

#[test]
fn unicode_escape_with_braces_() {
fn take_string_characters_unicode_escape_with_braces_2() {
let s = r#"\u{20ac}\u{a0}\u{a0}"#.to_string();

let mut cursor = Cursor::new(s.as_bytes());
Expand All @@ -877,7 +877,7 @@ fn unicode_escape_with_braces_() {
}

#[test]
fn unescape_string_with_single_escape() {
fn take_string_characters_with_single_escape() {
let s = r#"\Б"#.to_string();
let mut cursor = Cursor::new(s.as_bytes());
let (s, _) = StringLiteral::take_string_characters(
Expand All @@ -891,7 +891,7 @@ fn unescape_string_with_single_escape() {
}

#[test]
fn legacy_octal_escape() {
fn take_string_characters_legacy_octal_escape() {
let test_cases = [
(r#"\3"#, "\u{3}"),
(r#"\03"#, "\u{3}"),
Expand All @@ -914,10 +914,27 @@ fn legacy_octal_escape() {

assert_eq!(s, *expected);
}

for (s, _) in test_cases.iter() {
let mut cursor = Cursor::new(s.as_bytes());

if let Error::Syntax(_, pos) = StringLiteral::take_string_characters(
&mut cursor,
Position::new(1, 1),
StringTerminator::End,
true,
)
.expect_err("Octal-escape in strict mode not rejected as expected")
{
assert_eq!(pos, Position::new(1, 1));
} else {
panic!("invalid error type");
}
}
}

#[test]
fn zero_escape() {
fn take_string_characters_zero_escape() {
let test_cases = [(r#"\0"#, "\u{0}"), (r#"\0A"#, "\u{0}A")];

for (s, expected) in test_cases.iter() {
Expand All @@ -934,6 +951,56 @@ fn zero_escape() {
}
}

#[test]
fn take_string_characters_non_octal_decimal_escape() {
let test_cases = [(r#"\8"#, "8"), (r#"\9"#, "9")];

for (s, expected) in test_cases.iter() {
let mut cursor = Cursor::new(s.as_bytes());
let (s, _) = StringLiteral::take_string_characters(
&mut cursor,
Position::new(1, 1),
StringTerminator::End,
false,
)
.unwrap();

assert_eq!(s, *expected);
}

for (s, _) in test_cases.iter() {
let mut cursor = Cursor::new(s.as_bytes());

if let Error::Syntax(_, pos) = StringLiteral::take_string_characters(
&mut cursor,
Position::new(1, 1),
StringTerminator::End,
true,
)
.expect_err("Non-octal-decimal-escape in strict mode not rejected as expected")
{
assert_eq!(pos, Position::new(1, 1));
} else {
panic!("invalid error type");
}
}
}

#[test]
fn take_string_characters_line_continuation() {
let s = "hello \\\nworld";
let mut cursor = Cursor::new(s.as_bytes());
let (s, _) = StringLiteral::take_string_characters(
&mut cursor,
Position::new(1, 1),
StringTerminator::End,
false,
)
.unwrap();

assert_eq!(s, "hello world");
}

mod carriage_return {
use super::*;

Expand Down

0 comments on commit 8d6eb54

Please sign in to comment.