From 5fd86a1a67301b253b7d7137ad81772ea7336c51 Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Mon, 18 Jan 2021 20:04:22 -0800 Subject: [PATCH 1/8] Fix escape followed by unicode char --- boa/src/syntax/lexer/string.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs index 6849791f5a6..fd203763649 100644 --- a/boa/src/syntax/lexer/string.rs +++ b/boa/src/syntax/lexer/string.rs @@ -132,7 +132,7 @@ impl StringLiteral { .filter(|next_ch| next_ch.is_digit(10)) .is_none() => { - buf.push('\0' as u16) + buf.push('\u{0000}' as u16 /* NULL */) } 'x' => { Self::take_hex_escape_sequence(cursor, Some(&mut buf))?; @@ -153,17 +153,20 @@ impl StringLiteral { // Grammar: \ LineTerminatorSequence // LineContinuation is the empty String. Do nothing and continue lexing. } - _ => buf.push(escape_ch as u16), + _ => { + if escape_ch.len_utf16() == 1 { + buf.push(escape_ch as u16); + } else { + buf.extend(escape_ch.encode_utf16(&mut [0u16; 2]).iter()); + } + } }; } Some(next_ch) => { if next_ch.len_utf16() == 1 { buf.push(next_ch as u16); } else { - let mut code_units_buf = [0u16; 2]; - let code_units_buf = next_ch.encode_utf16(&mut code_units_buf); - - buf.extend(code_units_buf.iter()); + buf.extend(next_ch.encode_utf16(&mut [0u16; 2]).iter()); } } None => { From b176702fae9c91ed3acc2e7dc910bf7f74fee2d0 Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Mon, 18 Jan 2021 20:21:37 -0800 Subject: [PATCH 2/8] Add NonOctalDecimalEscapeSequence --- boa/src/syntax/lexer/string.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs index fd203763649..2e02b374427 100644 --- a/boa/src/syntax/lexer/string.rs +++ b/boa/src/syntax/lexer/string.rs @@ -140,7 +140,18 @@ impl StringLiteral { 'u' => { Self::take_unicode_escape_sequence(cursor, Some(&mut buf))?; } - _ if escape_ch.is_digit(10) => { + '8' | '9' => { + // Grammar: NonOctalDecimalEscapeSequence + if strict_mode { + return Err(Error::syntax( + "\\8 and \\9 are not allowed in strict mode.", + cursor.pos(), + )); + } else { + buf.push(escape_ch as u16); + } + } + _ if escape_ch.is_digit(8) => { Self::take_legacy_octal_escape_sequence( cursor, Some(&mut buf), From 858a74d9927dadbd171fe89608d3cf7739af9158 Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Mon, 18 Jan 2021 20:22:05 -0800 Subject: [PATCH 3/8] Fix comment --- boa/src/syntax/lexer/string.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs index 2e02b374427..88894b9290c 100644 --- a/boa/src/syntax/lexer/string.rs +++ b/boa/src/syntax/lexer/string.rs @@ -160,7 +160,7 @@ impl StringLiteral { )?; } _ if Self::is_line_terminator(escape_ch) => { - // Match LineContinuation + // Grammar: LineContinuation // Grammar: \ LineTerminatorSequence // LineContinuation is the empty String. Do nothing and continue lexing. } From e4bf635b94e37c1f0e0dd0439df082246cbf18fa Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Mon, 18 Jan 2021 20:24:59 -0800 Subject: [PATCH 4/8] Refactor --- boa/src/syntax/lexer/string.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs index 88894b9290c..b8b27fd3412 100644 --- a/boa/src/syntax/lexer/string.rs +++ b/boa/src/syntax/lexer/string.rs @@ -117,22 +117,22 @@ impl StringLiteral { })?; match escape_ch { - 'b' => buf.push('\u{0008}' as u16 /* */), - 't' => buf.push('\u{0009}' as u16 /* */), - 'n' => buf.push('\u{000A}' as u16 /* */), - 'v' => buf.push('\u{000B}' as u16 /* */), - 'f' => buf.push('\u{000C}' as u16 /* */), - 'r' => buf.push('\u{000D}' as u16 /* */), - '"' => buf.push('\u{0022}' as u16 /* " */), - '\'' => buf.push('\u{0027}' as u16 /* ' */), - '\\' => buf.push('\u{005C}' as u16 /* \ */), + 'b' => buf.push(0x0008 /* */), + 't' => buf.push(0x0009 /* */), + 'n' => buf.push(0x000A /* */), + 'v' => buf.push(0x000B /* */), + 'f' => buf.push(0x000C /* */), + 'r' => buf.push(0x000D /* */), + '"' => buf.push(0x0022 /* " */), + '\'' => buf.push(0x0027 /* ' */), + '\\' => buf.push(0x005C /* \ */), '0' if cursor .peek()? .and_then(|next_byte| char::try_from(next_byte).ok()) .filter(|next_ch| next_ch.is_digit(10)) .is_none() => { - buf.push('\u{0000}' as u16 /* NULL */) + buf.push(0x0000 /* NULL */) } 'x' => { Self::take_hex_escape_sequence(cursor, Some(&mut buf))?; From 835a5bb88cd818fe2c259ef5242af2e78bdbca71 Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Mon, 18 Jan 2021 20:26:22 -0800 Subject: [PATCH 5/8] Modify error message --- boa/src/syntax/lexer/string.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs index b8b27fd3412..6cf1e991b88 100644 --- a/boa/src/syntax/lexer/string.rs +++ b/boa/src/syntax/lexer/string.rs @@ -144,7 +144,7 @@ impl StringLiteral { // Grammar: NonOctalDecimalEscapeSequence if strict_mode { return Err(Error::syntax( - "\\8 and \\9 are not allowed in strict mode.", + "\\8 and \\9 are not allowed in strict mode", cursor.pos(), )); } else { @@ -287,7 +287,7 @@ impl StringLiteral { { if strict_mode { return Err(Error::syntax( - "octal escape sequences are deprecated", + "octal escape sequences are not allowed in strict mode", cursor.pos(), )); } From d67cec2dbff4cf9f6282ea5d5a6bdfde2ea49e60 Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Mon, 18 Jan 2021 20:56:19 -0800 Subject: [PATCH 6/8] Add tests --- boa/src/syntax/lexer/tests.rs | 55 +++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/boa/src/syntax/lexer/tests.rs b/boa/src/syntax/lexer/tests.rs index 61e8962f190..496ca44174a 100644 --- a/boa/src/syntax/lexer/tests.rs +++ b/boa/src/syntax/lexer/tests.rs @@ -914,6 +914,17 @@ fn legacy_octal_escape() { assert_eq!(s, *expected); } + + for (s, _) in test_cases.iter() { + let mut cursor = Cursor::new(s.as_bytes()); + StringLiteral::take_string_characters( + &mut cursor, + Position::new(1, 1), + StringTerminator::End, + true, + ) + .expect_err("Octal-escape in strict mode not rejected as expected"); + } } #[test] @@ -934,6 +945,50 @@ fn zero_escape() { } } +#[test] +fn non_octal_decimal_escape() { + let test_cases = [(r#"\8"#, "8"), (r#"\9"#, "9")]; + + for (s, expected) in test_cases.iter() { + let mut cursor = Cursor::new(s.as_bytes()); + let (s, _) = StringLiteral::take_string_characters( + &mut cursor, + Position::new(1, 1), + StringTerminator::End, + false, + ) + .unwrap(); + + assert_eq!(s, *expected); + } + + for (s, _) in test_cases.iter() { + let mut cursor = Cursor::new(s.as_bytes()); + StringLiteral::take_string_characters( + &mut cursor, + Position::new(1, 1), + StringTerminator::End, + true, + ) + .expect_err("Non-octal-decimal-escape in strict mode not rejected as expected"); + } +} + +#[test] +fn line_continuation() { + let s = "hello \\\nworld"; + let mut cursor = Cursor::new(s.as_bytes()); + let (s, _) = StringLiteral::take_string_characters( + &mut cursor, + Position::new(1, 1), + StringTerminator::End, + false, + ) + .unwrap(); + + assert_eq!(s, "hello world"); +} + mod carriage_return { use super::*; From f59a002816aa88711ee9ad383e44e23dd4efd2f6 Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Mon, 18 Jan 2021 21:02:07 -0800 Subject: [PATCH 7/8] Rename tests --- boa/src/syntax/lexer/tests.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/boa/src/syntax/lexer/tests.rs b/boa/src/syntax/lexer/tests.rs index 496ca44174a..e9d01d46915 100644 --- a/boa/src/syntax/lexer/tests.rs +++ b/boa/src/syntax/lexer/tests.rs @@ -795,7 +795,7 @@ fn illegal_following_numeric_literal() { } #[test] -fn codepoint_with_no_braces() { +fn string_codepoint_with_no_braces() { let mut lexer = Lexer::new(&br#""test\uD38Dtest""#[..]); assert!(lexer.next().is_ok()); } @@ -814,7 +814,7 @@ fn illegal_code_point_following_numeric_literal() { } #[test] -fn non_english_str() { +fn string_unicode() { let str = r#"'中文';"#; let mut lexer = Lexer::new(str.as_bytes()); @@ -828,7 +828,7 @@ fn non_english_str() { } #[test] -fn unicode_escape_with_braces() { +fn string_unicode_escape_with_braces() { let mut lexer = Lexer::new(&br#"'{\u{20ac}\u{a0}\u{a0}}'"#[..]); let expected = [TokenKind::StringLiteral("{\u{20ac}\u{a0}\u{a0}}".into())]; @@ -859,7 +859,7 @@ fn unicode_escape_with_braces() { } #[test] -fn unicode_escape_with_braces_() { +fn take_string_characters_unicode_escape_with_braces_2() { let s = r#"\u{20ac}\u{a0}\u{a0}"#.to_string(); let mut cursor = Cursor::new(s.as_bytes()); @@ -877,7 +877,7 @@ fn unicode_escape_with_braces_() { } #[test] -fn unescape_string_with_single_escape() { +fn take_string_characters_with_single_escape() { let s = r#"\Б"#.to_string(); let mut cursor = Cursor::new(s.as_bytes()); let (s, _) = StringLiteral::take_string_characters( @@ -891,7 +891,7 @@ fn unescape_string_with_single_escape() { } #[test] -fn legacy_octal_escape() { +fn take_string_characters_legacy_octal_escape() { let test_cases = [ (r#"\3"#, "\u{3}"), (r#"\03"#, "\u{3}"), @@ -928,7 +928,7 @@ fn legacy_octal_escape() { } #[test] -fn zero_escape() { +fn take_string_characters_zero_escape() { let test_cases = [(r#"\0"#, "\u{0}"), (r#"\0A"#, "\u{0}A")]; for (s, expected) in test_cases.iter() { @@ -946,7 +946,7 @@ fn zero_escape() { } #[test] -fn non_octal_decimal_escape() { +fn take_string_characters_non_octal_decimal_escape() { let test_cases = [(r#"\8"#, "8"), (r#"\9"#, "9")]; for (s, expected) in test_cases.iter() { @@ -975,7 +975,7 @@ fn non_octal_decimal_escape() { } #[test] -fn line_continuation() { +fn take_string_characters_line_continuation() { let s = "hello \\\nworld"; let mut cursor = Cursor::new(s.as_bytes()); let (s, _) = StringLiteral::take_string_characters( From 983d786d6b67cfe65aa16c2f3cabca2d14c42f34 Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Mon, 18 Jan 2021 21:30:16 -0800 Subject: [PATCH 8/8] Add test for error --- boa/src/syntax/lexer/string.rs | 33 +++++++++++++++++++-------------- boa/src/syntax/lexer/tests.rs | 20 ++++++++++++++++---- 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs index 6cf1e991b88..6449d554c90 100644 --- a/boa/src/syntax/lexer/string.rs +++ b/boa/src/syntax/lexer/string.rs @@ -90,9 +90,10 @@ impl StringLiteral { { let mut buf = Vec::new(); loop { - let next_ch = cursor.next_char()?.map(char::try_from).transpose().unwrap(); + let ch_start_pos = cursor.pos(); + let ch = cursor.next_char()?.map(char::try_from).transpose().unwrap(); - match next_ch { + match ch { Some('\'') if terminator == StringTerminator::SingleQuote => { break; } @@ -135,17 +136,17 @@ impl StringLiteral { buf.push(0x0000 /* NULL */) } 'x' => { - Self::take_hex_escape_sequence(cursor, Some(&mut buf))?; + Self::take_hex_escape_sequence(cursor, ch_start_pos, Some(&mut buf))?; } 'u' => { - Self::take_unicode_escape_sequence(cursor, Some(&mut buf))?; + Self::take_unicode_escape_sequence(cursor, ch_start_pos, Some(&mut buf))?; } '8' | '9' => { // Grammar: NonOctalDecimalEscapeSequence if strict_mode { return Err(Error::syntax( "\\8 and \\9 are not allowed in strict mode", - cursor.pos(), + ch_start_pos, )); } else { buf.push(escape_ch as u16); @@ -154,6 +155,7 @@ impl StringLiteral { _ if escape_ch.is_digit(8) => { Self::take_legacy_octal_escape_sequence( cursor, + ch_start_pos, Some(&mut buf), strict_mode, escape_ch as u8, @@ -173,11 +175,11 @@ impl StringLiteral { } }; } - Some(next_ch) => { - if next_ch.len_utf16() == 1 { - buf.push(next_ch as u16); + Some(ch) => { + if ch.len_utf16() == 1 { + buf.push(ch as u16); } else { - buf.extend(next_ch.encode_utf16(&mut [0u16; 2]).iter()); + buf.extend(ch.encode_utf16(&mut [0u16; 2]).iter()); } } None => { @@ -198,6 +200,7 @@ impl StringLiteral { #[inline] pub(super) fn take_unicode_escape_sequence( cursor: &mut Cursor, + start_pos: Position, code_units_buf: Option<&mut Vec>, ) -> Result where @@ -212,14 +215,14 @@ impl StringLiteral { let code_point_str = unsafe { str::from_utf8_unchecked(code_point_buf.as_slice()) }; // We know this is a single unicode codepoint, convert to u32 let code_point = u32::from_str_radix(&code_point_str, 16).map_err(|_| { - Error::syntax("malformed Unicode character escape sequence", cursor.pos()) + Error::syntax("malformed Unicode character escape sequence", start_pos) })?; // UTF16Encoding of a numeric code point value if code_point > 0x10_FFFF { return Err(Error::syntax( "Unicode codepoint must not be greater than 0x10FFFF in escape sequence", - cursor.pos(), + start_pos, )); } else if let Some(code_units_buf) = code_units_buf { if code_point <= 65535 { @@ -243,7 +246,7 @@ impl StringLiteral { let code_point_str = str::from_utf8(&code_point_utf8_bytes) .expect("malformed Unicode character escape sequence"); let code_point = u16::from_str_radix(code_point_str, 16) - .map_err(|_| Error::syntax("invalid Unicode escape sequence", cursor.pos()))?; + .map_err(|_| Error::syntax("invalid Unicode escape sequence", start_pos))?; if let Some(code_units_buf) = code_units_buf { code_units_buf.push(code_point); @@ -256,6 +259,7 @@ impl StringLiteral { #[inline] fn take_hex_escape_sequence( cursor: &mut Cursor, + start_pos: Position, code_units_buf: Option<&mut Vec>, ) -> Result where @@ -266,7 +270,7 @@ impl StringLiteral { let code_point_str = str::from_utf8(&code_point_utf8_bytes) .expect("malformed Hexadecimal character escape sequence"); let code_point = u16::from_str_radix(&code_point_str, 16) - .map_err(|_| Error::syntax("invalid Hexadecimal escape sequence", cursor.pos()))?; + .map_err(|_| Error::syntax("invalid Hexadecimal escape sequence", start_pos))?; if let Some(code_units_buf) = code_units_buf { code_units_buf.push(code_point); @@ -278,6 +282,7 @@ impl StringLiteral { #[inline] fn take_legacy_octal_escape_sequence( cursor: &mut Cursor, + start_pos: Position, code_units_buf: Option<&mut Vec>, strict_mode: bool, init_byte: u8, @@ -288,7 +293,7 @@ impl StringLiteral { if strict_mode { return Err(Error::syntax( "octal escape sequences are not allowed in strict mode", - cursor.pos(), + start_pos, )); } // Grammar: OctalDigit diff --git a/boa/src/syntax/lexer/tests.rs b/boa/src/syntax/lexer/tests.rs index e9d01d46915..7ef4a34bc04 100644 --- a/boa/src/syntax/lexer/tests.rs +++ b/boa/src/syntax/lexer/tests.rs @@ -917,13 +917,19 @@ fn take_string_characters_legacy_octal_escape() { for (s, _) in test_cases.iter() { let mut cursor = Cursor::new(s.as_bytes()); - StringLiteral::take_string_characters( + + if let Error::Syntax(_, pos) = StringLiteral::take_string_characters( &mut cursor, Position::new(1, 1), StringTerminator::End, true, ) - .expect_err("Octal-escape in strict mode not rejected as expected"); + .expect_err("Octal-escape in strict mode not rejected as expected") + { + assert_eq!(pos, Position::new(1, 1)); + } else { + panic!("invalid error type"); + } } } @@ -964,13 +970,19 @@ fn take_string_characters_non_octal_decimal_escape() { for (s, _) in test_cases.iter() { let mut cursor = Cursor::new(s.as_bytes()); - StringLiteral::take_string_characters( + + if let Error::Syntax(_, pos) = StringLiteral::take_string_characters( &mut cursor, Position::new(1, 1), StringTerminator::End, true, ) - .expect_err("Non-octal-decimal-escape in strict mode not rejected as expected"); + .expect_err("Non-octal-decimal-escape in strict mode not rejected as expected") + { + assert_eq!(pos, Position::new(1, 1)); + } else { + panic!("invalid error type"); + } } }