Hashbang lexer support (#1631)

boa-dev · Oct 7, 2021 · 69d9f62 · 69d9f62
1 parent eaa61f4
commit 69d9f62
Show file tree

Hide file tree

Showing 3 changed files with 88 additions and 18 deletions.
diff --git a/boa/src/syntax/lexer/comment.rs b/boa/src/syntax/lexer/comment.rs
@@ -8,6 +8,7 @@ use crate::{
         lexer::{Token, TokenKind},
     },
 };
+use core::convert::TryFrom;
 use std::io::Read;
 
 /// Lexes a single line comment.
@@ -65,27 +66,59 @@ impl<R> Tokenizer<R> for MultiLineComment {
         let _timer = BoaProfiler::global().start_event("MultiLineComment", "Lexing");
 
         let mut new_line = false;
-        loop {
-            if let Some(ch) = cursor.next_byte()? {
-                if ch == b'*' && cursor.next_is(b'/')? {
-                    break;
-                } else if ch == b'\n' {
-                    new_line = true;
+        while let Some(ch) = cursor.next_char()? {
+            let tried_ch = char::try_from(ch);
+            match tried_ch {
+                Ok(c) if c == '*' && cursor.next_is(b'/')? => {
+                    return Ok(Token::new(
+                        if new_line {
+                            TokenKind::LineTerminator
+                        } else {
+                            TokenKind::Comment
+                        },
+                        Span::new(start_pos, cursor.pos()),
+                    ))
                 }
-            } else {
-                return Err(Error::syntax(
-                    "unterminated multiline comment",
-                    cursor.pos(),
-                ));
-            }
+                Ok(c) if c == '\r' || c == '\n' || c == '\u{2028}' || c == '\u{2029}' => {
+                    new_line = true
+                }
+                _ => {}
+            };
+        }
+
+        Err(Error::syntax(
+            "unterminated multiline comment",
+            cursor.pos(),
+        ))
+    }
+}
+
+///Lexes a first line Hashbang comment
+///
+/// More information:
+///  - [ECMAScript reference][spec]
+///
+/// [spec]: https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar
+
+pub(super) struct HashbangComment;
+
+impl<R> Tokenizer<R> for HashbangComment {
+    fn lex(&mut self, cursor: &mut Cursor<R>, start_pos: Position) -> Result<Token, Error>
+    where
+        R: Read,
+    {
+        let _timer = BoaProfiler::global().start_event("Hashbang", "Lexing");
+
+        while let Some(ch) = cursor.next_char()? {
+            let tried_ch = char::try_from(ch);
+            match tried_ch {
+                Ok(c) if c == '\r' || c == '\n' || c == '\u{2028}' || c == '\u{2029}' => break,
+                _ => {}
+            };
         }
 
         Ok(Token::new(
-            if new_line {
-                TokenKind::LineTerminator
-            } else {
-                TokenKind::Comment
-            },
+            TokenKind::Comment,
             Span::new(start_pos, cursor.pos()),
         ))
     }

diff --git a/boa/src/syntax/lexer/mod.rs b/boa/src/syntax/lexer/mod.rs
@@ -30,7 +30,7 @@ pub mod token;
 mod tests;
 
 use self::{
-    comment::{MultiLineComment, SingleLineComment},
+    comment::{HashbangComment, MultiLineComment, SingleLineComment},
     cursor::Cursor,
     identifier::Identifier,
     number::NumberLiteral,
@@ -191,6 +191,17 @@ impl<R> Lexer<R> {
             }
         };
 
+        //handle hashbang here so the below match block still throws error on
+        //# if position isn't (1, 1)
+        if start.column_number() == 1 && start.line_number() == 1 && next_ch == 0x23 {
+            if let Some(hashbang_peek) = self.cursor.peek()? {
+                if hashbang_peek == 0x21 {
+                    let _token = HashbangComment.lex(&mut self.cursor, start);
+                    return self.next();
+                }
+            }
+        };
+
         if let Ok(c) = char::try_from(next_ch) {
             let token = match c {
                 '\r' | '\n' | '\u{2028}' | '\u{2029}' => Ok(Token::new(

diff --git a/boa/src/syntax/parser/tests.rs b/boa/src/syntax/parser/tests.rs
@@ -372,3 +372,29 @@ fn empty_statement() {
         ],
     );
 }
+
+#[test]
+fn hashbang_use_strict_no_with() {
+    check_parser(
+        r#"#!\"use strict"
+        "#,
+        vec![],
+    );
+}
+
+#[test]
+#[ignore]
+fn hashbang_use_strict_with_with_statement() {
+    check_parser(
+        r#"#!\"use strict"
+        
+        with({}) {}
+        "#,
+        vec![],
+    );
+}
+
+#[test]
+fn hashbang_comment() {
+    check_parser(r"#!Comment Here", vec![]);
+}