Skip to content

Commit

Permalink
Hashbang lexer support (#1631)
Browse files Browse the repository at this point in the history
  • Loading branch information
nekevss authored Oct 7, 2021
1 parent eaa61f4 commit 69d9f62
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 18 deletions.
67 changes: 50 additions & 17 deletions boa/src/syntax/lexer/comment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::{
lexer::{Token, TokenKind},
},
};
use core::convert::TryFrom;
use std::io::Read;

/// Lexes a single line comment.
Expand Down Expand Up @@ -65,27 +66,59 @@ impl<R> Tokenizer<R> for MultiLineComment {
let _timer = BoaProfiler::global().start_event("MultiLineComment", "Lexing");

let mut new_line = false;
loop {
if let Some(ch) = cursor.next_byte()? {
if ch == b'*' && cursor.next_is(b'/')? {
break;
} else if ch == b'\n' {
new_line = true;
while let Some(ch) = cursor.next_char()? {
let tried_ch = char::try_from(ch);
match tried_ch {
Ok(c) if c == '*' && cursor.next_is(b'/')? => {
return Ok(Token::new(
if new_line {
TokenKind::LineTerminator
} else {
TokenKind::Comment
},
Span::new(start_pos, cursor.pos()),
))
}
} else {
return Err(Error::syntax(
"unterminated multiline comment",
cursor.pos(),
));
}
Ok(c) if c == '\r' || c == '\n' || c == '\u{2028}' || c == '\u{2029}' => {
new_line = true
}
_ => {}
};
}

Err(Error::syntax(
"unterminated multiline comment",
cursor.pos(),
))
}
}

///Lexes a first line Hashbang comment
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar

pub(super) struct HashbangComment;

impl<R> Tokenizer<R> for HashbangComment {
fn lex(&mut self, cursor: &mut Cursor<R>, start_pos: Position) -> Result<Token, Error>
where
R: Read,
{
let _timer = BoaProfiler::global().start_event("Hashbang", "Lexing");

while let Some(ch) = cursor.next_char()? {
let tried_ch = char::try_from(ch);
match tried_ch {
Ok(c) if c == '\r' || c == '\n' || c == '\u{2028}' || c == '\u{2029}' => break,
_ => {}
};
}

Ok(Token::new(
if new_line {
TokenKind::LineTerminator
} else {
TokenKind::Comment
},
TokenKind::Comment,
Span::new(start_pos, cursor.pos()),
))
}
Expand Down
13 changes: 12 additions & 1 deletion boa/src/syntax/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ pub mod token;
mod tests;

use self::{
comment::{MultiLineComment, SingleLineComment},
comment::{HashbangComment, MultiLineComment, SingleLineComment},
cursor::Cursor,
identifier::Identifier,
number::NumberLiteral,
Expand Down Expand Up @@ -191,6 +191,17 @@ impl<R> Lexer<R> {
}
};

//handle hashbang here so the below match block still throws error on
//# if position isn't (1, 1)
if start.column_number() == 1 && start.line_number() == 1 && next_ch == 0x23 {
if let Some(hashbang_peek) = self.cursor.peek()? {
if hashbang_peek == 0x21 {
let _token = HashbangComment.lex(&mut self.cursor, start);
return self.next();
}
}
};

if let Ok(c) = char::try_from(next_ch) {
let token = match c {
'\r' | '\n' | '\u{2028}' | '\u{2029}' => Ok(Token::new(
Expand Down
26 changes: 26 additions & 0 deletions boa/src/syntax/parser/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -372,3 +372,29 @@ fn empty_statement() {
],
);
}

#[test]
fn hashbang_use_strict_no_with() {
check_parser(
r#"#!\"use strict"
"#,
vec![],
);
}

#[test]
#[ignore]
fn hashbang_use_strict_with_with_statement() {
check_parser(
r#"#!\"use strict"
with({}) {}
"#,
vec![],
);
}

#[test]
fn hashbang_comment() {
check_parser(r"#!Comment Here", vec![]);
}

0 comments on commit 69d9f62

Please sign in to comment.