Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up JSON operator tokenizing code #923

Merged
merged 1 commit into from
Jul 17, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 15 additions & 22 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -497,12 +497,14 @@ impl<'a> Tokenizer<'a> {
Ok(tokens)
}

// Tokenize the identifer or keywords in `ch`
fn tokenize_identifier_or_keyword(
&self,
ch: String,
ch: impl IntoIterator<Item = char>,
chars: &mut State,
) -> Result<Option<Token>, TokenizerError> {
chars.next(); // consume the first char
let ch: String = ch.into_iter().collect();
let word = self.tokenize_word(ch, chars);

// TODO: implement parsing of exponent here
Expand Down Expand Up @@ -550,7 +552,7 @@ impl<'a> Tokenizer<'a> {
}
_ => {
// regular identifier starting with an "b" or "B"
let s = self.tokenize_word(b.to_string(), chars);
let s = self.tokenize_word(b, chars);
Ok(Some(Token::make_word(&s, None)))
}
}
Expand All @@ -569,7 +571,7 @@ impl<'a> Tokenizer<'a> {
}
_ => {
// regular identifier starting with an "r" or "R"
let s = self.tokenize_word(b.to_string(), chars);
let s = self.tokenize_word(b, chars);
Ok(Some(Token::make_word(&s, None)))
}
}
Expand All @@ -585,7 +587,7 @@ impl<'a> Tokenizer<'a> {
}
_ => {
// regular identifier starting with an "N"
let s = self.tokenize_word(n.to_string(), chars);
let s = self.tokenize_word(n, chars);
Ok(Some(Token::make_word(&s, None)))
}
}
Expand All @@ -602,7 +604,7 @@ impl<'a> Tokenizer<'a> {
}
_ => {
// regular identifier starting with an "E" or "e"
let s = self.tokenize_word(x.to_string(), chars);
let s = self.tokenize_word(x, chars);
Ok(Some(Token::make_word(&s, None)))
}
}
Expand All @@ -619,7 +621,7 @@ impl<'a> Tokenizer<'a> {
}
_ => {
// regular identifier starting with an "X"
let s = self.tokenize_word(x.to_string(), chars);
let s = self.tokenize_word(x, chars);
Ok(Some(Token::make_word(&s, None)))
}
}
Expand Down Expand Up @@ -794,9 +796,7 @@ impl<'a> Tokenizer<'a> {
match chars.peek() {
Some(' ') => self.consume_and_return(chars, Token::Mod),
Some(sch) if self.dialect.is_identifier_start('%') => {
let mut s = ch.to_string();
s.push_str(&sch.to_string());
self.tokenize_identifier_or_keyword(s, chars)
self.tokenize_identifier_or_keyword([ch, *sch], chars)
}
_ => self.consume_and_return(chars, Token::Mod),
}
Expand Down Expand Up @@ -917,9 +917,7 @@ impl<'a> Tokenizer<'a> {
}
Some(' ') => Ok(Some(Token::Sharp)),
Some(sch) if self.dialect.is_identifier_start('#') => {
let mut s = ch.to_string();
s.push_str(&sch.to_string());
self.tokenize_identifier_or_keyword(s, chars)
self.tokenize_identifier_or_keyword([ch, *sch], chars)
}
_ => Ok(Some(Token::Sharp)),
}
Expand All @@ -934,19 +932,14 @@ impl<'a> Tokenizer<'a> {
match chars.peek() {
Some(' ') => Ok(Some(Token::AtAt)),
Some(tch) if self.dialect.is_identifier_start('@') => {
let mut s = ch.to_string();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the new construction is both less verbose and clearer about intent

s.push('@');
s.push_str(&tch.to_string());
self.tokenize_identifier_or_keyword(s, chars)
self.tokenize_identifier_or_keyword([ch, '@', *tch], chars)
}
_ => Ok(Some(Token::AtAt)),
}
}
Some(' ') => Ok(Some(Token::AtSign)),
Some(sch) if self.dialect.is_identifier_start('@') => {
let mut s = ch.to_string();
s.push_str(&sch.to_string());
self.tokenize_identifier_or_keyword(s, chars)
self.tokenize_identifier_or_keyword([ch, *sch], chars)
}
_ => Ok(Some(Token::AtSign)),
}
Expand All @@ -959,7 +952,7 @@ impl<'a> Tokenizer<'a> {

// identifier or keyword
ch if self.dialect.is_identifier_start(ch) => {
self.tokenize_identifier_or_keyword(ch.to_string(), chars)
self.tokenize_identifier_or_keyword([ch], chars)
}
'$' => Ok(Some(self.tokenize_dollar_preceded_value(chars)?)),

Expand Down Expand Up @@ -1086,8 +1079,8 @@ impl<'a> Tokenizer<'a> {
}

/// Tokenize an identifier or keyword, after the first char is already consumed.
fn tokenize_word(&self, first_chars: String, chars: &mut State) -> String {
let mut s = first_chars;
fn tokenize_word(&self, first_chars: impl Into<String>, chars: &mut State) -> String {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Uses some generics to avoid having to manually construct Strings in several places.

let mut s = first_chars.into();
s.push_str(&peeking_take_while(chars, |ch| {
self.dialect.is_identifier_part(ch)
}));
Expand Down
Loading