Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

parser: Cleanup Parser::bump_with and its uses #69376

Merged
merged 1 commit into from
Feb 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 51 additions & 127 deletions src/librustc_parse/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -601,141 +601,76 @@ impl<'a> Parser<'a> {
)
}

/// Expects and consumes a `+`. if `+=` is seen, replaces it with a `=`
/// and continues. If a `+` is not seen, returns `false`.
///
/// This is used when token-splitting `+=` into `+`.
/// See issue #47856 for an example of when this may occur.
fn eat_plus(&mut self) -> bool {
self.expected_tokens.push(TokenType::Token(token::BinOp(token::Plus)));
match self.token.kind {
token::BinOp(token::Plus) => {
self.bump();
/// Eats the expected token if it's present possibly breaking
/// compound tokens like multi-character operators in process.
/// Returns `true` if the token was eaten.
fn break_and_eat(&mut self, expected: TokenKind) -> bool {
if self.token.kind == expected {
self.bump();
return true;
}
match self.token.kind.break_two_token_op() {
Some((first, second)) if first == expected => {
let first_span = self.sess.source_map().start_point(self.token.span);
let second_span = self.token.span.with_lo(first_span.hi());
self.set_token(Token::new(first, first_span));
self.bump_with(Token::new(second, second_span));
true
}
token::BinOpEq(token::Plus) => {
let start_point = self.sess.source_map().start_point(self.token.span);
self.bump_with(token::Eq, self.token.span.with_lo(start_point.hi()));
true
_ => {
self.expected_tokens.push(TokenType::Token(expected));
false
}
_ => false,
}
}

/// Expects and consumes an `&`. If `&&` is seen, replaces it with a single
/// `&` and continues. If an `&` is not seen, signals an error.
/// Eats `+` possibly breaking tokens like `+=` in process.
fn eat_plus(&mut self) -> bool {
self.break_and_eat(token::BinOp(token::Plus))
}

/// Eats `&` possibly breaking tokens like `&&` in process.
/// Signals an error if `&` is not eaten.
fn expect_and(&mut self) -> PResult<'a, ()> {
self.expected_tokens.push(TokenType::Token(token::BinOp(token::And)));
match self.token.kind {
token::BinOp(token::And) => {
self.bump();
Ok(())
}
token::AndAnd => {
let start_point = self.sess.source_map().start_point(self.token.span);
Ok(self
.bump_with(token::BinOp(token::And), self.token.span.with_lo(start_point.hi())))
}
_ => self.unexpected(),
}
if self.break_and_eat(token::BinOp(token::And)) { Ok(()) } else { self.unexpected() }
}

/// Expects and consumes an `|`. If `||` is seen, replaces it with a single
/// `|` and continues. If an `|` is not seen, signals an error.
/// Eats `|` possibly breaking tokens like `||` in process.
/// Signals an error if `|` was not eaten.
fn expect_or(&mut self) -> PResult<'a, ()> {
self.expected_tokens.push(TokenType::Token(token::BinOp(token::Or)));
match self.token.kind {
token::BinOp(token::Or) => {
self.bump();
Ok(())
}
token::OrOr => {
let start_point = self.sess.source_map().start_point(self.token.span);
Ok(self
.bump_with(token::BinOp(token::Or), self.token.span.with_lo(start_point.hi())))
}
_ => self.unexpected(),
}
if self.break_and_eat(token::BinOp(token::Or)) { Ok(()) } else { self.unexpected() }
}

/// Attempts to consume a `<`. If `<<` is seen, replaces it with a single
/// `<` and continue. If `<-` is seen, replaces it with a single `<`
/// and continue. If a `<` is not seen, returns false.
///
/// This is meant to be used when parsing generics on a path to get the
/// starting token.
/// Eats `<` possibly breaking tokens like `<<` in process.
fn eat_lt(&mut self) -> bool {
self.expected_tokens.push(TokenType::Token(token::Lt));
let ate = match self.token.kind {
token::Lt => {
self.bump();
true
}
token::BinOp(token::Shl) => {
let start_point = self.sess.source_map().start_point(self.token.span);
self.bump_with(token::Lt, self.token.span.with_lo(start_point.hi()));
true
}
token::LArrow => {
let start_point = self.sess.source_map().start_point(self.token.span);
self.bump_with(
token::BinOp(token::Minus),
self.token.span.with_lo(start_point.hi()),
);
true
}
_ => false,
};

let ate = self.break_and_eat(token::Lt);
if ate {
// See doc comment for `unmatched_angle_bracket_count`.
self.unmatched_angle_bracket_count += 1;
self.max_angle_bracket_count += 1;
debug!("eat_lt: (increment) count={:?}", self.unmatched_angle_bracket_count);
}

ate
}

/// Eats `<` possibly breaking tokens like `<<` in process.
/// Signals an error if `<` was not eaten.
fn expect_lt(&mut self) -> PResult<'a, ()> {
if !self.eat_lt() { self.unexpected() } else { Ok(()) }
if self.eat_lt() { Ok(()) } else { self.unexpected() }
}

/// Expects and consumes a single `>` token. if a `>>` is seen, replaces it
/// with a single `>` and continues. If a `>` is not seen, signals an error.
/// Eats `>` possibly breaking tokens like `>>` in process.
/// Signals an error if `>` was not eaten.
fn expect_gt(&mut self) -> PResult<'a, ()> {
self.expected_tokens.push(TokenType::Token(token::Gt));
let ate = match self.token.kind {
token::Gt => {
self.bump();
Some(())
}
token::BinOp(token::Shr) => {
let start_point = self.sess.source_map().start_point(self.token.span);
Some(self.bump_with(token::Gt, self.token.span.with_lo(start_point.hi())))
}
token::BinOpEq(token::Shr) => {
let start_point = self.sess.source_map().start_point(self.token.span);
Some(self.bump_with(token::Ge, self.token.span.with_lo(start_point.hi())))
}
token::Ge => {
let start_point = self.sess.source_map().start_point(self.token.span);
Some(self.bump_with(token::Eq, self.token.span.with_lo(start_point.hi())))
}
_ => None,
};

match ate {
Some(_) => {
// See doc comment for `unmatched_angle_bracket_count`.
if self.unmatched_angle_bracket_count > 0 {
self.unmatched_angle_bracket_count -= 1;
debug!("expect_gt: (decrement) count={:?}", self.unmatched_angle_bracket_count);
}

Ok(())
if self.break_and_eat(token::Gt) {
// See doc comment for `unmatched_angle_bracket_count`.
if self.unmatched_angle_bracket_count > 0 {
self.unmatched_angle_bracket_count -= 1;
debug!("expect_gt: (decrement) count={:?}", self.unmatched_angle_bracket_count);
}
None => self.unexpected(),
Ok(())
} else {
self.unexpected()
}
}

Expand Down Expand Up @@ -903,41 +838,30 @@ impl<'a> Parser<'a> {
}
}

/// Advance the parser by one token.
pub fn bump(&mut self) {
/// Advance the parser by one token using provided token as the next one.
fn bump_with(&mut self, next_token: Token) {
// Bumping after EOF is a bad sign, usually an infinite loop.
if self.prev_token.kind == TokenKind::Eof {
// Bumping after EOF is a bad sign, usually an infinite loop.
let msg = "attempted to bump the parser past EOF (may be stuck in a loop)";
self.span_bug(self.token.span, msg);
}

// Update the current and previous tokens.
self.prev_token = self.token.take();
self.unnormalized_prev_token = self.unnormalized_token.take();
let next_token = self.next_tok(self.unnormalized_prev_token.span);
self.set_token(next_token);

// Update fields derived from the previous token.
self.prev_span = self.unnormalized_prev_token.span;

// Diagnostics.
self.expected_tokens.clear();
}

/// Advances the parser using provided token as a next one. Use this when
/// consuming a part of a token. For example a single `<` from `<<`.
/// FIXME: this function sets the previous token data to some semi-nonsensical values
/// which kind of work because they are currently used in very limited ways in practice.
/// Correct token kinds and spans need to be calculated instead.
fn bump_with(&mut self, next: TokenKind, span: Span) {
// Update the current and previous tokens.
self.prev_token = self.token.take();
self.unnormalized_prev_token = self.unnormalized_token.take();
self.set_token(Token::new(next, span));

// Update fields derived from the previous token.
self.prev_span = self.unnormalized_prev_token.span.with_hi(span.lo());

self.expected_tokens.clear();
/// Advance the parser by one token.
pub fn bump(&mut self) {
let next_token = self.next_tok(self.unnormalized_token.span);
self.bump_with(next_token);
}

/// Look-ahead `dist` tokens of `self.token` and get access to that token there.
Expand Down
33 changes: 33 additions & 0 deletions src/libsyntax/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,39 @@ impl TokenKind {
Literal(Lit::new(kind, symbol, suffix))
}

// An approximation to proc-macro-style single-character operators used by rustc parser.
// If the operator token can be broken into two tokens, the first of which is single-character,
// then this function performs that operation, otherwise it returns `None`.
pub fn break_two_token_op(&self) -> Option<(TokenKind, TokenKind)> {
Some(match *self {
Le => (Lt, Eq),
EqEq => (Eq, Eq),
Ne => (Not, Eq),
Ge => (Gt, Eq),
AndAnd => (BinOp(And), BinOp(And)),
OrOr => (BinOp(Or), BinOp(Or)),
BinOp(Shl) => (Lt, Lt),
BinOp(Shr) => (Gt, Gt),
BinOpEq(Plus) => (BinOp(Plus), Eq),
BinOpEq(Minus) => (BinOp(Minus), Eq),
BinOpEq(Star) => (BinOp(Star), Eq),
BinOpEq(Slash) => (BinOp(Slash), Eq),
BinOpEq(Percent) => (BinOp(Percent), Eq),
BinOpEq(Caret) => (BinOp(Caret), Eq),
BinOpEq(And) => (BinOp(And), Eq),
BinOpEq(Or) => (BinOp(Or), Eq),
BinOpEq(Shl) => (Lt, Le),
BinOpEq(Shr) => (Gt, Ge),
DotDot => (Dot, Dot),
DotDotDot => (Dot, DotDot),
ModSep => (Colon, Colon),
RArrow => (BinOp(Minus), Gt),
LArrow => (Lt, BinOp(Minus)),
FatArrow => (Eq, Gt),
_ => return None,
})
}

/// Returns tokens that are likely to be typed accidentally instead of the current token.
/// Enables better error recovery when the wrong token is found.
pub fn similar_tokens(&self) -> Option<Vec<TokenKind>> {
Expand Down