Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Parser::look_ahead #127636

Merged
merged 3 commits into from
Jul 12, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Add unit tests for Parser::look_ahead.
It's currently buggy, so some of the test results are surprising, as
described in the `FIXME` comments. The bugs will be fixed in subsequent
commits.
  • Loading branch information
nnethercote committed Jul 12, 2024
commit dad95578b0f000807f23fe2d37ca37ea8bd8522c
116 changes: 116 additions & 0 deletions compiler/rustc_parse/src/parser/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1376,6 +1376,122 @@ fn ttdelim_span() {
});
}

// Uses a macro rather than a function so that failure messages mention the
// correct line in the test function.
fmease marked this conversation as resolved.
Show resolved Hide resolved
macro_rules! look {
($p:ident, $dist:literal, $kind:expr) => {
$p.look_ahead($dist, |tok| assert_eq!($kind, tok.kind));
};
}

#[test]
fn look_ahead() {
create_default_session_globals_then(|| {
let sym_f = Symbol::intern("f");
let sym_x = Symbol::intern("x");
#[allow(non_snake_case)]
let sym_S = Symbol::intern("S");
let raw_no = IdentIsRaw::No;

let psess = psess();
let mut p = string_to_parser(&psess, "fn f(x: u32) { x } struct S;".to_string());

// Current position is the `fn`.
look!(p, 0, token::Ident(kw::Fn, raw_no));
look!(p, 1, token::Ident(sym_f, raw_no));
look!(p, 2, token::OpenDelim(Delimiter::Parenthesis));
look!(p, 3, token::Ident(sym_x, raw_no));
look!(p, 4, token::Colon);
look!(p, 5, token::Ident(sym::u32, raw_no));
look!(p, 6, token::CloseDelim(Delimiter::Parenthesis));
look!(p, 7, token::OpenDelim(Delimiter::Brace));
look!(p, 8, token::Ident(sym_x, raw_no));
look!(p, 9, token::CloseDelim(Delimiter::Brace));
look!(p, 10, token::Ident(kw::Struct, raw_no));
look!(p, 11, token::Ident(sym_S, raw_no));
look!(p, 12, token::Semi);
// Any lookahead past the end of the token stream returns `Eof`.
look!(p, 13, token::Eof);
look!(p, 14, token::Eof);
look!(p, 15, token::Eof);
look!(p, 100, token::Eof);

// Move forward to the first `x`.
for _ in 0..3 {
p.bump();
}
look!(p, 0, token::Ident(sym_x, raw_no));
look!(p, 1, token::Colon);
look!(p, 2, token::Ident(sym::u32, raw_no));
look!(p, 3, token::CloseDelim(Delimiter::Parenthesis));
// FIXME(nnethercote) If we lookahead any distance past a close delim
// we currently return that close delim.
look!(p, 4, token::CloseDelim(Delimiter::Parenthesis));
look!(p, 5, token::CloseDelim(Delimiter::Parenthesis));
look!(p, 6, token::CloseDelim(Delimiter::Parenthesis));
look!(p, 100, token::CloseDelim(Delimiter::Parenthesis));

// Move forward to the `;`.
for _ in 0..9 {
p.bump();
}
look!(p, 0, token::Semi);
// Any lookahead past the end of the token stream returns `Eof`.
look!(p, 1, token::Eof);
look!(p, 100, token::Eof);

// Move one past the `;`, i.e. past the end of the token stream.
p.bump();
look!(p, 0, token::Eof);
look!(p, 1, token::Eof);
look!(p, 100, token::Eof);

// Bumping after Eof is idempotent.
p.bump();
look!(p, 0, token::Eof);
look!(p, 1, token::Eof);
look!(p, 100, token::Eof);
});
}

/// FIXME(nnethercote) Currently there is some buggy behaviour when using
/// `look_ahead` not within the outermost token stream, as this test shows.
#[test]
fn look_ahead_non_outermost_stream() {
create_default_session_globals_then(|| {
let sym_f = Symbol::intern("f");
#[allow(non_snake_case)]
let sym_S = Symbol::intern("S");
let raw_no = IdentIsRaw::No;

let psess = psess();
let mut p = string_to_parser(&psess, "mod m { fn f(x: u32) { x } struct S; }".to_string());

// Move forward to the `fn`, which is not within the outermost token
// stream (because it's inside the `mod { ... }`).
for _ in 0..3 {
p.bump();
}
look!(p, 0, token::Ident(kw::Fn, raw_no));
look!(p, 1, token::Ident(sym_f, raw_no));
look!(p, 2, token::OpenDelim(Delimiter::Parenthesis));
// FIXME(nnethercote) The current code incorrectly skips the `x: u32)`
// to the next token tree.
look!(p, 3, token::OpenDelim(Delimiter::Brace));
// FIXME(nnethercote) The current code incorrectly skips the `x }`
// to the next token tree.
look!(p, 4, token::Ident(kw::Struct, raw_no));
look!(p, 5, token::Ident(sym_S, raw_no));
look!(p, 6, token::Semi);
// FIXME(nnethercote) If we lookahead any distance past a close delim
// we currently return that close delim.
look!(p, 7, token::CloseDelim(Delimiter::Brace));
look!(p, 8, token::CloseDelim(Delimiter::Brace));
look!(p, 9, token::CloseDelim(Delimiter::Brace));
look!(p, 100, token::CloseDelim(Delimiter::Brace));
});
}

// This tests that when parsing a string (rather than a file) we don't try
// and read in a file for a module declaration and just parse a stub.
// See `recurse_into_file_modules` in the parser.
Expand Down