Add unit tests for Parser::look_ahead.

It's currently buggy, so some of the test results are surprising, as described in the `FIXME` comments. The bugs will be fixed in subsequent commits.
rust-lang · bors · Jul 12, 2024 · Jul 11, 2024 · Jul 12, 2024 · Jul 12, 2024
commit dad95578b0f000807f23fe2d37ca37ea8bd8522c
diff --git a/compiler/rustc_parse/src/parser/tests.rs b/compiler/rustc_parse/src/parser/tests.rs
@@ -1376,6 +1376,122 @@ fn ttdelim_span() {
     });
 }
 
+// Uses a macro rather than a function so that failure messages mention the
+// correct line in the test function.
+macro_rules! look {
+    ($p:ident, $dist:literal, $kind:expr) => {
+        $p.look_ahead($dist, |tok| assert_eq!($kind, tok.kind));
+    };
+}
+
+#[test]
+fn look_ahead() {
+    create_default_session_globals_then(|| {
+        let sym_f = Symbol::intern("f");
+        let sym_x = Symbol::intern("x");
+        #[allow(non_snake_case)]
+        let sym_S = Symbol::intern("S");
+        let raw_no = IdentIsRaw::No;
+
+        let psess = psess();
+        let mut p = string_to_parser(&psess, "fn f(x: u32) { x } struct S;".to_string());
+
+        // Current position is the `fn`.
+        look!(p, 0, token::Ident(kw::Fn, raw_no));
+        look!(p, 1, token::Ident(sym_f, raw_no));
+        look!(p, 2, token::OpenDelim(Delimiter::Parenthesis));
+        look!(p, 3, token::Ident(sym_x, raw_no));
+        look!(p, 4, token::Colon);
+        look!(p, 5, token::Ident(sym::u32, raw_no));
+        look!(p, 6, token::CloseDelim(Delimiter::Parenthesis));
+        look!(p, 7, token::OpenDelim(Delimiter::Brace));
+        look!(p, 8, token::Ident(sym_x, raw_no));
+        look!(p, 9, token::CloseDelim(Delimiter::Brace));
+        look!(p, 10, token::Ident(kw::Struct, raw_no));
+        look!(p, 11, token::Ident(sym_S, raw_no));
+        look!(p, 12, token::Semi);
+        // Any lookahead past the end of the token stream returns `Eof`.
+        look!(p, 13, token::Eof);
+        look!(p, 14, token::Eof);
+        look!(p, 15, token::Eof);
+        look!(p, 100, token::Eof);
+
+        // Move forward to the first `x`.
+        for _ in 0..3 {
+            p.bump();
+        }
+        look!(p, 0, token::Ident(sym_x, raw_no));
+        look!(p, 1, token::Colon);
+        look!(p, 2, token::Ident(sym::u32, raw_no));
+        look!(p, 3, token::CloseDelim(Delimiter::Parenthesis));
+        // FIXME(nnethercote) If we lookahead any distance past a close delim
+        // we currently return that close delim.
+        look!(p, 4, token::CloseDelim(Delimiter::Parenthesis));
+        look!(p, 5, token::CloseDelim(Delimiter::Parenthesis));
+        look!(p, 6, token::CloseDelim(Delimiter::Parenthesis));
+        look!(p, 100, token::CloseDelim(Delimiter::Parenthesis));
+
+        // Move forward to the `;`.
+        for _ in 0..9 {
+            p.bump();
+        }
+        look!(p, 0, token::Semi);
+        // Any lookahead past the end of the token stream returns `Eof`.
+        look!(p, 1, token::Eof);
+        look!(p, 100, token::Eof);
+
+        // Move one past the `;`, i.e. past the end of the token stream.
+        p.bump();
+        look!(p, 0, token::Eof);
+        look!(p, 1, token::Eof);
+        look!(p, 100, token::Eof);
+
+        // Bumping after Eof is idempotent.
+        p.bump();
+        look!(p, 0, token::Eof);
+        look!(p, 1, token::Eof);
+        look!(p, 100, token::Eof);
+    });
+}
+
+/// FIXME(nnethercote) Currently there is some buggy behaviour when using
+/// `look_ahead` not within the outermost token stream, as this test shows.
+#[test]
+fn look_ahead_non_outermost_stream() {
+    create_default_session_globals_then(|| {
+        let sym_f = Symbol::intern("f");
+        #[allow(non_snake_case)]
+        let sym_S = Symbol::intern("S");
+        let raw_no = IdentIsRaw::No;
+
+        let psess = psess();
+        let mut p = string_to_parser(&psess, "mod m { fn f(x: u32) { x } struct S; }".to_string());
+
+        // Move forward to the `fn`, which is not within the outermost token
+        // stream (because it's inside the `mod { ... }`).
+        for _ in 0..3 {
+            p.bump();
+        }
+        look!(p, 0, token::Ident(kw::Fn, raw_no));
+        look!(p, 1, token::Ident(sym_f, raw_no));
+        look!(p, 2, token::OpenDelim(Delimiter::Parenthesis));
+        // FIXME(nnethercote) The current code incorrectly skips the `x: u32)`
+        // to the next token tree.
+        look!(p, 3, token::OpenDelim(Delimiter::Brace));
+        // FIXME(nnethercote) The current code incorrectly skips the `x }`
+        // to the next token tree.
+        look!(p, 4, token::Ident(kw::Struct, raw_no));
+        look!(p, 5, token::Ident(sym_S, raw_no));
+        look!(p, 6, token::Semi);
+        // FIXME(nnethercote) If we lookahead any distance past a close delim
+        // we currently return that close delim.
+        look!(p, 7, token::CloseDelim(Delimiter::Brace));
+        look!(p, 8, token::CloseDelim(Delimiter::Brace));
+        look!(p, 9, token::CloseDelim(Delimiter::Brace));
+        look!(p, 100, token::CloseDelim(Delimiter::Brace));
+    });
+}
+
 // This tests that when parsing a string (rather than a file) we don't try
 // and read in a file for a module declaration and just parse a stub.
 // See `recurse_into_file_modules` in the parser.