Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rustc_parse top-level cleanups #125815

Merged
merged 14 commits into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Don't use the word "parse" for lexing operations.
Lexing converts source text into a token stream. Parsing converts a
token stream into AST fragments. This commit renames several lexing
operations that have "parse" in the name. I think these names have been
subtly confusing me for years.

This is just a `s/parse/lex/` on function names, with one exception:
`parse_stream_from_source_str` becomes `source_str_to_stream`, to make
it consistent with the existing `source_file_to_stream`. The commit also
moves that function's location in the file to be just above
`source_file_to_stream`.

The commit also cleans up a few comments along the way.
  • Loading branch information
nnethercote committed Jun 5, 2024
commit d1215da26e7848bd925a9fffecdaa7ea51b360c3
4 changes: 2 additions & 2 deletions compiler/rustc_expand/src/proc_macro_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sync::Lrc;
use rustc_errors::{Diag, ErrorGuaranteed, MultiSpan, PResult};
use rustc_parse::lexer::nfc_normalize;
use rustc_parse::parse_stream_from_source_str;
use rustc_parse::source_str_to_stream;
use rustc_session::parse::ParseSess;
use rustc_span::def_id::CrateNum;
use rustc_span::symbol::{self, sym, Symbol};
Expand Down Expand Up @@ -538,7 +538,7 @@ impl server::TokenStream for Rustc<'_, '_> {
}

fn from_str(&mut self, src: &str) -> Self::TokenStream {
parse_stream_from_source_str(
source_str_to_stream(
FileName::proc_macro_source_code(src),
src.to_string(),
self.psess(),
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_parse/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ pub(crate) struct UnmatchedDelim {
pub candidate_span: Option<Span>,
}

pub(crate) fn parse_token_trees<'psess, 'src>(
pub(crate) fn lex_token_trees<'psess, 'src>(
psess: &'psess ParseSess,
mut src: &'src str,
mut start_pos: BytePos,
Expand All @@ -66,7 +66,7 @@ pub(crate) fn parse_token_trees<'psess, 'src>(
last_lifetime: None,
};
let (stream, res, unmatched_delims) =
tokentrees::TokenTreesReader::parse_all_token_trees(string_reader);
tokentrees::TokenTreesReader::lex_all_token_trees(string_reader);
match res {
Ok(()) if unmatched_delims.is_empty() => Ok(stream),
_ => {
Expand Down
47 changes: 22 additions & 25 deletions compiler/rustc_parse/src/lexer/tokentrees.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,21 @@ pub(super) struct TokenTreesReader<'psess, 'src> {
}

impl<'psess, 'src> TokenTreesReader<'psess, 'src> {
pub(super) fn parse_all_token_trees(
pub(super) fn lex_all_token_trees(
string_reader: StringReader<'psess, 'src>,
) -> (TokenStream, Result<(), Vec<PErr<'psess>>>, Vec<UnmatchedDelim>) {
let mut tt_reader = TokenTreesReader {
string_reader,
token: Token::dummy(),
diag_info: TokenTreeDiagInfo::default(),
};
let (_open_spacing, stream, res) =
tt_reader.parse_token_trees(/* is_delimited */ false);
let (_open_spacing, stream, res) = tt_reader.lex_token_trees(/* is_delimited */ false);
(stream, res, tt_reader.diag_info.unmatched_delims)
}

// Parse a stream of tokens into a list of `TokenTree`s. The `Spacing` in
// the result is that of the opening delimiter.
fn parse_token_trees(
// Lex into a token stream. The `Spacing` in the result is that of the
// opening delimiter.
fn lex_token_trees(
&mut self,
is_delimited: bool,
) -> (Spacing, TokenStream, Result<(), Vec<PErr<'psess>>>) {
Expand All @@ -42,12 +41,10 @@ impl<'psess, 'src> TokenTreesReader<'psess, 'src> {
let mut buf = Vec::new();
loop {
match self.token.kind {
token::OpenDelim(delim) => {
buf.push(match self.parse_token_tree_open_delim(delim) {
Ok(val) => val,
Err(errs) => return (open_spacing, TokenStream::new(buf), Err(errs)),
})
}
token::OpenDelim(delim) => buf.push(match self.lex_token_tree_open_delim(delim) {
Ok(val) => val,
Err(errs) => return (open_spacing, TokenStream::new(buf), Err(errs)),
}),
token::CloseDelim(delim) => {
return (
open_spacing,
Expand Down Expand Up @@ -95,24 +92,24 @@ impl<'psess, 'src> TokenTreesReader<'psess, 'src> {
err
}

fn parse_token_tree_open_delim(
fn lex_token_tree_open_delim(
&mut self,
open_delim: Delimiter,
) -> Result<TokenTree, Vec<PErr<'psess>>> {
// The span for beginning of the delimited section
// The span for beginning of the delimited section.
let pre_span = self.token.span;

self.diag_info.open_braces.push((open_delim, self.token.span));

// Parse the token trees within the delimiters.
// Lex the token trees within the delimiters.
// We stop at any delimiter so we can try to recover if the user
// uses an incorrect delimiter.
let (open_spacing, tts, res) = self.parse_token_trees(/* is_delimited */ true);
let (open_spacing, tts, res) = self.lex_token_trees(/* is_delimited */ true);
if let Err(errs) = res {
return Err(self.unclosed_delim_err(tts, errs));
}

// Expand to cover the entire delimited token tree
// Expand to cover the entire delimited token tree.
let delim_span = DelimSpan::from_pair(pre_span, self.token.span);
let sm = self.string_reader.psess.source_map();

Expand Down Expand Up @@ -150,7 +147,7 @@ impl<'psess, 'src> TokenTreesReader<'psess, 'src> {
self.diag_info.last_unclosed_found_span = Some(self.token.span);
// This is a conservative error: only report the last unclosed
// delimiter. The previous unclosed delimiters could actually be
// closed! The parser just hasn't gotten to them yet.
// closed! The lexer just hasn't gotten to them yet.
if let Some(&(_, sp)) = self.diag_info.open_braces.last() {
unclosed_delimiter = Some(sp);
};
Expand Down Expand Up @@ -236,9 +233,9 @@ impl<'psess, 'src> TokenTreesReader<'psess, 'src> {
// out instead of complaining about the unclosed delims.
let mut parser = crate::stream_to_parser(self.string_reader.psess, tts, None);
let mut diff_errs = vec![];
// Suggest removing a `{` we think appears in an `if`/`while` condition
// We want to suggest removing a `{` only if we think we're in an `if`/`while` condition, but
// we have no way of tracking this in the lexer itself, so we piggyback on the parser
// Suggest removing a `{` we think appears in an `if`/`while` condition.
// We want to suggest removing a `{` only if we think we're in an `if`/`while` condition,
// but we have no way of tracking this in the lexer itself, so we piggyback on the parser.
let mut in_cond = false;
while parser.token != token::Eof {
if let Err(diff_err) = parser.err_vcs_conflict_marker() {
Expand All @@ -249,14 +246,15 @@ impl<'psess, 'src> TokenTreesReader<'psess, 'src> {
parser.token.kind,
token::CloseDelim(Delimiter::Brace) | token::FatArrow
) {
// end of the `if`/`while` body, or the end of a `match` guard
// End of the `if`/`while` body, or the end of a `match` guard.
in_cond = false;
} else if in_cond && parser.token == token::OpenDelim(Delimiter::Brace) {
// Store the `&&` and `let` to use their spans later when creating the diagnostic
let maybe_andand = parser.look_ahead(1, |t| t.clone());
let maybe_let = parser.look_ahead(2, |t| t.clone());
if maybe_andand == token::OpenDelim(Delimiter::Brace) {
// This might be the beginning of the `if`/`while` body (i.e., the end of the condition)
// This might be the beginning of the `if`/`while` body (i.e., the end of the
// condition).
in_cond = false;
} else if maybe_andand == token::AndAnd && maybe_let.is_keyword(kw::Let) {
let mut err = parser.dcx().struct_span_err(
Expand Down Expand Up @@ -288,8 +286,7 @@ impl<'psess, 'src> TokenTreesReader<'psess, 'src> {
}

fn close_delim_err(&mut self, delim: Delimiter) -> PErr<'psess> {
// An unexpected closing delimiter (i.e., there is no
// matching opening delimiter).
// An unexpected closing delimiter (i.e., there is no matching opening delimiter).
let token_str = token_to_string(&self.token);
let msg = format!("unexpected closing delimiter: `{token_str}`");
let mut err = self.string_reader.psess.dcx.struct_span_err(self.token.span, msg);
Expand Down
24 changes: 12 additions & 12 deletions compiler/rustc_parse/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,6 @@ pub fn parse_crate_attrs_from_source_str(
new_parser_from_source_str(psess, name, source).parse_inner_attributes()
}

pub fn parse_stream_from_source_str(
name: FileName,
source: String,
psess: &ParseSess,
override_span: Option<Span>,
) -> TokenStream {
source_file_to_stream(psess, psess.source_map().new_source_file(name, source), override_span)
}

/// Creates a new parser from a source string.
pub fn new_parser_from_source_str(psess: &ParseSess, name: FileName, source: String) -> Parser<'_> {
panictry_buffer!(maybe_new_parser_from_source_str(psess, name, source))
Expand Down Expand Up @@ -142,6 +133,15 @@ fn maybe_source_file_to_parser(

// Base abstractions

pub fn source_str_to_stream(
name: FileName,
source: String,
psess: &ParseSess,
override_span: Option<Span>,
) -> TokenStream {
source_file_to_stream(psess, psess.source_map().new_source_file(name, source), override_span)
}

/// Given a `source_file`, produces a sequence of token trees.
pub fn source_file_to_stream(
psess: &ParseSess,
Expand All @@ -165,7 +165,7 @@ fn maybe_file_to_stream<'psess>(
));
});

lexer::parse_token_trees(psess, src.as_str(), source_file.start_pos, override_span)
lexer::lex_token_trees(psess, src.as_str(), source_file.start_pos, override_span)
}

/// Given a stream and the `ParseSess`, produces a parser.
Expand Down Expand Up @@ -195,13 +195,13 @@ pub fn parse_in<'a, T>(
pub fn fake_token_stream_for_item(psess: &ParseSess, item: &ast::Item) -> TokenStream {
let source = pprust::item_to_string(item);
let filename = FileName::macro_expansion_source_code(&source);
parse_stream_from_source_str(filename, source, psess, Some(item.span))
source_str_to_stream(filename, source, psess, Some(item.span))
}

pub fn fake_token_stream_for_crate(psess: &ParseSess, krate: &ast::Crate) -> TokenStream {
let source = pprust::crate_to_string_for_macros(krate);
let filename = FileName::macro_expansion_source_code(&source);
parse_stream_from_source_str(filename, source, psess, Some(krate.spans.inner_span))
source_str_to_stream(filename, source, psess, Some(krate.spans.inner_span))
}

pub fn parse_cfg_attr(
Expand Down
11 changes: 3 additions & 8 deletions src/librustdoc/passes/lint/check_code_block_syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use rustc_errors::{
translation::{to_fluent_args, Translate},
Applicability, DiagCtxt, DiagInner, LazyFallbackBundle,
};
use rustc_parse::parse_stream_from_source_str;
use rustc_parse::source_str_to_stream;
use rustc_resolve::rustdoc::source_span_for_markdown_range;
use rustc_session::parse::ParseSess;
use rustc_span::hygiene::{AstPass, ExpnData, ExpnKind, LocalExpnId, Transparency};
Expand Down Expand Up @@ -51,13 +51,8 @@ fn check_rust_syntax(
let span = DUMMY_SP.apply_mark(expn_id.to_expn_id(), Transparency::Transparent);

let is_empty = rustc_driver::catch_fatal_errors(|| {
parse_stream_from_source_str(
FileName::Custom(String::from("doctest")),
source,
&psess,
Some(span),
)
.is_empty()
source_str_to_stream(FileName::Custom(String::from("doctest")), source, &psess, Some(span))
.is_empty()
})
.unwrap_or(false);
let buffer = buffer.borrow();
Expand Down