From a8775d44e9f273b8ec03a0a881f2dc6800eb3027 Mon Sep 17 00:00:00 2001 From: pierwill Date: Wed, 23 Dec 2020 23:01:03 -0800 Subject: [PATCH] Edit rustc_span documentation Various changes to the `rustc_span` docs, including the following: - Additions to top-level docs - Edits to the source_map module docs - Edits to documentation for `Span` and `SpanData` - Added intra-docs links - Documentation for Levenshtein distances - Fixed missing punctuation --- compiler/rustc_span/src/edition.rs | 19 +++---- compiler/rustc_span/src/lev_distance.rs | 16 ++++-- compiler/rustc_span/src/lib.rs | 66 +++++++++++++++--------- compiler/rustc_span/src/source_map.rs | 8 +-- compiler/rustc_span/src/span_encoding.rs | 8 ++- 5 files changed, 71 insertions(+), 46 deletions(-) diff --git a/compiler/rustc_span/src/edition.rs b/compiler/rustc_span/src/edition.rs index 4d0c92f51d7ca..efbb0a23a6f01 100644 --- a/compiler/rustc_span/src/edition.rs +++ b/compiler/rustc_span/src/edition.rs @@ -4,24 +4,25 @@ use std::str::FromStr; use rustc_macros::HashStable_Generic; -/// The edition of the compiler (RFC 2052) +/// The edition of the compiler. (See [RFC 2052](https://github.com/rust-lang/rfcs/blob/master/text/2052-epochs.md).) #[derive(Clone, Copy, Hash, PartialEq, PartialOrd, Debug, Encodable, Decodable, Eq)] #[derive(HashStable_Generic)] pub enum Edition { - // editions must be kept in order, oldest to newest + // When adding new editions, be sure to do the following: + // + // - update the `ALL_EDITIONS` const + // - update the `EDITION_NAME_LIST` const + // - add a `rust_####()` function to the session + // - update the enum in Cargo's sources as well + // + // Editions *must* be kept in order, oldest to newest. /// The 2015 edition Edition2015, /// The 2018 edition Edition2018, - // when adding new editions, be sure to update: - // - // - Update the `ALL_EDITIONS` const - // - Update the EDITION_NAME_LIST const - // - add a `rust_####()` function to the session - // - update the enum in Cargo's sources as well } -// must be in order from oldest to newest +// Must be in order from oldest to newest. pub const ALL_EDITIONS: &[Edition] = &[Edition::Edition2015, Edition::Edition2018]; pub const EDITION_NAME_LIST: &str = "2015|2018"; diff --git a/compiler/rustc_span/src/lev_distance.rs b/compiler/rustc_span/src/lev_distance.rs index edc6625a6ead7..cea7871923bc6 100644 --- a/compiler/rustc_span/src/lev_distance.rs +++ b/compiler/rustc_span/src/lev_distance.rs @@ -1,10 +1,16 @@ +//! Levenshtein distances. +//! +//! The [Levenshtein distance] is a metric for measuring the difference between two strings. +//! +//! [Levenshtein distance]: https://en.wikipedia.org/wiki/Levenshtein_distance + use crate::symbol::Symbol; use std::cmp; #[cfg(test)] mod tests; -/// Finds the Levenshtein distance between two strings +/// Finds the Levenshtein distance between two strings. pub fn lev_distance(a: &str, b: &str) -> usize { // cases which don't require further computation if a.is_empty() { @@ -35,14 +41,14 @@ pub fn lev_distance(a: &str, b: &str) -> usize { dcol[t_last + 1] } -/// Finds the best match for a given word in the given iterator +/// Finds the best match for a given word in the given iterator. /// /// As a loose rule to avoid the obviously incorrect suggestions, it takes /// an optional limit for the maximum allowable edit distance, which defaults /// to one-third of the given word. /// -/// Besides Levenshtein, we use case insensitive comparison to improve accuracy on an edge case with -/// a lower(upper)case letters mismatch. +/// Besides Levenshtein, we use case insensitive comparison to improve accuracy +/// on an edge case with a lower(upper)case letters mismatch. #[cold] pub fn find_best_match_for_name( name_vec: &[Symbol], @@ -98,7 +104,7 @@ fn find_match_by_sorted_words(iter_names: &[Symbol], lookup: &str) -> Option String { let mut split_words: Vec<&str> = name.split('_').collect(); - // We are sorting primitive &strs and can use unstable sort here + // We are sorting primitive &strs and can use unstable sort here. split_words.sort_unstable(); split_words.join("_") } diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index f63a73acbf4ba..1c111bb102794 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -1,4 +1,13 @@ -//! The source positions and related helper functions. +//! Source positions and related helper functions. +//! +//! Important concepts in this module include: +//! +//! - the *span*, represented by [`SpanData`] and related types; +//! - source code as represented by a [`SourceMap`]; and +//! - interned strings, represented by [`Symbol`]s, with some common symbols available statically in the [`sym`] module. +//! +//! Unlike most compilers, the span contains not only the position in the source code, but also various other metadata, +//! such as the edition and macro hygiene. This metadata is stored in [`SyntaxContext`] and [`ExpnData`]. //! //! ## Note //! @@ -124,7 +133,7 @@ pub enum RealFileName { impl RealFileName { /// Returns the path suitable for reading from the file system on the local host. - /// Avoid embedding this in build artifacts; see `stable_name` for that. + /// Avoid embedding this in build artifacts; see `stable_name()` for that. pub fn local_path(&self) -> &Path { match self { RealFileName::Named(p) @@ -133,7 +142,7 @@ impl RealFileName { } /// Returns the path suitable for reading from the file system on the local host. - /// Avoid embedding this in build artifacts; see `stable_name` for that. + /// Avoid embedding this in build artifacts; see `stable_name()` for that. pub fn into_local_path(self) -> PathBuf { match self { RealFileName::Named(p) @@ -143,7 +152,7 @@ impl RealFileName { /// Returns the path suitable for embedding into build artifacts. Note that /// a virtualized path will not correspond to a valid file system path; see - /// `local_path` for something that is more likely to return paths into the + /// `local_path()` for something that is more likely to return paths into the /// local host file system. pub fn stable_name(&self) -> &Path { match self { @@ -173,7 +182,7 @@ pub enum FileName { /// Custom sources for explicit parser calls from plugins and drivers. Custom(String), DocTest(PathBuf, isize), - /// Post-substitution inline assembly from LLVM + /// Post-substitution inline assembly from LLVM. InlineAsm(u64), } @@ -266,14 +275,17 @@ impl FileName { } } +/// Represents a span. +/// /// Spans represent a region of code, used for error reporting. Positions in spans -/// are *absolute* positions from the beginning of the source_map, not positions -/// relative to `SourceFile`s. Methods on the `SourceMap` can be used to relate spans back +/// are *absolute* positions from the beginning of the [`SourceMap`], not positions +/// relative to [`SourceFile`]s. Methods on the `SourceMap` can be used to relate spans back /// to the original source. -/// You must be careful if the span crosses more than one file - you will not be +/// +/// You must be careful if the span crosses more than one file, since you will not be /// able to use many of the functions on spans in source_map and you cannot assume -/// that the length of the `span = hi - lo`; there may be space in the `BytePos` -/// range between files. +/// that the length of the span is equal to `span.hi - span.lo`; there may be space in the +/// [`BytePos`] range between files. /// /// `SpanData` is public because `Span` uses a thread-local interner and can't be /// sent to other threads, but some pieces of performance infra run in a separate thread. @@ -384,7 +396,7 @@ impl Span { Span::new(lo, hi, SyntaxContext::root()) } - /// Returns a new span representing an empty span at the beginning of this span + /// Returns a new span representing an empty span at the beginning of this span. #[inline] pub fn shrink_to_lo(self) -> Span { let span = self.data(); @@ -398,7 +410,7 @@ impl Span { } #[inline] - /// Returns true if hi == lo + /// Returns `true` if `hi == lo`. pub fn is_empty(&self) -> bool { let span = self.data(); span.hi == span.lo @@ -512,7 +524,7 @@ impl Span { } /// Checks if a span is "internal" to a macro in which `unsafe` - /// can be used without triggering the `unsafe_code` lint + /// can be used without triggering the `unsafe_code` lint. // (that is, a macro marked with `#[allow_internal_unsafe]`). pub fn allows_unsafe(&self) -> bool { self.ctxt().outer_expn_data().allow_internal_unsafe @@ -700,6 +712,7 @@ impl Span { } } +/// A span together with some additional data. #[derive(Clone, Debug)] pub struct SpanLabel { /// The span we are going to include in the final snippet. @@ -743,7 +756,7 @@ impl Decodable for Span { /// any spans that are debug-printed during the closure's execution. /// /// Normally, the global `TyCtxt` is used to retrieve the `SourceMap` -/// (see `rustc_interface::callbacks::span_debug1). However, some parts +/// (see `rustc_interface::callbacks::span_debug1`). However, some parts /// of the compiler (e.g. `rustc_parse`) may debug-print `Span`s before /// a `TyCtxt` is available. In this case, we fall back to /// the `SourceMap` provided to this function. If that is not available, @@ -994,9 +1007,9 @@ pub enum ExternalSource { Unneeded, Foreign { kind: ExternalSourceKind, - /// This SourceFile's byte-offset within the source_map of its original crate + /// This SourceFile's byte-offset within the source_map of its original crate. original_start_pos: BytePos, - /// The end of this SourceFile within the source_map of its original crate + /// The end of this SourceFile within the source_map of its original crate. original_end_pos: BytePos, }, } @@ -1099,7 +1112,7 @@ impl SourceFileHash { } } -/// A single source in the `SourceMap`. +/// A single source in the [`SourceMap`]. #[derive(Clone)] pub struct SourceFile { /// The name of the file that the source came from. Source that doesn't @@ -1580,7 +1593,7 @@ fn remove_bom(src: &mut String, normalized_pos: &mut Vec) { /// Replaces `\r\n` with `\n` in-place in `src`. /// -/// Returns error if there's a lone `\r` in the string +/// Returns error if there's a lone `\r` in the string. fn normalize_newlines(src: &mut String, normalized_pos: &mut Vec) { if !src.as_bytes().contains(&b'\r') { return; @@ -1705,13 +1718,16 @@ macro_rules! impl_pos { } impl_pos! { - /// A byte offset. Keep this small (currently 32-bits), as AST contains - /// a lot of them. + /// A byte offset. + /// + /// Keep this small (currently 32-bits), as AST contains a lot of them. #[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] pub struct BytePos(pub u32); - /// A character offset. Because of multibyte UTF-8 characters, a byte offset - /// is not equivalent to a character offset. The `SourceMap` will convert `BytePos` + /// A character offset. + /// + /// Because of multibyte UTF-8 characters, a byte offset + /// is not equivalent to a character offset. The [`SourceMap`] will convert [`BytePos`] /// values to `CharPos` values as necessary. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] pub struct CharPos(pub usize); @@ -1835,8 +1851,9 @@ fn lookup_line(lines: &[BytePos], pos: BytePos) -> isize { } /// Requirements for a `StableHashingContext` to be used in this crate. -/// This is a hack to allow using the `HashStable_Generic` derive macro -/// instead of implementing everything in librustc_middle. +/// +/// This is a hack to allow using the [`HashStable_Generic`] derive macro +/// instead of implementing everything in rustc_middle. pub trait HashStableContext { fn hash_def_id(&mut self, _: DefId, hasher: &mut StableHasher); fn hash_crate_num(&mut self, _: CrateNum, hasher: &mut StableHasher); @@ -1856,6 +1873,7 @@ where /// offsets into the `SourceMap`). Instead, we hash the (file name, line, column) /// triple, which stays the same even if the containing `SourceFile` has moved /// within the `SourceMap`. + /// /// Also note that we are hashing byte offsets for the column, not unicode /// codepoint offsets. For the purpose of the hash that's sufficient. /// Also, hashing filenames is expensive so we avoid doing it twice when the diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs index e9b4eb6e4abe0..fefc0cb48ddd8 100644 --- a/compiler/rustc_span/src/source_map.rs +++ b/compiler/rustc_span/src/source_map.rs @@ -1,9 +1,11 @@ -//! The `SourceMap` tracks all the source code used within a single crate, mapping +//! Types for tracking pieces of source code within a crate. +//! +//! The [`SourceMap`] tracks all the source code used within a single crate, mapping //! from integer byte positions to the original source code location. Each bit //! of source parsed during crate parsing (typically files, in-memory strings, //! or various bits of macro expansion) cover a continuous range of bytes in the -//! `SourceMap` and are represented by `SourceFile`s. Byte positions are stored in -//! `Span` and used pervasively in the compiler. They are absolute positions +//! `SourceMap` and are represented by [`SourceFile`]s. Byte positions are stored in +//! [`Span`] and used pervasively in the compiler. They are absolute positions //! within the `SourceMap`, which upon request can be converted to line and column //! information, source code snippets, etc. diff --git a/compiler/rustc_span/src/span_encoding.rs b/compiler/rustc_span/src/span_encoding.rs index b05e01d666bd6..ceb9b59b13ad1 100644 --- a/compiler/rustc_span/src/span_encoding.rs +++ b/compiler/rustc_span/src/span_encoding.rs @@ -12,7 +12,7 @@ use rustc_data_structures::fx::FxIndexSet; /// A compressed span. /// -/// `SpanData` is 12 bytes, which is a bit too big to stick everywhere. `Span` +/// Whereas [`SpanData`] is 12 bytes, which is a bit too big to stick everywhere, `Span` /// is a form that only takes up 8 bytes, with less space for the length and /// context. The vast majority (99.9%+) of `SpanData` instances will fit within /// those 8 bytes; any `SpanData` whose fields don't fit into a `Span` are @@ -42,13 +42,11 @@ use rustc_data_structures::fx::FxIndexSet; /// - `base` is 32 bits in both `Span` and `SpanData`, which means that `base` /// values never cause interning. The number of bits needed for `base` /// depends on the crate size. 32 bits allows up to 4 GiB of code in a crate. -/// `script-servo` is the largest crate in `rustc-perf`, requiring 26 bits -/// for some spans. /// - `len` is 15 bits in `Span` (a u16, minus 1 bit for the tag) and 32 bits /// in `SpanData`, which means that large `len` values will cause interning. /// The number of bits needed for `len` does not depend on the crate size. -/// The most common number of bits for `len` are 0--7, with a peak usually at -/// 3 or 4, and then it drops off quickly from 8 onwards. 15 bits is enough +/// The most common numbers of bits for `len` are from 0 to 7, with a peak usually +/// at 3 or 4, and then it drops off quickly from 8 onwards. 15 bits is enough /// for 99.99%+ of cases, but larger values (sometimes 20+ bits) might occur /// dozens of times in a typical crate. /// - `ctxt` is 16 bits in `Span` and 32 bits in `SpanData`, which means that