From 6d455296fde6ff0d5154d39513259a60eb73f302 Mon Sep 17 00:00:00 2001 From: yukang Date: Tue, 25 Oct 2022 20:47:19 +0800 Subject: [PATCH 01/26] Fix #103451, find_width_of_character_at_span return width with 1 when reaching end --- compiler/rustc_span/src/source_map.rs | 8 ++---- compiler/rustc_span/src/source_map/tests.rs | 11 +++---- src/test/ui/parser/issue-103451.rs | 5 ++++ src/test/ui/parser/issue-103451.stderr | 32 +++++++++++++++++++++ 4 files changed, 46 insertions(+), 10 deletions(-) create mode 100644 src/test/ui/parser/issue-103451.rs create mode 100644 src/test/ui/parser/issue-103451.stderr diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs index f9566eeee9465..3baa2e03cbad7 100644 --- a/compiler/rustc_span/src/source_map.rs +++ b/compiler/rustc_span/src/source_map.rs @@ -855,7 +855,8 @@ impl SourceMap { /// Returns a new span representing the next character after the end-point of this span. /// Special cases: /// - if span is a dummy one, returns the same span - /// - if next_point reached the end of source, return span with lo = hi + /// - if next_point reached the end of source, return a span exceeding the end of source, + /// which means sm.span_to_snippet(next_point) will get `Err` /// - respect multi-byte characters pub fn next_point(&self, sp: Span) -> Span { if sp.is_dummy() { @@ -864,9 +865,6 @@ impl SourceMap { let start_of_next_point = sp.hi().0; let width = self.find_width_of_character_at_span(sp, true); - if width == 0 { - return Span::new(sp.hi(), sp.hi(), sp.ctxt(), None); - } // If the width is 1, then the next span should only contain the next char besides current ending. // However, in the case of a multibyte character, where the width != 1, the next span should // span multiple bytes to include the whole character. @@ -938,7 +936,7 @@ impl SourceMap { // Ensure indexes are also not malformed. if start_index > end_index || end_index > source_len - 1 { debug!("find_width_of_character_at_span: source indexes are malformed"); - return 0; + return 1; } let src = local_begin.sf.external_src.borrow(); diff --git a/compiler/rustc_span/src/source_map/tests.rs b/compiler/rustc_span/src/source_map/tests.rs index 1fd81018fa05c..3cab59e8dbe6c 100644 --- a/compiler/rustc_span/src/source_map/tests.rs +++ b/compiler/rustc_span/src/source_map/tests.rs @@ -511,16 +511,17 @@ fn test_next_point() { assert_eq!(span.lo().0, 4); assert_eq!(span.hi().0, 5); - // A non-empty span at the last byte should advance to create an empty - // span pointing at the end of the file. + // Reaching to the end of file, return a span that will get error with `span_to_snippet` let span = Span::with_root_ctxt(BytePos(4), BytePos(5)); let span = sm.next_point(span); assert_eq!(span.lo().0, 5); - assert_eq!(span.hi().0, 5); + assert_eq!(span.hi().0, 6); + assert!(sm.span_to_snippet(span).is_err()); - // Empty span pointing just past the last byte. + // Reaching to the end of file, return a span that will get error with `span_to_snippet` let span = Span::with_root_ctxt(BytePos(5), BytePos(5)); let span = sm.next_point(span); assert_eq!(span.lo().0, 5); - assert_eq!(span.hi().0, 5); + assert_eq!(span.hi().0, 6); + assert!(sm.span_to_snippet(span).is_err()); } diff --git a/src/test/ui/parser/issue-103451.rs b/src/test/ui/parser/issue-103451.rs new file mode 100644 index 0000000000000..1fdb001488155 --- /dev/null +++ b/src/test/ui/parser/issue-103451.rs @@ -0,0 +1,5 @@ +// error-pattern: this file contains an unclosed delimiter +// error-pattern: expected value, found struct `R` +struct R { } +struct S { + x: [u8; R diff --git a/src/test/ui/parser/issue-103451.stderr b/src/test/ui/parser/issue-103451.stderr new file mode 100644 index 0000000000000..eb3c92fb43d9b --- /dev/null +++ b/src/test/ui/parser/issue-103451.stderr @@ -0,0 +1,32 @@ +error: this file contains an unclosed delimiter + --> $DIR/issue-103451.rs:5:15 + | +LL | struct S { + | - unclosed delimiter +LL | x: [u8; R + | - ^ + | | + | unclosed delimiter + +error: this file contains an unclosed delimiter + --> $DIR/issue-103451.rs:5:15 + | +LL | struct S { + | - unclosed delimiter +LL | x: [u8; R + | - ^ + | | + | unclosed delimiter + +error[E0423]: expected value, found struct `R` + --> $DIR/issue-103451.rs:5:13 + | +LL | struct R { } + | ------------ `R` defined here +LL | struct S { +LL | x: [u8; R + | ^ help: use struct literal syntax instead: `R {}` + +error: aborting due to 3 previous errors + +For more information about this error, try `rustc --explain E0423`. From 8dbd817af13f1248e307502a331d6bb1a091b177 Mon Sep 17 00:00:00 2001 From: Tom Parker-Shemilt Date: Sat, 29 Oct 2022 22:46:19 +0100 Subject: [PATCH 02/26] Upgrade cc for working is_flag_supported on cross-compiles --- library/unwind/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/unwind/Cargo.toml b/library/unwind/Cargo.toml index 69fce8d7795c1..dbd360065c844 100644 --- a/library/unwind/Cargo.toml +++ b/library/unwind/Cargo.toml @@ -20,7 +20,7 @@ compiler_builtins = "0.1.0" cfg-if = "0.1.8" [build-dependencies] -cc = "1.0.69" +cc = "1.0.74" [features] From a9d7cfc480e5298def35fc0112ebe6d706934941 Mon Sep 17 00:00:00 2001 From: Tom Parker-Shemilt Date: Sat, 29 Oct 2022 23:21:12 +0100 Subject: [PATCH 03/26] Update cc in Cargo.lock --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dab693419a95d..dcf40977bdcb6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -489,9 +489,9 @@ version = "0.1.0" [[package]] name = "cc" -version = "1.0.73" +version = "1.0.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" +checksum = "581f5dba903aac52ea3feb5ec4810848460ee833876f1f9b0fdeab1f19091574" dependencies = [ "jobserver", ] From a2037e3012ed6c59e91fadb693b35fd503cba117 Mon Sep 17 00:00:00 2001 From: Michael Goulet Date: Tue, 1 Nov 2022 01:34:59 +0000 Subject: [PATCH 04/26] Check for substs compatibility for RPITITs --- .../src/traits/project.rs | 18 +++++++++++++++--- .../impl-trait/in-trait/generics-mismatch.rs | 17 +++++++++++++++++ .../in-trait/generics-mismatch.stderr | 12 ++++++++++++ 3 files changed, 44 insertions(+), 3 deletions(-) create mode 100644 src/test/ui/impl-trait/in-trait/generics-mismatch.rs create mode 100644 src/test/ui/impl-trait/in-trait/generics-mismatch.stderr diff --git a/compiler/rustc_trait_selection/src/traits/project.rs b/compiler/rustc_trait_selection/src/traits/project.rs index c8276854016f0..ad216e5502d82 100644 --- a/compiler/rustc_trait_selection/src/traits/project.rs +++ b/compiler/rustc_trait_selection/src/traits/project.rs @@ -2192,7 +2192,7 @@ fn confirm_impl_candidate<'cx, 'tcx>( // Verify that the trait item and its implementation have compatible substs lists fn check_substs_compatible<'tcx>( tcx: TyCtxt<'tcx>, - assoc_ty: &ty::AssocItem, + assoc_item: &ty::AssocItem, substs: ty::SubstsRef<'tcx>, ) -> bool { fn check_substs_compatible_inner<'tcx>( @@ -2224,7 +2224,10 @@ fn check_substs_compatible<'tcx>( true } - check_substs_compatible_inner(tcx, tcx.generics_of(assoc_ty.def_id), substs.as_slice()) + let generics = tcx.generics_of(assoc_item.def_id); + // Chop off any additional substs (RPITIT) substs + let substs = &substs[0..generics.count().min(substs.len())]; + check_substs_compatible_inner(tcx, generics, substs) } fn confirm_impl_trait_in_trait_candidate<'tcx>( @@ -2253,12 +2256,21 @@ fn confirm_impl_trait_in_trait_candidate<'tcx>( }; } - let impl_fn_def_id = leaf_def.item.def_id; // Rebase from {trait}::{fn}::{opaque} to {impl}::{fn}::{opaque}, // since `data.substs` are the impl substs. let impl_fn_substs = obligation.predicate.substs.rebase_onto(tcx, tcx.parent(trait_fn_def_id), data.substs); + if !check_substs_compatible(tcx, &leaf_def.item, impl_fn_substs) { + let err = tcx.ty_error_with_message( + obligation.cause.span, + "impl method and trait method have different parameters", + ); + return Progress { term: err.into(), obligations }; + } + + let impl_fn_def_id = leaf_def.item.def_id; + let cause = ObligationCause::new( obligation.cause.span, obligation.cause.body_id, diff --git a/src/test/ui/impl-trait/in-trait/generics-mismatch.rs b/src/test/ui/impl-trait/in-trait/generics-mismatch.rs new file mode 100644 index 0000000000000..cc0fc720ebbfd --- /dev/null +++ b/src/test/ui/impl-trait/in-trait/generics-mismatch.rs @@ -0,0 +1,17 @@ +#![feature(return_position_impl_trait_in_trait)] +#![allow(incomplete_features)] + +struct U; + +trait Foo { + fn bar(&self) -> impl Sized; +} + +impl Foo for U { + fn bar(&self) {} + //~^ ERROR method `bar` has 1 type parameter but its trait declaration has 0 type parameters +} + +fn main() { + U.bar(); +} diff --git a/src/test/ui/impl-trait/in-trait/generics-mismatch.stderr b/src/test/ui/impl-trait/in-trait/generics-mismatch.stderr new file mode 100644 index 0000000000000..cd42683e0224d --- /dev/null +++ b/src/test/ui/impl-trait/in-trait/generics-mismatch.stderr @@ -0,0 +1,12 @@ +error[E0049]: method `bar` has 1 type parameter but its trait declaration has 0 type parameters + --> $DIR/generics-mismatch.rs:11:12 + | +LL | fn bar(&self) -> impl Sized; + | - expected 0 type parameters +... +LL | fn bar(&self) {} + | ^ found 1 type parameter + +error: aborting due to previous error + +For more information about this error, try `rustc --explain E0049`. From 32dae918a181e01b8ee01d256c1e54ddd6d0ea76 Mon Sep 17 00:00:00 2001 From: Michael Goulet Date: Tue, 1 Nov 2022 01:36:50 +0000 Subject: [PATCH 05/26] Remap RPITIT substs properly --- .../src/traits/project.rs | 7 ++++++ .../in-trait/specialization-substs-remap.rs | 24 +++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 src/test/ui/impl-trait/in-trait/specialization-substs-remap.rs diff --git a/compiler/rustc_trait_selection/src/traits/project.rs b/compiler/rustc_trait_selection/src/traits/project.rs index ad216e5502d82..474ae22e3d589 100644 --- a/compiler/rustc_trait_selection/src/traits/project.rs +++ b/compiler/rustc_trait_selection/src/traits/project.rs @@ -2260,6 +2260,13 @@ fn confirm_impl_trait_in_trait_candidate<'tcx>( // since `data.substs` are the impl substs. let impl_fn_substs = obligation.predicate.substs.rebase_onto(tcx, tcx.parent(trait_fn_def_id), data.substs); + let impl_fn_substs = translate_substs( + selcx.infcx(), + obligation.param_env, + data.impl_def_id, + impl_fn_substs, + leaf_def.defining_node, + ); if !check_substs_compatible(tcx, &leaf_def.item, impl_fn_substs) { let err = tcx.ty_error_with_message( diff --git a/src/test/ui/impl-trait/in-trait/specialization-substs-remap.rs b/src/test/ui/impl-trait/in-trait/specialization-substs-remap.rs new file mode 100644 index 0000000000000..c9ee877db8ec5 --- /dev/null +++ b/src/test/ui/impl-trait/in-trait/specialization-substs-remap.rs @@ -0,0 +1,24 @@ +// check-pass + +#![feature(specialization)] +#![feature(return_position_impl_trait_in_trait)] +#![allow(incomplete_features)] + +trait Foo { + fn bar(&self) -> impl Sized; +} + +impl Foo for U +where + U: Copy, +{ + fn bar(&self) -> U { + *self + } +} + +impl Foo for i32 {} + +fn main() { + let _: i32 = 1i32.bar(); +} From 0f632c8f7882a4470bf1c7b6a4d6b14379b8e163 Mon Sep 17 00:00:00 2001 From: Michael Goulet Date: Tue, 1 Nov 2022 01:42:47 +0000 Subject: [PATCH 06/26] Fix ICE in default impl error reporting --- compiler/rustc_middle/src/ty/error.rs | 4 ++- .../in-trait/specialization-broken.rs | 26 +++++++++++++++++++ .../in-trait/specialization-broken.stderr | 23 ++++++++++++++++ 3 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 src/test/ui/impl-trait/in-trait/specialization-broken.rs create mode 100644 src/test/ui/impl-trait/in-trait/specialization-broken.stderr diff --git a/compiler/rustc_middle/src/ty/error.rs b/compiler/rustc_middle/src/ty/error.rs index 4e6cdb786025e..dc13374f992eb 100644 --- a/compiler/rustc_middle/src/ty/error.rs +++ b/compiler/rustc_middle/src/ty/error.rs @@ -430,7 +430,9 @@ impl<'tcx> TyCtxt<'tcx> { (ty::Projection(_), ty::Projection(_)) => { diag.note("an associated type was expected, but a different one was found"); } - (ty::Param(p), ty::Projection(proj)) | (ty::Projection(proj), ty::Param(p)) => { + (ty::Param(p), ty::Projection(proj)) | (ty::Projection(proj), ty::Param(p)) + if self.def_kind(proj.item_def_id) != DefKind::ImplTraitPlaceholder => + { let generics = self.generics_of(body_owner_def_id); let p_span = self.def_span(generics.type_param(p, self).def_id); if !sp.contains(p_span) { diff --git a/src/test/ui/impl-trait/in-trait/specialization-broken.rs b/src/test/ui/impl-trait/in-trait/specialization-broken.rs new file mode 100644 index 0000000000000..9d27d3710a601 --- /dev/null +++ b/src/test/ui/impl-trait/in-trait/specialization-broken.rs @@ -0,0 +1,26 @@ +// FIXME(compiler-errors): I'm not exactly sure if this is expected to pass or not. +// But we fixed an ICE anyways. + +#![feature(specialization)] +#![feature(return_position_impl_trait_in_trait)] +#![allow(incomplete_features)] + +trait Foo { + fn bar(&self) -> impl Sized; +} + +default impl Foo for U +where + U: Copy, +{ + fn bar(&self) -> U { + //~^ ERROR method `bar` has an incompatible type for trait + *self + } +} + +impl Foo for i32 {} + +fn main() { + 1i32.bar(); +} diff --git a/src/test/ui/impl-trait/in-trait/specialization-broken.stderr b/src/test/ui/impl-trait/in-trait/specialization-broken.stderr new file mode 100644 index 0000000000000..a30e6346b2927 --- /dev/null +++ b/src/test/ui/impl-trait/in-trait/specialization-broken.stderr @@ -0,0 +1,23 @@ +error[E0053]: method `bar` has an incompatible type for trait + --> $DIR/specialization-broken.rs:16:22 + | +LL | default impl Foo for U + | - this type parameter +... +LL | fn bar(&self) -> U { + | ^ + | | + | expected associated type, found type parameter `U` + | help: change the output type to match the trait: `impl Sized` + | +note: type in trait + --> $DIR/specialization-broken.rs:9:22 + | +LL | fn bar(&self) -> impl Sized; + | ^^^^^^^^^^ + = note: expected fn pointer `fn(&U) -> impl Sized` + found fn pointer `fn(&U) -> U` + +error: aborting due to previous error + +For more information about this error, try `rustc --explain E0053`. From f32e6781b2932aed55342ad8a4a7f1023acb30b4 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 3 Nov 2022 11:31:07 +1100 Subject: [PATCH 07/26] Rename some variables. These have been bugging me for a while. - `literal_text`: `src` is also used and is shorter and better. - `first_char`: used even when "first" doesn't make sense; `c` is shorter and better. - `curr`: `c` is shorter and better. - `unescaped_char`: `result` is also used and is shorter and better. - `second_char`: these have a single use and can be elided. --- compiler/rustc_lexer/src/unescape.rs | 70 +++++++++++++--------------- 1 file changed, 32 insertions(+), 38 deletions(-) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index 8f64b5f5158e4..a6752c82bd3c5 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -78,54 +78,52 @@ impl EscapeError { /// Takes a contents of a literal (without quotes) and produces a /// sequence of escaped characters or errors. /// Values are returned through invoking of the provided callback. -pub fn unescape_literal(literal_text: &str, mode: Mode, callback: &mut F) +pub fn unescape_literal(src: &str, mode: Mode, callback: &mut F) where F: FnMut(Range, Result), { match mode { Mode::Char | Mode::Byte => { - let mut chars = literal_text.chars(); + let mut chars = src.chars(); let result = unescape_char_or_byte(&mut chars, mode); // The Chars iterator moved forward. - callback(0..(literal_text.len() - chars.as_str().len()), result); + callback(0..(src.len() - chars.as_str().len()), result); } - Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(literal_text, mode, callback), + Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(src, mode, callback), // NOTE: Raw strings do not perform any explicit character escaping, here we // only translate CRLF to LF and produce errors on bare CR. - Mode::RawStr | Mode::RawByteStr => { - unescape_raw_str_or_raw_byte_str(literal_text, mode, callback) - } + Mode::RawStr | Mode::RawByteStr => unescape_raw_str_or_raw_byte_str(src, mode, callback), } } /// Takes a contents of a byte, byte string or raw byte string (without quotes) /// and produces a sequence of bytes or errors. /// Values are returned through invoking of the provided callback. -pub fn unescape_byte_literal(literal_text: &str, mode: Mode, callback: &mut F) +pub fn unescape_byte_literal(src: &str, mode: Mode, callback: &mut F) where F: FnMut(Range, Result), { debug_assert!(mode.is_bytes()); - unescape_literal(literal_text, mode, &mut |range, result| { + unescape_literal(src, mode, &mut |range, result| { callback(range, result.map(byte_from_char)); }) } /// Takes a contents of a char literal (without quotes), and returns an /// unescaped char or an error -pub fn unescape_char(literal_text: &str) -> Result { - let mut chars = literal_text.chars(); +pub fn unescape_char(src: &str) -> Result { + let mut chars = src.chars(); unescape_char_or_byte(&mut chars, Mode::Char) - .map_err(|err| (literal_text.len() - chars.as_str().len(), err)) + .map_err(|err| (src.len() - chars.as_str().len(), err)) } /// Takes a contents of a byte literal (without quotes), and returns an /// unescaped byte or an error. -pub fn unescape_byte(literal_text: &str) -> Result { - let mut chars = literal_text.chars(); +pub fn unescape_byte(src: &str) -> Result { + let mut chars = src.chars(); unescape_char_or_byte(&mut chars, Mode::Byte) .map(byte_from_char) - .map_err(|err| (literal_text.len() - chars.as_str().len(), err)) + .map_err(|err| (src.len() - chars.as_str().len(), err)) } /// What kind of literal do we parse. @@ -157,10 +155,7 @@ impl Mode { fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result { // Previous character was '\\', unescape what follows. - - let second_char = chars.next().ok_or(EscapeError::LoneSlash)?; - - let res = match second_char { + let res = match chars.next().ok_or(EscapeError::LoneSlash)? { '"' => '"', 'n' => '\n', 'r' => '\r', @@ -249,23 +244,23 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result { } #[inline] -fn ascii_check(first_char: char, mode: Mode) -> Result { - if mode.is_bytes() && !first_char.is_ascii() { +fn ascii_check(c: char, mode: Mode) -> Result { + if mode.is_bytes() && !c.is_ascii() { // Byte literal can't be a non-ascii character. Err(EscapeError::NonAsciiCharInByte) } else { - Ok(first_char) + Ok(c) } } fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result { debug_assert!(mode == Mode::Char || mode == Mode::Byte); - let first_char = chars.next().ok_or(EscapeError::ZeroChars)?; - let res = match first_char { + let c = chars.next().ok_or(EscapeError::ZeroChars)?; + let res = match c { '\\' => scan_escape(chars, mode), '\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar), '\r' => Err(EscapeError::BareCarriageReturn), - _ => ascii_check(first_char, mode), + _ => ascii_check(c, mode), }?; if chars.next().is_some() { return Err(EscapeError::MoreThanOneChar); @@ -282,13 +277,12 @@ where debug_assert!(mode == Mode::Str || mode == Mode::ByteStr); let initial_len = src.len(); let mut chars = src.chars(); - while let Some(first_char) = chars.next() { - let start = initial_len - chars.as_str().len() - first_char.len_utf8(); + while let Some(c) = chars.next() { + let start = initial_len - chars.as_str().len() - c.len_utf8(); - let unescaped_char = match first_char { + let result = match c { '\\' => { - let second_char = chars.clone().next(); - match second_char { + match chars.clone().next() { Some('\n') => { // Rust language specification requires us to skip whitespaces // if unescaped '\' character is followed by '\n'. @@ -304,10 +298,10 @@ where '\t' => Ok('\t'), '"' => Err(EscapeError::EscapeOnlyChar), '\r' => Err(EscapeError::BareCarriageReturn), - _ => ascii_check(first_char, mode), + _ => ascii_check(c, mode), }; let end = initial_len - chars.as_str().len(); - callback(start..end, unescaped_char); + callback(start..end, result); } fn skip_ascii_whitespace(chars: &mut Chars<'_>, start: usize, callback: &mut F) @@ -341,18 +335,18 @@ where /// sequence of characters or errors. /// NOTE: Raw strings do not perform any explicit character escaping, here we /// only translate CRLF to LF and produce errors on bare CR. -fn unescape_raw_str_or_raw_byte_str(literal_text: &str, mode: Mode, callback: &mut F) +fn unescape_raw_str_or_raw_byte_str(src: &str, mode: Mode, callback: &mut F) where F: FnMut(Range, Result), { debug_assert!(mode == Mode::RawStr || mode == Mode::RawByteStr); - let initial_len = literal_text.len(); + let initial_len = src.len(); - let mut chars = literal_text.chars(); - while let Some(curr) = chars.next() { - let start = initial_len - chars.as_str().len() - curr.len_utf8(); + let mut chars = src.chars(); + while let Some(c) = chars.next() { + let start = initial_len - chars.as_str().len() - c.len_utf8(); - let result = match curr { + let result = match c { '\r' => Err(EscapeError::BareCarriageReturnInRawString), c if mode.is_bytes() && !c.is_ascii() => Err(EscapeError::NonAsciiCharInByteString), c => Ok(c), From 84ca2c3bab370ee58ebd23050e9286e1d9e664b9 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 3 Nov 2022 12:15:55 +1100 Subject: [PATCH 08/26] Clarify range calculations. There is some subtlety here. --- compiler/rustc_lexer/src/unescape.rs | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index a6752c82bd3c5..dc2fd359e278e 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -275,11 +275,13 @@ where F: FnMut(Range, Result), { debug_assert!(mode == Mode::Str || mode == Mode::ByteStr); - let initial_len = src.len(); let mut chars = src.chars(); - while let Some(c) = chars.next() { - let start = initial_len - chars.as_str().len() - c.len_utf8(); + // The `start` and `end` computation here is complicated because + // `skip_ascii_whitespace` makes us to skip over chars without counting + // them in the range computation. + while let Some(c) = chars.next() { + let start = src.len() - chars.as_str().len() - c.len_utf8(); let result = match c { '\\' => { match chars.clone().next() { @@ -300,7 +302,7 @@ where '\r' => Err(EscapeError::BareCarriageReturn), _ => ascii_check(c, mode), }; - let end = initial_len - chars.as_str().len(); + let end = src.len() - chars.as_str().len(); callback(start..end, result); } @@ -340,19 +342,19 @@ where F: FnMut(Range, Result), { debug_assert!(mode == Mode::RawStr || mode == Mode::RawByteStr); - let initial_len = src.len(); - let mut chars = src.chars(); - while let Some(c) = chars.next() { - let start = initial_len - chars.as_str().len() - c.len_utf8(); + // The `start` and `end` computation here matches the one in + // `unescape_str_or_byte_str` for consistency, even though this function + // doesn't have to worry about skipping any chars. + while let Some(c) = chars.next() { + let start = src.len() - chars.as_str().len() - c.len_utf8(); let result = match c { '\r' => Err(EscapeError::BareCarriageReturnInRawString), c if mode.is_bytes() && !c.is_ascii() => Err(EscapeError::NonAsciiCharInByteString), c => Ok(c), }; - let end = initial_len - chars.as_str().len(); - + let end = src.len() - chars.as_str().len(); callback(start..end, result); } } From 34b32b0dac9da3fad7861bdc2bad89d771172bb3 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 3 Nov 2022 13:35:49 +1100 Subject: [PATCH 09/26] Use `Mode` less. It's passed to numerous places where we just need an `is_byte` bool. Passing the bool avoids the need for some assertions. Also rename `is_bytes()` as `is_byte()`, to better match `Mode::Byte`, `Mode::ByteStr`, and `Mode::RawByteStr`. --- compiler/rustc_lexer/src/unescape.rs | 46 +++++++++---------- .../src/lexer/unescape_error_reporting.rs | 14 +++--- 2 files changed, 29 insertions(+), 31 deletions(-) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index dc2fd359e278e..f0042a397c2c5 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -85,14 +85,16 @@ where match mode { Mode::Char | Mode::Byte => { let mut chars = src.chars(); - let result = unescape_char_or_byte(&mut chars, mode); + let result = unescape_char_or_byte(&mut chars, mode == Mode::Byte); // The Chars iterator moved forward. callback(0..(src.len() - chars.as_str().len()), result); } - Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(src, mode, callback), + Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(src, mode == Mode::ByteStr, callback), // NOTE: Raw strings do not perform any explicit character escaping, here we // only translate CRLF to LF and produce errors on bare CR. - Mode::RawStr | Mode::RawByteStr => unescape_raw_str_or_raw_byte_str(src, mode, callback), + Mode::RawStr | Mode::RawByteStr => { + unescape_raw_str_or_raw_byte_str(src, mode == Mode::RawByteStr, callback) + } } } @@ -103,7 +105,7 @@ pub fn unescape_byte_literal(src: &str, mode: Mode, callback: &mut F) where F: FnMut(Range, Result), { - debug_assert!(mode.is_bytes()); + debug_assert!(mode.is_byte()); unescape_literal(src, mode, &mut |range, result| { callback(range, result.map(byte_from_char)); }) @@ -113,15 +115,14 @@ where /// unescaped char or an error pub fn unescape_char(src: &str) -> Result { let mut chars = src.chars(); - unescape_char_or_byte(&mut chars, Mode::Char) - .map_err(|err| (src.len() - chars.as_str().len(), err)) + unescape_char_or_byte(&mut chars, false).map_err(|err| (src.len() - chars.as_str().len(), err)) } /// Takes a contents of a byte literal (without quotes), and returns an /// unescaped byte or an error. pub fn unescape_byte(src: &str) -> Result { let mut chars = src.chars(); - unescape_char_or_byte(&mut chars, Mode::Byte) + unescape_char_or_byte(&mut chars, true) .map(byte_from_char) .map_err(|err| (src.len() - chars.as_str().len(), err)) } @@ -145,7 +146,7 @@ impl Mode { } } - pub fn is_bytes(self) -> bool { + pub fn is_byte(self) -> bool { match self { Mode::Byte | Mode::ByteStr | Mode::RawByteStr => true, Mode::Char | Mode::Str | Mode::RawStr => false, @@ -153,7 +154,7 @@ impl Mode { } } -fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result { +fn scan_escape(chars: &mut Chars<'_>, is_byte: bool) -> Result { // Previous character was '\\', unescape what follows. let res = match chars.next().ok_or(EscapeError::LoneSlash)? { '"' => '"', @@ -176,7 +177,7 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result { let value = hi * 16 + lo; // For a non-byte literal verify that it is within ASCII range. - if !mode.is_bytes() && !is_ascii(value) { + if !is_byte && !is_ascii(value) { return Err(EscapeError::OutOfRangeHexEscape); } let value = value as u8; @@ -212,7 +213,7 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result { // Incorrect syntax has higher priority for error reporting // than unallowed value for a literal. - if mode.is_bytes() { + if is_byte { return Err(EscapeError::UnicodeEscapeInByte); } @@ -244,8 +245,8 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result { } #[inline] -fn ascii_check(c: char, mode: Mode) -> Result { - if mode.is_bytes() && !c.is_ascii() { +fn ascii_check(c: char, is_byte: bool) -> Result { + if is_byte && !c.is_ascii() { // Byte literal can't be a non-ascii character. Err(EscapeError::NonAsciiCharInByte) } else { @@ -253,14 +254,13 @@ fn ascii_check(c: char, mode: Mode) -> Result { } } -fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result { - debug_assert!(mode == Mode::Char || mode == Mode::Byte); +fn unescape_char_or_byte(chars: &mut Chars<'_>, is_byte: bool) -> Result { let c = chars.next().ok_or(EscapeError::ZeroChars)?; let res = match c { - '\\' => scan_escape(chars, mode), + '\\' => scan_escape(chars, is_byte), '\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar), '\r' => Err(EscapeError::BareCarriageReturn), - _ => ascii_check(c, mode), + _ => ascii_check(c, is_byte), }?; if chars.next().is_some() { return Err(EscapeError::MoreThanOneChar); @@ -270,11 +270,10 @@ fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result(src: &str, mode: Mode, callback: &mut F) +fn unescape_str_or_byte_str(src: &str, is_byte: bool, callback: &mut F) where F: FnMut(Range, Result), { - debug_assert!(mode == Mode::Str || mode == Mode::ByteStr); let mut chars = src.chars(); // The `start` and `end` computation here is complicated because @@ -293,14 +292,14 @@ where skip_ascii_whitespace(&mut chars, start, callback); continue; } - _ => scan_escape(&mut chars, mode), + _ => scan_escape(&mut chars, is_byte), } } '\n' => Ok('\n'), '\t' => Ok('\t'), '"' => Err(EscapeError::EscapeOnlyChar), '\r' => Err(EscapeError::BareCarriageReturn), - _ => ascii_check(c, mode), + _ => ascii_check(c, is_byte), }; let end = src.len() - chars.as_str().len(); callback(start..end, result); @@ -337,11 +336,10 @@ where /// sequence of characters or errors. /// NOTE: Raw strings do not perform any explicit character escaping, here we /// only translate CRLF to LF and produce errors on bare CR. -fn unescape_raw_str_or_raw_byte_str(src: &str, mode: Mode, callback: &mut F) +fn unescape_raw_str_or_raw_byte_str(src: &str, is_byte: bool, callback: &mut F) where F: FnMut(Range, Result), { - debug_assert!(mode == Mode::RawStr || mode == Mode::RawByteStr); let mut chars = src.chars(); // The `start` and `end` computation here matches the one in @@ -351,7 +349,7 @@ where let start = src.len() - chars.as_str().len() - c.len_utf8(); let result = match c { '\r' => Err(EscapeError::BareCarriageReturnInRawString), - c if mode.is_bytes() && !c.is_ascii() => Err(EscapeError::NonAsciiCharInByteString), + c if is_byte && !c.is_ascii() => Err(EscapeError::NonAsciiCharInByteString), c => Ok(c), }; let end = src.len() - chars.as_str().len(); diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index f075de7142676..055ee98a00aa3 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -108,7 +108,7 @@ pub(crate) fn emit_unescape_error( } if !has_help { - let (prefix, msg) = if mode.is_bytes() { + let (prefix, msg) = if mode.is_byte() { ("b", "if you meant to write a byte string literal, use double quotes") } else { ("", "if you meant to write a `str` literal, use double quotes") @@ -142,7 +142,7 @@ pub(crate) fn emit_unescape_error( EscapeError::EscapeOnlyChar => { let (c, char_span) = last_char(); - let msg = if mode.is_bytes() { + let msg = if mode.is_byte() { "byte constant must be escaped" } else { "character constant must be escaped" @@ -182,11 +182,11 @@ pub(crate) fn emit_unescape_error( let (c, span) = last_char(); let label = - if mode.is_bytes() { "unknown byte escape" } else { "unknown character escape" }; + if mode.is_byte() { "unknown byte escape" } else { "unknown character escape" }; let ec = escaped_char(c); let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec)); diag.span_label(span, label); - if c == '{' || c == '}' && !mode.is_bytes() { + if c == '{' || c == '}' && !mode.is_byte() { diag.help( "if used in a formatting string, curly braces are escaped with `{{` and `}}`", ); @@ -196,7 +196,7 @@ pub(crate) fn emit_unescape_error( version control settings", ); } else { - if !mode.is_bytes() { + if !mode.is_byte() { diag.span_suggestion( span_with_quotes, "if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal", @@ -231,7 +231,7 @@ pub(crate) fn emit_unescape_error( .emit(); } EscapeError::NonAsciiCharInByte => { - assert!(mode.is_bytes()); + assert!(mode.is_byte()); let (c, span) = last_char(); let mut err = handler.struct_span_err(span, "non-ASCII character in byte constant"); let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { @@ -271,7 +271,7 @@ pub(crate) fn emit_unescape_error( err.emit(); } EscapeError::NonAsciiCharInByteString => { - assert!(mode.is_bytes()); + assert!(mode.is_byte()); let (c, span) = last_char(); let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { format!(" but is {:?}", c) From 7dbf2c0ed86a6fc97aa0b93bc2ac865d6f2cc438 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 3 Nov 2022 15:17:37 +1100 Subject: [PATCH 10/26] Make non-ASCII errors more consistent. There are three kinds of "byte" literals: byte literals, byte string literals, and raw byte string literals. None are allowed to have non-ASCII chars in them. Two `EscapeError` variants exist for when that constraint is violated. - `NonAsciiCharInByte`: used for byte literals and byte string literals. - `NonAsciiCharInByteString`: used for raw byte string literals. As a result, the messages for raw byte string literals use different wording, without good reason. Also, byte string literals are incorrectly described as "byte constants" in some error messages. This commit eliminates `NonAsciiCharInByteString` so the three cases are handled similarly, and described correctly. The `mode` is enough to distinguish them. Note: Some existing error messages mention "byte constants" and some mention "byte literals". I went with the latter here, because it's a more correct name, as used by the Reference. --- compiler/rustc_lexer/src/unescape.rs | 7 ++-- compiler/rustc_lexer/src/unescape/tests.rs | 7 ++-- .../src/lexer/unescape_error_reporting.rs | 32 ++++++++----------- src/test/ui/attributes/key-value-non-ascii.rs | 2 +- .../ui/attributes/key-value-non-ascii.stderr | 4 +-- src/test/ui/parser/byte-literals.rs | 2 +- src/test/ui/parser/byte-literals.stderr | 4 +-- src/test/ui/parser/byte-string-literals.rs | 4 +-- .../ui/parser/byte-string-literals.stderr | 6 ++-- .../ui/parser/raw/raw-byte-string-literals.rs | 2 +- .../raw/raw-byte-string-literals.stderr | 2 +- .../ui/parser/unicode-control-codepoints.rs | 16 +++++----- .../parser/unicode-control-codepoints.stderr | 24 +++++++------- src/test/ui/suggestions/multibyte-escapes.rs | 12 +++---- .../ui/suggestions/multibyte-escapes.stderr | 12 +++---- 15 files changed, 62 insertions(+), 74 deletions(-) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index f0042a397c2c5..9c9cce7cbd48e 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -52,10 +52,8 @@ pub enum EscapeError { /// Unicode escape code in byte literal. UnicodeEscapeInByte, - /// Non-ascii character in byte literal. + /// Non-ascii character in byte literal, byte string literal, or raw byte string literal. NonAsciiCharInByte, - /// Non-ascii character in byte string literal. - NonAsciiCharInByteString, /// After a line ending with '\', the next line contains whitespace /// characters that are not skipped. @@ -349,8 +347,7 @@ where let start = src.len() - chars.as_str().len() - c.len_utf8(); let result = match c { '\r' => Err(EscapeError::BareCarriageReturnInRawString), - c if is_byte && !c.is_ascii() => Err(EscapeError::NonAsciiCharInByteString), - c => Ok(c), + _ => ascii_check(c, is_byte), }; let end = src.len() - chars.as_str().len(); callback(start..end, result); diff --git a/compiler/rustc_lexer/src/unescape/tests.rs b/compiler/rustc_lexer/src/unescape/tests.rs index fa61554afde6c..008edef5a6385 100644 --- a/compiler/rustc_lexer/src/unescape/tests.rs +++ b/compiler/rustc_lexer/src/unescape/tests.rs @@ -289,9 +289,6 @@ fn test_unescape_raw_byte_str() { } check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]); - check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByteString))]); - check( - "🦀a", - &[(0..4, Err(EscapeError::NonAsciiCharInByteString)), (4..5, Ok(byte_from_char('a')))], - ); + check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByte))]); + check("🦀a", &[(0..4, Err(EscapeError::NonAsciiCharInByte)), (4..5, Ok(byte_from_char('a')))]); } diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index 055ee98a00aa3..6373f5b4fd6ff 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -231,16 +231,23 @@ pub(crate) fn emit_unescape_error( .emit(); } EscapeError::NonAsciiCharInByte => { - assert!(mode.is_byte()); let (c, span) = last_char(); - let mut err = handler.struct_span_err(span, "non-ASCII character in byte constant"); + let desc = match mode { + Mode::Byte => "byte literal", + Mode::ByteStr => "byte string literal", + Mode::RawByteStr => "raw byte string literal", + _ => panic!("non-is_byte literal paired with NonAsciiCharInByte"), + }; + let mut err = handler.struct_span_err(span, format!("non-ASCII character in {}", desc)); let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { format!(" but is {:?}", c) } else { String::new() }; - err.span_label(span, &format!("byte constant must be ASCII{}", postfix)); - if (c as u32) <= 0xFF { + err.span_label(span, &format!("must be ASCII{}", postfix)); + // Note: the \\xHH suggestions are not given for raw byte string + // literals, because they are araw and so cannot use any escapes. + if (c as u32) <= 0xFF && mode != Mode::RawByteStr { err.span_suggestion( span, &format!( @@ -250,9 +257,9 @@ pub(crate) fn emit_unescape_error( format!("\\x{:X}", c as u32), Applicability::MaybeIncorrect, ); - } else if matches!(mode, Mode::Byte) { + } else if mode == Mode::Byte { err.span_label(span, "this multibyte character does not fit into a single byte"); - } else if matches!(mode, Mode::ByteStr) { + } else if mode != Mode::RawByteStr { let mut utf8 = String::new(); utf8.push(c); err.span_suggestion( @@ -270,19 +277,6 @@ pub(crate) fn emit_unescape_error( } err.emit(); } - EscapeError::NonAsciiCharInByteString => { - assert!(mode.is_byte()); - let (c, span) = last_char(); - let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { - format!(" but is {:?}", c) - } else { - String::new() - }; - handler - .struct_span_err(span, "raw byte string must be ASCII") - .span_label(span, &format!("must be ASCII{}", postfix)) - .emit(); - } EscapeError::OutOfRangeHexEscape => { handler .struct_span_err(span, "out of range hex escape") diff --git a/src/test/ui/attributes/key-value-non-ascii.rs b/src/test/ui/attributes/key-value-non-ascii.rs index 12942eabdf7b5..e14e2fc05ad39 100644 --- a/src/test/ui/attributes/key-value-non-ascii.rs +++ b/src/test/ui/attributes/key-value-non-ascii.rs @@ -1,4 +1,4 @@ #![feature(rustc_attrs)] -#[rustc_dummy = b"ffi.rs"] //~ ERROR non-ASCII character in byte constant +#[rustc_dummy = b"ffi.rs"] //~ ERROR non-ASCII character in byte string literal fn main() {} diff --git a/src/test/ui/attributes/key-value-non-ascii.stderr b/src/test/ui/attributes/key-value-non-ascii.stderr index 422107867f7f9..23d482de6a868 100644 --- a/src/test/ui/attributes/key-value-non-ascii.stderr +++ b/src/test/ui/attributes/key-value-non-ascii.stderr @@ -1,8 +1,8 @@ -error: non-ASCII character in byte constant +error: non-ASCII character in byte string literal --> $DIR/key-value-non-ascii.rs:3:19 | LL | #[rustc_dummy = b"ffi.rs"] - | ^ byte constant must be ASCII + | ^ must be ASCII | help: if you meant to use the UTF-8 encoding of 'ffi', use \xHH escapes | diff --git a/src/test/ui/parser/byte-literals.rs b/src/test/ui/parser/byte-literals.rs index 05a510b24a7ab..896dc1a1a5fba 100644 --- a/src/test/ui/parser/byte-literals.rs +++ b/src/test/ui/parser/byte-literals.rs @@ -7,6 +7,6 @@ pub fn main() { b'\x0Z'; //~ ERROR invalid character in numeric character escape: `Z` b' '; //~ ERROR byte constant must be escaped b'''; //~ ERROR byte constant must be escaped - b'é'; //~ ERROR non-ASCII character in byte constant + b'é'; //~ ERROR non-ASCII character in byte literal b'a //~ ERROR unterminated byte constant [E0763] } diff --git a/src/test/ui/parser/byte-literals.stderr b/src/test/ui/parser/byte-literals.stderr index c3d0006163005..efa55ae05bd37 100644 --- a/src/test/ui/parser/byte-literals.stderr +++ b/src/test/ui/parser/byte-literals.stderr @@ -32,11 +32,11 @@ error: byte constant must be escaped: `'` LL | b'''; | ^ help: escape the character: `\'` -error: non-ASCII character in byte constant +error: non-ASCII character in byte literal --> $DIR/byte-literals.rs:10:7 | LL | b'é'; - | ^ byte constant must be ASCII + | ^ must be ASCII | help: if you meant to use the unicode code point for 'é', use a \xHH escape | diff --git a/src/test/ui/parser/byte-string-literals.rs b/src/test/ui/parser/byte-string-literals.rs index b1f11024a7bb6..30a4f50c4e40b 100644 --- a/src/test/ui/parser/byte-string-literals.rs +++ b/src/test/ui/parser/byte-string-literals.rs @@ -3,7 +3,7 @@ static FOO: &'static [u8] = b"\f"; //~ ERROR unknown byte escape pub fn main() { b"\f"; //~ ERROR unknown byte escape b"\x0Z"; //~ ERROR invalid character in numeric character escape: `Z` - b"é"; //~ ERROR non-ASCII character in byte constant - br##"é"##; //~ ERROR raw byte string must be ASCII + b"é"; //~ ERROR non-ASCII character in byte string literal + br##"é"##; //~ ERROR non-ASCII character in raw byte string literal b"a //~ ERROR unterminated double quote byte string } diff --git a/src/test/ui/parser/byte-string-literals.stderr b/src/test/ui/parser/byte-string-literals.stderr index 3b8b3692e053f..5b96cc3d18abc 100644 --- a/src/test/ui/parser/byte-string-literals.stderr +++ b/src/test/ui/parser/byte-string-literals.stderr @@ -20,18 +20,18 @@ error: invalid character in numeric character escape: `Z` LL | b"\x0Z"; | ^ invalid character in numeric character escape -error: non-ASCII character in byte constant +error: non-ASCII character in byte string literal --> $DIR/byte-string-literals.rs:6:7 | LL | b"é"; - | ^ byte constant must be ASCII + | ^ must be ASCII | help: if you meant to use the unicode code point for 'é', use a \xHH escape | LL | b"\xE9"; | ~~~~ -error: raw byte string must be ASCII +error: non-ASCII character in raw byte string literal --> $DIR/byte-string-literals.rs:7:10 | LL | br##"é"##; diff --git a/src/test/ui/parser/raw/raw-byte-string-literals.rs b/src/test/ui/parser/raw/raw-byte-string-literals.rs index 163c8ac66b022..1b859fee596ad 100644 --- a/src/test/ui/parser/raw/raw-byte-string-literals.rs +++ b/src/test/ui/parser/raw/raw-byte-string-literals.rs @@ -2,6 +2,6 @@ pub fn main() { br"a "; //~ ERROR bare CR not allowed in raw string - br"é"; //~ ERROR raw byte string must be ASCII + br"é"; //~ ERROR non-ASCII character in raw byte string literal br##~"a"~##; //~ ERROR only `#` is allowed in raw string delimitation } diff --git a/src/test/ui/parser/raw/raw-byte-string-literals.stderr b/src/test/ui/parser/raw/raw-byte-string-literals.stderr index cfc877104bd9f..a2f27d1ed70ae 100644 --- a/src/test/ui/parser/raw/raw-byte-string-literals.stderr +++ b/src/test/ui/parser/raw/raw-byte-string-literals.stderr @@ -4,7 +4,7 @@ error: bare CR not allowed in raw string LL | br"a "; | ^ -error: raw byte string must be ASCII +error: non-ASCII character in raw byte string literal --> $DIR/raw-byte-string-literals.rs:5:8 | LL | br"é"; diff --git a/src/test/ui/parser/unicode-control-codepoints.rs b/src/test/ui/parser/unicode-control-codepoints.rs index 5af0b585a1275..df099bb62ad1e 100644 --- a/src/test/ui/parser/unicode-control-codepoints.rs +++ b/src/test/ui/parser/unicode-control-codepoints.rs @@ -14,15 +14,15 @@ fn main() { println!("{:?}", r##"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only "##); //~^ ERROR unicode codepoint changing visible direction of text present in literal println!("{:?}", b"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only "); - //~^ ERROR non-ASCII character in byte constant - //~| ERROR non-ASCII character in byte constant - //~| ERROR non-ASCII character in byte constant - //~| ERROR non-ASCII character in byte constant + //~^ ERROR non-ASCII character in byte string literal + //~| ERROR non-ASCII character in byte string literal + //~| ERROR non-ASCII character in byte string literal + //~| ERROR non-ASCII character in byte string literal println!("{:?}", br##"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only "##); - //~^ ERROR raw byte string must be ASCII - //~| ERROR raw byte string must be ASCII - //~| ERROR raw byte string must be ASCII - //~| ERROR raw byte string must be ASCII + //~^ ERROR non-ASCII character in raw byte string literal + //~| ERROR non-ASCII character in raw byte string literal + //~| ERROR non-ASCII character in raw byte string literal + //~| ERROR non-ASCII character in raw byte string literal println!("{:?}", '‮'); //~^ ERROR unicode codepoint changing visible direction of text present in literal } diff --git a/src/test/ui/parser/unicode-control-codepoints.stderr b/src/test/ui/parser/unicode-control-codepoints.stderr index 44548c72ff5d0..fc071a9419142 100644 --- a/src/test/ui/parser/unicode-control-codepoints.stderr +++ b/src/test/ui/parser/unicode-control-codepoints.stderr @@ -14,69 +14,69 @@ LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); | = help: unicode escape sequences cannot be used as a byte or in a byte string -error: non-ASCII character in byte constant +error: non-ASCII character in byte string literal --> $DIR/unicode-control-codepoints.rs:16:26 | LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ byte constant must be ASCII but is '\u{202e}' + | ^ must be ASCII but is '\u{202e}' | help: if you meant to use the UTF-8 encoding of '\u{202e}', use \xHH escapes | LL | println!("{:?}", b"/*\xE2\x80\xAE } if isAdmin begin admins only "); | ~~~~~~~~~~~~ -error: non-ASCII character in byte constant +error: non-ASCII character in byte string literal --> $DIR/unicode-control-codepoints.rs:16:30 | LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ byte constant must be ASCII but is '\u{2066}' + | ^ must be ASCII but is '\u{2066}' | help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes | LL | println!("{:?}", b"/* } \xE2\x81\xA6if isAdmin begin admins only "); | ~~~~~~~~~~~~ -error: non-ASCII character in byte constant +error: non-ASCII character in byte string literal --> $DIR/unicode-control-codepoints.rs:16:41 | LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ byte constant must be ASCII but is '\u{2069}' + | ^ must be ASCII but is '\u{2069}' | help: if you meant to use the UTF-8 encoding of '\u{2069}', use \xHH escapes | LL | println!("{:?}", b"/* } if isAdmin\xE2\x81\xA9 begin admins only "); | ~~~~~~~~~~~~ -error: non-ASCII character in byte constant +error: non-ASCII character in byte string literal --> $DIR/unicode-control-codepoints.rs:16:43 | LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ byte constant must be ASCII but is '\u{2066}' + | ^ must be ASCII but is '\u{2066}' | help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes | LL | println!("{:?}", b"/* } if isAdmin \xE2\x81\xA6 begin admins only "); | ~~~~~~~~~~~~ -error: raw byte string must be ASCII +error: non-ASCII character in raw byte string literal --> $DIR/unicode-control-codepoints.rs:21:29 | LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); | ^ must be ASCII but is '\u{202e}' -error: raw byte string must be ASCII +error: non-ASCII character in raw byte string literal --> $DIR/unicode-control-codepoints.rs:21:33 | LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); | ^ must be ASCII but is '\u{2066}' -error: raw byte string must be ASCII +error: non-ASCII character in raw byte string literal --> $DIR/unicode-control-codepoints.rs:21:44 | LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); | ^ must be ASCII but is '\u{2069}' -error: raw byte string must be ASCII +error: non-ASCII character in raw byte string literal --> $DIR/unicode-control-codepoints.rs:21:46 | LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); diff --git a/src/test/ui/suggestions/multibyte-escapes.rs b/src/test/ui/suggestions/multibyte-escapes.rs index fd5d46a4e923e..c4105186244db 100644 --- a/src/test/ui/suggestions/multibyte-escapes.rs +++ b/src/test/ui/suggestions/multibyte-escapes.rs @@ -2,17 +2,17 @@ fn main() { b'µ'; - //~^ ERROR: non-ASCII character in byte constant + //~^ ERROR: non-ASCII character in byte literal //~| HELP: if you meant to use the unicode code point for 'µ', use a \xHH escape - //~| NOTE: byte constant must be ASCII + //~| NOTE: must be ASCII b'字'; - //~^ ERROR: non-ASCII character in byte constant + //~^ ERROR: non-ASCII character in byte literal //~| NOTE: this multibyte character does not fit into a single byte - //~| NOTE: byte constant must be ASCII + //~| NOTE: must be ASCII b"字"; - //~^ ERROR: non-ASCII character in byte constant + //~^ ERROR: non-ASCII character in byte string literal //~| HELP: if you meant to use the UTF-8 encoding of '字', use \xHH escapes - //~| NOTE: byte constant must be ASCII + //~| NOTE: must be ASCII } diff --git a/src/test/ui/suggestions/multibyte-escapes.stderr b/src/test/ui/suggestions/multibyte-escapes.stderr index 6e26bc1f01cef..1e7c43e6538f6 100644 --- a/src/test/ui/suggestions/multibyte-escapes.stderr +++ b/src/test/ui/suggestions/multibyte-escapes.stderr @@ -1,28 +1,28 @@ -error: non-ASCII character in byte constant +error: non-ASCII character in byte literal --> $DIR/multibyte-escapes.rs:4:7 | LL | b'µ'; - | ^ byte constant must be ASCII + | ^ must be ASCII | help: if you meant to use the unicode code point for 'µ', use a \xHH escape | LL | b'\xB5'; | ~~~~ -error: non-ASCII character in byte constant +error: non-ASCII character in byte literal --> $DIR/multibyte-escapes.rs:9:7 | LL | b'字'; | ^^ | | - | byte constant must be ASCII + | must be ASCII | this multibyte character does not fit into a single byte -error: non-ASCII character in byte constant +error: non-ASCII character in byte string literal --> $DIR/multibyte-escapes.rs:14:7 | LL | b"字"; - | ^^ byte constant must be ASCII + | ^^ must be ASCII | help: if you meant to use the UTF-8 encoding of '字', use \xHH escapes | From a21c0458979d786d821c2d75a1b109fe38914da0 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 3 Nov 2022 16:26:27 +1100 Subject: [PATCH 11/26] Improve comments. Remove a low-value comment, remove a duplicate comment, and correct a third comment. --- compiler/rustc_lexer/src/unescape.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index 9c9cce7cbd48e..db7bf02e71adc 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -84,12 +84,9 @@ where Mode::Char | Mode::Byte => { let mut chars = src.chars(); let result = unescape_char_or_byte(&mut chars, mode == Mode::Byte); - // The Chars iterator moved forward. callback(0..(src.len() - chars.as_str().len()), result); } Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(src, mode == Mode::ByteStr, callback), - // NOTE: Raw strings do not perform any explicit character escaping, here we - // only translate CRLF to LF and produce errors on bare CR. Mode::RawStr | Mode::RawByteStr => { unescape_raw_str_or_raw_byte_str(src, mode == Mode::RawByteStr, callback) } @@ -333,7 +330,7 @@ where /// Takes a contents of a string literal (without quotes) and produces a /// sequence of characters or errors. /// NOTE: Raw strings do not perform any explicit character escaping, here we -/// only translate CRLF to LF and produce errors on bare CR. +/// only produce errors on bare CR. fn unescape_raw_str_or_raw_byte_str(src: &str, is_byte: bool, callback: &mut F) where F: FnMut(Range, Result), From d963686f5a87b9eaa2ac2bdc29ddb796e0e83f1f Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 4 Nov 2022 09:19:34 +1100 Subject: [PATCH 12/26] Refactor `cook_lexer_literal`. It deals with eight cases: ints, floats, and the six quoted types (char/byte/strings). For ints and floats we have an early return, and the other six types fall through to the code at the end, which makes the function hard to read. This commit rearranges things to avoid the early returns. --- compiler/rustc_parse/src/lexer/mod.rs | 78 +++++++++++++-------------- 1 file changed, 36 insertions(+), 42 deletions(-) diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 462bce16ad717..61b5be4240414 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -363,55 +363,55 @@ impl<'a> StringReader<'a> { fn cook_lexer_literal( &self, start: BytePos, - suffix_start: BytePos, + end: BytePos, kind: rustc_lexer::LiteralKind, ) -> (token::LitKind, Symbol) { - // prefix means `"` or `br"` or `r###"`, ... - let (lit_kind, mode, prefix_len, postfix_len) = match kind { + match kind { rustc_lexer::LiteralKind::Char { terminated } => { if !terminated { self.sess.span_diagnostic.span_fatal_with_code( - self.mk_sp(start, suffix_start), + self.mk_sp(start, end), "unterminated character literal", error_code!(E0762), ) } - (token::Char, Mode::Char, 1, 1) // ' ' + self.cook_quoted(token::Char, Mode::Char, start, end, 1, 1) // ' ' } rustc_lexer::LiteralKind::Byte { terminated } => { if !terminated { self.sess.span_diagnostic.span_fatal_with_code( - self.mk_sp(start + BytePos(1), suffix_start), + self.mk_sp(start + BytePos(1), end), "unterminated byte constant", error_code!(E0763), ) } - (token::Byte, Mode::Byte, 2, 1) // b' ' + self.cook_quoted(token::Byte, Mode::Byte, start, end, 2, 1) // b' ' } rustc_lexer::LiteralKind::Str { terminated } => { if !terminated { self.sess.span_diagnostic.span_fatal_with_code( - self.mk_sp(start, suffix_start), + self.mk_sp(start, end), "unterminated double quote string", error_code!(E0765), ) } - (token::Str, Mode::Str, 1, 1) // " " + self.cook_quoted(token::Str, Mode::Str, start, end, 1, 1) // " " } rustc_lexer::LiteralKind::ByteStr { terminated } => { if !terminated { self.sess.span_diagnostic.span_fatal_with_code( - self.mk_sp(start + BytePos(1), suffix_start), + self.mk_sp(start + BytePos(1), end), "unterminated double quote byte string", error_code!(E0766), ) } - (token::ByteStr, Mode::ByteStr, 2, 1) // b" " + self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" " } rustc_lexer::LiteralKind::RawStr { n_hashes } => { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); - (token::StrRaw(n_hashes), Mode::RawStr, 2 + n, 1 + n) // r##" "## + let kind = token::StrRaw(n_hashes); + self.cook_quoted(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "## } else { self.report_raw_str_error(start, 1); } @@ -419,56 +419,47 @@ impl<'a> StringReader<'a> { rustc_lexer::LiteralKind::RawByteStr { n_hashes } => { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); - (token::ByteStrRaw(n_hashes), Mode::RawByteStr, 3 + n, 1 + n) // br##" "## + let kind = token::ByteStrRaw(n_hashes); + self.cook_quoted(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "## } else { self.report_raw_str_error(start, 2); } } rustc_lexer::LiteralKind::Int { base, empty_int } => { - return if empty_int { + if empty_int { self.sess .span_diagnostic .struct_span_err_with_code( - self.mk_sp(start, suffix_start), + self.mk_sp(start, end), "no valid digits found for number", error_code!(E0768), ) .emit(); (token::Integer, sym::integer(0)) } else { - self.validate_int_literal(base, start, suffix_start); - (token::Integer, self.symbol_from_to(start, suffix_start)) - }; + self.validate_int_literal(base, start, end); + (token::Integer, self.symbol_from_to(start, end)) + } } rustc_lexer::LiteralKind::Float { base, empty_exponent } => { if empty_exponent { self.err_span_(start, self.pos, "expected at least one digit in exponent"); } - match base { - Base::Hexadecimal => self.err_span_( - start, - suffix_start, - "hexadecimal float literal is not supported", - ), + Base::Hexadecimal => { + self.err_span_(start, end, "hexadecimal float literal is not supported") + } Base::Octal => { - self.err_span_(start, suffix_start, "octal float literal is not supported") + self.err_span_(start, end, "octal float literal is not supported") } Base::Binary => { - self.err_span_(start, suffix_start, "binary float literal is not supported") + self.err_span_(start, end, "binary float literal is not supported") } - _ => (), + _ => {} } - - let id = self.symbol_from_to(start, suffix_start); - return (token::Float, id); + (token::Float, self.symbol_from_to(start, end)) } - }; - let content_start = start + BytePos(prefix_len); - let content_end = suffix_start - BytePos(postfix_len); - let id = self.symbol_from_to(content_start, content_end); - self.validate_literal_escape(mode, content_start, content_end, prefix_len, postfix_len); - (lit_kind, id) + } } #[inline] @@ -659,20 +650,22 @@ impl<'a> StringReader<'a> { ) } - fn validate_literal_escape( + fn cook_quoted( &self, + kind: token::LitKind, mode: Mode, - content_start: BytePos, - content_end: BytePos, + start: BytePos, + end: BytePos, prefix_len: u32, postfix_len: u32, - ) { + ) -> (token::LitKind, Symbol) { + let content_start = start + BytePos(prefix_len); + let content_end = end - BytePos(postfix_len); let lit_content = self.str_from_to(content_start, content_end); unescape::unescape_literal(lit_content, mode, &mut |range, result| { // Here we only check for errors. The actual unescaping is done later. if let Err(err) = result { - let span_with_quotes = self - .mk_sp(content_start - BytePos(prefix_len), content_end + BytePos(postfix_len)); + let span_with_quotes = self.mk_sp(start, end); let (start, end) = (range.start as u32, range.end as u32); let lo = content_start + BytePos(start); let hi = lo + BytePos(end - start); @@ -688,6 +681,7 @@ impl<'a> StringReader<'a> { ); } }); + (kind, Symbol::intern(lit_content)) } fn validate_int_literal(&self, base: Base, content_start: BytePos, content_end: BytePos) { From a203482d2a20cba0c86298334ebd74438bd477ba Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 4 Nov 2022 10:02:29 +1100 Subject: [PATCH 13/26] Inline and remove `validate_int_literal`. It has a single callsite, and is fairly small. The `Float` match arm already has base-specific checking inline, so this makes things more consistent. --- compiler/rustc_lexer/src/lib.rs | 10 ++++----- compiler/rustc_parse/src/lexer/mod.rs | 31 +++++++++++---------------- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index 51515976e4ee9..0d29d7b1e3d9b 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -203,13 +203,13 @@ pub enum RawStrError { #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum Base { /// Literal starts with "0b". - Binary, + Binary = 2, /// Literal starts with "0o". - Octal, - /// Literal starts with "0x". - Hexadecimal, + Octal = 8, /// Literal doesn't contain a prefix. - Decimal, + Decimal = 10, + /// Literal starts with "0x". + Hexadecimal = 16, } /// `rustc` allows files to have a shebang, e.g. "#!/usr/bin/rustrun", diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 61b5be4240414..9de0c74f4b1d2 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -437,7 +437,19 @@ impl<'a> StringReader<'a> { .emit(); (token::Integer, sym::integer(0)) } else { - self.validate_int_literal(base, start, end); + if matches!(base, Base::Binary | Base::Octal) { + let base = base as u32; + let s = self.str_from_to(start + BytePos(2), end); + for (idx, c) in s.char_indices() { + if c != '_' && c.to_digit(base).is_none() { + self.err_span_( + start + BytePos::from_usize(2 + idx), + start + BytePos::from_usize(2 + idx + c.len_utf8()), + &format!("invalid digit for a base {} literal", base), + ); + } + } + } (token::Integer, self.symbol_from_to(start, end)) } } @@ -683,23 +695,6 @@ impl<'a> StringReader<'a> { }); (kind, Symbol::intern(lit_content)) } - - fn validate_int_literal(&self, base: Base, content_start: BytePos, content_end: BytePos) { - let base = match base { - Base::Binary => 2, - Base::Octal => 8, - _ => return, - }; - let s = self.str_from_to(content_start + BytePos(2), content_end); - for (idx, c) in s.char_indices() { - let idx = idx as u32; - if c != '_' && c.to_digit(base).is_none() { - let lo = content_start + BytePos(2 + idx); - let hi = content_start + BytePos(2 + idx + c.len_utf8() as u32); - self.err_span_(lo, hi, &format!("invalid digit for a base {} literal", base)); - } - } - } } pub fn nfc_normalize(string: &str) -> Symbol { From f8e2cef5faa7ff77e91ea2d0416006f0b7aa52bf Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Thu, 3 Nov 2022 22:01:58 -0700 Subject: [PATCH 14/26] Move intra-doc link checks to a separate function. --- src/tools/linkchecker/main.rs | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/tools/linkchecker/main.rs b/src/tools/linkchecker/main.rs index 7842611bd4ffa..4092722501d0e 100644 --- a/src/tools/linkchecker/main.rs +++ b/src/tools/linkchecker/main.rs @@ -365,6 +365,23 @@ impl Checker { } }); + self.check_intra_doc_links(file, &pretty_path, &source, report); + + // we don't need the source anymore, + // so drop to reduce memory-usage + match self.cache.get_mut(&pretty_path).unwrap() { + FileEntry::HtmlFile { source, .. } => *source = Rc::new(String::new()), + _ => unreachable!("must be html file"), + } + } + + fn check_intra_doc_links( + &mut self, + file: &Path, + pretty_path: &str, + source: &str, + report: &mut Report, + ) { // Search for intra-doc links that rustdoc didn't warn about // FIXME(#77199, 77200) Rustdoc should just warn about these directly. // NOTE: only looks at one line at a time; in practice this should find most links @@ -379,12 +396,6 @@ impl Checker { } } } - // we don't need the source anymore, - // so drop to reduce memory-usage - match self.cache.get_mut(&pretty_path).unwrap() { - FileEntry::HtmlFile { source, .. } => *source = Rc::new(String::new()), - _ => unreachable!("must be html file"), - } } /// Load a file from disk, or from the cache if available. From 57b229086e274c21e5544e6719271ae79a952194 Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Thu, 3 Nov 2022 22:02:39 -0700 Subject: [PATCH 15/26] Remove reference from the intra-doc link checker. --- src/tools/linkchecker/main.rs | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/src/tools/linkchecker/main.rs b/src/tools/linkchecker/main.rs index 4092722501d0e..4170c32f1fe25 100644 --- a/src/tools/linkchecker/main.rs +++ b/src/tools/linkchecker/main.rs @@ -55,30 +55,6 @@ const LINKCHECK_EXCEPTIONS: &[(&str, &[&str])] = &[ #[rustfmt::skip] const INTRA_DOC_LINK_EXCEPTIONS: &[(&str, &[&str])] = &[ - // This will never have links that are not in other pages. - // To avoid repeating the exceptions twice, an empty list means all broken links are allowed. - ("reference/print.html", &[]), - // All the reference 'links' are actually ENBF highlighted as code - ("reference/comments.html", &[ - "/ !", - "* !", - ]), - ("reference/identifiers.html", &[ - "a-z A-Z", - "a-z A-Z 0-9 _", - "a-z A-Z] [a-z A-Z 0-9 _", - ]), - ("reference/tokens.html", &[ - "0-1", - "0-7", - "0-9", - "0-9", - "0-9 a-f A-F", - ]), - ("reference/notation.html", &[ - "b B", - "a-z", - ]), // This is being used in the sense of 'inclusive range', not a markdown link ("core/ops/struct.RangeInclusive.html", &["begin, end"]), ("std/ops/struct.RangeInclusive.html", &["begin, end"]), @@ -382,6 +358,16 @@ impl Checker { source: &str, report: &mut Report, ) { + let relative = file.strip_prefix(&self.root).expect("should always be relative to root"); + // Don't check the reference. It has several legitimate things that + // look like []. The reference has its own broken link + // checker in its CI which handles this using pulldown_cmark. + // + // This checks both the end of the root (when checking just the + // reference directory) or the beginning (when checking all docs). + if self.root.ends_with("reference") || relative.starts_with("reference") { + return; + } // Search for intra-doc links that rustdoc didn't warn about // FIXME(#77199, 77200) Rustdoc should just warn about these directly. // NOTE: only looks at one line at a time; in practice this should find most links From a838952239493d9dafe2d5f2ca1204f326841ae9 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 4 Nov 2022 11:09:23 +1100 Subject: [PATCH 16/26] Remove `unescape_byte_literal`. It's easy to just use `unescape_literal` + `byte_from_char`. --- compiler/rustc_ast/src/util/literal.rs | 29 +++++++------------ compiler/rustc_lexer/src/unescape.rs | 16 ++-------- compiler/rustc_lexer/src/unescape/tests.rs | 12 ++++---- .../crates/syntax/src/validation.rs | 6 ++-- 4 files changed, 20 insertions(+), 43 deletions(-) diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index 536b385606c69..8f342175f7d37 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -2,12 +2,9 @@ use crate::ast::{self, Lit, LitKind}; use crate::token::{self, Token}; - -use rustc_lexer::unescape::{unescape_byte, unescape_char}; -use rustc_lexer::unescape::{unescape_byte_literal, unescape_literal, Mode}; +use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode}; use rustc_span::symbol::{kw, sym, Symbol}; use rustc_span::Span; - use std::ascii; pub enum LitError { @@ -109,13 +106,11 @@ impl LitKind { let s = symbol.as_str(); let mut buf = Vec::with_capacity(s.len()); let mut error = Ok(()); - unescape_byte_literal(&s, Mode::ByteStr, &mut |_, unescaped_byte| { - match unescaped_byte { - Ok(c) => buf.push(c), - Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + unescape_literal(&s, Mode::ByteStr, &mut |_, c| match c { + Ok(c) => buf.push(byte_from_char(c)), + Err(err) => { + if err.is_fatal() { + error = Err(LitError::LexerError); } } }); @@ -127,13 +122,11 @@ impl LitKind { let bytes = if s.contains('\r') { let mut buf = Vec::with_capacity(s.len()); let mut error = Ok(()); - unescape_byte_literal(&s, Mode::RawByteStr, &mut |_, unescaped_byte| { - match unescaped_byte { - Ok(c) => buf.push(c), - Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + unescape_literal(&s, Mode::RawByteStr, &mut |_, c| match c { + Ok(c) => buf.push(byte_from_char(c)), + Err(err) => { + if err.is_fatal() { + error = Err(LitError::LexerError); } } }); diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index db7bf02e71adc..8d5eac29452e7 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -93,19 +93,6 @@ where } } -/// Takes a contents of a byte, byte string or raw byte string (without quotes) -/// and produces a sequence of bytes or errors. -/// Values are returned through invoking of the provided callback. -pub fn unescape_byte_literal(src: &str, mode: Mode, callback: &mut F) -where - F: FnMut(Range, Result), -{ - debug_assert!(mode.is_byte()); - unescape_literal(src, mode, &mut |range, result| { - callback(range, result.map(byte_from_char)); - }) -} - /// Takes a contents of a char literal (without quotes), and returns an /// unescaped char or an error pub fn unescape_char(src: &str) -> Result { @@ -351,7 +338,8 @@ where } } -fn byte_from_char(c: char) -> u8 { +#[inline] +pub fn byte_from_char(c: char) -> u8 { let res = c as u32; debug_assert!(res <= u8::MAX as u32, "guaranteed because of Mode::ByteStr"); res as u8 diff --git a/compiler/rustc_lexer/src/unescape/tests.rs b/compiler/rustc_lexer/src/unescape/tests.rs index 008edef5a6385..00c8401efdfe4 100644 --- a/compiler/rustc_lexer/src/unescape/tests.rs +++ b/compiler/rustc_lexer/src/unescape/tests.rs @@ -246,10 +246,10 @@ fn test_unescape_byte_good() { fn test_unescape_byte_str_good() { fn check(literal_text: &str, expected: &[u8]) { let mut buf = Ok(Vec::with_capacity(literal_text.len())); - unescape_byte_literal(literal_text, Mode::ByteStr, &mut |range, c| { + unescape_literal(literal_text, Mode::ByteStr, &mut |range, c| { if let Ok(b) = &mut buf { match c { - Ok(c) => b.push(c), + Ok(c) => b.push(byte_from_char(c)), Err(e) => buf = Err((range, e)), } } @@ -280,15 +280,13 @@ fn test_unescape_raw_str() { #[test] fn test_unescape_raw_byte_str() { - fn check(literal: &str, expected: &[(Range, Result)]) { + fn check(literal: &str, expected: &[(Range, Result)]) { let mut unescaped = Vec::with_capacity(literal.len()); - unescape_byte_literal(literal, Mode::RawByteStr, &mut |range, res| { - unescaped.push((range, res)) - }); + unescape_literal(literal, Mode::RawByteStr, &mut |range, res| unescaped.push((range, res))); assert_eq!(unescaped, expected); } check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]); check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByte))]); - check("🦀a", &[(0..4, Err(EscapeError::NonAsciiCharInByte)), (4..5, Ok(byte_from_char('a')))]); + check("🦀a", &[(0..4, Err(EscapeError::NonAsciiCharInByte)), (4..5, Ok('a'))]); } diff --git a/src/tools/rust-analyzer/crates/syntax/src/validation.rs b/src/tools/rust-analyzer/crates/syntax/src/validation.rs index b9f2b5132353c..1eea2346451dd 100644 --- a/src/tools/rust-analyzer/crates/syntax/src/validation.rs +++ b/src/tools/rust-analyzer/crates/syntax/src/validation.rs @@ -5,9 +5,7 @@ mod block; use rowan::Direction; -use rustc_lexer::unescape::{ - self, unescape_byte, unescape_byte_literal, unescape_char, unescape_literal, Mode, -}; +use rustc_lexer::unescape::{self, unescape_byte, unescape_char, unescape_literal, Mode}; use crate::{ algo, @@ -143,7 +141,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec) { ast::LiteralKind::ByteString(s) => { if !s.is_raw() { if let Some(without_quotes) = unquote(text, 2, '"') { - unescape_byte_literal(without_quotes, Mode::ByteStr, &mut |range, char| { + unescape_literal(without_quotes, Mode::ByteStr, &mut |range, char| { if let Err(err) = char { push_err(2, (range.start, err)); } From 43d21b535f003c81a55331c31e16313a90050b18 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 4 Nov 2022 13:52:44 +1100 Subject: [PATCH 17/26] Rename some `result` variables as `res`, for consistency. --- compiler/rustc_lexer/src/unescape.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index 8d5eac29452e7..674bbff0878ce 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -83,8 +83,8 @@ where match mode { Mode::Char | Mode::Byte => { let mut chars = src.chars(); - let result = unescape_char_or_byte(&mut chars, mode == Mode::Byte); - callback(0..(src.len() - chars.as_str().len()), result); + let res = unescape_char_or_byte(&mut chars, mode == Mode::Byte); + callback(0..(src.len() - chars.as_str().len()), res); } Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(src, mode == Mode::ByteStr, callback), Mode::RawStr | Mode::RawByteStr => { @@ -263,7 +263,7 @@ where // them in the range computation. while let Some(c) = chars.next() { let start = src.len() - chars.as_str().len() - c.len_utf8(); - let result = match c { + let res = match c { '\\' => { match chars.clone().next() { Some('\n') => { @@ -284,7 +284,7 @@ where _ => ascii_check(c, is_byte), }; let end = src.len() - chars.as_str().len(); - callback(start..end, result); + callback(start..end, res); } fn skip_ascii_whitespace(chars: &mut Chars<'_>, start: usize, callback: &mut F) @@ -329,12 +329,12 @@ where // doesn't have to worry about skipping any chars. while let Some(c) = chars.next() { let start = src.len() - chars.as_str().len() - c.len_utf8(); - let result = match c { + let res = match c { '\r' => Err(EscapeError::BareCarriageReturnInRawString), _ => ascii_check(c, is_byte), }; let end = src.len() - chars.as_str().len(); - callback(start..end, result); + callback(start..end, res); } } From 13d4c61b5f732f17b66c7610588e113844fc5e08 Mon Sep 17 00:00:00 2001 From: Tanner Davies Date: Sat, 5 Nov 2022 15:07:10 -0600 Subject: [PATCH 18/26] Place config.toml in current working directory if config not found --- src/bootstrap/bin/main.rs | 2 +- src/bootstrap/config.rs | 4 ++-- src/bootstrap/setup.rs | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/bootstrap/bin/main.rs b/src/bootstrap/bin/main.rs index 9b4861ccd95f4..be69f819c6428 100644 --- a/src/bootstrap/bin/main.rs +++ b/src/bootstrap/bin/main.rs @@ -35,7 +35,7 @@ fn main() { // NOTE: Since `./configure` generates a `config.toml`, distro maintainers will see the // changelog warning, not the `x.py setup` message. - let suggest_setup = !config.config.exists() && !matches!(config.cmd, Subcommand::Setup { .. }); + let suggest_setup = config.config.is_none() && !matches!(config.cmd, Subcommand::Setup { .. }); if suggest_setup { println!("warning: you have not made a `config.toml`"); println!( diff --git a/src/bootstrap/config.rs b/src/bootstrap/config.rs index 21dc11c48081e..c086d248e5ce1 100644 --- a/src/bootstrap/config.rs +++ b/src/bootstrap/config.rs @@ -80,7 +80,7 @@ pub struct Config { pub keep_stage_std: Vec, pub src: PathBuf, /// defaults to `config.toml` - pub config: PathBuf, + pub config: Option, pub jobs: Option, pub cmd: Subcommand, pub incremental: bool, @@ -942,7 +942,7 @@ impl Config { } config.changelog_seen = toml.changelog_seen; - config.config = toml_path; + config.config = if toml_path.exists() { Some(toml_path) } else { None }; let build = toml.build.unwrap_or_default(); diff --git a/src/bootstrap/setup.rs b/src/bootstrap/setup.rs index eb7da1bda73cb..04480277fe047 100644 --- a/src/bootstrap/setup.rs +++ b/src/bootstrap/setup.rs @@ -82,7 +82,7 @@ impl fmt::Display for Profile { } pub fn setup(config: &Config, profile: Profile) { - let path = &config.config; + let path = &config.config.clone().unwrap_or(PathBuf::from("config.toml")); if path.exists() { eprintln!( From 4c3cad062014253452a15574868e49328486ec02 Mon Sep 17 00:00:00 2001 From: Kamil Koczurek Date: Mon, 7 Nov 2022 15:21:35 +0100 Subject: [PATCH 19/26] Add --print=split-debuginfo This option prints all supported values for -Csplit-debuginfo=.., i.e. only stable ones on stable/beta and all of them on nightly/dev. --- compiler/rustc_driver/src/lib.rs | 11 +++++++++++ compiler/rustc_session/src/config.rs | 2 ++ .../valid-print-requests/valid-print-requests.stderr | 2 +- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/compiler/rustc_driver/src/lib.rs b/compiler/rustc_driver/src/lib.rs index cf4bcc7c158fc..423f0cdddfd5c 100644 --- a/compiler/rustc_driver/src/lib.rs +++ b/compiler/rustc_driver/src/lib.rs @@ -736,6 +736,17 @@ fn print_crate_info( // Any output here interferes with Cargo's parsing of other printed output NativeStaticLibs => {} LinkArgs => {} + SplitDebuginfo => { + use rustc_target::spec::SplitDebuginfo::{Off, Packed, Unpacked}; + + for split in &[Off, Packed, Unpacked] { + let stable = sess.target.options.supported_split_debuginfo.contains(split); + let unstable_ok = sess.unstable_options(); + if stable || unstable_ok { + println!("{}", split); + } + } + } } } Compilation::Stop diff --git a/compiler/rustc_session/src/config.rs b/compiler/rustc_session/src/config.rs index aece29ca0cbf6..be084adb7b724 100644 --- a/compiler/rustc_session/src/config.rs +++ b/compiler/rustc_session/src/config.rs @@ -548,6 +548,7 @@ pub enum PrintRequest { NativeStaticLibs, StackProtectorStrategies, LinkArgs, + SplitDebuginfo, } pub enum Input { @@ -1806,6 +1807,7 @@ fn collect_print_requests( ("stack-protector-strategies", PrintRequest::StackProtectorStrategies), ("target-spec-json", PrintRequest::TargetSpec), ("link-args", PrintRequest::LinkArgs), + ("split-debuginfo", PrintRequest::SplitDebuginfo), ]; prints.extend(matches.opt_strs("print").into_iter().map(|req| { diff --git a/src/test/run-make/valid-print-requests/valid-print-requests.stderr b/src/test/run-make/valid-print-requests/valid-print-requests.stderr index 85782866d125a..5191e4676486a 100644 --- a/src/test/run-make/valid-print-requests/valid-print-requests.stderr +++ b/src/test/run-make/valid-print-requests/valid-print-requests.stderr @@ -1,2 +1,2 @@ -error: unknown print request `uwu`. Valid print requests are: `crate-name`, `file-names`, `sysroot`, `target-libdir`, `cfg`, `calling-conventions`, `target-list`, `target-cpus`, `target-features`, `relocation-models`, `code-models`, `tls-models`, `native-static-libs`, `stack-protector-strategies`, `target-spec-json`, `link-args` +error: unknown print request `uwu`. Valid print requests are: `crate-name`, `file-names`, `sysroot`, `target-libdir`, `cfg`, `calling-conventions`, `target-list`, `target-cpus`, `target-features`, `relocation-models`, `code-models`, `tls-models`, `native-static-libs`, `stack-protector-strategies`, `target-spec-json`, `link-args`, `split-debuginfo` From 0c9896bfaa6c7bfd5d34119b7aecffbcc036b201 Mon Sep 17 00:00:00 2001 From: onestacked Date: Mon, 7 Nov 2022 17:41:58 +0100 Subject: [PATCH 20/26] Fix `const_fn_trait_ref_impl`, add test for it --- library/core/src/ops/function.rs | 25 ++++++++++++--------- src/test/ui/consts/fn_trait_refs.rs | 35 +++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 10 deletions(-) create mode 100644 src/test/ui/consts/fn_trait_refs.rs diff --git a/library/core/src/ops/function.rs b/library/core/src/ops/function.rs index 8d4b0a7ccacdb..11b43b621c7b8 100644 --- a/library/core/src/ops/function.rs +++ b/library/core/src/ops/function.rs @@ -576,9 +576,10 @@ mod impls { use crate::marker::Tuple; #[stable(feature = "rust1", since = "1.0.0")] - impl Fn for &F + #[rustc_const_unstable(feature = "const_fn_trait_ref_impls", issue = "101803")] + impl const Fn for &F where - F: Fn, + F: ~const Fn, { extern "rust-call" fn call(&self, args: A) -> F::Output { (**self).call(args) @@ -586,9 +587,10 @@ mod impls { } #[stable(feature = "rust1", since = "1.0.0")] - impl FnMut for &F + #[rustc_const_unstable(feature = "const_fn_trait_ref_impls", issue = "101803")] + impl const FnMut for &F where - F: Fn, + F: ~const Fn, { extern "rust-call" fn call_mut(&mut self, args: A) -> F::Output { (**self).call(args) @@ -596,9 +598,10 @@ mod impls { } #[stable(feature = "rust1", since = "1.0.0")] - impl FnOnce for &F + #[rustc_const_unstable(feature = "const_fn_trait_ref_impls", issue = "101803")] + impl const FnOnce for &F where - F: Fn, + F: ~const Fn, { type Output = F::Output; @@ -608,9 +611,10 @@ mod impls { } #[stable(feature = "rust1", since = "1.0.0")] - impl FnMut for &mut F + #[rustc_const_unstable(feature = "const_fn_trait_ref_impls", issue = "101803")] + impl const FnMut for &mut F where - F: FnMut, + F: ~const FnMut, { extern "rust-call" fn call_mut(&mut self, args: A) -> F::Output { (*self).call_mut(args) @@ -618,9 +622,10 @@ mod impls { } #[stable(feature = "rust1", since = "1.0.0")] - impl FnOnce for &mut F + #[rustc_const_unstable(feature = "const_fn_trait_ref_impls", issue = "101803")] + impl const FnOnce for &mut F where - F: FnMut, + F: ~const FnMut, { type Output = F::Output; extern "rust-call" fn call_once(self, args: A) -> F::Output { diff --git a/src/test/ui/consts/fn_trait_refs.rs b/src/test/ui/consts/fn_trait_refs.rs new file mode 100644 index 0000000000000..57465c5592580 --- /dev/null +++ b/src/test/ui/consts/fn_trait_refs.rs @@ -0,0 +1,35 @@ +// run-pass +#![feature(const_fn_trait_ref_impls)] +#![feature(fn_traits)] +#![feature(unboxed_closures)] +#![feature(const_trait_impl)] +#![feature(const_mut_refs)] + +use std::marker::Destruct; + +const fn test(i: i32) -> i32 { + i + 1 +} + +const fn call i32 + ~const Destruct>(mut f: F) -> F::Output { + f(5) +} + +const fn use_fn i32 + ~const Destruct>(mut f: F) -> F::Output { + call(&mut f) +} + +const fn test_fn() {} + +const fn tester(_fn: T) +where + T: ~const Fn() + ~const Destruct, +{ +} + +const fn main() { + tester(test_fn); + let test_ref = &test_fn; + tester(test_ref); + assert!(use_fn(test) == 6); +} From d97fa2536f6fb583e2e7a49b90a33632da33443c Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Mon, 7 Nov 2022 17:46:46 +0100 Subject: [PATCH 21/26] Fix invalid background-image file name --- src/librustdoc/html/static/css/rustdoc.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/librustdoc/html/static/css/rustdoc.css b/src/librustdoc/html/static/css/rustdoc.css index a38c0e42ab455..f58d2c609426d 100644 --- a/src/librustdoc/html/static/css/rustdoc.css +++ b/src/librustdoc/html/static/css/rustdoc.css @@ -854,7 +854,7 @@ so that we can apply CSS-filters to change the arrow color in themes */ background-size: 20px; background-position: calc(100% - 2px) 56%; /* image is black color, themes should apply a "filter" property to change the color */ - background-image: url("down-arrow-2d685a4bae708e15.svg"); + background-image: url("down-arrow-927217e04c7463ac.svg"); } #crate-search > option { font-size: 1rem; From 87c190c4252ce2039400146bbdf6b7245a5499f0 Mon Sep 17 00:00:00 2001 From: onestacked Date: Mon, 7 Nov 2022 21:16:22 +0100 Subject: [PATCH 22/26] Reworked const fn ref tests --- src/test/ui/consts/fn_trait_refs.rs | 73 +++++++++++++++++++++++------ 1 file changed, 59 insertions(+), 14 deletions(-) diff --git a/src/test/ui/consts/fn_trait_refs.rs b/src/test/ui/consts/fn_trait_refs.rs index 57465c5592580..bc8766c74c60f 100644 --- a/src/test/ui/consts/fn_trait_refs.rs +++ b/src/test/ui/consts/fn_trait_refs.rs @@ -1,35 +1,80 @@ -// run-pass +// build-pass + #![feature(const_fn_trait_ref_impls)] #![feature(fn_traits)] #![feature(unboxed_closures)] #![feature(const_trait_impl)] #![feature(const_mut_refs)] +#![feature(const_cmp)] +#![feature(const_refs_to_cell)] use std::marker::Destruct; -const fn test(i: i32) -> i32 { - i + 1 +const fn tester_fn(f: T) -> T::Output +where + T: ~const Fn<()> + ~const Destruct, +{ + f() } -const fn call i32 + ~const Destruct>(mut f: F) -> F::Output { - f(5) +const fn tester_fn_mut(mut f: T) -> T::Output +where + T: ~const FnMut<()> + ~const Destruct, +{ + f() } -const fn use_fn i32 + ~const Destruct>(mut f: F) -> F::Output { - call(&mut f) +const fn tester_fn_once(f: T) -> T::Output +where + T: ~const FnOnce<()>, +{ + f() } -const fn test_fn() {} +const fn test_fn(mut f: T) -> (T::Output, T::Output, T::Output) +where + T: ~const Fn<()> + ~const Destruct, +{ + ( + // impl const Fn for &F + tester_fn(&f), + // impl const FnMut for &F + tester_fn_mut(&f), + // impl const FnOnce for &F + tester_fn_once(&f), + ) +} -const fn tester(_fn: T) +const fn test_fn_mut(mut f: T) -> (T::Output, T::Output) where - T: ~const Fn() + ~const Destruct, + T: ~const FnMut<()> + ~const Destruct, { + ( + // impl const FnMut for &mut F + tester_fn_mut(&mut f), + // impl const FnOnce for &mut F + tester_fn_once(&mut f), + ) +} +const fn test(i: i32) -> i32 { + i + 1 } const fn main() { - tester(test_fn); - let test_ref = &test_fn; - tester(test_ref); - assert!(use_fn(test) == 6); + const fn one() -> i32 { + 1 + }; + const fn two() -> i32 { + 2 + }; + + // FIXME(const_cmp_tuple) + let test_one = test_fn(one); + assert!(test_one.0 == 1); + assert!(test_one.1 == 1); + assert!(test_one.2 == 1); + + let test_two = test_fn_mut(two); + assert!(test_two.0 == 1); + assert!(test_two.1 == 1); } From 66e8a296403e80ea50b2217d40e7a31fbd2f2158 Mon Sep 17 00:00:00 2001 From: Tanner Davies Date: Mon, 7 Nov 2022 15:27:42 -0700 Subject: [PATCH 23/26] Only set config.config to None when using default path --- src/bootstrap/config.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bootstrap/config.rs b/src/bootstrap/config.rs index c086d248e5ce1..ba50ce9ec2426 100644 --- a/src/bootstrap/config.rs +++ b/src/bootstrap/config.rs @@ -926,8 +926,10 @@ impl Config { // Give a hard error if `--config` or `RUST_BOOTSTRAP_CONFIG` are set to a missing path, // but not if `config.toml` hasn't been created. let mut toml = if !using_default_path || toml_path.exists() { + config.config = Some(toml_path.clone()); get_toml(&toml_path) } else { + config.config = None; TomlConfig::default() }; @@ -942,7 +944,6 @@ impl Config { } config.changelog_seen = toml.changelog_seen; - config.config = if toml_path.exists() { Some(toml_path) } else { None }; let build = toml.build.unwrap_or_default(); From ae5cc9c56e45e9561aa2bf69b423473c0c5fd7c6 Mon Sep 17 00:00:00 2001 From: Rejyr Date: Mon, 7 Nov 2022 19:23:29 -0500 Subject: [PATCH 24/26] fix: lint against lint functions fix: lint against the functions `LintContext::{lookup_with_diagnostics,lookup,struct_span_lint,lint}`, `TyCtxt::struct_lint_node`, `LintLevelsBuilder::struct_lint`. --- compiler/rustc_lint/src/context.rs | 4 ++++ compiler/rustc_lint/src/levels.rs | 1 + compiler/rustc_lint/src/lib.rs | 1 + compiler/rustc_middle/src/ty/context.rs | 1 + 4 files changed, 7 insertions(+) diff --git a/compiler/rustc_lint/src/context.rs b/compiler/rustc_lint/src/context.rs index cec0003ffea78..28f6ac61c5bdd 100644 --- a/compiler/rustc_lint/src/context.rs +++ b/compiler/rustc_lint/src/context.rs @@ -579,6 +579,7 @@ pub trait LintContext: Sized { /// Return value of the `decorate` closure is ignored, see [`struct_lint_level`] for a detailed explanation. /// /// [`struct_lint_level`]: rustc_middle::lint::struct_lint_level#decorate-signature + #[rustc_lint_diagnostics] fn lookup_with_diagnostics( &self, lint: &'static Lint, @@ -882,6 +883,7 @@ pub trait LintContext: Sized { /// Return value of the `decorate` closure is ignored, see [`struct_lint_level`] for a detailed explanation. /// /// [`struct_lint_level`]: rustc_middle::lint::struct_lint_level#decorate-signature + #[rustc_lint_diagnostics] fn lookup>( &self, lint: &'static Lint, @@ -908,6 +910,7 @@ pub trait LintContext: Sized { /// Return value of the `decorate` closure is ignored, see [`struct_lint_level`] for a detailed explanation. /// /// [`struct_lint_level`]: rustc_middle::lint::struct_lint_level#decorate-signature + #[rustc_lint_diagnostics] fn struct_span_lint>( &self, lint: &'static Lint, @@ -933,6 +936,7 @@ pub trait LintContext: Sized { /// Return value of the `decorate` closure is ignored, see [`struct_lint_level`] for a detailed explanation. /// /// [`struct_lint_level`]: rustc_middle::lint::struct_lint_level#decorate-signature + #[rustc_lint_diagnostics] fn lint( &self, lint: &'static Lint, diff --git a/compiler/rustc_lint/src/levels.rs b/compiler/rustc_lint/src/levels.rs index db0a3419e6a5d..dfe52312ff00d 100644 --- a/compiler/rustc_lint/src/levels.rs +++ b/compiler/rustc_lint/src/levels.rs @@ -1073,6 +1073,7 @@ impl<'s, P: LintLevelsProvider> LintLevelsBuilder<'s, P> { /// Return value of the `decorate` closure is ignored, see [`struct_lint_level`] for a detailed explanation. /// /// [`struct_lint_level`]: rustc_middle::lint::struct_lint_level#decorate-signature + #[rustc_lint_diagnostics] pub(crate) fn struct_lint( &self, lint: &'static Lint, diff --git a/compiler/rustc_lint/src/lib.rs b/compiler/rustc_lint/src/lib.rs index 5288fc542d79a..ebb7de70e0570 100644 --- a/compiler/rustc_lint/src/lib.rs +++ b/compiler/rustc_lint/src/lib.rs @@ -36,6 +36,7 @@ #![feature(let_chains)] #![feature(min_specialization)] #![feature(never_type)] +#![feature(rustc_attrs)] #![recursion_limit = "256"] #[macro_use] diff --git a/compiler/rustc_middle/src/ty/context.rs b/compiler/rustc_middle/src/ty/context.rs index fc706b890d5fb..10a8a5c78b502 100644 --- a/compiler/rustc_middle/src/ty/context.rs +++ b/compiler/rustc_middle/src/ty/context.rs @@ -2856,6 +2856,7 @@ impl<'tcx> TyCtxt<'tcx> { /// Return value of the `decorate` closure is ignored, see [`struct_lint_level`] for a detailed explanation. /// /// [`struct_lint_level`]: rustc_middle::lint::struct_lint_level#decorate-signature + #[rustc_lint_diagnostics] pub fn struct_lint_node( self, lint: &'static Lint, From d6c97a32b4f5e38c0e85010df4438dc7205c44f4 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 8 Nov 2022 15:59:19 +1100 Subject: [PATCH 25/26] Simplify `unescape_{char,byte}`. The `usize` isn't needed in the error case. --- compiler/rustc_lexer/src/unescape.rs | 14 +++++--------- compiler/rustc_lexer/src/unescape/tests.rs | 12 ++++-------- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index 674bbff0878ce..e405013dcabf8 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -94,19 +94,15 @@ where } /// Takes a contents of a char literal (without quotes), and returns an -/// unescaped char or an error -pub fn unescape_char(src: &str) -> Result { - let mut chars = src.chars(); - unescape_char_or_byte(&mut chars, false).map_err(|err| (src.len() - chars.as_str().len(), err)) +/// unescaped char or an error. +pub fn unescape_char(src: &str) -> Result { + unescape_char_or_byte(&mut src.chars(), false) } /// Takes a contents of a byte literal (without quotes), and returns an /// unescaped byte or an error. -pub fn unescape_byte(src: &str) -> Result { - let mut chars = src.chars(); - unescape_char_or_byte(&mut chars, true) - .map(byte_from_char) - .map_err(|err| (src.len() - chars.as_str().len(), err)) +pub fn unescape_byte(src: &str) -> Result { + unescape_char_or_byte(&mut src.chars(), true).map(byte_from_char) } /// What kind of literal do we parse. diff --git a/compiler/rustc_lexer/src/unescape/tests.rs b/compiler/rustc_lexer/src/unescape/tests.rs index 00c8401efdfe4..c7ca8fd16ae47 100644 --- a/compiler/rustc_lexer/src/unescape/tests.rs +++ b/compiler/rustc_lexer/src/unescape/tests.rs @@ -3,8 +3,7 @@ use super::*; #[test] fn test_unescape_char_bad() { fn check(literal_text: &str, expected_error: EscapeError) { - let actual_result = unescape_char(literal_text).map_err(|(_offset, err)| err); - assert_eq!(actual_result, Err(expected_error)); + assert_eq!(unescape_char(literal_text), Err(expected_error)); } check("", EscapeError::ZeroChars); @@ -68,8 +67,7 @@ fn test_unescape_char_bad() { #[test] fn test_unescape_char_good() { fn check(literal_text: &str, expected_char: char) { - let actual_result = unescape_char(literal_text); - assert_eq!(actual_result, Ok(expected_char)); + assert_eq!(unescape_char(literal_text), Ok(expected_char)); } check("a", 'a'); @@ -149,8 +147,7 @@ fn test_unescape_str_good() { #[test] fn test_unescape_byte_bad() { fn check(literal_text: &str, expected_error: EscapeError) { - let actual_result = unescape_byte(literal_text).map_err(|(_offset, err)| err); - assert_eq!(actual_result, Err(expected_error)); + assert_eq!(unescape_byte(literal_text), Err(expected_error)); } check("", EscapeError::ZeroChars); @@ -219,8 +216,7 @@ fn test_unescape_byte_bad() { #[test] fn test_unescape_byte_good() { fn check(literal_text: &str, expected_byte: u8) { - let actual_result = unescape_byte(literal_text); - assert_eq!(actual_result, Ok(expected_byte)); + assert_eq!(unescape_byte(literal_text), Ok(expected_byte)); } check("a", b'a'); From c48ca7dc87c5efc5cf2797d3666790c057867ea4 Mon Sep 17 00:00:00 2001 From: Yiming Lei Date: Thu, 27 Oct 2022 22:38:59 -0700 Subject: [PATCH 26/26] remove redundent "<>" for ty::Slice with reference type this fix #103271 --- .../rustc_hir_typeck/src/method/suggest.rs | 6 +++ compiler/rustc_span/src/source_map.rs | 44 +++++++++++++++++++ src/test/ui/type/issue-103271.rs | 9 ++++ src/test/ui/type/issue-103271.stderr | 22 ++++++++++ 4 files changed, 81 insertions(+) create mode 100644 src/test/ui/type/issue-103271.rs create mode 100644 src/test/ui/type/issue-103271.stderr diff --git a/compiler/rustc_hir_typeck/src/method/suggest.rs b/compiler/rustc_hir_typeck/src/method/suggest.rs index 04ecd2757b427..f41f8731867a9 100644 --- a/compiler/rustc_hir_typeck/src/method/suggest.rs +++ b/compiler/rustc_hir_typeck/src/method/suggest.rs @@ -1804,6 +1804,12 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { | ty::Str | ty::Projection(_) | ty::Param(_) => format!("{deref_ty}"), + // we need to test something like <&[_]>::len + // and Vec::function(); + // <&[_]>::len doesn't need an extra "<>" between + // but for Adt type like Vec::function() + // we would suggest <[_]>::function(); + _ if self.tcx.sess.source_map().span_wrapped_by_angle_bracket(ty.span) => format!("{deref_ty}"), _ => format!("<{deref_ty}>"), }; err.span_suggestion_verbose( diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs index f9566eeee9465..029cd93a52a3b 100644 --- a/compiler/rustc_span/src/source_map.rs +++ b/compiler/rustc_span/src/source_map.rs @@ -753,6 +753,50 @@ impl SourceMap { } } + /// Given a 'Span', tries to tell if the next character is '>' + /// and the previous charactoer is '<' after skipping white space + /// return true if wrapped by '<>' + pub fn span_wrapped_by_angle_bracket(&self, span: Span) -> bool { + self.span_to_source(span, |src, start_index, end_index| { + if src.get(start_index..end_index).is_none() { + return Ok(false); + } + // test the right side to match '>' after skipping white space + let end_src = &src[end_index..]; + let mut i = 0; + while let Some(cc) = end_src.chars().nth(i) { + if cc == ' ' { + i = i + 1; + } else if cc == '>' { + // found > in the right; + break; + } else { + // failed to find '>' return false immediately + return Ok(false); + } + } + // test the left side to match '<' after skipping white space + i = start_index; + let start_src = &src[0..start_index]; + while let Some(cc) = start_src.chars().nth(i) { + if cc == ' ' { + if i == 0 { + return Ok(false); + } + i = i - 1; + } else if cc == '<' { + // found < in the left + break; + } else { + // failed to find '<' return false immediately + return Ok(false); + } + } + return Ok(true); + }) + .map_or(false, |is_accessible| is_accessible) + } + /// Given a `Span`, tries to get a shorter span ending just after the first occurrence of `char` /// `c`. pub fn span_through_char(&self, sp: Span, c: char) -> Span { diff --git a/src/test/ui/type/issue-103271.rs b/src/test/ui/type/issue-103271.rs new file mode 100644 index 0000000000000..41e99e2d82128 --- /dev/null +++ b/src/test/ui/type/issue-103271.rs @@ -0,0 +1,9 @@ +fn main() { + //~^ HELP the following trait is implemented but not in scope; perhaps add a `use` for it: + let length = <&[_]>::len; + //~^ ERROR the function or associated item `len` exists for reference `&[_]`, but its trait bounds were not satisfied [E0599] + //~| function or associated item cannot be called on `&[_]` due to unsatisfied trait bounds + //~| HELP items from traits can only be used if the trait is in scope + //~| HELP the function `len` is implemented on `[_]` + assert_eq!(length(&[1,3]), 2); +} diff --git a/src/test/ui/type/issue-103271.stderr b/src/test/ui/type/issue-103271.stderr new file mode 100644 index 0000000000000..3eec881a91b03 --- /dev/null +++ b/src/test/ui/type/issue-103271.stderr @@ -0,0 +1,22 @@ +error[E0599]: the function or associated item `len` exists for reference `&[_]`, but its trait bounds were not satisfied + --> $DIR/issue-103271.rs:3:26 + | +LL | let length = <&[_]>::len; + | ^^^ function or associated item cannot be called on `&[_]` due to unsatisfied trait bounds + | + = note: the following trait bounds were not satisfied: + `&[_]: ExactSizeIterator` + which is required by `&mut &[_]: ExactSizeIterator` + = help: items from traits can only be used if the trait is in scope +help: the following trait is implemented but not in scope; perhaps add a `use` for it: + | +LL | use object::read::read_ref::ReadRef; + | +help: the function `len` is implemented on `[_]` + | +LL | let length = <[_]>::len; + | ~~~ + +error: aborting due to previous error + +For more information about this error, try `rustc --explain E0599`.