From f3682a1304200d554d2d36abf21376861b9ae14a Mon Sep 17 00:00:00 2001 From: "HTGAzureX1212." <39023054+HTGAzureX1212@users.noreply.github.com> Date: Tue, 23 Jan 2024 10:56:33 +0800 Subject: [PATCH 1/3] add list of characters to uncommon codepoints lint --- compiler/rustc_errors/src/diagnostic_impls.rs | 8 ++++++++ compiler/rustc_lint/messages.ftl | 2 +- compiler/rustc_lint/src/lints.rs | 4 +++- compiler/rustc_lint/src/non_ascii_idents.rs | 11 ++++++++++- tests/ui/lexer/lex-emoji-identifiers.stderr | 2 +- .../lint-uncommon-codepoints.stderr | 6 +++--- 6 files changed, 26 insertions(+), 7 deletions(-) diff --git a/compiler/rustc_errors/src/diagnostic_impls.rs b/compiler/rustc_errors/src/diagnostic_impls.rs index 39252dea28303..f6679ae9bb352 100644 --- a/compiler/rustc_errors/src/diagnostic_impls.rs +++ b/compiler/rustc_errors/src/diagnostic_impls.rs @@ -110,6 +110,14 @@ impl IntoDiagnosticArg for char { } } +impl IntoDiagnosticArg for Vec { + fn into_diagnostic_arg(self) -> DiagnosticArgValue<'static> { + DiagnosticArgValue::StrListSepByAnd( + self.into_iter().map(|c| Cow::Owned(format!("{c:?}"))).collect(), + ) + } +} + impl IntoDiagnosticArg for Symbol { fn into_diagnostic_arg(self) -> DiagnosticArgValue<'static> { self.to_ident_string().into_diagnostic_arg() diff --git a/compiler/rustc_lint/messages.ftl b/compiler/rustc_lint/messages.ftl index 33f96139f2011..ac456c69c57db 100644 --- a/compiler/rustc_lint/messages.ftl +++ b/compiler/rustc_lint/messages.ftl @@ -240,7 +240,7 @@ lint_hidden_unicode_codepoints = unicode codepoint changing visible direction of lint_identifier_non_ascii_char = identifier contains non-ASCII characters -lint_identifier_uncommon_codepoints = identifier contains uncommon Unicode codepoints +lint_identifier_uncommon_codepoints = identifier contains uncommon Unicode codepoints: {$codepoints} lint_ignored_unless_crate_specified = {$level}({$name}) is ignored unless specified at crate level diff --git a/compiler/rustc_lint/src/lints.rs b/compiler/rustc_lint/src/lints.rs index 94ecc7d95877b..7d63fad304447 100644 --- a/compiler/rustc_lint/src/lints.rs +++ b/compiler/rustc_lint/src/lints.rs @@ -1107,7 +1107,9 @@ pub struct IdentifierNonAsciiChar; #[derive(LintDiagnostic)] #[diag(lint_identifier_uncommon_codepoints)] -pub struct IdentifierUncommonCodepoints; +pub struct IdentifierUncommonCodepoints { + pub codepoints: Vec, +} #[derive(LintDiagnostic)] #[diag(lint_confusable_identifier_pair)] diff --git a/compiler/rustc_lint/src/non_ascii_idents.rs b/compiler/rustc_lint/src/non_ascii_idents.rs index 00f87a5af80fc..ec11f7a6130de 100644 --- a/compiler/rustc_lint/src/non_ascii_idents.rs +++ b/compiler/rustc_lint/src/non_ascii_idents.rs @@ -190,7 +190,16 @@ impl EarlyLintPass for NonAsciiIdents { if check_uncommon_codepoints && !symbol_str.chars().all(GeneralSecurityProfile::identifier_allowed) { - cx.emit_span_lint(UNCOMMON_CODEPOINTS, sp, IdentifierUncommonCodepoints); + cx.emit_span_lint( + UNCOMMON_CODEPOINTS, + sp, + IdentifierUncommonCodepoints { + codepoints: symbol_str + .chars() + .filter(|c| !GeneralSecurityProfile::identifier_allowed(*c)) + .collect(), + }, + ); } } diff --git a/tests/ui/lexer/lex-emoji-identifiers.stderr b/tests/ui/lexer/lex-emoji-identifiers.stderr index 747825fa2a988..568bde254fb07 100644 --- a/tests/ui/lexer/lex-emoji-identifiers.stderr +++ b/tests/ui/lexer/lex-emoji-identifiers.stderr @@ -40,7 +40,7 @@ error: identifiers cannot contain emoji: `folded🙏🏿` LL | let folded🙏🏿 = "modifier sequence"; | ^^^^^^^^^^ -warning: identifier contains uncommon Unicode codepoints +warning: identifier contains uncommon Unicode codepoints: '\u{fe0f}' --> $DIR/lex-emoji-identifiers.rs:6:9 | LL | let key1️⃣ = "keycap sequence"; diff --git a/tests/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.stderr b/tests/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.stderr index 0533da03068ae..4df13014f7c85 100644 --- a/tests/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.stderr +++ b/tests/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.stderr @@ -1,4 +1,4 @@ -error: identifier contains uncommon Unicode codepoints +error: identifier contains uncommon Unicode codepoints: 'µ' --> $DIR/lint-uncommon-codepoints.rs:3:7 | LL | const µ: f64 = 0.000001; @@ -10,13 +10,13 @@ note: the lint level is defined here LL | #![deny(uncommon_codepoints)] | ^^^^^^^^^^^^^^^^^^^ -error: identifier contains uncommon Unicode codepoints +error: identifier contains uncommon Unicode codepoints: 'ij' --> $DIR/lint-uncommon-codepoints.rs:6:4 | LL | fn dijkstra() {} | ^^^^^^^ -error: identifier contains uncommon Unicode codepoints +error: identifier contains uncommon Unicode codepoints: 'ㇻ', 'ㇲ', and 'ㇳ' --> $DIR/lint-uncommon-codepoints.rs:9:9 | LL | let ㇻㇲㇳ = "rust"; From 3a07333a8aa873fcb75d50541f7f209e2a04f80f Mon Sep 17 00:00:00 2001 From: "HTGAzureX1212." <39023054+HTGAzureX1212@users.noreply.github.com> Date: Tue, 23 Jan 2024 21:16:24 +0800 Subject: [PATCH 2/3] address requested changes --- compiler/rustc_lint/messages.ftl | 5 ++++- compiler/rustc_lint/src/lints.rs | 1 + compiler/rustc_lint/src/non_ascii_idents.rs | 13 +++++++------ tests/ui/lexer/lex-emoji-identifiers.rs | 2 +- tests/ui/lexer/lex-emoji-identifiers.stderr | 2 +- .../lint-uncommon-codepoints.rs | 4 ++-- .../lint-uncommon-codepoints.stderr | 4 ++-- 7 files changed, 18 insertions(+), 13 deletions(-) diff --git a/compiler/rustc_lint/messages.ftl b/compiler/rustc_lint/messages.ftl index ac456c69c57db..b4506990d4fca 100644 --- a/compiler/rustc_lint/messages.ftl +++ b/compiler/rustc_lint/messages.ftl @@ -240,7 +240,10 @@ lint_hidden_unicode_codepoints = unicode codepoint changing visible direction of lint_identifier_non_ascii_char = identifier contains non-ASCII characters -lint_identifier_uncommon_codepoints = identifier contains uncommon Unicode codepoints: {$codepoints} +lint_identifier_uncommon_codepoints = identifier contains {$codepoints_len -> + [one] an uncommon Unicode codepoint + *[other] uncommon Unicode codepoints +}: {$codepoints} lint_ignored_unless_crate_specified = {$level}({$name}) is ignored unless specified at crate level diff --git a/compiler/rustc_lint/src/lints.rs b/compiler/rustc_lint/src/lints.rs index 7d63fad304447..e19bb1cb62f5a 100644 --- a/compiler/rustc_lint/src/lints.rs +++ b/compiler/rustc_lint/src/lints.rs @@ -1109,6 +1109,7 @@ pub struct IdentifierNonAsciiChar; #[diag(lint_identifier_uncommon_codepoints)] pub struct IdentifierUncommonCodepoints { pub codepoints: Vec, + pub codepoints_len: usize, } #[derive(LintDiagnostic)] diff --git a/compiler/rustc_lint/src/non_ascii_idents.rs b/compiler/rustc_lint/src/non_ascii_idents.rs index ec11f7a6130de..f78b32ce5e77b 100644 --- a/compiler/rustc_lint/src/non_ascii_idents.rs +++ b/compiler/rustc_lint/src/non_ascii_idents.rs @@ -190,15 +190,16 @@ impl EarlyLintPass for NonAsciiIdents { if check_uncommon_codepoints && !symbol_str.chars().all(GeneralSecurityProfile::identifier_allowed) { + let codepoints: Vec<_> = symbol_str + .chars() + .filter(|c| !GeneralSecurityProfile::identifier_allowed(*c)) + .collect(); + let codepoints_len = codepoints.len(); + cx.emit_span_lint( UNCOMMON_CODEPOINTS, sp, - IdentifierUncommonCodepoints { - codepoints: symbol_str - .chars() - .filter(|c| !GeneralSecurityProfile::identifier_allowed(*c)) - .collect(), - }, + IdentifierUncommonCodepoints { codepoints, codepoints_len }, ); } } diff --git a/tests/ui/lexer/lex-emoji-identifiers.rs b/tests/ui/lexer/lex-emoji-identifiers.rs index decf2f0058721..bbc088521b7bd 100644 --- a/tests/ui/lexer/lex-emoji-identifiers.rs +++ b/tests/ui/lexer/lex-emoji-identifiers.rs @@ -4,7 +4,7 @@ fn invalid_emoji_usages() { let wireless🛜 = "basic emoji"; //~ ERROR: identifiers cannot contain emoji // FIXME let key1️⃣ = "keycap sequence"; //~ ERROR: unknown start of token - //~^ WARN: identifier contains uncommon Unicode codepoints + //~^ WARN: identifier contains an uncommon Unicode codepoint let flag🇺🇳 = "flag sequence"; //~ ERROR: identifiers cannot contain emoji let wales🏴 = "tag sequence"; //~ ERROR: identifiers cannot contain emoji let folded🙏🏿 = "modifier sequence"; //~ ERROR: identifiers cannot contain emoji diff --git a/tests/ui/lexer/lex-emoji-identifiers.stderr b/tests/ui/lexer/lex-emoji-identifiers.stderr index 568bde254fb07..679b7422bc150 100644 --- a/tests/ui/lexer/lex-emoji-identifiers.stderr +++ b/tests/ui/lexer/lex-emoji-identifiers.stderr @@ -40,7 +40,7 @@ error: identifiers cannot contain emoji: `folded🙏🏿` LL | let folded🙏🏿 = "modifier sequence"; | ^^^^^^^^^^ -warning: identifier contains uncommon Unicode codepoints: '\u{fe0f}' +warning: identifier contains an uncommon Unicode codepoint: '\u{fe0f}' --> $DIR/lex-emoji-identifiers.rs:6:9 | LL | let key1️⃣ = "keycap sequence"; diff --git a/tests/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.rs b/tests/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.rs index ed8e7ddddc597..c3459930a94c0 100644 --- a/tests/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.rs +++ b/tests/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.rs @@ -1,9 +1,9 @@ #![deny(uncommon_codepoints)] -const µ: f64 = 0.000001; //~ ERROR identifier contains uncommon Unicode codepoints +const µ: f64 = 0.000001; //~ ERROR identifier contains an uncommon Unicode codepoint //~| WARNING should have an upper case name -fn dijkstra() {} //~ ERROR identifier contains uncommon Unicode codepoints +fn dijkstra() {} //~ ERROR identifier contains an uncommon Unicode codepoint fn main() { let ㇻㇲㇳ = "rust"; //~ ERROR identifier contains uncommon Unicode codepoints diff --git a/tests/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.stderr b/tests/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.stderr index 4df13014f7c85..bae5ac654d354 100644 --- a/tests/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.stderr +++ b/tests/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.stderr @@ -1,4 +1,4 @@ -error: identifier contains uncommon Unicode codepoints: 'µ' +error: identifier contains an uncommon Unicode codepoint: 'µ' --> $DIR/lint-uncommon-codepoints.rs:3:7 | LL | const µ: f64 = 0.000001; @@ -10,7 +10,7 @@ note: the lint level is defined here LL | #![deny(uncommon_codepoints)] | ^^^^^^^^^^^^^^^^^^^ -error: identifier contains uncommon Unicode codepoints: 'ij' +error: identifier contains an uncommon Unicode codepoint: 'ij' --> $DIR/lint-uncommon-codepoints.rs:6:4 | LL | fn dijkstra() {} From da1d0c4a6913dded1deaba602f677674dcbbe21f Mon Sep 17 00:00:00 2001 From: "HTGAzureX1212." <39023054+HTGAzureX1212@users.noreply.github.com> Date: Tue, 23 Jan 2024 21:17:06 +0800 Subject: [PATCH 3/3] tidy --- compiler/rustc_lint/messages.ftl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/rustc_lint/messages.ftl b/compiler/rustc_lint/messages.ftl index b4506990d4fca..5652a34103b09 100644 --- a/compiler/rustc_lint/messages.ftl +++ b/compiler/rustc_lint/messages.ftl @@ -240,7 +240,7 @@ lint_hidden_unicode_codepoints = unicode codepoint changing visible direction of lint_identifier_non_ascii_char = identifier contains non-ASCII characters -lint_identifier_uncommon_codepoints = identifier contains {$codepoints_len -> +lint_identifier_uncommon_codepoints = identifier contains {$codepoints_len -> [one] an uncommon Unicode codepoint *[other] uncommon Unicode codepoints }: {$codepoints}