From 19f04a7d6878fc4c258ba3d4374e81c8bbeca2e0 Mon Sep 17 00:00:00 2001 From: Chris Denton Date: Mon, 8 Apr 2024 11:42:16 +0000 Subject: [PATCH] Add comment on UTF-16 surrogates --- library/std/src/sys/pal/windows/api.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/library/std/src/sys/pal/windows/api.rs b/library/std/src/sys/pal/windows/api.rs index 8613dba42d2dc..555ad581b8568 100644 --- a/library/std/src/sys/pal/windows/api.rs +++ b/library/std/src/sys/pal/windows/api.rs @@ -68,6 +68,8 @@ pub const fn utf16_len(s: &str) -> usize { n => n as usize, }; i += utf8_len; + // Note that UTF-16 surrogates (U+D800 to U+DFFF) are not encodable as UTF-8, + // so (unlike with WTF-8) we don't have to worry about how they'll get re-encoded. len += if utf8_len < 4 { 1 } else { 2 }; } len