Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Windows: set main thread name without re-encoding #123534

Merged
merged 3 commits into from
Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions library/std/src/sys/pal/windows/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,102 @@ use core::ptr::addr_of;

use super::c;

/// Creates a null-terminated UTF-16 string from a str.
pub macro wide_str($str:literal) {{
const _: () = {
if core::slice::memchr::memchr(0, $str.as_bytes()).is_some() {
panic!("null terminated strings cannot contain interior nulls");
}
};
crate::sys::pal::windows::api::utf16!(concat!($str, '\0'))
}}

/// Creates a UTF-16 string from a str without null termination.
pub macro utf16($str:expr) {{
const UTF8: &str = $str;
const UTF16_LEN: usize = crate::sys::pal::windows::api::utf16_len(UTF8);
const UTF16: [u16; UTF16_LEN] = crate::sys::pal::windows::api::to_utf16(UTF8);
&UTF16
}}

#[cfg(test)]
mod tests;

/// Gets the UTF-16 length of a UTF-8 string, for use in the wide_str macro.
pub const fn utf16_len(s: &str) -> usize {
let s = s.as_bytes();
let mut i = 0;
let mut len = 0;
while i < s.len() {
// the length of a UTF-8 encoded code-point is given by the number of
// leading ones, except in the case of ASCII.
let utf8_len = match s[i].leading_ones() {
0 => 1,
n => n as usize,
};
i += utf8_len;
// Note that UTF-16 surrogates (U+D800 to U+DFFF) are not encodable as UTF-8,
// so (unlike with WTF-8) we don't have to worry about how they'll get re-encoded.
len += if utf8_len < 4 { 1 } else { 2 };
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved
}
len
}

/// Const convert UTF-8 to UTF-16, for use in the wide_str macro.
///
/// Note that this is designed for use in const contexts so is not optimized.
pub const fn to_utf16<const UTF16_LEN: usize>(s: &str) -> [u16; UTF16_LEN] {
jieyouxu marked this conversation as resolved.
Show resolved Hide resolved
let mut output = [0_u16; UTF16_LEN];
let mut pos = 0;
let s = s.as_bytes();
let mut i = 0;
while i < s.len() {
match s[i].leading_ones() {
// Decode UTF-8 based on its length.
// See https://en.wikipedia.org/wiki/UTF-8
0 => {
// ASCII is the same in both encodings
output[pos] = s[i] as u16;
i += 1;
pos += 1;
}
2 => {
// Bits: 110xxxxx 10xxxxxx
output[pos] = ((s[i] as u16 & 0b11111) << 6) | (s[i + 1] as u16 & 0b111111);
i += 2;
pos += 1;
}
3 => {
// Bits: 1110xxxx 10xxxxxx 10xxxxxx
output[pos] = ((s[i] as u16 & 0b1111) << 12)
| ((s[i + 1] as u16 & 0b111111) << 6)
| (s[i + 2] as u16 & 0b111111);
i += 3;
pos += 1;
}
4 => {
// Bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
let mut c = ((s[i] as u32 & 0b111) << 18)
| ((s[i + 1] as u32 & 0b111111) << 12)
| ((s[i + 2] as u32 & 0b111111) << 6)
| (s[i + 3] as u32 & 0b111111);
// re-encode as UTF-16 (see https://en.wikipedia.org/wiki/UTF-16)
// - Subtract 0x10000 from the code point
// - For the high surrogate, shift right by 10 then add 0xD800
// - For the low surrogate, take the low 10 bits then add 0xDC00
c -= 0x10000;
output[pos] = ((c >> 10) + 0xD800) as u16;
output[pos + 1] = ((c & 0b1111111111) + 0xDC00) as u16;
i += 4;
pos += 2;
}
// valid UTF-8 cannot have any other values
_ => unreachable!(),
}
}
output
}
Comment on lines +78 to +131
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice work. I feel like at least some of this should be using more public std API instead of a bunch of sorcerous isopsephia, but I looked for equivalents and couldn't find any in the stdlib, so this will do for now.


/// Helper method for getting the size of `T` as a u32.
/// Errors at compile time if the size would overflow.
///
Expand Down
16 changes: 16 additions & 0 deletions library/std/src/sys/pal/windows/api/tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
use crate::sys::pal::windows::api::{utf16, wide_str};

macro_rules! check_utf16 {
($str:literal) => {{
assert!(wide_str!($str).iter().copied().eq($str.encode_utf16().chain([0])));
assert!(utf16!($str).iter().copied().eq($str.encode_utf16()));
}};
}

#[test]
fn test_utf16_macros() {
check_utf16!("hello world");
check_utf16!("€4.50");
check_utf16!("𨉟呐㗂越");
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved
check_utf16!("Pchnąć w tę łódź jeża lub ośm skrzyń fig");
}
7 changes: 4 additions & 3 deletions library/std/src/sys/pal/windows/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@ use crate::io::ErrorKind;
use crate::mem::MaybeUninit;
use crate::os::windows::ffi::{OsStrExt, OsStringExt};
use crate::path::PathBuf;
use crate::sys::pal::windows::api::wide_str;
use crate::time::Duration;

pub use self::rand::hashmap_random_keys;

#[macro_use]
pub mod compat;

mod api;

pub mod alloc;
pub mod args;
pub mod c;
Expand Down Expand Up @@ -41,8 +44,6 @@ cfg_if::cfg_if! {
}
}

mod api;

/// Map a Result<T, WinError> to io::Result<T>.
trait IoResult<T> {
fn io_result(self) -> crate::io::Result<T>;
Expand All @@ -60,7 +61,7 @@ pub unsafe fn init(_argc: isize, _argv: *const *const u8, _sigpipe: u8) {

// Normally, `thread::spawn` will call `Thread::set_name` but since this thread already
// exists, we have to call it ourselves.
thread::Thread::set_name(&c"main");
thread::Thread::set_name_wide(wide_str!("main"));
}

// SAFETY: must be called only once during runtime cleanup.
Expand Down
10 changes: 7 additions & 3 deletions library/std/src/sys/pal/windows/thread.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,17 @@ impl Thread {
pub fn set_name(name: &CStr) {
if let Ok(utf8) = name.to_str() {
if let Ok(utf16) = to_u16s(utf8) {
unsafe {
c::SetThreadDescription(c::GetCurrentThread(), utf16.as_ptr());
};
Self::set_name_wide(&utf16)
};
};
}

pub fn set_name_wide(name: &[u16]) {
unsafe {
c::SetThreadDescription(c::GetCurrentThread(), name.as_ptr());
};
}

pub fn join(self) {
let rc = unsafe { c::WaitForSingleObject(self.handle.as_raw_handle(), c::INFINITE) };
if rc == c::WAIT_FAILED {
Expand Down
Loading