Skip to content

Commit

Permalink
Fix user and kernel times on Windows
Browse files Browse the repository at this point in the history
  • Loading branch information
clemenswasser committed Aug 16, 2022
1 parent 2e8ce30 commit 3242e0a
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 52 deletions.
5 changes: 3 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ version = "1.14.0"
edition = "2018"
build = "build.rs"

[features]
# Use the nightly feature windows_process_extensions_main_thread_handle
windows_process_extensions_main_thread_handle = []

[dependencies]
colored = "2.0"
indicatif = "0.16"
Expand All @@ -29,7 +33,8 @@ anyhow = "1.0"
libc = "0.2"

[target.'cfg(windows)'.dependencies]
winapi = { version = "0.3", features = ["processthreadsapi", "minwindef", "winnt"] }
winapi = { version = "0.3", features = ["processthreadsapi", "minwindef", "winnt", "jobapi2", "tlhelp32"] }
once_cell = "1.13"

[target.'cfg(target_os="linux")'.dependencies]
nix = { version = "0.24.2", features = ["zerocopy"] }
Expand Down
5 changes: 5 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
#![cfg_attr(
all(windows, feature = "windows_process_extensions_main_thread_handle"),
feature(windows_process_extensions_main_thread_handle)
)]

use std::env;

use benchmark::scheduler::Scheduler;
Expand Down
19 changes: 15 additions & 4 deletions src/timer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use std::process::{ChildStdout, Command, ExitStatus};

use anyhow::Result;

#[cfg(not(windows))]
#[derive(Debug, Copy, Clone)]
struct CPUTimes {
/// Total amount of time spent executing in user mode
Expand Down Expand Up @@ -74,15 +75,25 @@ fn discard(output: ChildStdout) {

/// Execute the given command and return a timing summary
pub fn execute_and_measure(mut command: Command) -> Result<TimerResult> {
let wallclock_timer = WallClockTimer::start();

#[cfg(not(windows))]
let cpu_timer = self::unix_timer::CPUTimer::start();

#[cfg(windows)]
{
use std::os::windows::process::CommandExt;

// Create a suspended process
command.creation_flags(4);
}

let wallclock_timer = WallClockTimer::start();
let mut child = command.spawn()?;

#[cfg(windows)]
let cpu_timer = self::windows_timer::CPUTimer::start_for_process(&child);
let cpu_timer = {
// SAFETY: We created a suspended process
unsafe { self::windows_timer::CPUTimer::start_suspended_process(&child) }
};

if let Some(output) = child.stdout.take() {
// Handle CommandOutputPolicy::Pipe
Expand All @@ -91,8 +102,8 @@ pub fn execute_and_measure(mut command: Command) -> Result<TimerResult> {

let status = child.wait()?;

let (time_user, time_system) = cpu_timer.stop();
let time_real = wallclock_timer.stop();
let (time_user, time_system) = cpu_timer.stop();

Ok(TimerResult {
time_real,
Expand Down
137 changes: 92 additions & 45 deletions src/timer/windows_timer.rs
Original file line number Diff line number Diff line change
@@ -1,72 +1,119 @@
#![cfg(windows)]
#![warn(unsafe_op_in_unsafe_fn)]

use std::mem;
use std::os::windows::io::{AsRawHandle, RawHandle};
use std::process::Child;
use std::{mem, os::windows::io::AsRawHandle, process, ptr};

use winapi::um::processthreadsapi::GetProcessTimes;
use winapi::um::winnt::HANDLE;
use clap::__macro_refs::once_cell::sync::Lazy;
use winapi::{
shared::{ntdef::NTSTATUS, ntstatus::STATUS_SUCCESS},
um::{
handleapi::CloseHandle,
jobapi2::{AssignProcessToJobObject, CreateJobObjectW, QueryInformationJobObject},
libloaderapi::{GetModuleHandleA, GetProcAddress},
winnt::{
JobObjectBasicAccountingInformation, HANDLE, JOBOBJECT_BASIC_ACCOUNTING_INFORMATION,
},
},
};

#[cfg(windows_process_extensions_main_thread_handle)]
use winapi::shared::minwindef::DWORD;

use crate::timer::CPUTimes;
use crate::util::units::Second;

const HUNDRED_NS_PER_MS: i64 = 10;

#[allow(non_upper_case_globals)]
static NtResumeProcess: Lazy<unsafe extern "system" fn(ProcessHandle: HANDLE) -> NTSTATUS> =
Lazy::new(|| {
// SAFETY: Getting the module handle for ntdll.dll is safe
let ntdll = unsafe { GetModuleHandleA(b"ntdll.dll\0".as_ptr().cast()) };
assert!(!ntdll.is_null(), "GetModuleHandleA failed");

// SAFETY: The ntdll handle is valid
let nt_resume_process =
unsafe { GetProcAddress(ntdll, b"NtResumeProcess\0".as_ptr().cast()) };
assert!(!nt_resume_process.is_null(), "GetProcAddress failed");

// SAFETY: We transmute to the correct function signature
unsafe { mem::transmute(nt_resume_process) }
});

pub struct CPUTimer {
handle: RawHandle,
job_object: HANDLE,
}

impl CPUTimer {
pub fn start_for_process(process: &Child) -> Self {
CPUTimer {
handle: process.as_raw_handle(),
pub unsafe fn start_suspended_process(child: &process::Child) -> Self {
// SAFETY: Creating a new job object is safe
let job_object = unsafe { CreateJobObjectW(ptr::null_mut(), ptr::null_mut()) };
assert!(!job_object.is_null(), "CreateJobObjectW failed");

// SAFETY: The job object handle is valid
let ret = unsafe { AssignProcessToJobObject(job_object, child.as_raw_handle()) };
assert!(ret != 0, "AssignProcessToJobObject failed");

#[cfg(windows_process_extensions_main_thread_handle)]
{
// SAFETY: The main thread handle is valid
let ret = unsafe { ResumeThread(child.main_thread_handle().as_raw_handle()) };
assert!(ret != -1 as DWORD, "NtResumeProcess failed");
}

#[cfg(not(windows_process_extensions_main_thread_handle))]
{
// Since we can't get the main thread handle on stable rust, we use
// the undocumented but widely known `NtResumeProcess` function to
// resume a process by it's handle.

// SAFETY: The process handle is valid
let ret = unsafe { NtResumeProcess(child.as_raw_handle()) };
assert!(ret == STATUS_SUCCESS, "NtResumeProcess failed");
}

Self { job_object }
}

pub fn stop(&self) -> (Second, Second) {
let times = get_cpu_times(self.handle);
(
times.user_usec as f64 * 1e-6,
times.system_usec as f64 * 1e-6,
)
}
}
let mut job_object_info =
mem::MaybeUninit::<JOBOBJECT_BASIC_ACCOUNTING_INFORMATION>::uninit();

/// Read CPU execution times
fn get_cpu_times(handle: RawHandle) -> CPUTimes {
let (user_usec, system_usec) = {
let mut _ctime = unsafe { mem::zeroed() };
let mut _etime = unsafe { mem::zeroed() };
let mut kernel_time = unsafe { mem::zeroed() };
let mut user_time = unsafe { mem::zeroed() };
// SAFETY: A valid job object got created in `start_suspended_process`
let res = unsafe {
GetProcessTimes(
handle as HANDLE,
&mut _ctime,
&mut _etime,
&mut kernel_time,
&mut user_time,
QueryInformationJobObject(
self.job_object,
JobObjectBasicAccountingInformation,
job_object_info.as_mut_ptr().cast(),
mem::size_of::<JOBOBJECT_BASIC_ACCOUNTING_INFORMATION>() as u32,
ptr::null_mut(),
)
};

// GetProcessTimes will exit with non-zero if success as per: https://msdn.microsoft.com/en-us/library/windows/desktop/ms683223(v=vs.85).aspx
if res != 0 {
// Extract times as laid out here: https://support.microsoft.com/en-us/help/188768/info-working-with-the-filetime-structure
// Both user_time and kernel_time are spans that the process spent in either.
let user: i64 = (((user_time.dwHighDateTime as i64) << 32)
+ user_time.dwLowDateTime as i64)
/ HUNDRED_NS_PER_MS;
let kernel: i64 = (((kernel_time.dwHighDateTime as i64) << 32)
+ kernel_time.dwLowDateTime as i64)
/ HUNDRED_NS_PER_MS;
(user, kernel)
// SAFETY: The job object info got correctly initialized
let job_object_info = unsafe { job_object_info.assume_init() };

// SAFETY: The `TotalUserTime` is "The total amount of user-mode execution time for
// all active processes associated with the job, as well as all terminated processes no
// longer associated with the job, in 100-nanosecond ticks." and is safe to extract
let user: i64 = unsafe { job_object_info.TotalUserTime.QuadPart() } / HUNDRED_NS_PER_MS;

// SAFETY: The `TotalKernelTime` is "The total amount of kernel-mode execution time
// for all active processes associated with the job, as well as all terminated
// processes no longer associated with the job, in 100-nanosecond ticks." and is safe
// to extract
let kernel: i64 =
unsafe { job_object_info.TotalKernelTime.QuadPart() } / HUNDRED_NS_PER_MS;
(user as f64 * 1e-6, kernel as f64 * 1e-6)
} else {
(0, 0)
(0.0, 0.0)
}
};
}
}

CPUTimes {
user_usec,
system_usec,
impl Drop for CPUTimer {
fn drop(self: &mut Self) {
// SAFETY: A valid job object got created in `start_suspended_process`
unsafe { CloseHandle(self.job_object) };
}
}

0 comments on commit 3242e0a

Please sign in to comment.