diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml index a79266c9..aaadff55 100644 --- a/.github/workflows/audit.yml +++ b/.github/workflows/audit.yml @@ -3,12 +3,12 @@ name: Security audit on: schedule: # Runs at 00:00 UTC everyday - - cron: '0 0 * * *' + - cron: "0 0 * * *" push: paths: - - '**/Cargo.toml' - - '**/Cargo.lock' - - '**/audit.toml' + - "**/Cargo.toml" + - "**/Cargo.lock" + - "**/audit.toml" jobs: audit: @@ -16,6 +16,7 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v2 - - uses: actions-rs/audit-check@v1 + - name: deny audit + uses: EmbarkStudios/cargo-deny-action@v1 with: - token: ${{ secrets.GITHUB_TOKEN }} + command: check advisories diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f6c734ba..95f7fb80 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,16 +21,17 @@ jobs: - { os: ubuntu-latest , target: x86_64-unknown-linux-gnu, use-cross: false } - { os: ubuntu-latest, target: x86_64-unknown-linux-musl, use-cross: true } - { os: ubuntu-latest, target: i686-unknown-linux-gnu, use-cross: true } - #- { os: ubuntu-latest, target: i686-unknown-linux-musl, use-cross: true } + #- { os: ubuntu-latest, target: i686-unknown-linux-musl, use-cross: true } - { os: ubuntu-latest, target: aarch64-unknown-linux-gnu, use-cross: true } - { os: ubuntu-latest, target: aarch64-unknown-linux-musl, use-cross: true } - { os: ubuntu-latest, target: aarch64-linux-android, use-cross: true } - { os: ubuntu-latest, target: arm-unknown-linux-gnueabi, use-cross: true } - - { os: ubuntu-latest, target: arm-unknown-linux-musleabi, use-cross: true } + - { os: ubuntu-latest, target: arm-unknown-linux-musleabi, use-cross: true } - { os: ubuntu-latest, target: arm-linux-androideabi, use-cross: true } - { os: ubuntu-latest, target: arm-unknown-linux-gnueabihf, use-cross: true } - - { os: windows-2022, target: x86_64-pc-windows-msvc, use-cross: false } - #- { os: macos-latest, target: x86_64-apple-darwin, use-cross: false } + - { os: windows-2022, target: x86_64-pc-windows-msvc, use-cross: false } + - { os: macos-latest, target: x86_64-apple-darwin, use-cross: false } + - { os: macos-latest, target: aarch64-apple-darwin, use-cross: false } steps: - name: Checkout repository uses: actions/checkout@v2 diff --git a/.gitignore b/.gitignore index 96ef6c0b..6beb0457 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target Cargo.lock +.test-symbols diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..084c32e0 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,18 @@ + + +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] +### Added +- Initial implementation of support for `x86_64-apple-darwin` and `aarch64-apple-darwin` + +## [0.1.0] - 2022-04-26 +### Added +- Initial release, including basic support for `x86_64-unknown-linux-gnu/musl` and `x86_64-pc-windows-msvc` + +[Unreleased]: https://github.com/rust-minidump/minidump-writer/compare/0.1.0...HEAD +[0.1.0]: https://github.com/rust-minidump/minidump-writer/releases/tag/0.1.0 diff --git a/Cargo.toml b/Cargo.toml index 1b8bd443..feaea30b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,9 +11,9 @@ license = "MIT" [dependencies] byteorder = "1.3.2" cfg-if = "1.0" -crash-context = "0.1" +crash-context = "0.2" memoffset = "0.6" -minidump-common = "0.10" +minidump-common = "0.11" scroll = "0.11" tempfile = "3.1.0" thiserror = "1.0.21" @@ -41,5 +41,26 @@ features = [ "Win32_System_Threading", ] +[target.'cfg(target_os = "macos")'.dependencies] +# Binds some additional mac specifics not in libc +mach2 = "0.4" + [dev-dependencies] -minidump = "0.10" +# Sigh, minidump-processor is async for some reason so we need an executor :( +futures = { version = "0.3", features = ["executor"] } +minidump = "0.11" +memmap2 = "0.5" + +[target.'cfg(target_os = "macos")'.dev-dependencies] +# We dump symbols for the `test` executable so that we can validate that minidumps +# created by this crate can be processed by minidump-processor +dump_syms = { version = "0.0.7", default-features = false } +minidump-processor = { version = "0.11", default-features = false, features = [ + "breakpad-syms", +] } +similar-asserts = "1.2" +uuid = "1.0" + +[patch.crates-io] +# PR https://github.com/mozilla/dump_syms/pull/356, merged, but unreleased +dump_syms = { git = "https://github.com/mozilla/dump_syms", rev = "c2743d5" } # branch = master diff --git a/deny.toml b/deny.toml new file mode 100644 index 00000000..b8e12925 --- /dev/null +++ b/deny.toml @@ -0,0 +1,9 @@ +[advisories] +ignore = [ + # chrono can segfault due to use of localtime_r, however this is only used + # via the `cab` crate, which is not using local time + "RUSTSEC-2020-0159", + # This is an old version of time that can segfault due to local time, but + # again, this functionality is not being used + "RUSTSEC-2020-0071", +] diff --git a/src/bin/test.rs b/src/bin/test.rs index 8a8f4095..6ef236bf 100644 --- a/src/bin/test.rs +++ b/src/bin/test.rs @@ -341,6 +341,64 @@ mod windows { } } +#[cfg(target_os = "macos")] +mod mac { + use super::*; + use std::time::Duration; + + #[inline(never)] + pub(super) fn real_main(args: Vec) -> Result<()> { + let port_name = args.get(0).ok_or("mach port name not specified")?; + let exception: u32 = args.get(1).ok_or("exception code not specified")?.parse()?; + + let client = + crash_context::ipc::Client::create(&std::ffi::CString::new(port_name.clone())?)?; + + std::thread::Builder::new() + .name("test-thread".to_owned()) + .spawn(move || { + #[inline(never)] + fn wait_until_killed(client: crash_context::ipc::Client, exception: u32) { + // SAFETY: syscalls + let cc = unsafe { + crash_context::CrashContext { + task: mach2::traps::mach_task_self(), + thread: mach2::mach_init::mach_thread_self(), + handler_thread: mach2::port::MACH_PORT_NULL, + exception: Some(crash_context::ExceptionInfo { + kind: exception as i32, + code: 0, + subcode: None, + }), + } + }; + + // Send the crash context to the server and wait for it to + // finish dumping, we should be killed shortly afterwards + client + .send_crash_context( + &cc, + Some(Duration::from_secs(2)), + Some(Duration::from_secs(5)), + ) + .expect("failed to send crash context/receive ack"); + + // Wait until we're killed + loop { + std::thread::park(); + } + } + + wait_until_killed(client, exception) + }) + .unwrap() + .join() + .unwrap(); + + Ok(()) + } +} + fn main() -> Result<()> { let args: Vec<_> = std::env::args().skip(1).collect(); @@ -349,6 +407,8 @@ fn main() -> Result<()> { linux::real_main(args) } else if #[cfg(target_os = "windows")] { windows::real_main(args) + } else if #[cfg(target_os = "macos")] { + mac::real_main(args) } else { unimplemented!(); } diff --git a/src/dir_section.rs b/src/dir_section.rs new file mode 100644 index 00000000..63b2e39c --- /dev/null +++ b/src/dir_section.rs @@ -0,0 +1,103 @@ +use crate::{ + mem_writer::{Buffer, MemoryArrayWriter, MemoryWriterError}, + minidump_format::MDRawDirectory, +}; +use std::io::{Error, Seek, SeekFrom, Write}; + +pub type DumpBuf = Buffer; + +#[derive(Debug, thiserror::Error)] +pub enum FileWriterError { + #[error("IO error")] + IOError(#[from] Error), + #[error("Failed to write to memory")] + MemoryWriterError(#[from] MemoryWriterError), +} + +/// Utility that wraps writing minidump directory entries to an I/O stream, generally +/// a [`std::fs::File`]. +#[derive(Debug)] +pub struct DirSection<'a, W> +where + W: Write + Seek, +{ + curr_idx: usize, + section: MemoryArrayWriter, + /// If we have to append to some file, we have to know where we currently are + destination_start_offset: u64, + destination: &'a mut W, + last_position_written_to_file: u64, +} + +impl<'a, W> DirSection<'a, W> +where + W: Write + Seek, +{ + pub fn new( + buffer: &mut DumpBuf, + index_length: u32, + destination: &'a mut W, + ) -> std::result::Result { + let dir_section = + MemoryArrayWriter::::alloc_array(buffer, index_length as usize)?; + + Ok(Self { + curr_idx: 0, + section: dir_section, + destination_start_offset: destination.seek(SeekFrom::Current(0))?, + destination, + last_position_written_to_file: 0, + }) + } + + #[inline] + pub fn position(&self) -> u32 { + self.section.position + } + + pub fn dump_dir_entry( + &mut self, + buffer: &mut DumpBuf, + dirent: MDRawDirectory, + ) -> std::result::Result<(), FileWriterError> { + self.section.set_value_at(buffer, dirent, self.curr_idx)?; + + // Now write it to file + + // First get all the positions + let curr_file_pos = self.destination.seek(SeekFrom::Current(0))?; + let idx_pos = self.section.location_of_index(self.curr_idx); + self.curr_idx += 1; + + self.destination.seek(std::io::SeekFrom::Start( + self.destination_start_offset + idx_pos.rva as u64, + ))?; + let start = idx_pos.rva as usize; + let end = (idx_pos.rva + idx_pos.data_size) as usize; + self.destination.write_all(&buffer[start..end])?; + + // Reset file-position + self.destination + .seek(std::io::SeekFrom::Start(curr_file_pos))?; + + Ok(()) + } + + /// Writes 2 things to file: + /// 1. The given dirent into the dir section in the header (if any is given) + /// 2. Everything in the in-memory buffer that was added since the last call to this function + pub fn write_to_file( + &mut self, + buffer: &mut DumpBuf, + dirent: Option, + ) -> std::result::Result<(), FileWriterError> { + if let Some(dirent) = dirent { + self.dump_dir_entry(buffer, dirent)?; + } + + let start_pos = self.last_position_written_to_file as usize; + self.destination.write_all(&buffer[start_pos..])?; + self.last_position_written_to_file = buffer.position(); + Ok(()) + } +} diff --git a/src/lib.rs b/src/lib.rs index ab803757..00ce6074 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,8 +7,21 @@ cfg_if::cfg_if! { mod windows; pub use windows::*; + } else if #[cfg(target_os = "macos")] { + mod mac; + + pub use mac::*; } } pub mod minidump_cpu; pub mod minidump_format; + +// Non-windows platforms need additional code since they are essentially +// replicating functionality we get for free on Windows +cfg_if::cfg_if! { + if #[cfg(not(target_os = "windows"))] { + pub(crate) mod mem_writer; + pub(crate) mod dir_section; + } +} diff --git a/src/linux/dso_debug.rs b/src/linux/dso_debug.rs index eb3cc396..7aa60adb 100644 --- a/src/linux/dso_debug.rs +++ b/src/linux/dso_debug.rs @@ -1,10 +1,6 @@ use crate::{ - linux::{ - auxv_reader::AuxvType, - errors::SectionDsoDebugError, - ptrace_dumper::PtraceDumper, - sections::{write_string_to_location, Buffer, MemoryArrayWriter, MemoryWriter}, - }, + linux::{auxv_reader::AuxvType, errors::SectionDsoDebugError, ptrace_dumper::PtraceDumper}, + mem_writer::{write_string_to_location, Buffer, MemoryArrayWriter, MemoryWriter}, minidump_format::*, }; use std::collections::HashMap; diff --git a/src/linux/errors.rs b/src/linux/errors.rs index ca9d83f4..e423389f 100644 --- a/src/linux/errors.rs +++ b/src/linux/errors.rs @@ -1,4 +1,6 @@ +use crate::dir_section::FileWriterError; use crate::maps_reader::MappingInfo; +use crate::mem_writer::MemoryWriterError; use crate::thread_info::Pid; use goblin; use thiserror::Error; @@ -120,16 +122,6 @@ pub enum DumperError { MapsReaderError(#[from] MapsReaderError), } -#[derive(Debug, Error)] -pub enum MemoryWriterError { - #[error("IO error when writing to DumpBuf")] - IOError(#[from] std::io::Error), - #[error("Failed integer conversion")] - TryFromIntError(#[from] std::num::TryFromIntError), - #[error("Failed to write to buffer")] - Scroll(#[from] scroll::Error), -} - #[derive(Debug, Error)] pub enum SectionAppMemoryError { #[error("Failed to copy memory from process")] @@ -202,14 +194,6 @@ pub enum SectionDsoDebugError { FromUTF8Error(#[from] std::string::FromUtf8Error), } -#[derive(Debug, Error)] -pub enum FileWriterError { - #[error("IO error")] - IOError(#[from] std::io::Error), - #[error("Failed to write to memory")] - MemoryWriterError(#[from] MemoryWriterError), -} - #[derive(Debug, Error)] pub enum WriterError { #[error("Error during init phase")] diff --git a/src/linux/minidump_writer.rs b/src/linux/minidump_writer.rs index 069308a5..245c6fea 100644 --- a/src/linux/minidump_writer.rs +++ b/src/linux/minidump_writer.rs @@ -1,103 +1,19 @@ use crate::{ + dir_section::{DirSection, DumpBuf}, linux::{ app_memory::AppMemoryList, crash_context::CrashContext, dso_debug, - errors::{FileWriterError, InitError, MemoryWriterError, WriterError}, + errors::{InitError, WriterError}, maps_reader::{MappingInfo, MappingList}, ptrace_dumper::PtraceDumper, sections::*, thread_info::Pid, }, + mem_writer::{Buffer, MemoryArrayWriter, MemoryWriter, MemoryWriterError}, minidump_format::*, }; -use std::io::{Seek, SeekFrom, Write}; - -pub type DumpBuf = Buffer; - -#[derive(Debug)] -pub struct DirSection<'a, W> -where - W: Write + Seek, -{ - curr_idx: usize, - section: MemoryArrayWriter, - /// If we have to append to some file, we have to know where we currently are - destination_start_offset: u64, - destination: &'a mut W, - last_position_written_to_file: u64, -} - -impl<'a, W> DirSection<'a, W> -where - W: Write + Seek, -{ - fn new( - buffer: &mut DumpBuf, - index_length: u32, - destination: &'a mut W, - ) -> std::result::Result { - let dir_section = - MemoryArrayWriter::::alloc_array(buffer, index_length as usize)?; - Ok(DirSection { - curr_idx: 0, - section: dir_section, - destination_start_offset: destination.seek(SeekFrom::Current(0))?, - destination, - last_position_written_to_file: 0, - }) - } - - fn position(&self) -> u32 { - self.section.position - } - - fn dump_dir_entry( - &mut self, - buffer: &mut DumpBuf, - dirent: MDRawDirectory, - ) -> std::result::Result<(), FileWriterError> { - self.section.set_value_at(buffer, dirent, self.curr_idx)?; - - // Now write it to file - - // First get all the positions - let curr_file_pos = self.destination.seek(SeekFrom::Current(0))?; - let idx_pos = self.section.location_of_index(self.curr_idx); - self.curr_idx += 1; - - self.destination.seek(std::io::SeekFrom::Start( - self.destination_start_offset + idx_pos.rva as u64, - ))?; - let start = idx_pos.rva as usize; - let end = (idx_pos.rva + idx_pos.data_size) as usize; - self.destination.write_all(&buffer[start..end])?; - - // Reset file-position - self.destination - .seek(std::io::SeekFrom::Start(curr_file_pos))?; - - Ok(()) - } - - /// Writes 2 things to file: - /// 1. The given dirent into the dir section in the header (if any is given) - /// 2. Everything in the in-memory buffer that was added since the last call to this function - fn write_to_file( - &mut self, - buffer: &mut DumpBuf, - dirent: Option, - ) -> std::result::Result<(), FileWriterError> { - if let Some(dirent) = dirent { - self.dump_dir_entry(buffer, dirent)?; - } - - let start_pos = self.last_position_written_to_file as usize; - self.destination.write_all(&buffer[start_pos..])?; - self.last_position_written_to_file = buffer.position(); - Ok(()) - } -} +use std::io::{Seek, Write}; pub enum CrashingThreadContext { None, diff --git a/src/linux/sections.rs b/src/linux/sections.rs index b7850ac0..c7c4172c 100644 --- a/src/linux/sections.rs +++ b/src/linux/sections.rs @@ -7,270 +7,12 @@ pub mod thread_list_stream; pub mod thread_names_stream; use crate::{ - errors::{self, MemoryWriterError}, + dir_section::DumpBuf, + errors::{self}, linux::{ - minidump_writer::{self, DumpBuf, MinidumpWriter}, + minidump_writer::{self, MinidumpWriter}, ptrace_dumper::PtraceDumper, }, + mem_writer::*, minidump_format::*, }; -use scroll::ctx::{SizeWith, TryIntoCtx}; - -type WriteResult = std::result::Result; - -macro_rules! size { - ($t:ty) => { - <$t>::size_with(&scroll::Endian::Little) - }; -} - -pub struct Buffer { - inner: Vec, -} - -impl Buffer { - pub fn with_capacity(cap: usize) -> Self { - Self { - inner: Vec::with_capacity(cap), - } - } - - #[inline] - pub fn position(&self) -> u64 { - self.inner.len() as u64 - } - - #[inline] - #[must_use] - fn reserve(&mut self, len: usize) -> usize { - let mark = self.inner.len(); - self.inner.resize(self.inner.len() + len, 0); - mark - } - - #[inline] - fn write(&mut self, val: N) -> Result - where - N: TryIntoCtx + SizeWith, - E: From, - { - self.write_at(self.inner.len(), val) - } - - fn write_at(&mut self, offset: usize, val: N) -> Result - where - N: TryIntoCtx + SizeWith, - E: From, - { - let to_write = size!(N); - let remainder = self.inner.len() - offset; - if remainder < to_write { - self.inner - .resize(self.inner.len() + to_write - remainder, 0); - } - - let dst = &mut self.inner[offset..offset + to_write]; - val.try_into_ctx(dst, scroll::Endian::Little) - } - - #[inline] - pub fn write_all(&mut self, buffer: &[u8]) { - self.inner.extend_from_slice(buffer); - } -} - -impl From for Vec { - fn from(b: Buffer) -> Self { - b.inner - } -} - -impl std::ops::Deref for Buffer { - type Target = [u8]; - - fn deref(&self) -> &Self::Target { - &self.inner - } -} - -#[derive(Debug, PartialEq)] -pub struct MemoryWriter { - pub position: MDRVA, - pub size: usize, - phantom: std::marker::PhantomData, -} - -impl MemoryWriter -where - T: TryIntoCtx + SizeWith, -{ - /// Create a slot for a type T in the buffer, we can fill right now with real values. - pub fn alloc_with_val(buffer: &mut Buffer, val: T) -> WriteResult { - // Mark the position as we may overwrite later - let position = buffer.position(); - let size = buffer.write(val)?; - - Ok(Self { - position: position as u32, - size, - phantom: std::marker::PhantomData, - }) - } - - /// Create a slot for a type T in the buffer, we can fill later with real values. - pub fn alloc(buffer: &mut Buffer) -> WriteResult { - let size = size!(T); - let position = buffer.reserve(size) as u32; - - Ok(Self { - position: position as u32, - size, - phantom: std::marker::PhantomData, - }) - } - - /// Write actual values in the buffer-slot we got during `alloc()` - #[inline] - pub fn set_value(&mut self, buffer: &mut Buffer, val: T) -> WriteResult<()> { - Ok(buffer.write_at(self.position as usize, val).map(|_sz| ())?) - } - - #[inline] - pub fn location(&self) -> MDLocationDescriptor { - MDLocationDescriptor { - data_size: size!(T) as u32, - rva: self.position, - } - } -} - -#[derive(Debug, PartialEq)] -pub struct MemoryArrayWriter { - pub position: MDRVA, - array_size: usize, - phantom: std::marker::PhantomData, -} - -impl MemoryArrayWriter { - #[inline] - pub fn write_bytes(buffer: &mut Buffer, slice: &[u8]) -> Self { - let position = buffer.position(); - buffer.write_all(slice); - - Self { - position: position as u32, - array_size: slice.len(), - phantom: std::marker::PhantomData, - } - } -} - -impl MemoryArrayWriter -where - T: TryIntoCtx + SizeWith + Copy, -{ - pub fn alloc_from_array(buffer: &mut Buffer, array: &[T]) -> WriteResult { - let array_size = array.len(); - let position = buffer.reserve(array_size * size!(T)); - - for (idx, val) in array.iter().enumerate() { - buffer.write_at(position + idx * size!(T), *val)?; - } - - Ok(Self { - position: position as u32, - array_size, - phantom: std::marker::PhantomData, - }) - } -} - -impl MemoryArrayWriter -where - T: TryIntoCtx + SizeWith, -{ - /// Create a slot for a type T in the buffer, we can fill in the values in one go. - pub fn alloc_from_iter( - buffer: &mut Buffer, - iter: impl IntoIterator, - ) -> WriteResult - where - I: std::iter::ExactSizeIterator, - { - let iter = iter.into_iter(); - let array_size = iter.len(); - let size = size!(T); - let position = buffer.reserve(array_size * size); - - for (idx, val) in iter.enumerate() { - buffer.write_at(position + idx * size, val)?; - } - - Ok(Self { - position: position as u32, - array_size, - phantom: std::marker::PhantomData, - }) - } - - /// Create a slot for a type T in the buffer, we can fill later with real values. - /// This function fills it with `Default::default()`, which is less performant than - /// using uninitialized memory, but safe. - pub fn alloc_array(buffer: &mut Buffer, array_size: usize) -> WriteResult { - let position = buffer.reserve(array_size * size!(T)); - - Ok(Self { - position: position as u32, - array_size, - phantom: std::marker::PhantomData, - }) - } - - /// Write actual values in the buffer-slot we got during `alloc()` - #[inline] - pub fn set_value_at(&mut self, buffer: &mut Buffer, val: T, index: usize) -> WriteResult<()> { - Ok(buffer - .write_at(self.position as usize + size!(T) * index, val) - .map(|_sz| ())?) - } - - #[inline] - pub fn location(&self) -> MDLocationDescriptor { - MDLocationDescriptor { - data_size: (self.array_size * size!(T)) as u32, - rva: self.position, - } - } - - #[inline] - pub fn location_of_index(&self, idx: usize) -> MDLocationDescriptor { - MDLocationDescriptor { - data_size: size!(T) as u32, - rva: self.position + (size!(T) * idx) as u32, - } - } -} - -pub fn write_string_to_location( - buffer: &mut Buffer, - text: &str, -) -> WriteResult { - let letters: Vec = text.encode_utf16().collect(); - - // First write size of the string (x letters in u16, times the size of u16) - let text_header = MemoryWriter::::alloc_with_val( - buffer, - (letters.len() * std::mem::size_of::()).try_into()?, - )?; - - // Then write utf-16 letters after that - let mut text_section = MemoryArrayWriter::::alloc_array(buffer, letters.len())?; - for (index, letter) in letters.iter().enumerate() { - text_section.set_value_at(buffer, *letter, index)?; - } - - let mut location = text_header.location(); - location.data_size += text_section.location().data_size; - - Ok(location) -} diff --git a/src/mac.rs b/src/mac.rs new file mode 100644 index 00000000..4d405999 --- /dev/null +++ b/src/mac.rs @@ -0,0 +1,10 @@ +#![allow(unsafe_code)] + +#[cfg(target_pointer_width = "32")] +compile_error!("Various MacOS FFI bindings assume we are on a 64-bit architechture"); + +pub mod errors; +pub mod mach; +pub mod minidump_writer; +mod streams; +pub mod task_dumper; diff --git a/src/mac/errors.rs b/src/mac/errors.rs new file mode 100644 index 00000000..ea85162b --- /dev/null +++ b/src/mac/errors.rs @@ -0,0 +1,11 @@ +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum WriterError { + #[error(transparent)] + TaskDumpError(#[from] crate::mac::task_dumper::TaskDumpError), + #[error("Failed to write to memory")] + MemoryWriterError(#[from] crate::mem_writer::MemoryWriterError), + #[error("Failed to write to file")] + FileWriterError(#[from] crate::dir_section::FileWriterError), +} diff --git a/src/mac/mach.rs b/src/mac/mach.rs new file mode 100644 index 00000000..9212c0ec --- /dev/null +++ b/src/mac/mach.rs @@ -0,0 +1,571 @@ +//! Contains various helpers to improve and expand on the bindings provided +//! by `mach2` + +// Just exports all of the mach functions we use into a flat list +pub use mach2::{ + kern_return::{kern_return_t, KERN_SUCCESS}, + port::mach_port_name_t, + task::{self, task_threads}, + task_info, + thread_act::thread_get_state, + traps::mach_task_self, + vm::{mach_vm_deallocate, mach_vm_read, mach_vm_region_recurse}, + vm_region::vm_region_submap_info_64, +}; + +/// A Mach kernel error. +/// +/// See . +#[derive(thiserror::Error, Debug)] +pub enum KernelError { + #[error("specified address is not currently valid")] + InvalidAddress = 1, + #[error("specified memory is valid, but does not permit the required forms of access")] + ProtectionFailure = 2, + #[error("the address range specified is already in use, or no address range of the size specified could be found")] + NoSpace = 3, + #[error("the function requested was not applicable to this type of argument, or an argument is invalid")] + InvalidArgument = 4, + #[error("the function could not be performed")] + Failure = 5, + #[error("system resource could not be allocated to fulfill this request")] + ResourceShortage = 6, + #[error("the task in question does not hold receive rights for the port argument")] + NotReceiver = 7, + #[error("bogus access restriction")] + NoAccess = 8, + #[error( + "during a page fault, the target address refers to a memory object that has been destroyed" + )] + MemoryFailure = 9, + #[error( + "during a page fault, the memory object indicated that the data could not be returned" + )] + MemoryError = 10, + #[error("the receive right is already a member of the portset")] + AlreadyInSet = 11, + #[error("the receive right is not a member of a port set")] + NotInSet = 12, + #[error("the name already denotes a right in the task")] + NameExists = 13, + #[error("the operation was aborted")] + Aborted = 14, + #[error("the name doesn't denote a right in the task")] + InvalidName = 15, + #[error("target task isn't an active task")] + InvalidTask = 16, + #[error("the name denotes a right, but not an appropriate right")] + InvalidRight = 17, + #[error("a blatant range error")] + InvalidValue = 18, + #[error("operation would overflow limit on user-references")] + UserRefsOverflow = 19, + #[error("the supplied port capability is improper")] + InvalidCapability = 20, + #[error("the task already has send or receive rights for the port under another name")] + RightExists = 21, + #[error("target host isn't actually a host")] + InvalidHost = 22, + #[error("an attempt was made to supply 'precious' data for memory that is already present in a memory object")] + MemoryPresent = 23, + // These 2 are errors which should only ever be seen by the kernel itself + //MemoryDataMoved = 24, + //MemoryRestartCopy = 25, + #[error("an argument applied to assert processor set privilege was not a processor set control port")] + InvalidProcessorSet = 26, + #[error("the specified scheduling attributes exceed the thread's limits")] + PolicyLimit = 27, + #[error("the specified scheduling policy is not currently enabled for the processor set")] + InvalidPolicy = 28, + #[error("the external memory manager failed to initialize the memory object")] + InvalidObject = 29, + #[error( + "a thread is attempting to wait for an event for which there is already a waiting thread" + )] + AlreadyWaiting = 30, + #[error("an attempt was made to destroy the default processor set")] + DefaultSet = 31, + #[error("an attempt was made to fetch an exception port that is protected, or to abort a thread while processing a protected exception")] + ExceptionProtected = 32, + #[error("a ledger was required but not supplied")] + InvalidLedger = 33, + #[error("the port was not a memory cache control port")] + InvalidMemoryControl = 34, + #[error("an argument supplied to assert security privilege was not a host security port")] + InvalidSecurity = 35, + #[error("thread_depress_abort was called on a thread which was not currently depressed")] + NotDepressed = 36, + #[error("object has been terminated and is no longer available")] + Terminated = 37, + #[error("lock set has been destroyed and is no longer available")] + LockSetDestroyed = 38, + #[error("the thread holding the lock terminated before releasing the lock")] + LockUnstable = 39, + #[error("the lock is already owned by another thread")] + LockOwned = 40, + #[error("the lock is already owned by the calling thread")] + LockOwnedSelf = 41, + #[error("semaphore has been destroyed and is no longer available")] + SemaphoreDestroyed = 42, + #[error("return from RPC indicating the target server was terminated before it successfully replied")] + RpcServerTerminated = 43, + #[error("terminate an orphaned activation")] + RpcTerminateOrphan = 44, + #[error("allow an orphaned activation to continue executing")] + RpcContinueOrphan = 45, + #[error("empty thread activation (No thread linked to it)")] + NotSupported = 46, + #[error("remote node down or inaccessible")] + NodeDown = 47, + #[error("a signalled thread was not actually waiting")] + NotWaiting = 48, + #[error("some thread-oriented operation (semaphore_wait) timed out")] + OperationTimedOut = 49, + #[error("during a page fault, indicates that the page was rejected as a result of a signature check")] + CodesignError = 50, + #[error("the requested property cannot be changed at this time")] + PoicyStatic = 51, + #[error("the provided buffer is of insufficient size for the requested data")] + InsufficientBufferSize = 52, + #[error("denied by security policy")] + Denied = 53, + #[error("the KC on which the function is operating is missing")] + MissingKC = 54, + #[error("the KC on which the function is operating is invalid")] + InvalidKC = 55, + #[error("a search or query operation did not return a result")] + NotFound = 56, +} + +impl From for KernelError { + fn from(kr: mach2::kern_return::kern_return_t) -> Self { + use mach2::kern_return::*; + + match kr { + KERN_INVALID_ADDRESS => Self::InvalidAddress, + KERN_PROTECTION_FAILURE => Self::ProtectionFailure, + KERN_NO_SPACE => Self::NoSpace, + KERN_INVALID_ARGUMENT => Self::InvalidArgument, + KERN_FAILURE => Self::Failure, + KERN_RESOURCE_SHORTAGE => Self::ResourceShortage, + KERN_NOT_RECEIVER => Self::NotReceiver, + KERN_NO_ACCESS => Self::NoAccess, + KERN_MEMORY_FAILURE => Self::MemoryFailure, + KERN_MEMORY_ERROR => Self::MemoryError, + KERN_ALREADY_IN_SET => Self::AlreadyInSet, + KERN_NAME_EXISTS => Self::NameExists, + KERN_INVALID_NAME => Self::InvalidName, + KERN_INVALID_TASK => Self::InvalidTask, + KERN_INVALID_RIGHT => Self::InvalidRight, + KERN_INVALID_VALUE => Self::InvalidValue, + KERN_UREFS_OVERFLOW => Self::UserRefsOverflow, + KERN_INVALID_CAPABILITY => Self::InvalidCapability, + KERN_RIGHT_EXISTS => Self::RightExists, + KERN_INVALID_HOST => Self::InvalidHost, + KERN_MEMORY_PRESENT => Self::MemoryPresent, + KERN_INVALID_PROCESSOR_SET => Self::InvalidProcessorSet, + KERN_POLICY_LIMIT => Self::PolicyLimit, + KERN_INVALID_POLICY => Self::InvalidPolicy, + KERN_INVALID_OBJECT => Self::InvalidObject, + KERN_ALREADY_WAITING => Self::AlreadyWaiting, + KERN_DEFAULT_SET => Self::DefaultSet, + KERN_EXCEPTION_PROTECTED => Self::ExceptionProtected, + KERN_INVALID_LEDGER => Self::InvalidLedger, + KERN_INVALID_MEMORY_CONTROL => Self::InvalidMemoryControl, + KERN_INVALID_SECURITY => Self::InvalidSecurity, + KERN_NOT_DEPRESSED => Self::NotDepressed, + KERN_TERMINATED => Self::Terminated, + KERN_LOCK_SET_DESTROYED => Self::LockSetDestroyed, + KERN_LOCK_UNSTABLE => Self::LockUnstable, + KERN_LOCK_OWNED => Self::LockOwned, + KERN_LOCK_OWNED_SELF => Self::LockOwnedSelf, + KERN_SEMAPHORE_DESTROYED => Self::SemaphoreDestroyed, + KERN_RPC_SERVER_TERMINATED => Self::RpcServerTerminated, + KERN_RPC_TERMINATE_ORPHAN => Self::RpcTerminateOrphan, + KERN_RPC_CONTINUE_ORPHAN => Self::RpcContinueOrphan, + KERN_NOT_SUPPORTED => Self::NotSupported, + KERN_NODE_DOWN => Self::NodeDown, + KERN_NOT_WAITING => Self::NotWaiting, + KERN_OPERATION_TIMED_OUT => Self::OperationTimedOut, + KERN_CODESIGN_ERROR => Self::CodesignError, + KERN_POLICY_STATIC => Self::PoicyStatic, + 52 => Self::InsufficientBufferSize, + 53 => Self::Denied, + 54 => Self::MissingKC, + 55 => Self::InvalidKC, + 56 => Self::NotFound, + // This should never happen given a result from a mach call, but + // in that case we just use `Failure` as the mach header itself + // describes it as a catch all + _ => Self::Failure, + } + } +} + +// From /usr/include/mach/machine/thread_state.h +pub const THREAD_STATE_MAX: usize = 1296; + +cfg_if::cfg_if! { + if #[cfg(target_arch = "x86_64")] { + /// x86_THREAD_STATE64 in /usr/include/mach/i386/thread_status.h + pub const THREAD_STATE_FLAVOR: u32 = 4; + + pub type ArchThreadState = mach2::structs::x86_thread_state64_t; + } else if #[cfg(target_arch = "aarch64")] { + /// ARM_THREAD_STATE64 in /usr/include/mach/arm/thread_status.h + pub const THREAD_STATE_FLAVOR: u32 = 6; + + // Missing from mach2 atm + // _STRUCT_ARM_THREAD_STATE64 from /usr/include/mach/arm/_structs.h + #[repr(C)] + pub struct Arm64ThreadState { + pub x: [u64; 29], + pub fp: u64, + pub lr: u64, + pub sp: u64, + pub pc: u64, + pub cpsr: u32, + __pad: u32, + } + + pub type ArchThreadState = Arm64ThreadState; + } else { + compile_error!("unsupported target arch"); + } +} + +pub struct ThreadState { + pub state: [u32; THREAD_STATE_MAX], + pub state_size: u32, +} + +impl Default for ThreadState { + fn default() -> Self { + Self { + state: [0u32; THREAD_STATE_MAX], + state_size: (THREAD_STATE_MAX * std::mem::size_of::()) as u32, + } + } +} + +impl ThreadState { + /// Gets the program counter + #[inline] + pub fn pc(&self) -> u64 { + cfg_if::cfg_if! { + if #[cfg(target_arch = "x86_64")] { + self.arch_state().__rip + } else if #[cfg(target_arch = "aarch64")] { + self.arch_state().pc + } + } + } + + /// Gets the stack pointer + #[inline] + pub fn sp(&self) -> u64 { + cfg_if::cfg_if! { + if #[cfg(target_arch = "x86_64")] { + self.arch_state().__rsp + } else if #[cfg(target_arch = "aarch64")] { + self.arch_state().sp + } + } + } + + /// Converts the raw binary blob into the architecture specific state + #[inline] + pub fn arch_state(&self) -> &ArchThreadState { + // SAFETY: hoping the kernel isn't lying + unsafe { &*(self.state.as_ptr().cast()) } + } +} + +/// Minimal trait that just pairs a structure that can be filled out by +/// [`mach2::task::task_info`] with the "flavor" that tells it the info we +/// actually want to retrieve +pub trait TaskInfo { + /// One of the `MACH_*_TASK` integers. I assume it's very bad if you implement + /// this trait and provide the wrong flavor for the struct + const FLAVOR: u32; +} + +/// , the file type for the main executable image +pub const MH_EXECUTE: u32 = 0x2; +// usr/include/mach-o/loader.h, magic number for MachHeader +pub const MH_MAGIC_64: u32 = 0xfeedfacf; +// usr/include/mach-o/loader.h, command to map a segment +pub const LC_SEGMENT_64: u32 = 0x19; +// usr/include/mach-o/loader.h, dynamically linked shared lib ident +pub const LC_ID_DYLIB: u32 = 0xd; +// usr/include/mach-o/loader.h, the uuid +pub const LC_UUID: u32 = 0x1b; + +/// The header at the beginning of every (valid) Mach image +/// +/// +#[repr(C)] +#[derive(Clone)] +pub struct MachHeader { + /// Mach magic number identifier, this is used to validate the header is valid + pub magic: u32, + /// `cpu_type_t` cpu specifier + pub cpu_type: i32, + /// `cpu_subtype_t` machine specifier + pub cpu_sub_type: i32, + /// Type of file, eg. [`MH_EXECUTE`] for the main executable + pub file_type: u32, + /// Number of load commands for the image + pub num_commands: u32, + /// Size in bytes of all of the load commands + pub size_commands: u32, + pub flags: u32, + __reserved: u32, +} + +/// Every load command is a variable sized struct depending on its type, but +/// they all include the fields in this struct at the beginning +/// +/// +#[repr(C)] +pub struct LoadCommandBase { + /// Type of load command `LC_*` + pub cmd: u32, + /// Total size of the command in bytes + pub cmd_size: u32, +} + +/// The 64-bit segment load command indicates that a part of this file is to be +/// mapped into a 64-bit task's address space. If the 64-bit segment has +/// sections then section_64 structures directly follow the 64-bit segment +/// command and their size is reflected in `cmdsize`. +#[repr(C)] +pub struct SegmentCommand64 { + cmd: u32, + pub cmd_size: u32, + /// String name of the section + pub segment_name: [u8; 16], + /// Memory address the segment is mapped to + pub vm_addr: u64, + /// Total size of the segment + pub vm_size: u64, + /// File offset of the segment + pub file_off: u64, + /// Amount mapped from the file + pub file_size: u64, + /// Maximum VM protection + pub max_prot: i32, + /// Initial VM protection + pub init_prot: i32, + /// Number of sections in the segment + pub num_sections: u32, + pub flags: u32, +} + +/// Dynamically linked shared libraries are identified by two things. The +/// pathname (the name of the library as found for execution), and the +/// compatibility version number. The pathname must match and the compatibility +/// number in the user of the library must be greater than or equal to the +/// library being used. The time stamp is used to record the time a library was +/// built and copied into user so it can be use to determined if the library used +/// at runtime is exactly the same as used to built the program. +#[repr(C)] +#[derive(Debug)] +pub struct Dylib { + /// Offset from the load command start to the pathname + pub name: u32, + /// Library's build time stamp + pub timestamp: u32, + /// Library's current version number + pub current_version: u32, + /// Library's compatibility version number + pub compatibility_version: u32, +} + +/// A dynamically linked shared library (filetype == MH_DYLIB in the mach header) +/// contains a dylib_command (cmd == LC_ID_DYLIB) to identify the library. +/// An object that uses a dynamically linked shared library also contains a +/// dylib_command (cmd == LC_LOAD_DYLIB, LC_LOAD_WEAK_DYLIB, or +/// LC_REEXPORT_DYLIB) for each library it uses. +#[repr(C)] +pub struct DylibCommand { + cmd: u32, + /// Total size of the command in bytes, including pathname string + pub cmd_size: u32, + /// Library identification + pub dylib: Dylib, +} + +/// The uuid load command contains a single 128-bit unique random number that +/// identifies an object produced by the static link editor. +#[repr(C)] +pub struct UuidCommand { + cmd: u32, + pub cmd_size: u32, + /// The UUID. The components are in big-endian regardless of the host architecture + pub uuid: [u8; 16], +} + +/// A block of load commands for a particular image +pub struct LoadCommands { + /// The block of memory containing all of the load commands + pub buffer: Vec, + /// The number of actual load commmands that _should_ be in the buffer + pub count: u32, +} + +impl LoadCommands { + /// Retrieves an iterator over the load commands in the contained buffer + #[inline] + pub fn iter(&self) -> LoadCommandsIter<'_> { + LoadCommandsIter { + buffer: &self.buffer, + count: self.count, + } + } +} + +/// A single load command +pub enum LoadCommand<'buf> { + Segment(&'buf SegmentCommand64), + Dylib(&'buf DylibCommand), + Uuid(&'buf UuidCommand), +} + +pub struct LoadCommandsIter<'buf> { + buffer: &'buf [u8], + count: u32, +} + +impl<'buf> Iterator for LoadCommandsIter<'buf> { + type Item = LoadCommand<'buf>; + + fn next(&mut self) -> Option { + // SAFETY: we're interpreting raw bytes as C structs, we try and be safe + unsafe { + loop { + if self.count == 0 || self.buffer.len() < std::mem::size_of::() { + return None; + } + + let header = &*(self.buffer.as_ptr().cast::()); + + // This would mean we've been lied to by the MachHeader and either + // the size_commands field was too small, or the num_command was + // too large + if header.cmd_size as usize > self.buffer.len() { + return None; + } + + let cmd = match header.cmd { + LC_SEGMENT_64 => Some(LoadCommand::Segment( + &*(self.buffer.as_ptr().cast::()), + )), + LC_ID_DYLIB => Some(LoadCommand::Dylib( + &*(self.buffer.as_ptr().cast::()), + )), + LC_UUID => Some(LoadCommand::Uuid( + &*(self.buffer.as_ptr().cast::()), + )), + // Just ignore any other load commands + _ => None, + }; + + self.count -= 1; + self.buffer = &self.buffer[header.cmd_size as usize..]; + + if let Some(cmd) = cmd { + return Some(cmd); + } + } + } + } + + fn size_hint(&self) -> (usize, Option) { + let sz = self.count as usize; + (sz, Some(sz)) + } +} + +/// Retrieves an integer sysctl by name. Returns the default value if retrieval +/// fails. +pub fn sysctl_by_name(name: &[u8]) -> T { + let mut out = T::default(); + let mut len = std::mem::size_of_val(&out); + + // SAFETY: syscall + unsafe { + if libc::sysctlbyname( + name.as_ptr().cast(), + (&mut out as *mut T).cast(), + &mut len, + std::ptr::null_mut(), + 0, + ) != 0 + { + // log? + T::default() + } else { + out + } + } +} + +/// Retrieves an `i32` sysctl by name and casts it to the specified integer type. +/// Returns the default value if retrieval fails or the value is out of bounds of +/// the specified integer type. +pub fn int_sysctl_by_name + Default>(name: &[u8]) -> T { + let val = sysctl_by_name::(name); + T::try_from(val).unwrap_or_default() +} + +/// Retrieves a string sysctl by name. Returns an empty string if the retrieval +/// fails or the string can't be converted to utf-8. +pub fn sysctl_string(name: &[u8]) -> String { + let mut buf_len = 0; + + // SAFETY: syscalls + let string_buf = unsafe { + // Retrieve the size of the string (including null terminator) + if libc::sysctlbyname( + name.as_ptr().cast(), + std::ptr::null_mut(), + &mut buf_len, + std::ptr::null_mut(), + 0, + ) != 0 + || buf_len <= 1 + { + return String::new(); + } + + let mut buff = Vec::new(); + buff.resize(buf_len, 0); + + if libc::sysctlbyname( + name.as_ptr().cast(), + buff.as_mut_ptr().cast(), + &mut buf_len, + std::ptr::null_mut(), + 0, + ) != 0 + { + return String::new(); + } + + buff.pop(); // remove null terminator + buff + }; + + String::from_utf8(string_buf).unwrap_or_default() +} + +extern "C" { + /// From , this retrieves the normal PID for + /// the specified task as the syscalls from BSD use PIDs, not mach ports. + /// + /// This seems to be marked as "obsolete" in the header, but of course being + /// Apple, there is no mention of a replacement function or when/if it might + /// eventually disappear. + pub fn pid_for_task(task: mach_port_name_t, pid: *mut i32) -> kern_return_t; +} diff --git a/src/mac/minidump_writer.rs b/src/mac/minidump_writer.rs new file mode 100644 index 00000000..6f68a3f2 --- /dev/null +++ b/src/mac/minidump_writer.rs @@ -0,0 +1,88 @@ +use crate::{ + dir_section::{DirSection, DumpBuf}, + mac::{errors::WriterError, task_dumper::TaskDumper}, + mem_writer::*, + minidump_format::{self, MDMemoryDescriptor, MDRawDirectory, MDRawHeader}, +}; +use std::io::{Seek, Write}; + +type Result = std::result::Result; + +pub struct MinidumpWriter { + /// The crash context as captured by an exception handler + pub(crate) crash_context: crash_context::CrashContext, + /// List of raw blocks of memory we've written into the stream. These are + /// referenced by other streams (eg thread list) + pub(crate) memory_blocks: Vec, +} + +impl MinidumpWriter { + /// Creates a minidump writer + pub fn new(crash_context: crash_context::CrashContext) -> Self { + Self { + crash_context, + memory_blocks: Vec::new(), + } + } + + pub fn dump(&mut self, destination: &mut (impl Write + Seek)) -> Result> { + let writers = { + #[allow(clippy::type_complexity)] + let mut writers: Vec< + Box Result>, + > = vec![ + Box::new(|mw, buffer, dumper| mw.write_thread_list(buffer, dumper)), + Box::new(|mw, buffer, dumper| mw.write_memory_list(buffer, dumper)), + Box::new(|mw, buffer, dumper| mw.write_system_info(buffer, dumper)), + Box::new(|mw, buffer, dumper| mw.write_module_list(buffer, dumper)), + Box::new(|mw, buffer, dumper| mw.write_misc_info(buffer, dumper)), + Box::new(|mw, buffer, dumper| mw.write_breakpad_info(buffer, dumper)), + ]; + + // Exception stream needs to be the last entry in this array as it may + // be omitted in the case where the minidump is written without an + // exception. + if self.crash_context.exception.is_some() { + writers.push(Box::new(|mw, buffer, dumper| { + mw.write_exception(buffer, dumper) + })); + } + + writers + }; + + let num_writers = writers.len() as u32; + let mut buffer = Buffer::with_capacity(0); + + let mut header_section = MemoryWriter::::alloc(&mut buffer)?; + let mut dir_section = DirSection::new(&mut buffer, num_writers, destination)?; + + let header = MDRawHeader { + signature: minidump_format::MD_HEADER_SIGNATURE, + version: minidump_format::MD_HEADER_VERSION, + stream_count: num_writers, + stream_directory_rva: dir_section.position(), + checksum: 0, /* Can be 0. In fact, that's all that's + * been found in minidump files. */ + time_date_stamp: std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs() as u32, // TODO: This is not Y2038 safe, but thats how its currently defined as + flags: 0, + }; + header_section.set_value(&mut buffer, header)?; + + // Ensure the header gets flushed. If we crash somewhere below, + // we should have a mostly-intact dump + dir_section.write_to_file(&mut buffer, None)?; + + let dumper = super::task_dumper::TaskDumper::new(self.crash_context.task); + + for mut writer in writers { + let dirent = writer(self, &mut buffer, &dumper)?; + dir_section.write_to_file(&mut buffer, Some(dirent))?; + } + + Ok(buffer.into()) + } +} diff --git a/src/mac/streams.rs b/src/mac/streams.rs new file mode 100644 index 00000000..e1e6d6a4 --- /dev/null +++ b/src/mac/streams.rs @@ -0,0 +1,15 @@ +mod breakpad_info; +mod exception; +mod memory_list; +mod misc_info; +mod module_list; +mod system_info; +mod thread_list; + +use super::{ + errors::WriterError, + mach, + minidump_writer::MinidumpWriter, + task_dumper::{ImageInfo, TaskDumpError, TaskDumper}, +}; +use crate::{dir_section::DumpBuf, mem_writer::*, minidump_format::*}; diff --git a/src/mac/streams/breakpad_info.rs b/src/mac/streams/breakpad_info.rs new file mode 100644 index 00000000..d8f1e587 --- /dev/null +++ b/src/mac/streams/breakpad_info.rs @@ -0,0 +1,34 @@ +use super::*; +use format::{BreakpadInfoValid, MINIDUMP_BREAKPAD_INFO as BreakpadInfo}; + +impl MinidumpWriter { + /// Writes the [`BreakpadInfo`] stream. + /// + /// For MacOS the primary use of this stream is to differentiate between + /// the thread that actually raised an exception, and the thread on which + /// the exception port was listening, so that the exception port (handler) + /// thread can be deprioritized/ignored when analyzing the minidump. + pub(crate) fn write_breakpad_info( + &mut self, + buffer: &mut DumpBuf, + _dumper: &TaskDumper, + ) -> Result { + let bp_section = MemoryWriter::::alloc_with_val( + buffer, + BreakpadInfo { + validity: BreakpadInfoValid::DumpThreadId.bits() + | BreakpadInfoValid::RequestingThreadId.bits(), + // The thread where the exception port handled the exception, might + // be useful to ignore/deprioritize when processing the minidump + dump_thread_id: self.crash_context.handler_thread, + // The actual thread where the exception was thrown + requesting_thread_id: self.crash_context.thread, + }, + )?; + + Ok(MDRawDirectory { + stream_type: MDStreamType::BreakpadInfoStream as u32, + location: bp_section.location(), + }) + } +} diff --git a/src/mac/streams/exception.rs b/src/mac/streams/exception.rs new file mode 100644 index 00000000..69aabc1a --- /dev/null +++ b/src/mac/streams/exception.rs @@ -0,0 +1,62 @@ +use super::*; + +impl MinidumpWriter { + /// Writes the [`minidump_common::format::MINIDUMP_EXCEPTION_STREAM`] stream. + /// + /// This stream is optional on MacOS as a user requested minidump could + /// choose not to specify the exception information. + pub(crate) fn write_exception( + &mut self, + buffer: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result { + let thread_state = dumper.read_thread_state(self.crash_context.thread).ok(); + + let thread_context = if let Some(ts) = &thread_state { + let mut cpu = Default::default(); + Self::fill_cpu_context(ts, &mut cpu); + MemoryWriter::alloc_with_val(buffer, cpu) + .map(|mw| mw.location()) + .ok() + } else { + None + }; + + let exception_record = self + .crash_context + .exception + .as_ref() + .map(|exc| { + let exception_address = if let Some(subcode) = exc.subcode { + subcode as u64 + } else if let Some(ts) = thread_state { + ts.pc() + } else { + 0 + }; + + // The naming is confusing here, but it is how it is + MDException { + exception_code: exc.kind as u32, + exception_flags: exc.code as u32, + exception_address, + ..Default::default() + } + }) + .unwrap_or_default(); + + let stream = MDRawExceptionStream { + thread_id: self.crash_context.thread, + exception_record, + thread_context: thread_context.unwrap_or_default(), + __align: 0, + }; + + let exc_section = MemoryWriter::::alloc_with_val(buffer, stream)?; + + Ok(MDRawDirectory { + stream_type: MDStreamType::ExceptionStream as u32, + location: exc_section.location(), + }) + } +} diff --git a/src/mac/streams/memory_list.rs b/src/mac/streams/memory_list.rs new file mode 100644 index 00000000..9ead2027 --- /dev/null +++ b/src/mac/streams/memory_list.rs @@ -0,0 +1,69 @@ +use super::*; + +impl MinidumpWriter { + /// Writes the [`MDStreamType::MemoryListStream`]. The memory blocks that are + /// written into this stream are the raw thread contexts that were retrieved + /// and added by [`Self::write_thread_list`] + pub(crate) fn write_memory_list( + &mut self, + buffer: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result { + // Include some memory around the instruction pointer if the crash was + // due to an exception + if self.crash_context.exception.is_some() { + const IP_MEM_SIZE: u64 = 256; + + let get_ip_block = |tid| -> Option> { + let thread_state = dumper.read_thread_state(tid).ok()?; + + let ip = thread_state.pc(); + + // Bound it to the upper and lower bounds of the region + // it's contained within. If it's not in a known memory region, + // don't bother trying to write it. + let region = dumper.get_vm_region(ip).ok()?; + + if ip < region.range.start || ip > region.range.end { + return None; + } + + // Try to get IP_MEM_SIZE / 2 bytes before and after the IP, but + // settle for whatever's available. + let start = std::cmp::max(region.range.start, ip - IP_MEM_SIZE / 2); + let end = std::cmp::min(ip + IP_MEM_SIZE / 2, region.range.end); + + Some(start..end) + }; + + if let Some(ip_range) = get_ip_block(self.crash_context.thread) { + let size = ip_range.end - ip_range.start; + let stack_buffer = dumper.read_task_memory(ip_range.start as _, size as usize)?; + let ip_location = MDLocationDescriptor { + data_size: size as u32, + rva: buffer.position() as u32, + }; + buffer.write_all(&stack_buffer); + + self.memory_blocks.push(MDMemoryDescriptor { + start_of_memory_range: ip_range.start, + memory: ip_location, + }); + } + } + + let list_header = + MemoryWriter::::alloc_with_val(buffer, self.memory_blocks.len() as u32)?; + + let mut dirent = MDRawDirectory { + stream_type: MDStreamType::MemoryListStream as u32, + location: list_header.location(), + }; + + let block_list = + MemoryArrayWriter::::alloc_from_array(buffer, &self.memory_blocks)?; + + dirent.location.data_size += block_list.location().data_size; + Ok(dirent) + } +} diff --git a/src/mac/streams/misc_info.rs b/src/mac/streams/misc_info.rs new file mode 100644 index 00000000..629b94ce --- /dev/null +++ b/src/mac/streams/misc_info.rs @@ -0,0 +1,179 @@ +use super::*; +use format::{MiscInfoFlags, MINIDUMP_MISC_INFO_2 as MDRawMiscInfo}; +use std::time::Duration; + +/// From +#[repr(C)] +#[derive(Copy, Clone)] +struct TimeValue { + seconds: i32, + microseconds: i32, +} + +impl From for Duration { + fn from(tv: TimeValue) -> Self { + let mut seconds = tv.seconds as u64; + let mut microseconds = tv.microseconds as u32; + // This _probably_ will never happen, but this will avoid a panic in + // Duration::new() if it does + if tv.microseconds >= 1000000 { + seconds += 1; + microseconds -= 1000000; + } + + Duration::new(seconds, microseconds * 1000) + } +} + +/// From , this includes basic information about +/// a task. +#[repr(C, packed(4))] +struct MachTaskBasicInfo { + /// Virtual memory size in bytes + virtual_size: u64, + /// Resident memory size in bytes + resident_size: u64, + /// Maximum resident memory size in bytes + resident_size_max: u64, + /// Total user run time for terminated threads + user_time: TimeValue, + /// Total system run time for terminated threads + system_time: TimeValue, + /// Default policy for new threads + policy: i32, + /// Suspend count for task + suspend_count: i32, +} + +impl mach::TaskInfo for MachTaskBasicInfo { + const FLAVOR: u32 = mach::task_info::MACH_TASK_BASIC_INFO; +} + +/// From , this includes times for currently +/// live threads in the task. +#[repr(C, packed(4))] +struct TaskThreadsTimeInfo { + /// Total user run time for live threads + user_time: TimeValue, + /// total system run time for live threads + system_time: TimeValue, +} + +impl mach::TaskInfo for TaskThreadsTimeInfo { + const FLAVOR: u32 = mach::task_info::TASK_THREAD_TIMES_INFO; +} + +impl MinidumpWriter { + /// Writes the [`MDStreamType::MiscInfoStream`] stream. + /// + /// On MacOS, we write a [`minidump_common::format::MINIDUMP_MISC_INFO_2`] + /// to this stream, which includes the start time of the process at second + /// granularity, and the (approximate) amount of time spent in user and + /// system (kernel) time for the lifetime of the task. We attempt to also + /// retrieve power ie CPU usage statistics, though this information is only + /// currently available on x86_64, not aarch64 at the moment. + pub(crate) fn write_misc_info( + &mut self, + buffer: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result { + let mut info_section = MemoryWriter::::alloc(buffer)?; + let dirent = MDRawDirectory { + stream_type: MDStreamType::MiscInfoStream as u32, + location: info_section.location(), + }; + + let pid = dumper.pid_for_task()?; + + let mut misc_info = MDRawMiscInfo { + size_of_info: std::mem::size_of::() as u32, + flags1: MiscInfoFlags::MINIDUMP_MISC1_PROCESS_ID.bits() + | MiscInfoFlags::MINIDUMP_MISC1_PROCESS_TIMES.bits() + | MiscInfoFlags::MINIDUMP_MISC1_PROCESSOR_POWER_INFO.bits(), + process_id: pid as u32, + process_create_time: 0, + process_user_time: 0, + process_kernel_time: 0, + processor_max_mhz: 0, + processor_current_mhz: 0, + processor_mhz_limit: 0, + processor_max_idle_state: 0, + processor_current_idle_state: 0, + }; + + // Note that both Breakpad and Crashpad use `sysctl CTL_KERN, KERN_PROC, KERN_PROC_PID` + // to retrieve the process start time, but none of the structures that + // are filled in by that call are in libc at the moment, and `proc_pidinfo` + // seems to work just fine, so using that instead. + // + // SAFETY: syscall + misc_info.process_create_time = unsafe { + // Breakpad was using an old method to retrieve this, let's try the + // BSD method instead which is already implemented in libc + let mut proc_info = std::mem::MaybeUninit::::uninit(); + let size = std::mem::size_of::() as i32; + if libc::proc_pidinfo( + pid, + libc::PROC_PIDTBSDINFO, + 0, + proc_info.as_mut_ptr().cast(), + size, + ) == size + { + let proc_info = proc_info.assume_init(); + + proc_info.pbi_start_tvsec as u32 + } else { + 0 + } + }; + + // Note that Breakpad is using `getrusage` to retrieve this information, + // however that is wrong, as it can only retrieve the process usage information + // for the current or children processes, not an external process, so + // we use the Crashpad method, which is itself based off of the XNU + // method of retrieving the process times + // https://github.com/apple/darwin-xnu/blob/2ff845c2e033bd0ff64b5b6aa6063a1f8f65aa32/bsd/kern/kern_resource.c#L1215 + + // The basic task info keeps the timings for all of the terminated threads + let basic_info = dumper.task_info::().ok(); + + // THe thread times info keeps the timings for all of the living threads + let thread_times_info = dumper.task_info::().ok(); + + let user_time = basic_info + .as_ref() + .map(|bi| Duration::from(bi.user_time)) + .unwrap_or_default() + + thread_times_info + .as_ref() + .map(|tt| Duration::from(tt.user_time)) + .unwrap_or_default(); + let system_time = basic_info + .as_ref() + .map(|bi| Duration::from(bi.system_time)) + .unwrap_or_default() + + thread_times_info + .as_ref() + .map(|tt| Duration::from(tt.system_time)) + .unwrap_or_default(); + + misc_info.process_user_time = user_time.as_secs() as u32; + misc_info.process_kernel_time = system_time.as_secs() as u32; + + // Note that neither of these two keys are present on aarch64, at least atm + let max: u64 = mach::sysctl_by_name(b"hw.cpufrequency_max\0"); + let freq: u64 = mach::sysctl_by_name(b"hw.cpufrequency\0"); + + let max = (max / 1000 * 1000) as u32; + let current = (freq / 1000 * 1000) as u32; + + misc_info.processor_max_mhz = max; + misc_info.processor_mhz_limit = max; + misc_info.processor_current_mhz = current; + + info_section.set_value(buffer, misc_info)?; + + Ok(dirent) + } +} diff --git a/src/mac/streams/module_list.rs b/src/mac/streams/module_list.rs new file mode 100644 index 00000000..44213a4f --- /dev/null +++ b/src/mac/streams/module_list.rs @@ -0,0 +1,197 @@ +use super::*; + +impl MinidumpWriter { + /// Writes the [`MDStreamType::ModuleListStream`] to the minidump, which is + /// the last of all loaded modules (images) in the process. + /// + /// Notably, this includes the UUID of the image which is needed to look up + /// debug symbols for the module, as well as the address range covered by + /// the module to know which debug symbols are used to resolve which instruction + /// addresses + pub(crate) fn write_module_list( + &mut self, + buffer: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result { + // The list of modules is pretty critical information, but there could + // still be useful information in the minidump without them if we can't + // retrieve them for some reason + let modules = self.read_loaded_modules(buffer, dumper).unwrap_or_default(); + + let list_header = MemoryWriter::::alloc_with_val(buffer, modules.len() as u32)?; + + let mut dirent = MDRawDirectory { + stream_type: MDStreamType::ModuleListStream as u32, + location: list_header.location(), + }; + + if !modules.is_empty() { + let mapping_list = MemoryArrayWriter::::alloc_from_iter(buffer, modules)?; + dirent.location.data_size += mapping_list.location().data_size; + } + + Ok(dirent) + } + + fn read_loaded_modules( + &self, + buf: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result, WriterError> { + let mut images = dumper.read_images()?; + + // Apparently MacOS will happily list the same image multiple times + // for some reason, so sort the images by load address and remove all + // of the duplicates + images.sort(); + images.dedup(); + + let mut modules = Vec::with_capacity(images.len()); + let mut has_main_executable = false; + + for image in images { + if let Ok((module, is_main_executable)) = self.read_module(image, buf, dumper) { + // We want to keep the modules sorted by their load address except + // in the case of the main executable image which we want to put + // first as it is most likely the culprit, or at least generally + // the most interesting module for human and machine inspectors + if is_main_executable { + modules.insert(0, module); + has_main_executable = true; + } else { + modules.push(module) + }; + } + } + + if !has_main_executable { + Err(TaskDumpError::NoExecutableImage.into()) + } else { + Ok(modules) + } + } + + fn read_module( + &self, + image: ImageInfo, + buf: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result<(MDRawModule, bool), WriterError> { + struct ImageSizes { + vm_addr: u64, + vm_size: u64, + slide: isize, + } + + let mut sizes = None; + let mut version = None; + let mut uuid = None; + + { + let load_commands = dumper.read_load_commands(&image)?; + + for lc in load_commands.iter() { + match lc { + mach::LoadCommand::Segment(seg) if sizes.is_none() => { + if &seg.segment_name[..7] == b"__TEXT\0" { + let slide = if seg.file_off == 0 && seg.file_size != 0 { + image.load_address as isize - seg.vm_addr as isize + } else { + 0 + }; + + sizes = Some(ImageSizes { + vm_addr: seg.vm_addr, + vm_size: seg.vm_size, + slide, + }); + } + } + mach::LoadCommand::Dylib(dylib) if version.is_none() => { + version = Some(dylib.dylib.current_version); + } + mach::LoadCommand::Uuid(img_id) if uuid.is_none() => { + uuid = Some(img_id.uuid); + } + _ => {} + } + + if sizes.is_some() && version.is_some() && uuid.is_some() { + break; + } + } + } + + let sizes = sizes.ok_or(TaskDumpError::MissingLoadCommand { + name: "LC_SEGMENT_64", + id: mach::LC_SEGMENT_64, + })?; + let uuid = uuid.ok_or(TaskDumpError::MissingLoadCommand { + name: "LC_UUID", + id: mach::LC_UUID, + })?; + + let file_path = if image.file_path != 0 { + dumper.read_string(image.file_path)?.unwrap_or_default() + } else { + String::new() + }; + + let module_name = write_string_to_location(buf, &file_path)?; + + let mut raw_module = MDRawModule { + base_of_image: (sizes.vm_addr as isize + sizes.slide) as u64, + size_of_image: sizes.vm_size as u32, + module_name_rva: module_name.rva, + ..Default::default() + }; + + // Version info is not available for the main executable image since + // it doesn't issue a LC_ID_DYLIB load command + if let Some(version) = &version { + raw_module.version_info.signature = format::VS_FFI_SIGNATURE; + raw_module.version_info.struct_version = format::VS_FFI_STRUCVERSION; + + // Convert MAC dylib version format, which is a 32 bit number, to the + // format used by minidump. + raw_module.version_info.file_version_hi = version >> 16; + raw_module.version_info.file_version_lo = ((version & 0xff00) << 8) | (version & 0xff); + } + + let module_name = if let Some(sep_index) = file_path.rfind('/') { + &file_path[sep_index + 1..] + } else if file_path.is_empty() { + "" + } else { + &file_path + }; + + #[derive(scroll::Pwrite, scroll::SizeWith)] + struct CvInfoPdb { + cv_signature: u32, + signature: format::GUID, + age: u32, + } + + let cv = MemoryWriter::alloc_with_val( + buf, + CvInfoPdb { + cv_signature: format::CvSignature::Pdb70 as u32, + age: 0, + signature: uuid.into(), + }, + )?; + + // Note that we don't use write_string_to_location here as the module + // name is a simple 8-bit string, not 16-bit like most other strings + // in the minidump, and is directly part of the record itself, not an rva + buf.write_all(module_name.as_bytes()); + buf.write_all(&[0]); // null terminator + + let mut cv_location = cv.location(); + cv_location.data_size += module_name.len() as u32 + 1; + raw_module.cv_record = cv_location; + + Ok((raw_module, version.is_none())) + } +} diff --git a/src/mac/streams/system_info.rs b/src/mac/streams/system_info.rs new file mode 100644 index 00000000..aac2de57 --- /dev/null +++ b/src/mac/streams/system_info.rs @@ -0,0 +1,200 @@ +use super::*; +use crate::minidump_format::*; + +/// Retrieve the OS version information. +/// +/// Note that this only works on 10.13.4+, but that release is over 4 years old +/// and 1 version behind the latest unsupported release at the time of this writing +/// +/// Note that Breakpad/Crashpad use a private API in CoreFoundation to do this +/// via _CFCopySystemVersionDictionary->_kCFSystemVersionProductVersionKey +fn os_version() -> (u32, u32, u32) { + let vers = mach::sysctl_string(b"kern.osproductversion\0"); + + let inner = || { + let mut it = vers.split('.'); + + let major: u32 = it.next()?.parse().ok()?; + let minor: u32 = it.next()?.parse().ok()?; + let patch: u32 = it.next().and_then(|p| p.parse().ok()).unwrap_or_default(); + + Some((major, minor, patch)) + }; + + inner().unwrap_or_default() +} + +/// Retrieves the OS build version. +/// +/// Note that Breakpad/Crashpad use a private API in CoreFoundation to do this +/// via _CFCopySystemVersionDictionary->_kCFSystemVersionBuildVersionKey. I have +/// no idea how long this has been the case, but the same information can be +/// retrieved via `sysctlbyname` via the `kern.osversion` key as seen by comparing +/// its value versus the output of the `sw_vers -buildVersion` command +#[inline] +fn build_version() -> String { + mach::sysctl_string(b"kern.osversion\0") +} + +/// Retrieves more detailed information on the cpu. +/// +/// Note that this function is only implemented on `x86_64` as Apple doesn't +/// expose similar info on `aarch64` (or at least, not via the same mechanisms) +fn read_cpu_info(cpu: &mut format::CPU_INFORMATION) { + if !cfg!(target_arch = "x86_64") { + return; + } + + let mut md_feats: u64 = 1 << 2 /*PF_COMPARE_EXCHANGE_DOUBLE*/; + let features: u64 = mach::sysctl_by_name(b"machdep.cpu.feature_bits\0"); + + // Map the cpuid feature to its equivalent minidump cpu feature. + // See https://en.wikipedia.org/wiki/CPUID for where the values for the + // various cpuid bits come from, and + // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent + // for where the bits for the the minidump come from + macro_rules! map_feature { + ($set:expr, $cpuid_bit:expr, $md_bit:expr) => { + if $set & (1 << $cpuid_bit) != 0 { + md_feats |= 1 << $md_bit; + } + }; + } + + map_feature!( + features, 4, /*TSC*/ + 8 /* PF_RDTSC_INSTRUCTION_AVAILABLE */ + ); + map_feature!(features, 6 /*PAE*/, 9 /* PF_PAE_ENABLED */); + map_feature!( + features, 23, /*MMX*/ + 3 /* PF_MMX_INSTRUCTIONS_AVAILABLE */ + ); + map_feature!( + features, 25, /*SSE*/ + 6 /* PF_XMMI_INSTRUCTIONS_AVAILABLE */ + ); + map_feature!( + features, 26, /*SSE2*/ + 10 /* PF_XMMI64_INSTRUCTIONS_AVAILABLE */ + ); + map_feature!( + features, 32, /*SSE3*/ + 13 /* PF_SSE3_INSTRUCTIONS_AVAILABLE */ + ); + map_feature!( + features, 45, /*CX16*/ + 14 /* PF_COMPARE_EXCHANGE128 */ + ); + map_feature!(features, 58 /*XSAVE*/, 17 /* PF_XSAVE_ENABLED */); + map_feature!( + features, 62, /*RDRAND*/ + 28 /* PF_RDRAND_INSTRUCTION_AVAILABLE */ + ); + + let ext_features: u64 = mach::sysctl_by_name(b"machdep.cpu.extfeature_bits\0"); + + map_feature!( + ext_features, + 27, /* RDTSCP */ + 32 /* PF_RDTSCP_INSTRUCTION_AVAILABLE */ + ); + map_feature!( + ext_features, + 31, /* 3DNOW */ + 7 /* PF_3DNOW_INSTRUCTIONS_AVAILABLE */ + ); + + let leaf_features: u32 = mach::sysctl_by_name(b"machdep.cpu.leaf7_feature_bits\0"); + map_feature!( + leaf_features, + 0, /* F7_FSGSBASE */ + 22 /* PF_RDWRFSGSBASE_AVAILABLE */ + ); + + // In newer production kernels, NX is always enabled. + // See 10.15.0 xnu-6153.11.26/osfmk/x86_64/pmap.c nx_enabled. + md_feats |= 1 << 12 /* PF_NX_ENABLED */; + + // All CPUs that Apple is known to have shipped should support DAZ. + md_feats |= 1 << 11 /* PF_SSE_DAZ_MODE_AVAILABLE */; + + // minidump_common::format::OtherCpuInfo is just 2 adjacent u64's, we only + // set the first, so just do a direct write to the bytes + cpu.data[..std::mem::size_of::()].copy_from_slice(&md_feats.to_ne_bytes()); +} + +impl MinidumpWriter { + /// Writes the [`MDStreamType::SystemInfoStream`] stream. + /// + /// On MacOS we includes basic CPU information, though some of it is not + /// available on `aarch64` at the time of this writing, as well as kernel + /// version information. + pub(crate) fn write_system_info( + &mut self, + buffer: &mut DumpBuf, + _dumper: &TaskDumper, + ) -> Result { + let mut info_section = MemoryWriter::::alloc(buffer)?; + let dirent = MDRawDirectory { + stream_type: MDStreamType::SystemInfoStream as u32, + location: info_section.location(), + }; + + let number_of_processors: u8 = mach::int_sysctl_by_name(b"hw.ncpu\0"); + // SAFETY: POD buffer + let mut cpu: format::CPU_INFORMATION = unsafe { std::mem::zeroed() }; + read_cpu_info(&mut cpu); + + cfg_if::cfg_if! { + if #[cfg(target_arch = "x86_64")] { + let processor_architecture = MDCPUArchitecture::PROCESSOR_ARCHITECTURE_AMD64; + + // machdep.cpu.family and machdep.cpu.model already take the extended family + // and model IDs into account. See 10.9.2 xnu-2422.90.20/osfmk/i386/cpuid.c + // cpuid_set_generic_info(). + let processor_level: u16 = mach::int_sysctl_by_name(b"machdep.cpu.family\0"); + let model: u8 = mach::int_sysctl_by_name(b"machdep.cpu.model\0"); + let stepping: u8 = mach::int_sysctl_by_name(b"machdep.cpu.stepping\0"); + + let processor_revision = ((model as u16) << 8) | stepping as u16; + } else if #[cfg(target_arch = "aarch64")] { + let processor_architecture = MDCPUArchitecture::PROCESSOR_ARCHITECTURE_ARM64_OLD; + + let family: u32 = mach::sysctl_by_name(b"hw.cpufamily\0"); + + let processor_level = (family & 0xffff0000 >> 16) as u16; + let processor_revision = (family & 0x0000ffff) as u16; + } else { + compile_error!("unsupported target architecture"); + } + } + + let (major_version, minor_version, build_number) = os_version(); + let os_version_loc = write_string_to_location(buffer, &build_version())?; + + let info = MDRawSystemInfo { + // CPU + processor_architecture: processor_architecture as u16, + processor_level, + processor_revision, + number_of_processors, + cpu, + + // OS + platform_id: PlatformId::MacOs as u32, + product_type: 1, // VER_NT_WORKSTATION, could also be VER_NT_SERVER but...seriously? + major_version, + minor_version, + build_number, + csd_version_rva: os_version_loc.rva, + + suite_mask: 0, + reserved2: 0, + }; + + info_section.set_value(buffer, info)?; + + Ok(dirent) + } +} diff --git a/src/mac/streams/thread_list.rs b/src/mac/streams/thread_list.rs new file mode 100644 index 00000000..fdc40cc3 --- /dev/null +++ b/src/mac/streams/thread_list.rs @@ -0,0 +1,231 @@ +use super::*; +use crate::minidump_cpu::RawContextCPU; + +impl MinidumpWriter { + /// Writes the [`MDStreamType::ThreadListStream`] which is an array of + /// [`miniduimp_common::format::MINIDUMP_THREAD`] + pub(crate) fn write_thread_list( + &mut self, + buffer: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result { + let threads = dumper.read_threads()?; + + // Ignore the thread that handled the exception + let thread_count = if self.crash_context.handler_thread != mach2::port::MACH_PORT_NULL { + threads.len() - 1 + } else { + threads.len() + }; + + let list_header = MemoryWriter::::alloc_with_val(buffer, thread_count as u32)?; + + let mut dirent = MDRawDirectory { + stream_type: MDStreamType::ThreadListStream as u32, + location: list_header.location(), + }; + + let mut thread_list = MemoryArrayWriter::::alloc_array(buffer, thread_count)?; + dirent.location.data_size += thread_list.location().data_size; + + let handler_thread = self.crash_context.handler_thread; + for (i, tid) in threads + .iter() + .filter(|tid| **tid != handler_thread) + .enumerate() + { + let thread = self.write_thread(*tid, buffer, dumper)?; + thread_list.set_value_at(buffer, thread, i)?; + } + + Ok(dirent) + } + + fn write_thread( + &mut self, + tid: u32, + buffer: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result { + let mut thread = MDRawThread { + thread_id: tid, + suspend_count: 0, + priority_class: 0, + priority: 0, + teb: 0, + stack: MDMemoryDescriptor::default(), + thread_context: MDLocationDescriptor::default(), + }; + + let thread_state = dumper.read_thread_state(tid)?; + + self.write_stack_from_start_address(thread_state.sp(), &mut thread, buffer, dumper)?; + + let mut cpu: RawContextCPU = Default::default(); + Self::fill_cpu_context(&thread_state, &mut cpu); + let cpu_section = MemoryWriter::alloc_with_val(buffer, cpu)?; + thread.thread_context = cpu_section.location(); + Ok(thread) + } + + fn write_stack_from_start_address( + &mut self, + start: u64, + thread: &mut MDRawThread, + buffer: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result<(), WriterError> { + thread.stack.start_of_memory_range = start; + thread.stack.memory.data_size = 0; + thread.stack.memory.rva = buffer.position() as u32; + + let stack_size = self.calculate_stack_size(start, dumper); + + // In some situations the stack address for the thread can come back 0. + // In these cases we skip over the threads in question and stuff the + // stack with a clearly borked value. + // + // In other cases, notably a stack overflow, we might fail to read the + // stack eg. InvalidAddress in which case we use a different borked + // value to indicate the different failure + let stack_location = if stack_size != 0 { + dumper + .read_task_memory(start, stack_size) + .map(|stack_buffer| { + let stack_location = MDLocationDescriptor { + data_size: stack_buffer.len() as u32, + rva: buffer.position() as u32, + }; + buffer.write_all(&stack_buffer); + stack_location + }) + .ok() + } else { + None + }; + + thread.stack.memory = stack_location.unwrap_or_else(|| { + let borked = if stack_size == 0 { + 0xdeadbeef + } else { + 0xdeaddead + }; + + thread.stack.start_of_memory_range = borked; + + let stack_location = MDLocationDescriptor { + data_size: 16, + rva: buffer.position() as u32, + }; + buffer.write_all(&borked.to_ne_bytes()); + buffer.write_all(&borked.to_ne_bytes()); + stack_location + }); + + // Add the stack memory as a raw block of memory, this is written to + // the minidump as part of the memory list stream + self.memory_blocks.push(thread.stack); + Ok(()) + } + + fn calculate_stack_size(&self, start_address: u64, dumper: &TaskDumper) -> usize { + if start_address == 0 { + return 0; + } + + let mut region = if let Ok(region) = dumper.get_vm_region(start_address) { + region + } else { + return 0; + }; + + // Failure or stack corruption, since mach_vm_region had to go + // higher in the process address space to find a valid region. + if start_address < region.range.start { + return 0; + } + + let root_range_start = region.range.start; + let mut stack_size = region.range.end - region.range.start; + + // If the user tag is VM_MEMORY_STACK, look for more readable regions with + // the same tag placed immediately above the computed stack region. Under + // some circumstances, the stack for thread 0 winds up broken up into + // multiple distinct abutting regions. This can happen for several reasons, + // including user code that calls setrlimit(RLIMIT_STACK, ...) or changes + // the access on stack pages by calling mprotect. + if region.info.user_tag == mach2::vm_statistics::VM_MEMORY_STACK { + loop { + let proposed_next_region_base = region.range.end; + + region = if let Ok(reg) = dumper.get_vm_region(region.range.end) { + reg + } else { + break; + }; + + if region.range.start != proposed_next_region_base + || region.info.user_tag != mach2::vm_statistics::VM_MEMORY_STACK + || (region.info.protection & mach2::vm_prot::VM_PROT_READ) == 0 + { + break; + } + + stack_size += region.range.end - region.range.start; + } + } + + (root_range_start + stack_size - start_address) as usize + } + + pub(crate) fn fill_cpu_context( + thread_state: &crate::mac::mach::ThreadState, + out: &mut RawContextCPU, + ) { + let ts = thread_state.arch_state(); + + cfg_if::cfg_if! { + if #[cfg(target_arch = "x86_64")] { + out.context_flags = format::ContextFlagsCpu::CONTEXT_AMD64.bits(); + + out.rax = ts.__rax; + out.rbx = ts.__rbx; + out.rcx = ts.__rcx; + out.rdx = ts.__rdx; + out.rdi = ts.__rdi; + out.rsi = ts.__rsi; + out.rbp = ts.__rbp; + out.rsp = ts.__rsp; + out.r8 = ts.__r8; + out.r9 = ts.__r9; + out.r10 = ts.__r10; + out.r11 = ts.__r11; + out.r12 = ts.__r12; + out.r13 = ts.__r13; + out.r14 = ts.__r14; + out.r15 = ts.__r15; + out.rip = ts.__rip; + // according to AMD's software developer guide, bits above 18 are + // not used in the flags register. Since the minidump format + // specifies 32 bits for the flags register, we can truncate safely + // with no loss. + out.eflags = ts.__rflags as _; + out.cs = ts.__cs as u16; + out.fs = ts.__fs as u16; + out.gs = ts.__gs as u16; + } else if #[cfg(target_arch = "aarch64")] { + // This is kind of a lie as we don't actually include the full float state..? + out.context_flags = format::ContextFlagsArm64Old::CONTEXT_ARM64_OLD_FULL.bits() as u64; + + out.cpsr = ts.cpsr; + out.iregs[..29].copy_from_slice(&ts.x[..29]); + out.iregs[29] = ts.fp; + out.iregs[30] = ts.lr; + out.sp = ts.sp; + out.pc = ts.pc; + } else { + compile_error!("unsupported target arch"); + } + } + } +} diff --git a/src/mac/task_dumper.rs b/src/mac/task_dumper.rs new file mode 100644 index 00000000..417aaf4d --- /dev/null +++ b/src/mac/task_dumper.rs @@ -0,0 +1,416 @@ +use crate::mac::mach; +use mach2::mach_types as mt; +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum TaskDumpError { + #[error("kernel error {syscall} {error})")] + Kernel { + syscall: &'static str, + error: mach::KernelError, + }, + #[error("detected an invalid mach image header")] + InvalidMachHeader, + #[error(transparent)] + NonUtf8String(#[from] std::string::FromUtf8Error), + #[error("unable to find the main executable image for the process")] + NoExecutableImage, + #[error("expected load command {name}({id}) was not found for an image")] + MissingLoadCommand { name: &'static str, id: u32 }, +} + +/// Wraps a mach call in a Result +macro_rules! mach_call { + ($call:expr) => {{ + // SAFETY: syscall + let kr = unsafe { $call }; + if kr == mach::KERN_SUCCESS { + Ok(()) + } else { + // This is ugly, improvements to the macro welcome! + let mut syscall = stringify!($call); + if let Some(i) = syscall.find('(') { + syscall = &syscall[..i]; + } + Err(TaskDumpError::Kernel { + syscall, + error: kr.into(), + }) + } + }}; +} + +/// `dyld_all_image_infos` from +/// +/// This struct is truncated as we only need a couple of fields at the beginning +/// of the struct +#[repr(C)] +struct AllImagesInfo { + version: u32, // == 1 in Mac OS X 10.4 + /// The number of [`ImageInfo`] structs at that following address + info_array_count: u32, + /// The address in the process where the array of [`ImageInfo`] structs is + info_array_addr: u64, +} + +/// `dyld_image_info` from +#[repr(C)] +#[derive(Clone)] +pub struct ImageInfo { + /// The address in the process where the image is loaded + pub load_address: u64, + /// The address in the process where the image's file path can be read + pub file_path: u64, + /// Timestamp for when the image's file was last modified + pub file_mod_date: u64, +} + +impl PartialEq for ImageInfo { + fn eq(&self, o: &Self) -> bool { + self.load_address == o.load_address + } +} + +impl Eq for ImageInfo {} + +impl Ord for ImageInfo { + fn cmp(&self, o: &Self) -> std::cmp::Ordering { + self.load_address.cmp(&o.load_address) + } +} + +impl PartialOrd for ImageInfo { + fn partial_cmp(&self, o: &Self) -> Option { + Some(self.cmp(o)) + } +} + +/// Describes a region of virtual memory +pub struct VMRegionInfo { + pub info: mach::vm_region_submap_info_64, + pub range: std::ops::Range, +} + +/// Similarly to PtraceDumper for Linux, this provides access to information +/// for a task (MacOS process) +pub struct TaskDumper { + task: mt::task_t, + page_size: i64, +} + +impl TaskDumper { + /// Constructs a [`TaskDumper`] for the specified task + pub fn new(task: mt::task_t) -> Self { + Self { + task, + // SAFETY: syscall + page_size: unsafe { libc::sysconf(libc::_SC_PAGESIZE) } as i64, + } + } + + /// Reads a block of memory from the task + /// + /// # Errors + /// + /// The syscall to read the task's memory fails for some reason, eg bad address. + pub fn read_task_memory(&self, address: u64, count: usize) -> Result, TaskDumpError> + where + T: Sized + Clone, + { + let length = (count * std::mem::size_of::()) as u64; + + // use the negative of the page size for the mask to find the page address + let page_address = address & (-self.page_size as u64); + let last_page_address = + (address + length + (self.page_size - 1) as u64) & (-self.page_size as u64); + + let page_size = last_page_address - page_address; + let mut local_start = 0; + let mut local_length = 0; + + mach_call!(mach::mach_vm_read( + self.task, + page_address, + page_size, + &mut local_start, + &mut local_length + ))?; + + let mut buffer = Vec::with_capacity(count); + + // SAFETY: this is safe as long as the kernel has not lied to us + let task_buffer = unsafe { + std::slice::from_raw_parts( + (local_start as *const u8) + .offset((address - page_address) as isize) + .cast(), + count, + ) + }; + buffer.extend_from_slice(task_buffer); + + // Don't worry about the return here, if something goes wrong there's probably + // not much we can do about it, and we have what we want anyways + let _res = mach_call!(mach::mach_vm_deallocate( + mach::mach_task_self(), + local_start as u64, // vm_read returns a pointer, but vm_deallocate takes a integer address :-/ + local_length as u64, // vm_read and vm_deallocate use different sizes :-/ + )); + + Ok(buffer) + } + + /// Reads a null terminated string starting at the specified address. This + /// is a specialization of [`read_task_memory`] since strings can span VM + /// regions. + /// + /// This string is capped at 8k which should never be close to being hit as + /// it is only used for file paths for loaded modules, but then again, this + /// is MacOS, so who knows what insanity goes on. + /// + /// # Errors + /// + /// Fails if the address cannot be read for some reason, or the string is + /// not utf-8. + pub fn read_string(&self, addr: u64) -> Result, TaskDumpError> { + // The problem is we don't know how much to read until we know how long + // the string is. And we don't know how long the string is, until we've read + // the memory! So, we'll try to read kMaxStringLength bytes + // (or as many bytes as we can until we reach the end of the vm region). + let get_region_size = || -> Result { + let region = self.get_vm_region(addr)?; + + let mut size_to_end = region.range.end - addr; + + // If the remaining is less than 4k, check if the next region is + // contiguous, and extend the memory that could contain the string + // to include it + if size_to_end < 4 * 1024 { + let maybe_adjacent = self.get_vm_region(region.range.end)?; + + if maybe_adjacent.range.start == region.range.end { + size_to_end += maybe_adjacent.range.end - maybe_adjacent.range.start; + } + } + + Ok(size_to_end) + }; + + if let Ok(size_to_end) = get_region_size() { + let mut bytes = self.read_task_memory(addr, size_to_end as usize)?; + + // Find the null terminator and truncate our string + if let Some(null_pos) = bytes.iter().position(|c| *c == 0) { + bytes.resize(null_pos, 0); + } + + Ok(String::from_utf8(bytes).map(Some)?) + } else { + Ok(None) + } + } + + /// Retrives information on the virtual memory region the specified address + /// is located within. + /// + /// # Errors + /// + /// The syscall to retrieve the VM region information fails for some reason, + /// eg. a bad address. + pub fn get_vm_region(&self, addr: u64) -> Result { + let mut region_base = addr; + let mut region_size = 0; + let mut nesting_level = 0; + let mut submap_info = std::mem::MaybeUninit::::uninit(); + + // + const VM_REGION_SUBMAP_INFO_COUNT_64: u32 = + (std::mem::size_of::() / std::mem::size_of::()) + as u32; + + let mut info_count = VM_REGION_SUBMAP_INFO_COUNT_64; + + mach_call!(mach::mach_vm_region_recurse( + self.task, + &mut region_base, + &mut region_size, + &mut nesting_level, + submap_info.as_mut_ptr().cast(), + &mut info_count, + ))?; + + Ok(VMRegionInfo { + // SAFETY: this will be valid if the syscall succeeded + info: unsafe { submap_info.assume_init() }, + range: region_base..region_base + region_size, + }) + } + + /// Retrieves the state of the specified thread. The state is an architecture + /// specific block of CPU context ie register state. + /// + /// # Errors + /// + /// The specified thread id is invalid, or the thread is in a task that is + /// compiled for a different architecture than this local task. + pub fn read_thread_state(&self, tid: u32) -> Result { + let mut thread_state = mach::ThreadState::default(); + + mach_call!(mach::thread_get_state( + tid, + mach::THREAD_STATE_FLAVOR as i32, + thread_state.state.as_mut_ptr(), + &mut thread_state.state_size, + ))?; + + Ok(thread_state) + } + + /// Reads the specified task information. + /// + /// # Errors + /// + /// The syscall to receive the task information failed for some reason, eg. + /// the specified type and the flavor are mismatched and considered invalid. + pub fn task_info(&self) -> Result { + let mut info = std::mem::MaybeUninit::::uninit(); + let mut count = (std::mem::size_of::() / std::mem::size_of::()) as u32; + + mach_call!(mach::task::task_info( + self.task, + T::FLAVOR, + info.as_mut_ptr().cast(), + &mut count + ))?; + + // SAFETY: this will be initialized if the call succeeded + unsafe { Ok(info.assume_init()) } + } + + /// Retrieves all of the images loaded in the task. + /// + /// Note that there may be multiple images with the same load address. + /// + /// # Errors + /// + /// The syscall to retrieve the location of the loaded images fails, or + /// the syscall to read the loaded images from the process memory fails + pub fn read_images(&self) -> Result, TaskDumpError> { + impl mach::TaskInfo for mach::task_info::task_dyld_info { + const FLAVOR: u32 = mach::task_info::TASK_DYLD_INFO; + } + + // Retrieve the address at which the list of loaded images is located + // within the task + let all_images_addr = { + let dyld_info = self.task_info::()?; + dyld_info.all_image_info_addr + }; + + // Here we make the assumption that dyld loaded at the same address in + // the crashed process vs. this one. This is an assumption made in + // "dyld_debug.c" and is said to be nearly always valid. + let dyld_all_info_buf = + self.read_task_memory::(all_images_addr, std::mem::size_of::())?; + // SAFETY: this is fine as long as the kernel isn't lying to us + let all_dyld_info: &AllImagesInfo = unsafe { &*(dyld_all_info_buf.as_ptr().cast()) }; + + self.read_task_memory::( + all_dyld_info.info_array_addr, + all_dyld_info.info_array_count as usize, + ) + } + + /// Retrieves the main executable image for the task. + /// + /// Note that this method is currently only used for tests due to deficiencies + /// in `otool` + /// + /// # Errors + /// + /// Any of the errors that apply to [`Self::read_images`] apply here, in + /// addition to not being able to find the main executable image + pub fn read_executable_image(&self) -> Result { + let images = self.read_images()?; + + for img in images { + let mach_header = self.read_task_memory::(img.load_address, 1)?; + + let header = &mach_header[0]; + + if header.magic != mach::MH_MAGIC_64 { + return Err(TaskDumpError::InvalidMachHeader); + } + + if header.file_type == mach::MH_EXECUTE { + return Ok(img); + } + } + + Err(TaskDumpError::NoExecutableImage) + } + + /// Retrieves the load commands for the specified image + /// + /// # Errors + /// + /// We fail to read the image header for the specified image, the header we + /// read is determined to be invalid, or we fail to read the block of memory + /// containing the load commands themselves. + pub fn read_load_commands(&self, img: &ImageInfo) -> Result { + let mach_header = self.read_task_memory::(img.load_address, 1)?; + + let header = &mach_header[0]; + + if header.magic != mach::MH_MAGIC_64 { + return Err(TaskDumpError::InvalidMachHeader); + } + + // Read the load commands which immediately follow the image header from + // the task memory. Note that load commands vary in size so we need to + // retrieve the memory as a raw byte buffer that we can then iterate + // through and step according to the size of each load command + let load_commands_buf = self.read_task_memory::( + img.load_address + std::mem::size_of::() as u64, + header.size_commands as usize, + )?; + + Ok(mach::LoadCommands { + buffer: load_commands_buf, + count: header.num_commands, + }) + } + + /// Gets a list of all of the thread ids in the task + /// + /// # Errors + /// + /// The syscall to retrieve the list of threads fails + pub fn read_threads(&self) -> Result<&'static [u32], TaskDumpError> { + let mut threads = std::ptr::null_mut(); + let mut thread_count = 0; + + mach_call!(mach::task_threads( + self.task, + &mut threads, + &mut thread_count + ))?; + + Ok( + // SAFETY: This should be valid if the call succeeded + unsafe { std::slice::from_raw_parts(threads, thread_count as usize) }, + ) + } + + /// Retrieves the PID for the task + /// + /// # Errors + /// + /// Presumably the only way this would fail would be if the task we are + /// dumping disappears. + pub fn pid_for_task(&self) -> Result { + let mut pid = 0; + mach_call!(mach::pid_for_task(self.task, &mut pid))?; + Ok(pid) + } +} diff --git a/src/mem_writer.rs b/src/mem_writer.rs new file mode 100644 index 00000000..a723c2aa --- /dev/null +++ b/src/mem_writer.rs @@ -0,0 +1,272 @@ +use crate::minidump_format::{MDLocationDescriptor, MDRVA}; +use scroll::ctx::{SizeWith, TryIntoCtx}; + +#[derive(Debug, thiserror::Error)] +pub enum MemoryWriterError { + #[error("IO error when writing to DumpBuf")] + IOError(#[from] std::io::Error), + #[error("Failed integer conversion")] + TryFromIntError(#[from] std::num::TryFromIntError), + #[error("Failed to write to buffer")] + Scroll(#[from] scroll::Error), +} + +type WriteResult = std::result::Result; + +macro_rules! size { + ($t:ty) => { + <$t>::size_with(&scroll::Endian::Little) + }; +} + +pub struct Buffer { + inner: Vec, +} + +impl Buffer { + pub fn with_capacity(cap: usize) -> Self { + Self { + inner: Vec::with_capacity(cap), + } + } + + #[inline] + pub fn position(&self) -> u64 { + self.inner.len() as u64 + } + + #[inline] + #[must_use] + fn reserve(&mut self, len: usize) -> usize { + let mark = self.inner.len(); + self.inner.resize(self.inner.len() + len, 0); + mark + } + + #[inline] + fn write(&mut self, val: N) -> Result + where + N: TryIntoCtx + SizeWith, + E: From, + { + self.write_at(self.inner.len(), val) + } + + fn write_at(&mut self, offset: usize, val: N) -> Result + where + N: TryIntoCtx + SizeWith, + E: From, + { + let to_write = size!(N); + let remainder = self.inner.len() - offset; + if remainder < to_write { + self.inner + .resize(self.inner.len() + to_write - remainder, 0); + } + + let dst = &mut self.inner[offset..offset + to_write]; + val.try_into_ctx(dst, scroll::Endian::Little) + } + + #[inline] + pub fn write_all(&mut self, buffer: &[u8]) { + self.inner.extend_from_slice(buffer); + } +} + +impl From for Vec { + fn from(b: Buffer) -> Self { + b.inner + } +} + +impl std::ops::Deref for Buffer { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +#[derive(Debug, PartialEq)] +pub struct MemoryWriter { + pub position: MDRVA, + pub size: usize, + phantom: std::marker::PhantomData, +} + +impl MemoryWriter +where + T: TryIntoCtx + SizeWith, +{ + /// Create a slot for a type T in the buffer, we can fill right now with real values. + pub fn alloc_with_val(buffer: &mut Buffer, val: T) -> WriteResult { + // Mark the position as we may overwrite later + let position = buffer.position(); + let size = buffer.write(val)?; + + Ok(Self { + position: position as u32, + size, + phantom: std::marker::PhantomData, + }) + } + + /// Create a slot for a type T in the buffer, we can fill later with real values. + pub fn alloc(buffer: &mut Buffer) -> WriteResult { + let size = size!(T); + let position = buffer.reserve(size) as u32; + + Ok(Self { + position: position as u32, + size, + phantom: std::marker::PhantomData, + }) + } + + /// Write actual values in the buffer-slot we got during `alloc()` + #[inline] + pub fn set_value(&mut self, buffer: &mut Buffer, val: T) -> WriteResult<()> { + Ok(buffer.write_at(self.position as usize, val).map(|_sz| ())?) + } + + #[inline] + pub fn location(&self) -> MDLocationDescriptor { + MDLocationDescriptor { + data_size: size!(T) as u32, + rva: self.position, + } + } +} + +#[derive(Debug, PartialEq)] +pub struct MemoryArrayWriter { + pub position: MDRVA, + array_size: usize, + phantom: std::marker::PhantomData, +} + +#[cfg(any(target_os = "linux", target_os = "android"))] +impl MemoryArrayWriter { + #[inline] + pub fn write_bytes(buffer: &mut Buffer, slice: &[u8]) -> Self { + let position = buffer.position(); + buffer.write_all(slice); + + Self { + position: position as u32, + array_size: slice.len(), + phantom: std::marker::PhantomData, + } + } +} + +impl MemoryArrayWriter +where + T: TryIntoCtx + SizeWith + Copy, +{ + pub fn alloc_from_array(buffer: &mut Buffer, array: &[T]) -> WriteResult { + let array_size = array.len(); + let position = buffer.reserve(array_size * size!(T)); + + for (idx, val) in array.iter().enumerate() { + buffer.write_at(position + idx * size!(T), *val)?; + } + + Ok(Self { + position: position as u32, + array_size, + phantom: std::marker::PhantomData, + }) + } +} + +impl MemoryArrayWriter +where + T: TryIntoCtx + SizeWith, +{ + /// Create a slot for a type T in the buffer, we can fill in the values in one go. + pub fn alloc_from_iter( + buffer: &mut Buffer, + iter: impl IntoIterator, + ) -> WriteResult + where + I: std::iter::ExactSizeIterator, + { + let iter = iter.into_iter(); + let array_size = iter.len(); + let size = size!(T); + let position = buffer.reserve(array_size * size); + + for (idx, val) in iter.enumerate() { + buffer.write_at(position + idx * size, val)?; + } + + Ok(Self { + position: position as u32, + array_size, + phantom: std::marker::PhantomData, + }) + } + + /// Create a slot for a type T in the buffer, we can fill later with real values. + /// This function fills it with `Default::default()`, which is less performant than + /// using uninitialized memory, but safe. + pub fn alloc_array(buffer: &mut Buffer, array_size: usize) -> WriteResult { + let position = buffer.reserve(array_size * size!(T)); + + Ok(Self { + position: position as u32, + array_size, + phantom: std::marker::PhantomData, + }) + } + + /// Write actual values in the buffer-slot we got during `alloc()` + #[inline] + pub fn set_value_at(&mut self, buffer: &mut Buffer, val: T, index: usize) -> WriteResult<()> { + Ok(buffer + .write_at(self.position as usize + size!(T) * index, val) + .map(|_sz| ())?) + } + + #[inline] + pub fn location(&self) -> MDLocationDescriptor { + MDLocationDescriptor { + data_size: (self.array_size * size!(T)) as u32, + rva: self.position, + } + } + + #[inline] + pub fn location_of_index(&self, idx: usize) -> MDLocationDescriptor { + MDLocationDescriptor { + data_size: size!(T) as u32, + rva: self.position + (size!(T) * idx) as u32, + } + } +} + +pub fn write_string_to_location( + buffer: &mut Buffer, + text: &str, +) -> WriteResult { + let letters: Vec = text.encode_utf16().collect(); + + // First write size of the string (x letters in u16, times the size of u16) + let text_header = MemoryWriter::::alloc_with_val( + buffer, + (letters.len() * std::mem::size_of::()).try_into()?, + )?; + + // Then write utf-16 letters after that + let mut text_section = MemoryArrayWriter::::alloc_array(buffer, letters.len())?; + for (index, letter) in letters.iter().enumerate() { + text_section.set_value_at(buffer, *letter, index)?; + } + + let mut location = text_header.location(); + location.data_size += text_section.location().data_size; + + Ok(location) +} diff --git a/src/minidump_cpu.rs b/src/minidump_cpu.rs index 4130939e..6afc9402 100644 --- a/src/minidump_cpu.rs +++ b/src/minidump_cpu.rs @@ -11,8 +11,10 @@ cfg_if::cfg_if! { } else if #[cfg(target_arch = "aarch64")] { /// This is the number of general purpose registers _not_ counting /// the stack pointer + #[cfg(any(target_os = "linux", target_os = "android"))] pub(crate) const GP_REG_COUNT: usize = 31; /// The number of floating point registers in the floating point save area + #[cfg(any(target_os = "linux", target_os = "android"))] pub(crate) const FP_REG_COUNT: usize = 32; pub type RawContextCPU = minidump_common::format::CONTEXT_ARM64_OLD; diff --git a/src/windows/minidump_writer.rs b/src/windows/minidump_writer.rs index ce6bc2f8..6a1bd2f1 100644 --- a/src/windows/minidump_writer.rs +++ b/src/windows/minidump_writer.rs @@ -13,8 +13,6 @@ pub struct MinidumpWriter { crash_context: crash_context::CrashContext, /// Handle to the crashing process, which could be ourselves crashing_process: HANDLE, - /// The pid of the crashing process. - crashing_pid: u32, /// The `EXCEPTION_POINTERS` contained in crash context is a pointer into the /// memory of the process that crashed, as it contains an `EXCEPTION_RECORD` /// record which is an internally linked list, so in the case that we are @@ -26,53 +24,41 @@ pub struct MinidumpWriter { } impl MinidumpWriter { - /// Creates a minidump writer for a crash that occurred in an external process. + /// Creates a minidump writer capable of dumping the process specified by + /// the [`crash_context::CrashContext`]. /// - /// # Errors + /// Note that it is inherently unreliable to dump the currently running + /// processes, it is recommended to dump from an external process if possible. /// - /// Fails if we are unable to open the external process for some reason - pub fn external_process( - crash_context: crash_context::CrashContext, - pid: u32, - ) -> Result { - // SAFETY: syscall - let crashing_process = unsafe { - threading::OpenProcess( - threading::PROCESS_ALL_ACCESS, // desired access - 0, // inherit handles - pid, // pid - ) - }; - - if crashing_process == 0 { - Err(std::io::Error::last_os_error().into()) - } else { - Ok(Self { - crash_context, - crashing_process, - crashing_pid: pid, - is_external_process: true, - }) - } - } - - /// Creates a minidump writer for a crash that occurred in the current process. + /// # Errors /// - /// Note that in-process dumping is inherently unreliable, it is recommended - /// to use the [`Self::external_process`] in a different process than the - /// one that crashed when possible. - pub fn current_process(crash_context: crash_context::CrashContext) -> Self { - let crashing_pid = std::process::id(); + /// Fails if the process specified in the context is not the local process + /// and we are unable to open it due to eg. security reasons. + pub fn new(crash_context: crash_context::CrashContext) -> Result { + // SAFETY: syscalls + let (crashing_process, is_external_process) = unsafe { + if crash_context.process_id != std::process::id() { + let proc = threading::OpenProcess( + threading::PROCESS_ALL_ACCESS, // desired access + 0, // inherit handles + crash_context.process_id, // pid + ); + + if proc == 0 { + return Err(std::io::Error::last_os_error().into()); + } - // SAFETY: syscall - let crashing_process = unsafe { threading::GetCurrentProcess() }; + (proc, true) + } else { + (threading::GetCurrentProcess(), false) + } + }; - Self { + Ok(Self { crash_context, crashing_process, - crashing_pid, - is_external_process: false, - } + is_external_process: true, + }) } /// Writes a minidump to the specified file @@ -132,7 +118,7 @@ impl MinidumpWriter { let ret = unsafe { md::MiniDumpWriteDump( self.crashing_process, // HANDLE to the process with the crash we want to capture - self.crashing_pid, // process id + self.crash_context.process_id, // process id destination.as_raw_handle() as HANDLE, // file to write the minidump to md::MiniDumpNormal, // MINIDUMP_TYPE - we _might_ want to make this configurable exc_info @@ -278,6 +264,11 @@ impl MinidumpWriter { impl Drop for MinidumpWriter { fn drop(&mut self) { + // Note we close the handle regardless of whether it is the local handle + // or an external one, as noted in the docs + // + // > The pseudo handle need not be closed when it is no longer needed. + // > Calling the CloseHandle function with a pseudo handle has no effect. // SAFETY: syscall unsafe { CloseHandle(self.crashing_process) }; } diff --git a/tests/common/mod.rs b/tests/common/mod.rs index bb262601..2c1ded5f 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -78,7 +78,7 @@ pub fn wait_for_threads(child: &mut Child, num: usize) { } #[allow(unused)] -pub fn start_child_and_return(command: &str) -> Child { +pub fn start_child_and_return(args: &[&str]) -> Child { let mut child = Command::new("cargo") .env("RUST_BACKTRACE", "1") .arg("run") @@ -86,7 +86,7 @@ pub fn start_child_and_return(command: &str) -> Child { .arg("--bin") .arg("test") .arg("--") - .arg(command) + .args(args) .stdout(Stdio::piped()) .spawn() .expect("failed to execute child"); diff --git a/tests/linux_minidump_writer.rs b/tests/linux_minidump_writer.rs index 3901f08e..6c8c0045 100644 --- a/tests/linux_minidump_writer.rs +++ b/tests/linux_minidump_writer.rs @@ -48,6 +48,7 @@ fn get_crash_context(tid: Pid) -> CrashContext { CrashContext { inner: crash_context::CrashContext { siginfo, + pid: std::process::id() as _, tid, context, float_state, @@ -98,7 +99,7 @@ fn test_write_dump_with_context() { } fn test_write_and_read_dump_from_parent_helper(context: Context) { - let mut child = start_child_and_return("spawn_mmap_wait"); + let mut child = start_child_and_return(&["spawn_mmap_wait"]); let pid = child.id() as i32; let mut tmpfile = tempfile::Builder::new() @@ -214,7 +215,7 @@ fn test_write_and_read_dump_from_parent_with_context() { } fn test_write_with_additional_memory_helper(context: Context) { - let mut child = start_child_and_return("spawn_alloc_wait"); + let mut child = start_child_and_return(&["spawn_alloc_wait"]); let pid = child.id() as i32; let mut tmpfile = tempfile::Builder::new() @@ -623,7 +624,7 @@ fn test_sanitized_stacks_with_context() { } fn test_write_early_abort_helper(context: Context) { - let mut child = start_child_and_return("spawn_alloc_wait"); + let mut child = start_child_and_return(&["spawn_alloc_wait"]); let pid = child.id() as i32; let mut tmpfile = tempfile::Builder::new() diff --git a/tests/mac_minidump_writer.rs b/tests/mac_minidump_writer.rs new file mode 100644 index 00000000..3b6f6c8a --- /dev/null +++ b/tests/mac_minidump_writer.rs @@ -0,0 +1,208 @@ +#![cfg(target_os = "macos")] + +mod common; +use common::start_child_and_return; + +use minidump::{ + CrashReason, Minidump, MinidumpBreakpadInfo, MinidumpMemoryList, MinidumpMiscInfo, + MinidumpModuleList, MinidumpSystemInfo, MinidumpThreadList, +}; +use minidump_writer::minidump_writer::MinidumpWriter; + +fn get_crash_reason<'a, T: std::ops::Deref + 'a>( + md: &Minidump<'a, T>, +) -> CrashReason { + let exc: minidump::MinidumpException<'_> = + md.get_stream().expect("unable to find exception stream"); + + exc.get_crash_reason( + minidump::system_info::Os::MacOs, + if cfg!(target_arch = "x86_64") { + minidump::system_info::Cpu::X86_64 + } else if cfg!(target_arch = "aarch64") { + minidump::system_info::Cpu::Arm64 + } else { + unimplemented!() + }, + ) +} + +struct Captured<'md> { + #[allow(dead_code)] + task: u32, + thread: u32, + minidump: Minidump<'md, memmap2::Mmap>, +} + +fn capture_minidump(name: &str, exception_kind: u32) -> Captured<'_> { + // Create a mach port server to retrieve the crash details from the child + let mut server = crash_context::ipc::Server::create(&std::ffi::CString::new(name).unwrap()) + .expect("failed to create mach port service"); + + let mut child = start_child_and_return(&[name, &exception_kind.to_string()]); + + // Wait for the child to spinup and report a crash context to us + let mut rcc = server + .try_recv_crash_context(Some(std::time::Duration::from_secs(5))) + .expect("failed to receive context") + .expect("receive timed out"); + + let mut tmpfile = tempfile::Builder::new().prefix(name).tempfile().unwrap(); + + let task = rcc.crash_context.task; + let thread = rcc.crash_context.thread; + + let mut dumper = MinidumpWriter::new(rcc.crash_context); + + dumper + .dump(tmpfile.as_file_mut()) + .expect("failed to write minidump"); + + // Signal the child that we've received and processed the crash context + rcc.acker + .send_ack(1, Some(std::time::Duration::from_secs(2))) + .expect("failed to send ack"); + + child.kill().expect("failed to kill child"); + + let minidump = Minidump::read_path(tmpfile.path()).expect("failed to read minidump"); + + Captured { + task, + thread, + minidump, + } +} + +#[test] +fn dump_external_process() { + let approximate_proc_start_time = std::time::SystemTime::now() + .duration_since(std::time::SystemTime::UNIX_EPOCH) + .unwrap() + .as_secs(); + + let md = capture_minidump( + "dump_external_process", + mach2::exception_types::EXC_BREAKPOINT, + ) + .minidump; + + let crash_reason = get_crash_reason(&md); + + assert!(matches!( + crash_reason, + CrashReason::MacGeneral( + minidump_common::errors::ExceptionCodeMac::EXC_BREAKPOINT, + 100 + ) + )); + + let _: MinidumpModuleList = md.get_stream().expect("Couldn't find MinidumpModuleList"); + let _: MinidumpThreadList = md.get_stream().expect("Couldn't find MinidumpThreadList"); + let _: MinidumpMemoryList = md.get_stream().expect("Couldn't find MinidumpMemoryList"); + let _: MinidumpSystemInfo = md.get_stream().expect("Couldn't find MinidumpSystemInfo"); + let _: MinidumpBreakpadInfo = md.get_stream().expect("Couldn't find MinidumpBreakpadInfo"); + + let misc_info: MinidumpMiscInfo = md.get_stream().expect("Couldn't find MinidumpMiscInfo"); + + if let minidump::RawMiscInfo::MiscInfo2(mi) = &misc_info.raw { + // Unfortunately the minidump format only has 32-bit precision for the + // process start time + let process_create_time = mi.process_create_time as u64; + + assert!( + process_create_time >= approximate_proc_start_time + && process_create_time <= approximate_proc_start_time + 2 + ); + + // I've tried busy looping to spend CPU time to get this up, but + // MACH_TASK_BASIC_INFO which should give terminated thread times only ever + // reports 0, and TASK_THREAD_TIMES_INFO which should show active thread + // times I've only been able to get upt to a few thousand microseconds + // even when busy looping for well over a second, and those get truncated + // to whole seconds. And it seems that crashpad doesn't have tests around + // this, though that's hard to say given how tedious it is finding stuff + // in that bloated codebase + // assert!(mi.process_user_time > 0); + // assert!(mi.process_kernel_time > 0); + + // These aren't currently available on aarch64, or if they are, they + // are not via the same sysctlbyname mechanism. Would be nice if Apple + // documented...anything + if cfg!(target_arch = "x86_64") { + assert!(mi.processor_max_mhz > 0); + assert!(mi.processor_current_mhz > 0); + } + } else { + panic!("unexpected misc info type {:?}", misc_info); + } +} + +/// Validates we can actually walk the stack for each thread in the minidump, +/// this is using minidump-processor, which (currently) depends on breakpad +/// symbols, however https://github.com/mozilla/dump_syms is not available as +/// a library https://github.com/mozilla/dump_syms/issues/253, so we just require +/// that it already be installed, hence the ignore +#[test] +fn stackwalks() { + println!("generating minidump..."); + let md = capture_minidump("stackwalks", mach2::exception_types::EXC_BREAKPOINT); + + // Generate the breakpad symbols + println!("generating symbols..."); + dump_syms::dumper::single_file( + &dump_syms::dumper::Config { + output: dump_syms::dumper::Output::Store(".test-symbols".into()), + symbol_server: None, + debug_id: None, + code_id: None, + arch: "", + file_type: dump_syms::common::FileType::Macho, + num_jobs: 2, // default this + check_cfi: false, + mapping_var: None, + mapping_src: None, + mapping_dest: None, + mapping_file: None, + }, + "target/debug/test", + ) + .expect("failed to dump symbols"); + + let provider = + minidump_processor::Symbolizer::new(minidump_processor::simple_symbol_supplier(vec![ + ".test-symbols".into(), + ])); + + let state = futures::executor::block_on(async { + minidump_processor::process_minidump(&md.minidump, &provider).await + }) + .unwrap(); + + //state.print(&mut std::io::stdout()).map_err(|_| ()).unwrap(); + + // We expect at least 2 threads, one of which is the fake crashing thread + let fake_crash_thread = state + .threads + .iter() + .find(|cs| cs.thread_id == md.thread) + .expect("failed to find crash thread"); + + // The thread is named, however we currently don't retrieve that information + // currently, indeed, it appears that you need to retrieve the pthread that + // corresponds the mach port for a thread, however that API seems to be + // task specific... + // assert_eq!( + // fake_crash_thread.thread_name.as_deref(), + // Some("test-thread") + // ); + + assert!( + fake_crash_thread.frames.iter().any(|sf| { + sf.function_name + .as_ref() + .map_or(false, |fname| fname.ends_with("wait_until_killed")) + }), + "unable to locate expected function" + ); +} diff --git a/tests/ptrace_dumper.rs b/tests/ptrace_dumper.rs index 520a3eaa..35d89461 100644 --- a/tests/ptrace_dumper.rs +++ b/tests/ptrace_dumper.rs @@ -186,7 +186,7 @@ fn test_copy_from_process_self() { #[test] fn test_sanitize_stack_copy() { let num_of_threads = 1; - let mut child = start_child_and_return("spawn_alloc_wait"); + let mut child = start_child_and_return(&["spawn_alloc_wait"]); let pid = child.id() as i32; let mut f = BufReader::new(child.stdout.as_mut().expect("Can't open stdout")); diff --git a/tests/task_dumper.rs b/tests/task_dumper.rs new file mode 100644 index 00000000..6f04c396 --- /dev/null +++ b/tests/task_dumper.rs @@ -0,0 +1,147 @@ +//! All of these tests are specific to the MacOS task dumper +#![cfg(target_os = "macos")] + +use minidump_writer::{mach::LoadCommand, task_dumper::TaskDumper}; +use std::fmt::Write; + +fn call_otool(args: &[&str]) -> String { + let mut cmd = std::process::Command::new("otool"); + cmd.args(args); + + let exe_path = std::env::current_exe().expect("unable to retrieve test executable path"); + cmd.arg(exe_path); + + let output = cmd.output().expect("failed to spawn otool"); + + assert!(output.status.success()); + + String::from_utf8(output.stdout).expect("stdout was invalid utf-8") +} + +/// Validates we can iterate the load commands for all of the images in the task +#[test] +fn iterates_load_commands() { + let lc_str = call_otool(&["-l"]); + + let mut expected = String::new(); + let mut lc_index = 0; + + expected.push('\n'); + + while let Some(nlc) = lc_str[lc_index..].find("Load command ") { + lc_index += nlc; + + let block = match lc_str[lc_index + 13..].find("Load command ") { + Some(ind) => &lc_str[lc_index + 13..lc_index + 13 + ind], + None => &lc_str[lc_index..], + }; + + // otool prints the load command index for each command, but we only + // handle the small subset of the available load commands we care about + // so just ignore that + let block = &block[block.find('\n').unwrap() + 1..]; + + // otool also prints all the sections for LC_SEGMENT_* commands, but + // we don't care about those, so ignore them + let block = match block.find("Section") { + Some(ind) => &block[..ind], + None => block, + }; + + lc_index += 13; + + let cmd = block + .find("cmd ") + .expect("load commnd didn't specify cmd kind"); + let cmd_end = block[cmd..] + .find('\n') + .expect("load cmd didn't end with newline"); + if matches!( + &block[cmd + 4..cmd + cmd_end], + "LC_SEGMENT_64" | "LC_UUID" | "LC_ID_DYLIB" + ) { + expected.push_str(block); + } + } + + let task_dumper = TaskDumper::new( + // SAFETY: syscall + unsafe { mach2::traps::mach_task_self() }, + ); + + let mut actual = String::new(); + + // Unfortunately, Apple decided to move dynamic libs into a shared cache, + // removing them from the file system completely, and unless I'm missing it + // there is no way to get the load commands for the dylibs since otool + // only understands file paths? So we just get the load commands for the main + // executable instead, this means that we miss the `LC_ID_DYLIB` commands + // since they only apply to dylibs, but this test is more that we can + // correctly iterate through the load commands themselves, so this _should_ + // be fine... + let exe_img = task_dumper + .read_executable_image() + .expect("failed to read executable image"); + + { + let lcmds = task_dumper + .read_load_commands(&exe_img) + .expect("failed to read load commands"); + + for lc in lcmds.iter() { + match lc { + LoadCommand::Segment(seg) => { + let segname = std::str::from_utf8(&seg.segment_name).unwrap(); + let segname = &segname[..segname.find('\0').unwrap()]; + write!( + &mut actual, + " + cmd LC_SEGMENT_64 + cmdsize {} + segname {} + vmaddr 0x{:016x} + vmsize 0x{:016x} + fileoff {} + filesize {} + maxprot 0x{:08x} + initprot 0x{:08x} + nsects {} + flags 0x{:x}", + seg.cmd_size, + segname, + seg.vm_addr, + seg.vm_size, + seg.file_off, + seg.file_size, + seg.max_prot, + seg.init_prot, + seg.num_sections, + seg.flags, + ) + .unwrap(); + } + LoadCommand::Dylib(_dylib) => { + unreachable!(); + } + LoadCommand::Uuid(uuid) => { + let id = uuid::Uuid::from_bytes(uuid.uuid); + let mut uuid_buf = [0u8; uuid::fmt::Hyphenated::LENGTH]; + let uuid_str = id.hyphenated().encode_upper(&mut uuid_buf); + + write!( + &mut actual, + " + cmd LC_UUID + cmdsize {} + uuid {uuid_str} +", + uuid.cmd_size, + ) + .unwrap(); + } + } + } + } + + similar_asserts::assert_str_eq!(expected, actual); +} diff --git a/tests/windows_minidump_writer.rs b/tests/windows_minidump_writer.rs index 7415e370..038f8599 100644 --- a/tests/windows_minidump_writer.rs +++ b/tests/windows_minidump_writer.rs @@ -51,11 +51,12 @@ fn dump_current_process() { let crash_context = crash_context::CrashContext { exception_pointers: (&exception_ptrs as *const EXCEPTION_POINTERS).cast(), + process_id: std::process::id(), thread_id: GetCurrentThreadId(), exception_code: STATUS_INVALID_PARAMETER, }; - let dumper = MinidumpWriter::current_process(crash_context); + let dumper = MinidumpWriter::new(crash_context).expect("failed to create MinidumpWriter"); dumper .dump(tmpfile.as_file_mut()) @@ -85,7 +86,7 @@ fn dump_current_process() { fn dump_external_process() { use std::io::BufRead; - let mut child = start_child_and_return(&format!("{:x}", EXCEPTION_ILLEGAL_INSTRUCTION)); + let mut child = start_child_and_return(&[&format!("{:x}", EXCEPTION_ILLEGAL_INSTRUCTION)]); let (process_id, exception_pointers, thread_id, exception_code) = { let mut f = std::io::BufReader::new(child.stdout.as_mut().expect("Can't open stdout")); @@ -107,6 +108,7 @@ fn dump_external_process() { let crash_context = crash_context::CrashContext { exception_pointers: exception_pointers as _, + process_id, thread_id, exception_code, }; @@ -116,8 +118,7 @@ fn dump_external_process() { .tempfile() .unwrap(); - let dumper = MinidumpWriter::external_process(crash_context, process_id) - .expect("failed to create MinidumpWriter"); + let dumper = MinidumpWriter::new(crash_context).expect("failed to create MinidumpWriter"); dumper .dump(tmpfile.as_file_mut())