symbolic-common-12.8.0/Cargo.toml0000644000000027040000000000100122340ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "symbolic-common" version = "12.8.0" authors = [ "Armin Ronacher ", "Jan Michael Auer ", ] description = """ Common types and utilities for symbolic, a library to symbolicate and process stack traces from native applications, minidumps or minified JavaScript. """ homepage = "https://github.com/getsentry/symbolic" documentation = "https://docs.rs/symbolic-common" readme = "README.md" license = "MIT" repository = "https://github.com/getsentry/symbolic" [package.metadata.docs.rs] all-features = true [dependencies.debugid] version = "0.8.0" [dependencies.memmap2] version = "0.9.0" [dependencies.serde] version = "1.0.154" features = ["derive"] optional = true [dependencies.stable_deref_trait] version = "1.2.0" [dependencies.uuid] version = "1.3.0" [dev-dependencies.similar-asserts] version = "1.4.2" [dev-dependencies.tempfile] version = "3.4.0" [features] serde = [ "dep:serde", "debugid/serde", ] symbolic-common-12.8.0/Cargo.toml.orig000064400000000000000000000016421046102023000157150ustar 00000000000000[package] name = "symbolic-common" version = "12.8.0" license = "MIT" authors = [ "Armin Ronacher ", "Jan Michael Auer ", ] documentation = "https://docs.rs/symbolic-common" homepage = "https://github.com/getsentry/symbolic" repository = "https://github.com/getsentry/symbolic" readme = "README.md" description = """ Common types and utilities for symbolic, a library to symbolicate and process stack traces from native applications, minidumps or minified JavaScript. """ edition = "2021" [package.metadata.docs.rs] all-features = true [features] serde = ["dep:serde", "debugid/serde"] [dependencies] debugid = "0.8.0" memmap2 = "0.9.0" stable_deref_trait = "1.2.0" serde = { version = "1.0.154", optional = true, features = ["derive"] } uuid = "1.3.0" [dev-dependencies] symbolic-testutils = { path = "../symbolic-testutils" } tempfile = "3.4.0" similar-asserts = "1.4.2" symbolic-common-12.8.0/README.md000064400000000000000000000021641046102023000143050ustar 00000000000000[![Build Status](https://travis-ci.org/getsentry/symbolic.svg?branch=master)](https://travis-ci.org/getsentry/symbolic) # symbolic-common Common functionality for `symbolic`. This crate exposes a set of key types: - [`ByteView`]: Gives access to binary data in-memory or on the file system. - [`SelfCell`]: Allows to create self-referential types. - [`Name`]: A symbol name that can be demangled with the `demangle` feature. - [`InstructionInfo`]: A utility type for instruction pointer heuristics. - Functions and utilities to deal with paths from different platforms. ## Features - `serde` (optional): Implements `serde::Deserialize` and `serde::Serialize` for all data types. In the `symbolic` crate, this feature is exposed via `common-serde`. This module is part of the `symbolic` crate. [`Name`]: https://docs.rs/symbolic/7/symbolic/common/struct.Name.html [`ByteView`]: https://docs.rs/symbolic/7/symbolic/common/struct.ByteView.html [`InstructionInfo`]: https://docs.rs/symbolic/7/symbolic/common/struct.InstructionInfo.html [`SelfCell`]: https://docs.rs/symbolic/7/symbolic/common/struct.SelfCell.html License: MIT symbolic-common-12.8.0/src/byteview.rs000064400000000000000000000205231046102023000160200ustar 00000000000000//! A wrapper type providing direct memory access to binary data. //! //! See the [`ByteView`] struct for more documentation. //! //! [`ByteView`]: struct.ByteView.html use std::borrow::Cow; use std::fs::File; use std::io; use std::ops::Deref; use std::path::Path; use std::sync::Arc; use memmap2::Mmap; use crate::cell::StableDeref; /// The owner of data behind a ByteView. /// /// This can either be an mmapped file, an owned buffer or a borrowed binary slice. #[derive(Debug)] enum ByteViewBacking<'a> { Buf(Cow<'a, [u8]>), Mmap(Mmap), } impl Deref for ByteViewBacking<'_> { type Target = [u8]; fn deref(&self) -> &Self::Target { match *self { ByteViewBacking::Buf(ref buf) => buf, ByteViewBacking::Mmap(ref mmap) => mmap, } } } /// A smart pointer for byte data. /// /// This type can be used to uniformly access bytes that were created either from mmapping in a /// path, a vector or a borrowed slice. A `ByteView` dereferences into a `&[u8]` and guarantees /// random access to the underlying buffer or file. /// /// A `ByteView` can be constructed from borrowed slices, vectors or memory mapped from the file /// system directly. /// /// # Example /// /// The most common way to use `ByteView` is to construct it from a file handle. This will own the /// underlying file handle until the `ByteView` is dropped: /// /// ``` /// use std::io::Write; /// use symbolic_common::ByteView; /// /// fn main() -> Result<(), std::io::Error> { /// let mut file = tempfile::tempfile()?; /// file.write_all(b"1234"); /// /// let view = ByteView::map_file(file)?; /// assert_eq!(view.as_slice(), b"1234"); /// Ok(()) /// } /// ``` #[derive(Clone, Debug)] pub struct ByteView<'a> { backing: Arc>, } impl<'a> ByteView<'a> { fn with_backing(backing: ByteViewBacking<'a>) -> Self { ByteView { backing: Arc::new(backing), } } /// Constructs a `ByteView` from a `Cow`. /// /// # Example /// /// ``` /// use std::borrow::Cow; /// use symbolic_common::ByteView; /// /// let cow = Cow::Borrowed(&b"1234"[..]); /// let view = ByteView::from_cow(cow); /// ``` pub fn from_cow(cow: Cow<'a, [u8]>) -> Self { ByteView::with_backing(ByteViewBacking::Buf(cow)) } /// Constructs a `ByteView` from a byte slice. /// /// # Example /// /// ``` /// use symbolic_common::ByteView; /// /// let view = ByteView::from_slice(b"1234"); /// ``` pub fn from_slice(buffer: &'a [u8]) -> Self { ByteView::from_cow(Cow::Borrowed(buffer)) } /// Constructs a `ByteView` from a vector of bytes. /// /// # Example /// /// ``` /// use symbolic_common::ByteView; /// /// let vec = b"1234".to_vec(); /// let view = ByteView::from_vec(vec); /// ``` pub fn from_vec(buffer: Vec) -> Self { ByteView::from_cow(Cow::Owned(buffer)) } /// Constructs a `ByteView` from an open file handle by memory mapping the file. /// /// See [`ByteView::map_file_ref`] for a non-consuming version of this constructor. /// /// # Example /// /// ``` /// use std::io::Write; /// use symbolic_common::ByteView; /// /// fn main() -> Result<(), std::io::Error> { /// let mut file = tempfile::tempfile()?; /// let view = ByteView::map_file(file)?; /// Ok(()) /// } /// ``` pub fn map_file(file: File) -> Result { Self::map_file_ref(&file) } /// Constructs a `ByteView` from an open file handle by memory mapping the file. /// /// The main difference with [`ByteView::map_file`] is that this takes the [`File`] by /// reference rather than consuming it. /// /// # Example /// /// ``` /// use std::io::Write; /// use symbolic_common::ByteView; /// /// fn main() -> Result<(), std::io::Error> { /// let mut file = tempfile::tempfile()?; /// let view = ByteView::map_file_ref(&file)?; /// Ok(()) /// } /// ``` pub fn map_file_ref(file: &File) -> Result { let backing = match unsafe { Mmap::map(file) } { Ok(mmap) => ByteViewBacking::Mmap(mmap), Err(err) => { // this is raised on empty mmaps which we want to ignore. The 1006 Windows error // looks like "The volume for a file has been externally altered so that the opened // file is no longer valid." if err.kind() == io::ErrorKind::InvalidInput || (cfg!(windows) && err.raw_os_error() == Some(1006)) { ByteViewBacking::Buf(Cow::Borrowed(b"")) } else { return Err(err); } } }; Ok(ByteView::with_backing(backing)) } /// Constructs a `ByteView` from any `std::io::Reader`. /// /// **Note**: This currently consumes the entire reader and stores its data in an internal /// buffer. Prefer [`open`] when reading from the file system or [`from_slice`] / [`from_vec`] /// for in-memory operations. This behavior might change in the future. /// /// # Example /// /// ``` /// use std::io::Cursor; /// use symbolic_common::ByteView; /// /// fn main() -> Result<(), std::io::Error> { /// let reader = Cursor::new(b"1234"); /// let view = ByteView::read(reader)?; /// Ok(()) /// } /// ``` /// /// [`open`]: struct.ByteView.html#method.open /// [`from_slice`]: struct.ByteView.html#method.from_slice /// [`from_vec`]: struct.ByteView.html#method.from_vec pub fn read(mut reader: R) -> Result { let mut buffer = vec![]; reader.read_to_end(&mut buffer)?; Ok(ByteView::from_vec(buffer)) } /// Constructs a `ByteView` from a file path by memory mapping the file. /// /// # Example /// /// ```no_run /// use symbolic_common::ByteView; /// /// fn main() -> Result<(), std::io::Error> { /// let view = ByteView::open("test.txt")?; /// Ok(()) /// } /// ``` pub fn open>(path: P) -> Result { let file = File::open(path)?; Self::map_file(file) } /// Returns a slice of the underlying data. /// /// /// # Example /// /// ``` /// use symbolic_common::ByteView; /// /// let view = ByteView::from_slice(b"1234"); /// let data = view.as_slice(); /// ``` #[inline(always)] pub fn as_slice(&self) -> &[u8] { self.backing.deref() } } impl AsRef<[u8]> for ByteView<'_> { #[inline(always)] fn as_ref(&self) -> &[u8] { self.as_slice() } } impl Deref for ByteView<'_> { type Target = [u8]; #[inline(always)] fn deref(&self) -> &Self::Target { self.as_slice() } } unsafe impl StableDeref for ByteView<'_> {} #[cfg(test)] mod tests { use super::*; use std::io::{Read, Seek, Write}; use similar_asserts::assert_eq; use tempfile::NamedTempFile; #[test] fn test_open_empty_file() -> Result<(), std::io::Error> { let tmp = NamedTempFile::new()?; let view = ByteView::open(tmp.path())?; assert_eq!(&*view, b""); Ok(()) } #[test] fn test_open_file() -> Result<(), std::io::Error> { let mut tmp = NamedTempFile::new()?; tmp.write_all(b"1234")?; let view = ByteView::open(tmp.path())?; assert_eq!(&*view, b"1234"); Ok(()) } #[test] fn test_mmap_fd_reuse() -> Result<(), std::io::Error> { let mut tmp = NamedTempFile::new()?; tmp.write_all(b"1234")?; let view = ByteView::map_file_ref(tmp.as_file())?; // This deletes the file on disk. let _path = tmp.path().to_path_buf(); let mut file = tmp.into_file(); #[cfg(not(windows))] { assert!(!_path.exists()); } // Ensure we can still read from the the file after mmapping and deleting it on disk. let mut buf = Vec::new(); file.rewind()?; file.read_to_end(&mut buf)?; assert_eq!(buf, b"1234"); drop(file); // Ensure the byteview can still read the file as well. assert_eq!(&*view, b"1234"); Ok(()) } } symbolic-common-12.8.0/src/cell.rs000064400000000000000000000264431046102023000151100ustar 00000000000000//! Primitives for dealing with self-referential data. //! //! The types and traits in this module aim to work around the lack of self-referencial types in //! Rust. This can happen when a _dependent_ type -- one that needs to borrow data without holding //! on to the owning reference -- needs to be stored alongside its owner. This is inherently not //! possible in Rust, since it would require the owner to have a stable memory address, but it is //! moved along with the reference. //! //! This module solves this by introducing the `AsSelf` trait, which can be used to coerce the //! lifetime of a dependent object to the lifetime of its owners at the time of the borrow. //! //! See [`SelfCell`] and [`AsSelf`] for more information. //! //! [`SelfCell`]: struct.SelfCell.html //! [`AsSelf`]: trait.AsSelf.html // FIXME(swatinem): clippy 1.67 complains about `# Safety` docs for safe fns // #![allow(clippy::unnecessary_safety_doc)] use std::ops::Deref; pub use stable_deref_trait::StableDeref; /// Safe downcasting of dependent lifetime bounds on structs. /// /// This trait is similar to `AsRef`, except that it allows to capture the lifetime of the own /// instance at the time of the borrow. This allows to force it onto the type's lifetime bounds. /// This is particularly useful when the type's lifetime is somehow tied to it's own existence, such /// as in self-referential structs. See [`SelfCell`] for an implementation that makes use of this. /// /// # Implementation /// /// While this trait may be implemented for any type, it is only useful for types that specify a /// lifetime bound, such as `Cow` or [`ByteView`]. To implement, define `Ref` as the type with all /// dependent lifetimes set to `'slf`. Then, simply return `self` in `as_self`. /// /// ```rust /// use symbolic_common::AsSelf; /// /// struct Foo<'a>(&'a str); /// /// impl<'slf> AsSelf<'slf> for Foo<'_> { /// type Ref = Foo<'slf>; /// /// fn as_self(&'slf self) -> &Self::Ref { /// self /// } /// } /// ``` /// /// # Interior Mutability /// /// **Note** that if your type uses interior mutability (essentially any type from `std::sync`, but /// specifically everything built on top of `UnsafeCell`), this implicit coercion will not work. The /// compiler imposes this limitation by declaring any lifetime on such types as invariant, to avoid /// interior mutations to write back data with the lowered lifetime. /// /// If you are sure that your type will not borrow and store data of the lower lifetime, then /// implement the coercion with an unsafe transmute: /// /// ```rust /// use std::cell::UnsafeCell; /// use symbolic_common::AsSelf; /// /// struct Foo<'a>(UnsafeCell<&'a str>); /// /// impl<'slf> AsSelf<'slf> for Foo<'_> { /// type Ref = Foo<'slf>; /// /// fn as_self(&'slf self) -> &Self::Ref { /// unsafe { std::mem::transmute(self) } /// } /// } /// ``` /// /// [`SelfCell`]: struct.SelfCell.html /// [`ByteView`]: struct.ByteView.html pub trait AsSelf<'slf> { /// The `Self` type with `'slf` lifetimes, returned by `as_self`. type Ref: ?Sized; /// Returns a reference to `self` with downcasted lifetime. fn as_self(&'slf self) -> &Self::Ref; } impl AsSelf<'_> for u8 { type Ref = u8; fn as_self(&self) -> &Self::Ref { self } } impl AsSelf<'_> for str { type Ref = str; fn as_self(&self) -> &Self::Ref { self } } impl<'slf, T> AsSelf<'slf> for [T] where T: AsSelf<'slf>, T::Ref: Sized, { type Ref = [T::Ref]; fn as_self(&'slf self) -> &Self::Ref { unsafe { &*(self as *const [T] as *const [T::Ref]) } } } impl<'slf, T> AsSelf<'slf> for &'slf T where T: AsSelf<'slf> + ?Sized, { type Ref = T::Ref; fn as_self(&'slf self) -> &Self::Ref { (*self).as_self() } } impl<'slf, T> AsSelf<'slf> for &'slf mut T where T: AsSelf<'slf> + ?Sized, { type Ref = T::Ref; fn as_self(&'slf self) -> &Self::Ref { (**self).as_self() } } impl<'slf, T> AsSelf<'slf> for Vec where T: AsSelf<'slf>, T::Ref: Sized, { type Ref = [T::Ref]; fn as_self(&'slf self) -> &Self::Ref { (**self).as_self() } } impl<'slf, T> AsSelf<'slf> for std::rc::Rc where T: AsSelf<'slf>, { type Ref = T::Ref; fn as_self(&'slf self) -> &Self::Ref { (**self).as_self() } } impl<'slf, T> AsSelf<'slf> for std::sync::Arc where T: AsSelf<'slf>, { type Ref = T::Ref; fn as_self(&'slf self) -> &Self::Ref { (**self).as_self() } } /// A container carrying a derived object alongside its owner. /// /// **Warning**: This is an inherently unsafe type that builds on top of [`StableDeref`] and /// [`AsSelf`] to establish somewhat safe memory semantics. Always try to avoid self-references by /// storing data in an outer scope or avoiding the need alltogether, first. /// /// `SelfCell` stores an owner object that must implement [`StableDeref`]. This guarantees that the /// reference pointed to by the dependent object never moves over the lifetime of this object. This /// is already implemented for most heap-allocating types, like `Box`, `Rc`, `Arc` or `ByteView`. /// /// Additionally, the dependent object must implement [`AsSelf`]. This guarantees that the borrow's /// lifetime and its lifetime bounds never exceed the lifetime of the owner. As such, an object /// `Foo<'a>` that borrows data from the owner, will be coerced down to `Foo<'self>` when borrowing. /// There are two constructor functions, `new` and `try_new`, each of which are passed a pointer to /// the owned data. Dereferencing this pointer is intentionally unsafe, and beware that a borrow of /// that pointer **must not** leave the callback. /// /// While it is possible to store derived *references* in a `SelfCell`, too, there are simpler /// alternatives, such as `owning_ref::OwningRef`. Consider using such types before using /// `SelfCell`. /// /// ## Example /// /// ```rust /// use symbolic_common::{AsSelf, SelfCell}; /// /// struct Foo<'a>(&'a str); /// /// impl<'slf> AsSelf<'slf> for Foo<'_> { /// type Ref = Foo<'slf>; /// /// fn as_self(&'slf self) -> &Self::Ref { /// self /// } /// } /// /// let owner = String::from("hello world"); /// let cell = SelfCell::new(owner, |s| Foo(unsafe { &*s })); /// assert_eq!(cell.get().0, "hello world"); /// ``` /// /// [`StableDeref`]: trait.StableDeref.html /// [`AsSelf`]: trait.AsSelf.html #[derive(Clone, Debug)] pub struct SelfCell where O: StableDeref, { owner: O, derived: D, } impl<'slf, O, T> SelfCell where O: StableDeref + 'slf, T: AsSelf<'slf>, { /// Creates a new `SelfCell`. /// /// # Safety /// /// The callback receives a pointer to the owned data. Dereferencing the pointer is unsafe. Note /// that a borrow to that data can only safely be used to derive the object and **must not** /// leave the callback. /// /// # Example /// /// ``` /// use symbolic_common::SelfCell; /// /// let owner = String::from("hello world"); /// let cell = SelfCell::new(owner, |s| unsafe { &*s }); /// ``` #[inline] pub fn new(owner: O, derive: F) -> Self where F: FnOnce(*const ::Target) -> T, { let derived = derive(owner.deref() as *const _); SelfCell { owner, derived } } /// Creates a new `SelfCell` which may fail to construct. /// /// # Safety /// /// The callback receives a pointer to the owned data. Dereferencing the pointer is unsafe. Note /// that a borrow to that data can only safely be used to derive the object and **must not** /// leave the callback. /// /// # Example /// /// ``` /// use symbolic_common::SelfCell; /// /// fn main() -> Result<(), std::str::Utf8Error> { /// let owner = Vec::from("hello world"); /// let cell = SelfCell::try_new(owner, |s| unsafe { std::str::from_utf8(&*s) })?; /// Ok(()) /// } /// ``` #[inline] pub fn try_new(owner: O, derive: F) -> Result where F: FnOnce(*const ::Target) -> Result, { let derived = derive(owner.deref() as *const _)?; Ok(SelfCell { owner, derived }) } /// Unsafely creates a new `SelfCell` from a derived object by moving the owner. /// /// # Safety /// /// This is an inherently unsafe process. The caller must guarantee that the derived object only /// borrows from the owner that is moved into this container and the borrowed reference has a /// stable address. This is useful, when cloning the owner by deriving a sub-object. /// /// # Example /// /// ```rust /// use std::sync::Arc; /// use symbolic_common::{AsSelf, SelfCell}; /// /// struct Foo<'a>(&'a str); /// /// impl<'slf> AsSelf<'slf> for Foo<'_> { /// type Ref = Foo<'slf>; /// /// fn as_self(&'slf self) -> &Self::Ref { /// self /// } /// } /// /// // Create a clonable owner and move it into cell /// let owner = Arc::::from(" hello "); /// let cell = SelfCell::new(owner, |s| Foo(unsafe { &*s })); /// /// // Create a second derived object and clone the owner /// let trimmed = Foo(cell.get().0.trim()); /// let cell2 = unsafe { SelfCell::from_raw(cell.owner().clone(), trimmed) }; /// /// // Now, drop the original cell and continue using the clone /// assert_eq!(cell2.get().0, "hello"); /// ``` #[inline] pub unsafe fn from_raw(owner: O, derived: T) -> Self { SelfCell { owner, derived } } /// Returns a reference to the owner of this cell. /// /// # Example /// /// ``` /// use symbolic_common::SelfCell; /// /// let owner = String::from(" hello "); /// let cell = SelfCell::new(owner, |s| unsafe { (*s).trim() }); /// assert_eq!(cell.owner(), " hello "); /// ``` #[inline(always)] pub fn owner(&self) -> &O { &self.owner } /// Returns a safe reference to the derived object in this cell. /// /// # Example /// /// ``` /// use symbolic_common::SelfCell; /// /// let owner = String::from(" hello "); /// let cell = SelfCell::new(owner, |s| unsafe { (*s).trim() }); /// assert_eq!(cell.get(), "hello"); /// ``` #[inline(always)] pub fn get(&'slf self) -> &'slf >::Ref { self.derived.as_self() } } #[cfg(test)] mod tests { use super::*; use similar_asserts::assert_eq; #[derive(Debug, PartialEq)] struct Foo<'a>(&'a str); impl<'a> Foo<'a> { fn parse(s: &'a str) -> Result { s.parse::()?; Ok(Foo(s)) } } impl<'slf> AsSelf<'slf> for Foo<'_> { type Ref = Foo<'slf>; fn as_self(&'slf self) -> &Self::Ref { self } } #[test] fn test_new() { let fooref = SelfCell::new(String::from("hello world"), |s| Foo(unsafe { &*s })); assert_eq!(fooref.get().0, "hello world"); } #[test] fn test_try_new() { let result = SelfCell::try_new(String::from("42"), |s| Foo::parse(unsafe { &*s })); result.expect("parsing should not fail"); let result = SelfCell::try_new(String::from("hello world"), |s| Foo::parse(unsafe { &*s })); result.expect_err("parsing should fail"); } } symbolic-common-12.8.0/src/heuristics.rs000064400000000000000000000360431046102023000163500ustar 00000000000000//! Heuristics for correcting instruction pointers based on the CPU architecture. use crate::types::{Arch, CpuFamily}; const SIGILL: u32 = 4; const SIGBUS: u32 = 10; const SIGSEGV: u32 = 11; /// Helper to work with instruction addresses. /// /// Directly symbolicated stack traces may show the wrong calling symbols, as the stack frame's /// return addresses point a few bytes past the original call site, which may place the address /// within a different symbol entirely. /// /// The most useful function is [`caller_address`], which applies some heuristics to determine the /// call site of a function call based on the return address. /// /// # Examples /// /// ``` /// use symbolic_common::{Arch, InstructionInfo}; /// /// const SIGSEGV: u32 = 11; /// /// let caller_address = InstructionInfo::new(Arch::Arm64, 0x1337) /// .is_crashing_frame(false) /// .signal(Some(SIGSEGV)) /// .ip_register_value(Some(0x4242)) /// .caller_address(); /// /// assert_eq!(caller_address, 0x1330); /// ``` /// /// # Background /// /// When *calling* a function, it is necessary for the *called* function to know where it should /// return to upon completion. To support this, a *return address* is supplied as part of the /// standard function call semantics. This return address specifies the instruction that the called /// function should jump to upon completion of its execution. /// /// When a crash reporter generates a backtrace, it first collects the thread state of all active /// threads, including the **actual** current execution address. The reporter then iterates over /// those threads, walking backwards to find calling frames – what it's actually finding during this /// process are the **return addresses**. The actual address of the call instruction is not recorded /// anywhere. The only address available is the address at which execution should resume after /// function return. /// /// To make things more complicated, there is no guarantee that a return address be set to exactly /// one instruction after the call. It's entirely proper for a function to remove itself from the /// call stack by setting a different return address entirely. This is why you never see /// `objc_msgSend` in your backtrace unless you actually crash inside of `objc_msgSend`. When /// `objc_msgSend` jumps to a method's implementation, it leaves its caller's return address in /// place, and `objc_msgSend` itself disappears from the stack trace. In the case of `objc_msgSend`, /// the loss of that information is of no great importance, but it's hardly the only function that /// elides its own code from the return address. /// /// # Heuristics /// /// To resolve this particular issue, it is necessary for the symbolication implementor to apply a /// per-architecture heuristics to the return addresses, and thus derive the **likely** address of /// the actual calling instruction. There is a high probability of correctness, but absolutely no /// guarantee. /// /// This derived address **should** be used as the symbolication address, but **should not** replace /// the return address in the crash report. This derived address is a best guess, and if you replace /// the return address in the report, the end-user will have lost access to the original canonical /// data from which they could have made their own assessment. /// /// These heuristics must not be applied to frame #0 on any thread. The first frame of all threads /// contains the actual register state of that thread at the time that it crashed (if it's the /// crashing thread), or at the time it was suspended (if it is a non-crashing thread). These /// heuristics should only be applied to frames *after* frame #0 – that is, starting with frame #1. /// /// Additionally, these heuristics assume that your symbolication implementation correctly handles /// addresses that occur within an instruction, rather than directly at the start of a valid /// instruction. This should be the case for any reasonable implementation, but is something to be /// aware of when deploying these changes. /// /// ## x86 and x86-64 /// /// x86 uses variable-width instruction encodings; subtract one byte from the return address to /// derive an address that should be within the calling instruction. This will provide an address /// within a calling instruction found directly prior to the return address. /// /// ## ARMv6 and ARMv7 /// /// - **Step 1:** Strip the low order thumb bit from the return address. ARM uses the low bit to /// inform the processor that it should enter thumb mode when jumping to the return address. Since /// all instructions are at least 2 byte aligned, an actual instruction address will never have /// the low bit set. /// /// - **Step 2:** Subtract 2 Bytes. 32-bit ARM instructions are either 2 or 4 bytes long, depending /// on the use of thumb. This will place the symbolication address within the likely calling /// instruction. All ARM64 instructions are 4 bytes long; subtract 4 bytes from the return address /// to derive the likely address of the calling instruction. /// /// # More Information /// /// The above information was taken and slightly updated from the now-gone *PLCrashReporter Wiki*. /// An old copy can still be found in the [internet archive]. /// /// [internet archive]: https://web.archive.org/web/20161012225323/https://opensource.plausible.coop/wiki/display/PLCR/Automated+Crash+Report+Analysis /// [`caller_address`]: struct.InstructionInfo.html#method.caller_address #[derive(Clone, Debug)] pub struct InstructionInfo { addr: u64, arch: Arch, crashing_frame: bool, signal: Option, ip_reg: Option, } impl InstructionInfo { /// Creates a new instruction info instance. /// /// By default, the frame is not marked as *crashing frame*. The signal and instruction pointer /// register value are empty. /// /// # Examples /// /// ``` /// use symbolic_common::{Arch, InstructionInfo}; /// /// let caller_address = InstructionInfo::new(Arch::X86, 0x1337) /// .caller_address(); /// ``` pub fn new(arch: Arch, instruction_address: u64) -> Self { Self { arch, addr: instruction_address, crashing_frame: false, signal: None, ip_reg: None, } } /// Marks this as the crashing frame. /// /// The crashing frame is the first frame yielded by the stack walker. In such a frame, the /// instruction address is the location of the direct crash. This is used by /// [`should_adjust_caller`] to determine which frames need caller address adjustment. /// /// Defaults to `false`. /// /// [`should_adjust_caller`]: struct.InstructionInfo.html#method.should_adjust_caller pub fn is_crashing_frame(&mut self, flag: bool) -> &mut Self { self.crashing_frame = flag; self } /// Sets a POSIX signal number. /// /// The signal number is used by [`should_adjust_caller`] to determine which frames need caller /// address adjustment. /// /// [`should_adjust_caller`]: struct.InstructionInfo.html#method.should_adjust_caller pub fn signal(&mut self, signal: Option) -> &mut Self { self.signal = signal; self } /// Sets the value of the instruction pointer register. /// /// This should be the original register value at the time of the crash, and not a restored /// register value. This is used by [`should_adjust_caller`] to determine which frames need /// caller address adjustment. /// /// [`should_adjust_caller`]: struct.InstructionInfo.html#method.should_adjust_caller pub fn ip_register_value(&mut self, value: Option) -> &mut Self { self.ip_reg = value; self } /// Tries to resolve the start address of the current instruction. /// /// For architectures without fixed alignment (such as Intel with variable instruction lengths), /// this will return the same address. Otherwise, the address is aligned to the architecture's /// instruction alignment. /// /// # Examples /// /// For example, on 64-bit ARM, addresses are aligned at 4 byte boundaries. This applies to all /// 64-bit ARM variants, even unknown ones: /// /// ``` /// use symbolic_common::{Arch, InstructionInfo}; /// /// let info = InstructionInfo::new(Arch::Arm64, 0x1337); /// assert_eq!(info.aligned_address(), 0x1334); /// ``` pub fn aligned_address(&self) -> u64 { if let Some(alignment) = self.arch.cpu_family().instruction_alignment() { self.addr - (self.addr % alignment) } else { self.addr } } /// Returns the instruction preceding the current one. /// /// For known architectures, this will return the start address of the instruction immediately /// before the current one in the machine code. This is likely the instruction that was just /// executed or that called a function returning at the current address. /// /// For unknown architectures or those using variable instruction size, the exact start address /// cannot be determined. Instead, an address *within* the preceding instruction will be /// returned. For this reason, the return value of this function should be considered an upper /// bound. /// /// # Examples /// /// On 64-bit ARM, instructions have 4 bytes in size. The previous address is therefore 4 bytes /// before the start of the current instruction (returned by [`aligned_address`]): /// /// ``` /// use symbolic_common::{Arch, InstructionInfo}; /// /// let info = InstructionInfo::new(Arch::Arm64, 0x1337); /// assert_eq!(info.previous_address(), 0x1330); /// ``` /// /// On the contrary, Intel uses variable-length instruction encoding. In such a case, the best /// effort is to subtract 1 byte and hope that it points into the previous instruction: /// /// ``` /// use symbolic_common::{Arch, InstructionInfo}; /// /// let info = InstructionInfo::new(Arch::X86, 0x1337); /// assert_eq!(info.previous_address(), 0x1336); /// ``` /// /// [`aligned_address`]: struct.InstructionInfo.html#method.aligned_address pub fn previous_address(&self) -> u64 { let instruction_size = self.arch.cpu_family().instruction_alignment().unwrap_or(1); // In MIPS, the return address apparently often points two instructions after the the // previous program counter. On other architectures, just subtract one instruction. let pc_offset = match self.arch.cpu_family() { CpuFamily::Mips32 | CpuFamily::Mips64 => 2 * instruction_size, _ => instruction_size, }; self.aligned_address() - pc_offset } /// Returns whether the application attempted to jump to an invalid, privileged or misaligned /// address. /// /// This indicates that certain adjustments should be made on the caller instruction address. /// /// # Example /// /// ``` /// use symbolic_common::{Arch, InstructionInfo}; /// /// const SIGSEGV: u32 = 11; /// /// let is_crash = InstructionInfo::new(Arch::X86, 0x1337) /// .signal(Some(SIGSEGV)) /// .is_crash_signal(); /// /// assert!(is_crash); /// ``` pub fn is_crash_signal(&self) -> bool { matches!(self.signal, Some(SIGILL) | Some(SIGBUS) | Some(SIGSEGV)) } /// Determines whether the given address should be adjusted to resolve the call site of a stack /// frame. /// /// This generally applies to all frames except the crashing / suspended frame. However, if the /// process crashed with an illegal instruction, even the top-most frame needs to be adjusted to /// account for the signal handler. /// /// # Examples /// /// By default, all frames need to be adjusted. There are only few exceptions to this rule: The /// crashing frame is the first frame yielded in the stack trace and specifies the actual /// instruction pointer address. Therefore, it does not need to be adjusted: /// /// ``` /// use symbolic_common::{Arch, InstructionInfo}; /// /// let should_adjust = InstructionInfo::new(Arch::X86, 0x1337) /// .is_crashing_frame(true) /// .should_adjust_caller(); /// /// assert!(!should_adjust); /// ``` pub fn should_adjust_caller(&self) -> bool { // All frames other than the crashing frame (or suspended frame for // other threads) report the return address. This address (generally) // points to the instruction after the function call. Therefore, we // need to adjust the caller address for these frames. if !self.crashing_frame { return true; } // KSCrash applies a heuristic to remove the signal handler frame from // the top of the stack trace, if the crash was caused by certain // signals. However, that means that the top-most frame contains a // return address just like any other and needs to be adjusted. if let Some(ip) = self.ip_reg { if ip != self.addr && self.is_crash_signal() { return true; } } // The crashing frame usually contains the actual register contents, // which points to the exact instruction that crashed and must not be // adjusted. false } /// Determines the address of the call site based on a return address. /// /// In the top most frame (often referred to as context frame), this is the value of the /// instruction pointer register. In all other frames, the return address is generally one /// instruction after the jump / call. /// /// This function actually resolves an address _within_ the call instruction rather than its /// beginning. Also, in some cases the top most frame has been modified by certain signal /// handlers or return optimizations. A set of heuristics tries to recover this for well-known /// cases. /// /// # Examples /// /// Returns the aligned address for crashing frames: /// /// ``` /// use symbolic_common::{Arch, InstructionInfo}; /// /// let caller_address = InstructionInfo::new(Arch::Arm64, 0x1337) /// .is_crashing_frame(true) /// .caller_address(); /// /// assert_eq!(caller_address, 0x1334); /// ``` /// /// For all other frames, it returns the previous address: /// /// ``` /// use symbolic_common::{Arch, InstructionInfo}; /// /// let caller_address = InstructionInfo::new(Arch::Arm64, 0x1337) /// .is_crashing_frame(false) /// .caller_address(); /// /// assert_eq!(caller_address, 0x1330); /// ``` pub fn caller_address(&self) -> u64 { if self.should_adjust_caller() { self.previous_address() } else { self.aligned_address() } // NOTE: Currently, we only provide stack traces from KSCrash and // Breakpad. Both already apply a set of heuristics while stackwalking // in order to fix return addresses. It seems that no further heuristics // are necessary at the moment. } } symbolic-common-12.8.0/src/lib.rs000064400000000000000000000022561046102023000147330ustar 00000000000000//! Common functionality for `symbolic`. //! //! This crate exposes a set of key types: //! //! - [`ByteView`]: Gives access to binary data in-memory or on the file system. //! - [`SelfCell`]: Allows to create self-referential types. //! - [`Name`]: A symbol name that can be demangled with the `demangle` feature. //! - [`InstructionInfo`]: A utility type for instruction pointer heuristics. //! - Functions and utilities to deal with paths from different platforms. //! //! # Features //! //! - `serde` (optional): Implements `serde::Deserialize` and `serde::Serialize` for all data types. //! In the `symbolic` crate, this feature is exposed via `common-serde`. //! //! This module is part of the `symbolic` crate. //! //! [`Name`]: struct.Name.html //! [`ByteView`]: struct.ByteView.html //! [`InstructionInfo`]: struct.InstructionInfo.html //! [`SelfCell`]: struct.SelfCell.html #![warn(missing_docs)] mod byteview; mod cell; mod heuristics; mod path; mod sourcelinks; mod types; pub use crate::byteview::*; pub use crate::cell::*; pub use crate::heuristics::*; pub use crate::path::*; pub use crate::sourcelinks::*; pub use crate::types::*; pub use debugid::*; pub use uuid::Uuid; symbolic-common-12.8.0/src/path.rs000064400000000000000000000610241046102023000151170ustar 00000000000000use std::borrow::Cow; use std::ffi::OsStr; use std::path::{Path, PathBuf}; trait IntoChar { fn into_char(self) -> char; } impl IntoChar for char { fn into_char(self) -> char { self } } impl IntoChar for u8 { fn into_char(self) -> char { char::from(self) } } impl IntoChar for &'_ T { fn into_char(self) -> char { (*self).into_char() } } /// Returns `true` if the given character is any valid directory separator. #[inline] fn is_path_separator(c: C) -> bool { matches!(c.into_char(), '\\' | '/') } /// Returns `true` if the given character is a valid Windows directory separator. #[inline] fn is_windows_separator(c: C) -> bool { is_path_separator(c) } /// Returns `true` if the given character is a valid UNIX directory separator. #[inline] fn is_unix_separator(c: C) -> bool { c.into_char() == '/' } /// Returns `true` if this is a Windows Universal Naming Convention path (UNC). fn is_windows_unc>(path: P) -> bool { let path = path.as_ref(); path.starts_with(b"\\\\") || path.starts_with(b"//") } /// Returns `true` if this is an absolute Windows path starting with a drive letter. fn is_windows_driveletter>(path: P) -> bool { let path = path.as_ref(); if let (Some(drive_letter), Some(b':')) = (path.first(), path.get(1)) { if drive_letter.is_ascii_alphabetic() { return path.get(2).map_or(true, is_windows_separator); } } false } /// Returns `true` if this is an absolute Windows path. fn is_absolute_windows_path>(path: P) -> bool { let path = path.as_ref(); is_windows_unc(path) || is_windows_driveletter(path) } /// Returns `true` fn is_semi_absolute_windows_path>(path: P) -> bool { path.as_ref().first().map_or(false, is_windows_separator) } fn is_absolute_unix_path>(path: P) -> bool { path.as_ref().first().map_or(false, is_unix_separator) } fn is_windows_path>(path: P) -> bool { let path = path.as_ref(); is_absolute_windows_path(path) || path.contains(&b'\\') } /// Joins paths of various platforms. /// /// This attempts to detect Windows or Unix paths and joins with the correct directory separator. /// Also, trailing directory separators are detected in the base string and empty paths are handled /// correctly. /// /// # Examples /// /// Join a relative UNIX path: /// /// ``` /// assert_eq!(symbolic_common::join_path("/a/b", "c/d"), "/a/b/c/d"); /// ``` /// /// Join a Windows drive letter path path: /// /// ``` /// assert_eq!(symbolic_common::join_path("C:\\a", "b\\c"), "C:\\a\\b\\c"); /// ``` /// /// If the right-hand side is an absolute path, it replaces the left-hand side: /// /// ``` /// assert_eq!(symbolic_common::join_path("/a/b", "/c/d"), "/c/d"); /// ``` pub fn join_path(base: &str, other: &str) -> String { // special case for things like or others. if other.starts_with('<') && other.ends_with('>') { return other.into(); } // absolute paths if base.is_empty() || is_absolute_windows_path(other) || is_absolute_unix_path(other) { return other.into(); } // other weird cases if other.is_empty() { return base.into(); } // C:\test + \bar -> C:\bar if is_semi_absolute_windows_path(other) { if is_absolute_windows_path(base) { return format!("{}{}", &base[..2], other); } else { return other.into(); } } // Always trim by both separators, since as soon as the path is Windows, slashes also count as // valid path separators. However, use the main separator for joining. let is_windows = is_windows_path(base) || is_windows_path(other); format!( "{}{}{}", base.trim_end_matches(is_path_separator), if is_windows { '\\' } else { '/' }, other.trim_start_matches(is_path_separator) ) } fn pop_path(path: &mut String) -> bool { if let Some(idx) = path.rfind(is_path_separator) { path.truncate(idx); true } else if !path.is_empty() { path.truncate(0); true } else { false } } /// Simplifies paths by stripping redundant components. /// /// This removes redundant `../` or `./` path components. However, this function does not operate on /// the file system. Since it does not resolve symlinks, this is a potentially lossy operation. /// /// # Examples /// /// Remove `./` components: /// /// ``` /// assert_eq!(symbolic_common::clean_path("/a/./b"), "/a/b"); /// ``` /// /// Remove path components followed by `../`: /// /// ``` /// assert_eq!(symbolic_common::clean_path("/a/b/../c"), "/a/c"); /// ``` /// /// Note that when the path is relative, the parent dir components may exceed the top-level: /// /// ``` /// assert_eq!(symbolic_common::clean_path("/foo/../../b"), "../b"); /// ``` pub fn clean_path(path: &str) -> Cow<'_, str> { // TODO: This function has a number of problems (see broken tests): // - It does not collapse consequtive directory separators // - Parent-directory directives may leave an absolute path // - A path is converted to relative when the parent directory hits top-level let mut rv = String::with_capacity(path.len()); let main_separator = if is_windows_path(path) { '\\' } else { '/' }; let mut needs_separator = false; let mut is_past_root = false; for segment in path.split_terminator(is_path_separator) { if segment == "." { continue; } else if segment == ".." { if !is_past_root && pop_path(&mut rv) { if rv.is_empty() { needs_separator = false; } } else { if !is_past_root { needs_separator = false; is_past_root = true; } if needs_separator { rv.push(main_separator); } rv.push_str(".."); needs_separator = true; } continue; } if needs_separator { rv.push(main_separator); } else { needs_separator = true; } rv.push_str(segment); } // For now, always return an owned string. // This can be optimized later. Cow::Owned(rv) } /// Splits off the last component of a path given as bytes. /// /// The path should be a path to a file, and not a directory with a trailing directory separator. If /// this path is a directory or the root path, the result is undefined. /// /// This attempts to detect Windows or Unix paths and split off the last component of the path /// accordingly. Note that for paths with mixed slash and backslash separators this might not lead /// to the desired results. /// /// **Note**: This is the same as [`split_path`], except that it operates on byte slices. /// /// # Examples /// /// Split the last component of a UNIX path: /// /// ``` /// assert_eq!( /// symbolic_common::split_path_bytes(b"/a/b/c"), /// (Some("/a/b".as_bytes()), "c".as_bytes()) /// ); /// ``` /// /// Split the last component of a Windows path: /// /// ``` /// assert_eq!( /// symbolic_common::split_path_bytes(b"C:\\a\\b"), /// (Some("C:\\a".as_bytes()), "b".as_bytes()) /// ); /// ``` /// /// [`split_path`]: fn.split_path.html pub fn split_path_bytes(path: &[u8]) -> (Option<&[u8]>, &[u8]) { // Trim directory separators at the end, if any. let path = match path.iter().rposition(|c| !is_path_separator(c)) { Some(cutoff) => &path[..=cutoff], None => path, }; // Split by all path separators. On Windows, both are valid and a path is considered a // Windows path as soon as it has a backslash inside. match path.iter().rposition(is_path_separator) { Some(0) => (Some(&path[..1]), &path[1..]), Some(pos) => (Some(&path[..pos]), &path[pos + 1..]), None => (None, path), } } /// Splits off the last component of a path. /// /// The path should be a path to a file, and not a directory. If this path is a directory or the /// root path, the result is undefined. /// /// This attempts to detect Windows or Unix paths and split off the last component of the path /// accordingly. Note that for paths with mixed slash and backslash separators this might not lead /// to the desired results. /// /// **Note**: For a version that operates on byte slices, see [`split_path_bytes`]. /// /// # Examples /// /// Split the last component of a UNIX path: /// /// ``` /// assert_eq!(symbolic_common::split_path("/a/b/c"), (Some("/a/b"), "c")); /// ``` /// /// Split the last component of a Windows path: /// /// ``` /// assert_eq!(symbolic_common::split_path("C:\\a\\b"), (Some("C:\\a"), "b")); /// ``` /// /// [`split_path_bytes`]: fn.split_path_bytes.html pub fn split_path(path: &str) -> (Option<&str>, &str) { let (dir, name) = split_path_bytes(path.as_bytes()); unsafe { ( dir.map(|b| std::str::from_utf8_unchecked(b)), std::str::from_utf8_unchecked(name), ) } } /// Truncates the given string at character boundaries. fn truncate(path: &str, mut length: usize) -> &str { // Backtrack to the last code point. There is a unicode point at least at the beginning of the // string before the first character, which is why this cannot underflow. while !path.is_char_boundary(length) { length -= 1; } path.get(..length).unwrap_or_default() } /// Trims a path to a given length. /// /// This attempts to not completely destroy the path in the process by trimming off the middle path /// segments. In the process, this tries to determine whether the path is a Windows or Unix path and /// handle directory separators accordingly. /// /// # Examples /// /// ``` /// assert_eq!( /// symbolic_common::shorten_path("/foo/bar/baz/blah/blafasel", 21), /// "/foo/.../blafasel" /// ); /// ``` pub fn shorten_path(path: &str, length: usize) -> Cow<'_, str> { // trivial cases if path.len() <= length { return Cow::Borrowed(path); } else if length <= 3 { return Cow::Borrowed(truncate(path, length)); } else if length <= 10 { return Cow::Owned(format!("{}...", truncate(path, length - 3))); } let mut rv = String::new(); let mut last_idx = 0; let mut piece_iter = path.match_indices(is_path_separator); let mut final_sep = "/"; let max_len = length - 4; // make sure we get two segments at the start. for (idx, sep) in &mut piece_iter { let slice = &path[last_idx..idx + sep.len()]; rv.push_str(slice); let done = last_idx > 0; last_idx = idx + sep.len(); final_sep = sep; if done { break; } } // collect the rest of the segments into a temporary we can then reverse. let mut final_length = rv.len() as i64; let mut rest = vec![]; let mut next_idx = path.len(); while let Some((idx, _)) = piece_iter.next_back() { if idx <= last_idx { break; } let slice = &path[idx + 1..next_idx]; if final_length + (slice.len() as i64) > max_len as i64 { break; } rest.push(slice); next_idx = idx + 1; final_length += slice.len() as i64; } // if at this point already we're too long we just take the last element // of the path and strip it. if rv.len() > max_len || rest.is_empty() { let basename = path.rsplit(is_path_separator).next().unwrap(); if basename.len() > max_len { return Cow::Owned(format!("...{}", &basename[basename.len() - max_len + 1..])); } else { return Cow::Owned(format!("...{final_sep}{basename}")); } } rest.reverse(); rv.push_str("..."); rv.push_str(final_sep); for item in rest { rv.push_str(item); } Cow::Owned(rv) } /// Extensions to `Path` for handling `dSYM` directories. /// /// # dSYM Files /// /// `dSYM` files are actually folder structures that store debugging information on Apple platforms. /// They are also referred to as debug companion. At the core of this structure is a `MachO` file /// containing the actual debug information. /// /// A full `dSYM` folder structure looks like this: /// /// ```text /// MyApp.dSYM /// └── Contents /// ├── Info.plist /// └── Resources /// └── DWARF /// └── MyApp /// ``` pub trait DSymPathExt { /// Returns `true` if this path points to an existing directory with a `.dSYM` extension. /// /// Note that this does not check if a full `dSYM` structure is contained within this folder. /// /// # Examples /// /// ```no_run /// use std::path::Path; /// use symbolic_common::DSymPathExt; /// /// assert!(Path::new("Foo.dSYM").is_dsym_dir()); /// assert!(!Path::new("Foo").is_dsym_dir()); /// ``` fn is_dsym_dir(&self) -> bool; /// Resolves the path of the debug file in a `dSYM` directory structure. /// /// Returns `Some(path)` if this path is a dSYM directory according to [`is_dsym_dir`], and a /// file of the same name is located at `Contents/Resources/DWARF/`. /// /// # Examples /// /// ```no_run /// use std::path::Path; /// use symbolic_common::DSymPathExt; /// /// let path = Path::new("Foo.dSYM"); /// let dsym_path = path.resolve_dsym().unwrap(); /// assert_eq!(dsym_path, Path::new("Foo.dSYM/Contents/Resources/DWARF/Foo")); /// ``` /// /// [`is_dsym_dir`]: trait.DSymPathExt.html#tymethod.is_dsym_dir fn resolve_dsym(&self) -> Option; /// Resolves the `dSYM` parent directory if this file is a dSYM. /// /// If this path points to the MachO file in a `dSYM` directory structure, this function returns /// the path to the dSYM directory. Returns `None` if the parent does not exist or the file name /// does not match. /// /// # Examples /// /// ```no_run /// use std::path::Path; /// use symbolic_common::DSymPathExt; /// /// let path = Path::new("Foo.dSYM/Contents/Resources/DWARF/Foo"); /// let parent = path.dsym_parent().unwrap(); /// assert_eq!(parent, Path::new("Foo.dSYM")); /// /// let path = Path::new("Foo.dSYM/Contents/Resources/DWARF/Bar"); /// assert_eq!(path.dsym_parent(), None); /// ``` fn dsym_parent(&self) -> Option<&Path>; } impl DSymPathExt for Path { fn is_dsym_dir(&self) -> bool { self.extension() == Some("dSYM".as_ref()) && self.is_dir() } fn resolve_dsym(&self) -> Option { if !self.is_dsym_dir() || !self.is_dir() { return None; } let framework = self.file_stem()?; let mut full_path = self.to_path_buf(); full_path.push("Contents/Resources/DWARF"); full_path.push(framework); // XCode produces [appName].app.dSYM files where the debug file's name is just [appName], // so strip .app if it's present. if matches!(full_path.extension(), Some(extension) if extension == "app") { full_path = full_path.with_extension("") } if full_path.is_file() { Some(full_path) } else { None } } fn dsym_parent(&self) -> Option<&Path> { let framework = self.file_name()?; let mut parent = self.parent()?; if !parent.ends_with("Contents/Resources/DWARF") { return None; } for _ in 0..3 { parent = parent.parent()?; } // Accept both Filename.dSYM and Filename.framework.dSYM as // the bundle directory name. let stem_matches = parent .file_name() .and_then(|name| Path::new(name).file_stem()) .map(|stem| { if stem == framework { return true; } let alt = Path::new(stem); alt.file_stem() == Some(framework) && alt.extension() == Some(OsStr::new("framework")) }) .unwrap_or(false); if parent.is_dsym_dir() && stem_matches { Some(parent) } else { None } } } #[cfg(test)] mod tests { use super::*; use similar_asserts::assert_eq; use symbolic_testutils::fixture; #[test] fn test_join_path() { assert_eq!(join_path("foo", "C:"), "C:"); assert_eq!(join_path("foo", "C:bar"), "foo/C:bar"); assert_eq!(join_path("C:\\a", "b"), "C:\\a\\b"); assert_eq!(join_path("C:/a", "b"), "C:/a\\b"); assert_eq!(join_path("C:\\a", "b\\c"), "C:\\a\\b\\c"); assert_eq!(join_path("C:/a", "C:\\b"), "C:\\b"); assert_eq!(join_path("a\\b\\c", "d\\e"), "a\\b\\c\\d\\e"); assert_eq!(join_path("\\\\UNC\\", "a"), "\\\\UNC\\a"); assert_eq!(join_path("C:\\foo/bar", "\\baz"), "C:\\baz"); assert_eq!(join_path("\\foo/bar", "\\baz"), "\\baz"); assert_eq!(join_path("/a/b", "\\c"), "\\c"); assert_eq!(join_path("/a/b", "c"), "/a/b/c"); assert_eq!(join_path("/a/b", "c/d"), "/a/b/c/d"); assert_eq!(join_path("/a/b", "/c/d/e"), "/c/d/e"); assert_eq!(join_path("a/b/", "c"), "a/b/c"); assert_eq!(join_path("a/b/", ""), ""); assert_eq!( join_path("C:\\test", "<::core::macros::assert_eq macros>"), "<::core::macros::assert_eq macros>" ); assert_eq!( join_path("foo", "아이쿱 조합원 앱카드"), "foo/아이쿱 조합원 앱카드" ); } #[test] fn test_clean_path() { assert_eq!(clean_path("/foo/bar/baz/./blah"), "/foo/bar/baz/blah"); assert_eq!(clean_path("/foo/bar/baz/./blah/"), "/foo/bar/baz/blah"); assert_eq!(clean_path("foo/bar/baz/./blah/"), "foo/bar/baz/blah"); assert_eq!(clean_path("foo/bar/baz/../blah/"), "foo/bar/blah"); assert_eq!(clean_path("../../blah/"), "../../blah"); assert_eq!(clean_path("..\\../blah/"), "..\\..\\blah"); assert_eq!(clean_path("foo\\bar\\baz/../blah/"), "foo\\bar\\blah"); assert_eq!(clean_path("foo\\bar\\baz/../../../../blah/"), "..\\blah"); assert_eq!(clean_path("foo/bar/baz/../../../../blah/"), "../blah"); assert_eq!(clean_path("..\\foo"), "..\\foo"); assert_eq!(clean_path("foo"), "foo"); assert_eq!(clean_path("foo\\bar\\baz/../../../blah/"), "blah"); assert_eq!(clean_path("foo/bar/baz/../../../blah/"), "blah"); assert_eq!(clean_path("\\\\foo\\..\\bar"), "\\\\bar"); assert_eq!( clean_path("foo/bar/../아이쿱 조합원 앱카드"), "foo/아이쿱 조합원 앱카드" ); // XXX currently known broken tests: // assert_eq!(clean_path("/foo/../bar"), "/bar"); // assert_eq!(clean_path("\\\\foo\\..\\..\\bar"), "\\\\bar"); // assert_eq!(clean_path("/../../blah/"), "/blah"); // assert_eq!(clean_path("c:\\..\\foo"), "c:\\foo"); } #[test] fn test_shorten_path() { assert_eq!(shorten_path("/foo/bar/baz/blah/blafasel", 6), "/fo..."); assert_eq!(shorten_path("/foo/bar/baz/blah/blafasel", 2), "/f"); assert_eq!( shorten_path("/foo/bar/baz/blah/blafasel", 21), "/foo/.../blafasel" ); assert_eq!( shorten_path("/foo/bar/baz/blah/blafasel", 22), "/foo/.../blah/blafasel" ); assert_eq!( shorten_path("C:\\bar\\baz\\blah\\blafasel", 20), "C:\\bar\\...\\blafasel" ); assert_eq!( shorten_path("/foo/blar/baz/blah/blafasel", 27), "/foo/blar/baz/blah/blafasel" ); assert_eq!( shorten_path("/foo/blar/baz/blah/blafasel", 26), "/foo/.../baz/blah/blafasel" ); assert_eq!( shorten_path("/foo/b/baz/blah/blafasel", 23), "/foo/.../blah/blafasel" ); assert_eq!(shorten_path("/foobarbaz/blahblah", 16), ".../blahblah"); assert_eq!(shorten_path("/foobarbazblahblah", 12), "...lahblah"); assert_eq!(shorten_path("", 0), ""); assert_eq!(shorten_path("아이쿱 조합원 앱카드", 9), "아..."); assert_eq!(shorten_path("아이쿱 조합원 앱카드", 20), "...ᆸ카드"); } #[test] fn test_split_path() { assert_eq!(split_path("C:\\a\\b"), (Some("C:\\a"), "b")); assert_eq!(split_path("C:/a\\b"), (Some("C:/a"), "b")); assert_eq!(split_path("C:\\a\\b\\c"), (Some("C:\\a\\b"), "c")); assert_eq!(split_path("a\\b\\c\\d\\e"), (Some("a\\b\\c\\d"), "e")); assert_eq!(split_path("\\\\UNC\\a"), (Some("\\\\UNC"), "a")); assert_eq!(split_path("/a/b/c"), (Some("/a/b"), "c")); assert_eq!(split_path("/a/b/c/d"), (Some("/a/b/c"), "d")); assert_eq!(split_path("a/b/c"), (Some("a/b"), "c")); assert_eq!(split_path("a"), (None, "a")); assert_eq!(split_path("a/"), (None, "a")); assert_eq!(split_path("/a"), (Some("/"), "a")); assert_eq!(split_path(""), (None, "")); assert_eq!( split_path("foo/아이쿱 조합원 앱카드"), (Some("foo"), "아이쿱 조합원 앱카드") ); } #[test] fn test_split_path_bytes() { assert_eq!( split_path_bytes(&b"C:\\a\\b"[..]), (Some(&b"C:\\a"[..]), &b"b"[..]) ); assert_eq!( split_path_bytes(&b"C:/a\\b"[..]), (Some(&b"C:/a"[..]), &b"b"[..]) ); assert_eq!( split_path_bytes(&b"C:\\a\\b\\c"[..]), (Some(&b"C:\\a\\b"[..]), &b"c"[..]) ); assert_eq!( split_path_bytes(&b"a\\b\\c\\d\\e"[..]), (Some(&b"a\\b\\c\\d"[..]), &b"e"[..]) ); assert_eq!( split_path_bytes(&b"\\\\UNC\\a"[..]), (Some(&b"\\\\UNC"[..]), &b"a"[..]) ); assert_eq!( split_path_bytes(&b"/a/b/c"[..]), (Some(&b"/a/b"[..]), &b"c"[..]) ); assert_eq!( split_path_bytes(&b"/a/b/c/d"[..]), (Some(&b"/a/b/c"[..]), &b"d"[..]) ); assert_eq!( split_path_bytes(&b"a/b/c"[..]), (Some(&b"a/b"[..]), &b"c"[..]) ); assert_eq!(split_path_bytes(&b"a"[..]), (None, &b"a"[..])); assert_eq!(split_path_bytes(&b"a/"[..]), (None, &b"a"[..])); assert_eq!(split_path_bytes(&b"/a"[..]), (Some(&b"/"[..]), &b"a"[..])); assert_eq!(split_path_bytes(&b""[..]), (None, &b""[..])); } #[test] fn test_is_dsym_dir() { assert!(fixture("macos/crash.dSYM").is_dsym_dir()); assert!(!fixture("macos/crash").is_dsym_dir()); } #[test] fn test_resolve_dsym() { let crash_path = fixture("macos/crash.dSYM"); let resolved = crash_path.resolve_dsym().unwrap(); assert!(resolved.exists()); assert!(resolved.ends_with("macos/crash.dSYM/Contents/Resources/DWARF/crash")); let other_path = fixture("macos/other.dSYM"); assert_eq!(other_path.resolve_dsym(), None); } // XCode and other tools (e.g. dwarfdump) produce a dSYM that includes the .app // suffix, which needs to be stripped. #[test] fn test_resolve_dsym_double_extension() { let crash_path = fixture("macos/crash.app.dSYM"); let resolved = crash_path.resolve_dsym().unwrap(); assert!(resolved.exists()); assert!(resolved.ends_with("macos/crash.app.dSYM/Contents/Resources/DWARF/crash")); let other_path = fixture("macos/other.dmp.dSYM"); assert_eq!(other_path.resolve_dsym(), None); } #[test] fn test_dsym_parent() { let crash_path = fixture("macos/crash.dSYM/Contents/Resources/DWARF/crash"); let dsym_path = crash_path.dsym_parent().unwrap(); assert!(dsym_path.exists()); assert!(dsym_path.ends_with("macos/crash.dSYM")); let other_path = fixture("macos/crash.dSYM/Contents/Resources/DWARF/invalid"); assert_eq!(other_path.dsym_parent(), None); } #[test] fn test_dsym_parent_framework() { let dwarf_path = fixture("macos/Example.framework.dSYM/Contents/Resources/DWARF/Example"); let dsym_path = dwarf_path.dsym_parent().unwrap(); assert!(dsym_path.exists()); assert!(dsym_path.ends_with("macos/Example.framework.dSYM")); } } symbolic-common-12.8.0/src/sourcelinks.rs000064400000000000000000000143301046102023000165220ustar 00000000000000use std::cmp::Ordering; use std::collections::BTreeMap; /// A pattern for matching source paths. /// /// A pattern either matches a string exactly (`Exact`) /// or it matches any string starting with a certain prefix (`Prefix`). /// /// Patterns are ordered as follows: /// 1. Exact patterns come before prefixes /// 2. Exact patterns are ordered lexicographically /// 3. Prefix patterns are ordered inversely by length, i.e., /// longer before shorter, and lexicographically among equally long strings. #[derive(Debug, Clone, PartialEq, Eq)] enum Pattern { Exact(String), Prefix(String), } impl Pattern { fn parse(input: &str) -> Self { if let Some(prefix) = input.strip_suffix('*') { Pattern::Prefix(prefix.to_lowercase()) } else { Pattern::Exact(input.to_lowercase()) } } } impl Ord for Pattern { fn cmp(&self, other: &Self) -> Ordering { match (self, other) { (Pattern::Exact(s), Pattern::Exact(t)) => s.cmp(t), (Pattern::Exact(_), Pattern::Prefix(_)) => Ordering::Less, (Pattern::Prefix(_), Pattern::Exact(_)) => Ordering::Greater, (Pattern::Prefix(s), Pattern::Prefix(t)) => match s.len().cmp(&t.len()) { Ordering::Greater => Ordering::Less, Ordering::Equal => s.cmp(t), Ordering::Less => Ordering::Greater, }, } } } impl PartialOrd for Pattern { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } /// A structure mapping source file paths to remote locations. /// /// # Example /// ``` /// use symbolic_common::SourceLinkMappings; /// let mappings = vec![ /// ("C:\\src\\*", "http://MyDefaultDomain.com/src/*"), /// ("C:\\src\\fOO\\*", "http://MyFooDomain.com/src/*"), /// ("C:\\src\\foo\\specific.txt", "http://MySpecificFoodDomain.com/src/specific.txt"), /// ("C:\\src\\bar\\*", "http://MyBarDomain.com/src/*"), /// ]; /// let mappings = SourceLinkMappings::new(mappings.into_iter()); /// let resolved = mappings.resolve("c:\\src\\bAr\\foo\\FiLe.txt").unwrap(); /// assert_eq!(resolved, "http://MyBarDomain.com/src/foo/FiLe.txt"); /// ```` #[derive(Debug, Default, Clone, PartialEq, Eq)] pub struct SourceLinkMappings { mappings: BTreeMap, } impl<'a> Extend<(&'a str, &'a str)> for SourceLinkMappings { fn extend>(&mut self, iter: T) { self.mappings.extend( iter.into_iter() .map(|(k, v)| (Pattern::parse(k), v.to_string())), ) } } impl SourceLinkMappings { /// Creates a `SourceLinkMappings` struct from an iterator of pattern/target pairs. pub fn new<'a, I: IntoIterator>(iter: I) -> Self { let mut res = Self::default(); res.extend(iter); res } /// Returns true if this structure contains no mappings. pub fn is_empty(&self) -> bool { self.mappings.is_empty() } /// Resolve the path to a URL. pub fn resolve(&self, path: &str) -> Option { // Note: this is currently quite simple, just pick the first match. If we needed to improve // performance in the future because we encounter PDBs with too many items, we can do a // prefix binary search, for example. let path_lower = path.to_lowercase(); for (pattern, target) in &self.mappings { match &pattern { Pattern::Exact(value) => { if value == &path_lower { return Some(target.clone()); } } Pattern::Prefix(value) => { if path_lower.starts_with(value) { let replacement = path .get(value.len()..) .unwrap_or_default() .replace('\\', "/"); return Some(target.replace('*', &replacement)); } } } } None } } #[cfg(test)] mod tests { use super::*; #[test] fn test_mapping() { let mappings = vec![ ("C:\\src\\*", "http://MyDefaultDomain.com/src/*"), ("C:\\src\\fOO\\*", "http://MyFooDomain.com/src/*"), ( "C:\\src\\foo\\specific.txt", "http://MySpecificFoodDomain.com/src/specific.txt", ), ("C:\\src\\bar\\*", "http://MyBarDomain.com/src/*"), ("C:\\src\\file.txt", "https://example.com/file.txt"), ("/home/user/src/*", "https://linux.com/*"), ]; let mappings = SourceLinkMappings::new(mappings); assert_eq!(mappings.mappings.len(), 6); // In this example: // All files under directory bar will map to a relative URL beginning with http://MyBarDomain.com/src/. // All files under directory foo will map to a relative URL beginning with http://MyFooDomain.com/src/ EXCEPT foo/specific.txt which will map to http://MySpecificFoodDomain.com/src/specific.txt. // All other files anywhere under the src directory will map to a relative url beginning with http://MyDefaultDomain.com/src/. assert!(mappings.resolve("c:\\other\\path").is_none()); assert!(mappings.resolve("/home/path").is_none()); assert_eq!( mappings.resolve("c:\\src\\bAr\\foo\\FiLe.txt").unwrap(), "http://MyBarDomain.com/src/foo/FiLe.txt" ); assert_eq!( mappings.resolve("c:\\src\\foo\\FiLe.txt").unwrap(), "http://MyFooDomain.com/src/FiLe.txt" ); assert_eq!( mappings.resolve("c:\\src\\foo\\SpEcIfIc.txt").unwrap(), "http://MySpecificFoodDomain.com/src/specific.txt" ); assert_eq!( mappings.resolve("c:\\src\\other\\path").unwrap(), "http://MyDefaultDomain.com/src/other/path" ); assert_eq!( mappings.resolve("c:\\src\\other\\path").unwrap(), "http://MyDefaultDomain.com/src/other/path" ); assert_eq!( mappings.resolve("/home/user/src/Path/TO/file.txt").unwrap(), "https://linux.com/Path/TO/file.txt" ); } } symbolic-common-12.8.0/src/types.rs000064400000000000000000000627311046102023000153350ustar 00000000000000//! Common types and errors used in `symbolic`. use std::borrow::Cow; use std::fmt; use std::str; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; /// Represents a family of CPUs. /// /// This is strongly connected to the [`Arch`] type, but reduces the selection to a range of /// families with distinct properties, such as a generally common instruction set and pointer size. /// /// This enumeration is represented as `u32` for C-bindings and lowlevel APIs. /// /// [`Arch`]: enum.Arch.html #[repr(u32)] #[non_exhaustive] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Default)] pub enum CpuFamily { /// Any other CPU family that is not explicitly supported. #[default] Unknown = 0, /// 32-bit little-endian CPUs using the Intel 8086 instruction set, also known as `x86`. Intel32 = 1, /// 64-bit little-endian, also known as `x86_64`, now widely used by Intel and AMD. Amd64 = 2, /// 32-bit ARM. Arm32 = 3, /// 64-bit ARM (e.g. ARMv8-A). Arm64 = 4, /// 32-bit big-endian PowerPC. Ppc32 = 5, /// 64-bit big-endian PowerPC. Ppc64 = 6, /// 32-bit MIPS. Mips32 = 7, /// 64-bit MIPS. Mips64 = 8, /// ILP32 ABI on 64-bit ARM. Arm64_32 = 9, /// Virtual WASM 32-bit architecture. Wasm32 = 10, } impl CpuFamily { /// Returns the native pointer size. /// /// This commonly defines the size of CPU registers including the instruction pointer, and the /// size of all pointers on the platform. /// /// This function returns `None` if the CPU family is unknown. /// /// # Examples /// /// ``` /// use symbolic_common::CpuFamily; /// /// assert_eq!(CpuFamily::Amd64.pointer_size(), Some(8)); /// assert_eq!(CpuFamily::Intel32.pointer_size(), Some(4)); /// ``` pub fn pointer_size(self) -> Option { match self { CpuFamily::Unknown => None, CpuFamily::Wasm32 => Some(4), CpuFamily::Amd64 | CpuFamily::Arm64 | CpuFamily::Ppc64 | CpuFamily::Mips64 | CpuFamily::Arm64_32 => Some(8), CpuFamily::Intel32 | CpuFamily::Arm32 | CpuFamily::Ppc32 | CpuFamily::Mips32 => Some(4), } } /// Returns instruction alignment if fixed. /// /// Some instruction sets, such as Intel's x86, use variable length instruction encoding. /// Others, such as ARM, have fixed length instructions. This method returns `Some` for fixed /// size instructions and `None` for variable-length instruction sizes. /// /// # Examples /// /// ``` /// use symbolic_common::CpuFamily; /// /// // variable length on x86_64: /// assert_eq!(CpuFamily::Amd64.instruction_alignment(), None); /// /// // 4-byte alignment on all 64-bit ARM variants: /// assert_eq!(CpuFamily::Arm64.instruction_alignment(), Some(4)); /// ``` pub fn instruction_alignment(self) -> Option { match self { CpuFamily::Wasm32 => Some(4), CpuFamily::Arm32 => Some(2), CpuFamily::Arm64 | CpuFamily::Arm64_32 => Some(4), CpuFamily::Ppc32 | CpuFamily::Mips32 | CpuFamily::Mips64 => Some(4), CpuFamily::Ppc64 => Some(8), CpuFamily::Intel32 | CpuFamily::Amd64 => None, CpuFamily::Unknown => None, } } /// Returns the name of the instruction pointer register. /// /// The instruction pointer register holds a pointer to currrent code execution at all times. /// This is a differrent register on each CPU family. The size of the value in this register is /// specified by [`pointer_size`]. /// /// Returns `None` if the CPU family is unknown. /// /// # Examples /// /// ``` /// use symbolic_common::CpuFamily; /// /// assert_eq!(CpuFamily::Amd64.ip_register_name(), Some("rip")); /// ``` /// /// [`pointer_size`]: enum.CpuFamily.html#method.pointer_size pub fn ip_register_name(self) -> Option<&'static str> { // NOTE: These values do not correspond to the register names defined in this file, but to // the names exposed by breakpad. This mapping is implemented in `data_structures.cpp`. match self { CpuFamily::Intel32 => Some("eip"), CpuFamily::Amd64 => Some("rip"), CpuFamily::Arm32 | CpuFamily::Arm64 | CpuFamily::Arm64_32 => Some("pc"), CpuFamily::Ppc32 | CpuFamily::Ppc64 => Some("srr0"), CpuFamily::Mips32 | CpuFamily::Mips64 => Some("pc"), CpuFamily::Wasm32 => None, CpuFamily::Unknown => None, } } } /// An error returned for an invalid [`Arch`](enum.Arch.html). #[derive(Debug)] pub struct UnknownArchError; impl fmt::Display for UnknownArchError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "unknown architecture") } } impl std::error::Error for UnknownArchError {} /// An enumeration of CPU architectures and variants. /// /// The architectues are grouped into families, which can be retrieved by [`cpu_family`]. There are /// `*Unknown` variants for each architecture to maintain forward-compatibility. This allows to /// support architectures where the family is known but the subtype is not. /// /// Each architecture has a canonical name, returned by [`Arch::name`]. Likewise, architectures can /// be parsed from their string names. In addition to that, in some cases aliases are supported. For /// instance, `"x86"` is aliased as `"i386"`. /// /// This enumeration is represented as `u32` for C-bindings and lowlevel APIs. The values are /// grouped by CPU family for forward compatibility. /// /// [`cpu_family`]: enum.Arch.html#method.cpu_family /// [`Arch::name`]: enum.Arch.html#method.name #[repr(u32)] #[non_exhaustive] #[allow(missing_docs)] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Default)] pub enum Arch { #[default] Unknown = 0, X86 = 101, X86Unknown = 199, Amd64 = 201, Amd64h = 202, Amd64Unknown = 299, Arm = 301, ArmV5 = 302, ArmV6 = 303, ArmV6m = 304, ArmV7 = 305, ArmV7f = 306, ArmV7s = 307, ArmV7k = 308, ArmV7m = 309, ArmV7em = 310, ArmUnknown = 399, Arm64 = 401, Arm64V8 = 402, Arm64e = 403, Arm64Unknown = 499, Ppc = 501, Ppc64 = 601, Mips = 701, Mips64 = 801, Arm64_32 = 901, Arm64_32V8 = 902, Arm64_32Unknown = 999, Wasm32 = 1001, } impl Arch { /// Creates an `Arch` from its `u32` representation. /// /// Returns `Arch::Unknown` for all unknown values. /// /// # Examples /// /// ``` /// use symbolic_common::Arch; /// /// // Will print "X86" /// println!("{:?}", Arch::from_u32(101)); /// ``` pub fn from_u32(val: u32) -> Arch { match val { 0 => Arch::Unknown, 1 | 101 => Arch::X86, 199 => Arch::X86Unknown, 2 | 201 => Arch::Amd64, 3 | 202 => Arch::Amd64h, 299 => Arch::Amd64Unknown, 4 | 301 => Arch::Arm, 5 | 302 => Arch::ArmV5, 6 | 303 => Arch::ArmV6, 7 | 304 => Arch::ArmV6m, 8 | 305 => Arch::ArmV7, 9 | 306 => Arch::ArmV7f, 10 | 307 => Arch::ArmV7s, 11 | 308 => Arch::ArmV7k, 12 | 309 => Arch::ArmV7m, 13 | 310 => Arch::ArmV7em, 399 => Arch::ArmUnknown, 14 | 401 => Arch::Arm64, 15 | 402 => Arch::Arm64V8, 16 | 403 => Arch::Arm64e, 499 => Arch::Arm64Unknown, 17 | 501 => Arch::Ppc, 18 | 601 => Arch::Ppc64, 701 => Arch::Mips, 801 => Arch::Mips64, 901 => Arch::Arm64_32, 902 => Arch::Arm64_32V8, 999 => Arch::Arm64_32Unknown, 1001 => Arch::Wasm32, _ => Arch::Unknown, } } /// Returns the CPU family of the CPU architecture. /// /// # Examples /// /// ``` /// use symbolic_common::Arch; /// /// // Will print "Intel32" /// println!("{:?}", Arch::X86.cpu_family()); /// ``` pub fn cpu_family(self) -> CpuFamily { match self { Arch::Unknown => CpuFamily::Unknown, Arch::X86 | Arch::X86Unknown => CpuFamily::Intel32, Arch::Amd64 | Arch::Amd64h | Arch::Amd64Unknown => CpuFamily::Amd64, Arch::Arm64 | Arch::Arm64V8 | Arch::Arm64e | Arch::Arm64Unknown => CpuFamily::Arm64, Arch::Arm | Arch::ArmV5 | Arch::ArmV6 | Arch::ArmV6m | Arch::ArmV7 | Arch::ArmV7f | Arch::ArmV7s | Arch::ArmV7k | Arch::ArmV7m | Arch::ArmV7em | Arch::ArmUnknown => CpuFamily::Arm32, Arch::Ppc => CpuFamily::Ppc32, Arch::Ppc64 => CpuFamily::Ppc64, Arch::Mips => CpuFamily::Mips32, Arch::Mips64 => CpuFamily::Mips64, Arch::Arm64_32 | Arch::Arm64_32V8 | Arch::Arm64_32Unknown => CpuFamily::Arm64_32, Arch::Wasm32 => CpuFamily::Wasm32, } } /// Returns the canonical name of the CPU architecture. /// /// This follows the Apple conventions for naming architectures. For instance, Intel 32-bit /// architectures are canonically named `"x86"`, even though `"i386"` would also be a valid /// name. /// /// For architectures with variants or subtypes, that subtype is encoded into the name. For /// instance the ARM v7-M architecture is named with a full `"armv7m". /// /// # Examples /// /// ``` /// use symbolic_common::Arch; /// /// // Will print "x86" /// println!("{}", Arch::X86.name()); /// ``` pub fn name(self) -> &'static str { match self { Arch::Unknown => "unknown", Arch::Wasm32 => "wasm32", Arch::X86 => "x86", Arch::X86Unknown => "x86_unknown", Arch::Amd64 => "x86_64", Arch::Amd64h => "x86_64h", Arch::Amd64Unknown => "x86_64_unknown", Arch::Arm64 => "arm64", Arch::Arm64V8 => "arm64v8", Arch::Arm64e => "arm64e", Arch::Arm64Unknown => "arm64_unknown", Arch::Arm => "arm", Arch::ArmV5 => "armv5", Arch::ArmV6 => "armv6", Arch::ArmV6m => "armv6m", Arch::ArmV7 => "armv7", Arch::ArmV7f => "armv7f", Arch::ArmV7s => "armv7s", Arch::ArmV7k => "armv7k", Arch::ArmV7m => "armv7m", Arch::ArmV7em => "armv7em", Arch::ArmUnknown => "arm_unknown", Arch::Ppc => "ppc", Arch::Ppc64 => "ppc64", Arch::Mips => "mips", Arch::Mips64 => "mips64", Arch::Arm64_32 => "arm64_32", Arch::Arm64_32V8 => "arm64_32_v8", Arch::Arm64_32Unknown => "arm64_32_unknown", } } /// Returns whether this architecture is well-known. /// /// This is trivially `true` for all architectures other than the `*Unknown` variants. /// /// # Examples /// /// ``` /// use symbolic_common::Arch; /// /// assert!(Arch::X86.well_known()); /// assert!(!Arch::X86Unknown.well_known()); /// ``` pub fn well_known(self) -> bool { !matches!( self, Arch::Unknown | Arch::ArmUnknown | Arch::Arm64Unknown | Arch::X86Unknown | Arch::Amd64Unknown | Arch::Arm64_32Unknown ) } } impl fmt::Display for Arch { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.name()) } } impl str::FromStr for Arch { type Err = UnknownArchError; fn from_str(string: &str) -> Result { Ok(match string.to_ascii_lowercase().as_str() { "unknown" => Arch::Unknown, // this is an alias that is known among macho users "i386" => Arch::X86, "x86" => Arch::X86, "x86_unknown" => Arch::X86Unknown, "x86_64" | "amd64" => Arch::Amd64, "x86_64h" => Arch::Amd64h, "x86_64_unknown" => Arch::Amd64Unknown, "arm64" => Arch::Arm64, "arm64v8" => Arch::Arm64V8, "arm64e" => Arch::Arm64e, "arm64_unknown" => Arch::Arm64Unknown, "arm" => Arch::Arm, "armv5" => Arch::ArmV5, "armv6" => Arch::ArmV6, "armv6m" => Arch::ArmV6m, "armv7" => Arch::ArmV7, "armv7f" => Arch::ArmV7f, "armv7s" => Arch::ArmV7s, "armv7k" => Arch::ArmV7k, "armv7m" => Arch::ArmV7m, "armv7em" => Arch::ArmV7em, "arm_unknown" => Arch::ArmUnknown, "ppc" => Arch::Ppc, "ppc64" => Arch::Ppc64, "mips" => Arch::Mips, "mips64" => Arch::Mips64, "arm64_32" => Arch::Arm64_32, "arm64_32_v8" => Arch::Arm64_32V8, "arm64_32_unknown" => Arch::Arm64_32Unknown, // apple crash report variants "x86-64" => Arch::Amd64, "arm-64" => Arch::Arm64, // wasm extensions "wasm32" => Arch::Wasm32, _ => return Err(UnknownArchError), }) } } /// An error returned for an invalid [`Language`](enum.Language.html). #[derive(Debug)] pub struct UnknownLanguageError; impl fmt::Display for UnknownLanguageError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "unknown language") } } impl std::error::Error for UnknownLanguageError {} /// A programming language declared in debugging information. /// /// In the context of function names or source code, the lanugage can help to determine appropriate /// strategies for demangling names or syntax highlighting. See the [`Name`] type, which declares a /// function name with an optional language. /// /// This enumeration is represented as `u32` for C-bindings and lowlevel APIs. /// /// [`Name`]: struct.Name.html #[repr(u32)] #[non_exhaustive] #[allow(missing_docs)] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Default)] pub enum Language { #[default] Unknown = 0, C = 1, Cpp = 2, D = 3, Go = 4, ObjC = 5, ObjCpp = 6, Rust = 7, Swift = 8, CSharp = 9, VisualBasic = 10, FSharp = 11, } impl Language { /// Creates an `Language` from its `u32` representation. /// /// Returns `Language::Unknown` for all unknown values. /// /// # Examples /// /// ``` /// use symbolic_common::Language; /// /// // Will print "C" /// println!("{:?}", Language::from_u32(1)); /// ``` pub fn from_u32(val: u32) -> Language { match val { 0 => Self::Unknown, 1 => Self::C, 2 => Self::Cpp, 3 => Self::D, 4 => Self::Go, 5 => Self::ObjC, 6 => Self::ObjCpp, 7 => Self::Rust, 8 => Self::Swift, 9 => Self::CSharp, 10 => Self::VisualBasic, 11 => Self::FSharp, _ => Self::Unknown, } } /// Returns the name of the language. /// /// The name is always given in lower case without special characters or spaces, suitable for /// serialization and parsing. For a human readable name, use the `Display` implementation, /// instead. /// /// # Examples /// /// ``` /// use symbolic_common::Language; /// /// // Will print "objcpp" /// println!("{}", Language::ObjCpp.name()); /// /// // Will print "Objective-C++" /// println!("{}", Language::ObjCpp); /// ``` pub fn name(self) -> &'static str { match self { Language::Unknown => "unknown", Language::C => "c", Language::Cpp => "cpp", Language::D => "d", Language::Go => "go", Language::ObjC => "objc", Language::ObjCpp => "objcpp", Language::Rust => "rust", Language::Swift => "swift", Language::CSharp => "csharp", Language::VisualBasic => "visualbasic", Language::FSharp => "fsharp", } } } impl fmt::Display for Language { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let formatted = match *self { Language::Unknown => "unknown", Language::C => "C", Language::Cpp => "C++", Language::D => "D", Language::Go => "Go", Language::ObjC => "Objective-C", Language::ObjCpp => "Objective-C++", Language::Rust => "Rust", Language::Swift => "Swift", Language::CSharp => "C#", Language::VisualBasic => "Visual Basic", Language::FSharp => "F#", }; f.write_str(formatted) } } impl str::FromStr for Language { type Err = UnknownLanguageError; fn from_str(string: &str) -> Result { Ok(match string { "unknown" => Language::Unknown, "c" => Language::C, "cpp" => Language::Cpp, "d" => Language::D, "go" => Language::Go, "objc" => Language::ObjC, "objcpp" => Language::ObjCpp, "rust" => Language::Rust, "swift" => Language::Swift, "csharp" => Language::CSharp, "visualbasic" => Language::VisualBasic, "fsharp" => Language::FSharp, _ => return Err(UnknownLanguageError), }) } } /// A [`Name`]s mangling state. /// /// By default, the mangling of a [`Name`] is not known, but an explicit mangling state can be set /// for Names that are guaranteed to be unmangled. #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Default)] pub enum NameMangling { /// The [`Name`] is definitely mangled. Mangled, /// The [`Name`] is not mangled. Unmangled, /// The mangling of the [`Name`] is not known. #[default] Unknown, } /// The name of a potentially mangled symbol. /// /// Debugging information often only contains mangled names in their symbol and debug information /// data. The mangling schema depends on the compiler and programming language. `Name` is a wrapper /// type for potentially mangled names and an optionally declared language. To demangle the name, /// see the `demangle` feature of `symbolic`. /// /// Not all sources declare a programming language. In such a case, the [`language`] will be /// `Unknown`. However, it may still be inferred for demangling by inspecting the mangled string. /// /// Names can refer either functions, types, fields, or virtual constructs. Their semantics are /// fully defined by the language and the compiler. /// /// # Examples /// /// Create a name and print it: /// /// ``` /// use symbolic_common::Name; /// /// let name = Name::from("_ZN3foo3barEv"); /// assert_eq!(name.to_string(), "_ZN3foo3barEv"); /// ``` /// /// Create a name with a language and explicit mangling state. /// Alternate formatting prints the language: /// /// ``` /// use symbolic_common::{Language, Name, NameMangling}; /// /// let name = Name::new("_ZN3foo3barEv", NameMangling::Mangled, Language::Cpp); /// assert_eq!(format!("{:#}", name), "_ZN3foo3barEv [C++]"); /// ``` /// /// [`language`]: struct.Name.html#method.language #[derive(Clone, Debug, Eq, Hash, PartialEq)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct Name<'a> { string: Cow<'a, str>, lang: Language, #[cfg_attr(feature = "serde", serde(default))] mangling: NameMangling, } impl<'a> Name<'a> { /// Constructs a new Name with given mangling and language. /// /// In case both the mangling state and the language are unknown, a simpler alternative to use /// is [`Name::from`]. /// /// /// # Example /// /// ``` /// use symbolic_common::{Language, Name, NameMangling}; /// /// let name = Name::new("_ZN3foo3barEv", NameMangling::Mangled, Language::Cpp); /// assert_eq!(format!("{:#}", name), "_ZN3foo3barEv [C++]"); /// ``` #[inline] pub fn new(string: S, mangling: NameMangling, lang: Language) -> Self where S: Into>, { Name { string: string.into(), lang, mangling, } } /// Returns the raw, mangled string of the name. /// /// # Example /// /// ``` /// use symbolic_common::{Language, Name, NameMangling}; /// /// let name = Name::new("_ZN3foo3barEv", NameMangling::Mangled, Language::Cpp); /// assert_eq!(name.as_str(), "_ZN3foo3barEv"); /// ``` /// /// This is also available as an `AsRef` implementation: /// /// ``` /// use symbolic_common::{Language, Name, NameMangling}; /// /// let name = Name::new("_ZN3foo3barEv", NameMangling::Mangled, Language::Cpp); /// assert_eq!(name.as_ref(), "_ZN3foo3barEv"); /// ``` pub fn as_str(&self) -> &str { &self.string } /// Set the `Name`'s language. pub fn set_language(&mut self, language: Language) -> &mut Self { self.lang = language; self } /// The language of the mangled symbol. /// /// If the language is not declared in the source, this returns `Language::Unknown`. The /// language may still be inferred using `detect_language`, which is declared on the `Demangle` /// extension trait. /// /// # Example /// /// ``` /// use symbolic_common::{Language, Name, NameMangling}; /// /// let name = Name::new("_ZN3foo3barEv", NameMangling::Mangled, Language::Cpp); /// assert_eq!(name.language(), Language::Cpp); /// ``` pub fn language(&self) -> Language { self.lang } /// Set the `Name`'s mangling state. pub fn set_mangling(&mut self, mangling: NameMangling) -> &mut Self { self.mangling = mangling; self } /// Returns the `Name`'s mangling state. /// /// # Example /// /// ``` /// use symbolic_common::{Language, Name, NameMangling}; /// /// let unmangled = Name::new("foo::bar", NameMangling::Unmangled, Language::Unknown); /// assert_eq!(unmangled.mangling(), NameMangling::Unmangled); /// ``` pub fn mangling(&self) -> NameMangling { self.mangling } /// Converts this name into a [`Cow`]. /// /// # Example /// /// ``` /// use symbolic_common::Name; /// /// let name = Name::from("_ZN3foo3barEv"); /// assert_eq!(name.into_cow(), "_ZN3foo3barEv"); /// ``` pub fn into_cow(self) -> Cow<'a, str> { self.string } /// Converts this name into a [`String`]. /// /// # Example /// /// ``` /// use symbolic_common::Name; /// /// let name = Name::from("_ZN3foo3barEv"); /// assert_eq!(name.into_string(), "_ZN3foo3barEv"); /// ``` pub fn into_string(self) -> String { self.string.into_owned() } } impl AsRef for Name<'_> { fn as_ref(&self) -> &str { self.as_str() } } impl From> for String { fn from(name: Name) -> Self { name.string.into() } } impl<'a, S> From for Name<'a> where S: Into>, { fn from(string: S) -> Self { Self::new(string, NameMangling::Unknown, Language::Unknown) } } impl fmt::Display for Name<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.as_str())?; if f.alternate() && self.lang != Language::Unknown { write!(f, " [{}]", self.lang)?; } Ok(()) } } macro_rules! impl_eq { ($lhs:ty, $rhs: ty) => { #[allow(clippy::extra_unused_lifetimes)] impl<'a, 'b> PartialEq<$rhs> for $lhs { #[inline] fn eq(&self, other: &$rhs) -> bool { PartialEq::eq(&self.string, other) } } #[allow(clippy::extra_unused_lifetimes)] impl<'a, 'b> PartialEq<$lhs> for $rhs { #[inline] fn eq(&self, other: &$lhs) -> bool { PartialEq::eq(self, &other.string) } } }; } impl_eq! { Name<'a>, str } impl_eq! { Name<'a>, &'b str } impl_eq! { Name<'a>, String } impl_eq! { Name<'a>, std::borrow::Cow<'b, str> } #[cfg(feature = "serde")] mod derive_serde { /// Helper macro to implement string based serialization and deserialization. /// /// If a type implements `FromStr` and `Display` then this automatically /// implements a serializer/deserializer for that type that dispatches /// appropriately. macro_rules! impl_str_serde { ($type:ty) => { impl ::serde::ser::Serialize for $type { fn serialize(&self, serializer: S) -> Result where S: ::serde::ser::Serializer, { serializer.serialize_str(self.name()) } } impl<'de> ::serde::de::Deserialize<'de> for $type { fn deserialize(deserializer: D) -> Result where D: ::serde::de::Deserializer<'de>, { <::std::borrow::Cow>::deserialize(deserializer)? .parse() .map_err(::serde::de::Error::custom) } } }; } impl_str_serde!(super::Arch); impl_str_serde!(super::Language); }