divan-0.1.21/.cargo_vcs_info.json0000644000000001360000000000100122160ustar { "git": { "sha1": "52f9d4983e68b16d1d77f8920df087ef8f8d6ba0" }, "path_in_vcs": "" }divan-0.1.21/.github/FUNDING.yml000064400000000000000000000000661046102023000141650ustar 00000000000000github: ['nvzqz'] custom: ['https://paypal.me/nvzqz'] divan-0.1.21/.github/workflows/ci.yml000064400000000000000000000126741046102023000155330ustar 00000000000000on: [push, pull_request] name: CI env: CARGO_HOME: ${{ github.workspace }}/.cargo CARGO_TERM_COLOR: always RUSTFLAGS: -D warnings -A unused-imports RUSTDOCFLAGS: -D warnings RUST_BACKTRACE: full jobs: # Check formatting. rustfmt: name: Rustfmt if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - run: rustup update stable --no-self-update - run: rustc -Vv - run: cargo fmt --all -- --check # Build documentation. rustdoc: name: Rustdoc if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions/cache@v4 with: path: | ${{ env.CARGO_HOME }} target key: rustdoc-${{ runner.os }} - run: rustup update stable --no-self-update - run: rustc -Vv - run: cargo rustdoc --all-features -- --document-private-items # Run linter. clippy: name: Clippy if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository runs-on: ${{ matrix.os }} strategy: matrix: os: - ubuntu-latest - macos-latest - windows-latest steps: - uses: actions/checkout@v4 - uses: actions/cache@v4 with: path: | ${{ env.CARGO_HOME }} target key: clippy-${{ runner.os }} - run: rustup update stable --no-self-update - run: rustc -Vv - run: cargo clippy --all --all-targets --all-features # Run tests in `src/` and `tests/`. unit-test: name: Unit Test if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository runs-on: ${{ matrix.os }} strategy: matrix: os: - ubuntu-latest - macos-latest - windows-latest rust: - stable - nightly steps: - uses: actions/checkout@v4 - uses: actions/cache@v4 with: path: | ${{ env.CARGO_HOME }} target key: unit-test-${{ runner.os }}-${{ matrix.rust }} - run: rustup default ${{ matrix.rust }} - run: rustup update ${{ matrix.rust }} --no-self-update - run: rustc -Vv - run: cargo test -p divan -p divan-macros # Run tests in `src/` and `tests/` using Miri. unit-test-miri: name: Unit Test (Miri) if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions/cache@v4 with: path: | ${{ env.CARGO_HOME }} target key: miri-${{ runner.os }} - run: rustup default nightly - run: rustup update nightly --no-self-update - run: rustup component add miri - run: rustc -Vv - run: cargo miri test -p divan -p divan-macros # Run `examples/` directory as tests. examples-test: name: Examples Test if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository runs-on: ${{ matrix.os }} strategy: matrix: os: - ubuntu-latest - macos-latest - windows-latest rust: - stable - nightly env: DIVAN_ITEMS_COUNT: 0 DIVAN_BYTES_COUNT: 1 DIVAN_CHARS_COUNT: 2 steps: - uses: actions/checkout@v4 - uses: actions/cache@v4 with: path: | ${{ env.CARGO_HOME }} target key: examples-test-${{ runner.os }}-${{ matrix.rust }} - run: rustup default ${{ matrix.rust }} - run: rustup update ${{ matrix.rust }} --no-self-update - run: rustc -Vv - run: cargo test -p examples --all-features --benches # Run `examples/` directory as benchmarks. examples-bench: name: Examples Bench if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository runs-on: ${{ matrix.os }} env: # Run each benchmark within 2 seconds. DIVAN_MAX_TIME: 2 strategy: matrix: os: - ubuntu-latest - macos-latest - windows-latest steps: - uses: actions/checkout@v4 - uses: actions/cache@v4 with: path: | ${{ env.CARGO_HOME }} target key: examples-bench-${{ runner.os }} - run: rustup update stable --no-self-update - run: rustc -Vv - run: cargo bench -p examples --all-features # Run `internal_benches/` directory as benchmarks. internals-bench: name: Internals Bench if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository runs-on: ${{ matrix.os }} env: # Run each benchmark within 2 seconds. DIVAN_MAX_TIME: 2 strategy: matrix: os: - ubuntu-latest - macos-latest - windows-latest steps: - uses: actions/checkout@v4 - uses: actions/cache@v4 with: path: | ${{ env.CARGO_HOME }} target key: internals-bench-${{ runner.os }} - run: rustup update stable --no-self-update - run: rustc -Vv - run: cargo bench -p internal_benches --all-features divan-0.1.21/.gitignore000064400000000000000000000030011046102023000127700ustar 00000000000000### Linux ### *~ # temporary files which can be created if a process still has a handle open of a deleted file .fuse_hidden* # KDE directory preferences .directory # Linux trash folder which might appear on any partition or disk .Trash-* # .nfs files are created when an open file is removed but is still being accessed .nfs* ### macOS ### # General .DS_Store .AppleDouble .LSOverride # Icon must end with two \r Icon # Thumbnails ._* # Files that might appear in the root of a volume .DocumentRevisions-V100 .fseventsd .Spotlight-V100 .TemporaryItems .Trashes .VolumeIcon.icns .com.apple.timemachine.donotpresent # Directories potentially created on remote AFP share .AppleDB .AppleDesktop Network Trash Folder Temporary Items .apdisk ### macOS Patch ### # iCloud generated files *.icloud ### Rust ### # Generated by Cargo # will have compiled files and executables debug/ target/ # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html Cargo.lock # These are backup files generated by rustfmt **/*.rs.bk # MSVC Windows builds of rustc generate these, which store debugging information *.pdb ### Windows ### # Windows thumbnail cache files Thumbs.db Thumbs.db:encryptable ehthumbs.db ehthumbs_vista.db # Dump file *.stackdump # Folder config file [Dd]esktop.ini # Recycle Bin used on file shares $RECYCLE.BIN/ # Windows Installer files *.cab *.msi *.msix *.msm *.msp # Windows shortcuts *.lnk divan-0.1.21/CHANGELOG.md000064400000000000000000000353721046102023000126310ustar 00000000000000# Changelog [![crates.io][crate-badge]][crate] All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). ## [Unreleased] ## [0.1.21] - 2025-04-09 ### Fixed - `Divan::skip_exact` behaved incorrectly in `v0.1.19`. ### Changed - Improved handling of internal code around filters and those responsible for sacking the people who have just been sacked have been sacked. ## [0.1.20] - 2025-04-09 ### Fixed - `Divan::skip_regex` accidentally dropped [`regex_lite::Regex`](https://docs.rs/regex-lite/latest/regex_lite/struct.Regex.html) and behaved incorrectly in `v0.1.19`. ## [0.1.19] - 2025-04-09 ### Fixed - [`cargo-nextest`] no longer skips benchmarks with argument parameters ([#75]). ### Changed - Organized positive and negative filters into a split buffer. ## [0.1.18] - 2025-04-05 ### Added - Support for [`cargo-nextest`] running benchmarks as tests. - [`prelude`] module for simplifying imports of [`#[bench]`][bench_attr], [`#[bench_group]`][bench_group_attr], [`black_box`], [`black_box_drop`], [`AllocProfiler`], [`Bencher`], and [`Divan`]. - Support `wasi` and `emscripten` targets. ## [0.1.17] - 2024-12-04 ### Changed - Set [MSRV] to 1.80 for [`LazyLock`] and new `size_of` prelude import. - Reduced thread pool memory usage by many kilobytes by using rendezvous channels instead of array-based channels. ## [0.1.16] - 2024-11-25 ### Added - Thread pool for reusing threads across multi-threaded benchmarks. The result is that when running Divan benchmarks under a sampling profiler, the profiler's output will be cleaner and easier to understand. ([#37]) - Track the maximum number of allocations during a benchmark. ### Changed - Make private `Arg::get` trait method not take `self`, so that text editors don't recommend using it. ([#59]) - Cache `BenchOptions` using `LazyLock` instead of `OnceLock`, saving space and simplifying the implementation. ## [0.1.15] - 2024-10-31 ### Added - [`CyclesCount`] counter to display cycle throughput as Hertz. - Track the maximum number of bytes allocated during a benchmark. ### Removed - Remove `has_cpuid` polyfill due to it no longer being planned for Rust, since CPUID is assumed to be available on all old x86 Rust targets. ### Fixed - List generic benchmark type parameter `A<4>` before `A<32>`. ([#64]) - Improve precision by using `f64` when calculating allocation count and sizes for the median samples. - Multi-thread allocation counting in `sum_alloc_tallies` on macOS was loading a null pointer instead of the pointer initialized by `sync_threads`. ### Changed - Sort all output benchmark names [naturally](https://en.wikipedia.org/wiki/Natural_sort_order) instead of [lexicographically](https://en.wikipedia.org/wiki/Lexicographic_order). - Internally reuse [`&[&str]` slice][slice] for [`args`] names. - Subtract overhead of [`AllocProfiler`] from timings. Now that Divan also tracks the maximum bytes allocated, the overhead was apparent in timings. - Simplify `ThreadAllocInfo::clear`. - Move measured loop overhead from `SharedContext` to global `OnceLock`. - Macros no longer rely on `std` being re-exported by Divan. Instead they use `::std` or `::core` to greatly simplify code. Although this is technically a breaking change, it is extremely unlikely to do `extern crate std as x`. ## [0.1.14] - 2024-02-17 ### Fixed - Set correct field in [`Divan::max_time`]. ([#45](https://github.com/nvzqz/divan/pull/45)) ### Changed - Improve [`args`] documentation by relating it to using [`Bencher`]. - Define [`BytesCount::of_iter`] in terms of [`BytesCount::of_many`]. ## [0.1.13] - 2024-02-09 ### Fixed - Missing update to `divan-macros` dependency. ## [0.1.12] - 2024-02-09 ### Added - Display [`args`] option values with [`Debug`] instead if [`ToString`] is not implemented. This makes it simple to use enums with derived [`Debug`]: ```rs #[derive(Debug)] enum Arg { A, B } #[divan::bench(args = [Arg::A, Arg::B])] fn bench_args(arg: &Arg) { ... } ``` - Documentation of when to use [`black_box`] in benchmarks. ## [0.1.11] - 2024-01-20 ### Fixed - Sorting negative [`args`] numbers. ## [0.1.10] - 2024-01-20 ### Fixed - Sort [`args`] numbers like [`consts`]. ## [0.1.9] - 2024-01-20 ### Added - [`args`] option for providing runtime arguments to benchmarks: ```rs #[divan::bench(args = [1, 2, 3])] fn args_list(arg: usize) { ... } #[divan::bench(args = 1..=3)] fn args_range(arg: usize) { ... } const ARGS: &[usize] = [1, 2, 3]; #[divan::bench(args = ARGS)] fn args_const(arg: usize) { ... } ``` This option may be preferred over the similar [`consts`] option because: - It is compatible with more types, only requiring that the argument type implements [`Any`], [`Copy`], [`Send`], [`Sync`], and [`ToString`]. [`Copy`] is not needed if the argument is used through a reference. - It does not increase compile times, unlike [`consts`] which needs to generate new code for each constant used. ## [0.1.8] - 2023-12-19 ### Changed - Reduce [`AllocProfiler`] footprint from 6-10ns to 1-2ns: - Thread-local values are now exclusively owned by their threads and are no longer kept in a global list. This enables some optimizations: - Performing faster unsynchronized arithmetic. - Removing one level of pointer indirection by storing the thread-local value entirely inline in [`thread_local!`], rather than storing a pointer to a globally-shared instance. - Compiler emits SIMD arithmetic for x86_64 using `paddq`. - Improved thread-local lookup on x86_64 macOS by using a static lookup key instead of a dynamic key from [`pthread_key_create`]. Key 11 is used because it is reserved for Windows. The `dyn_thread_local` crate feature disables this optimization. This is recommended if your code or another dependency uses the same static key. ### Fixed - Remove unused allocations if [`AllocProfiler`] is not active as the global allocator. ## [0.1.7] - 2023-12-13 ### Changed - Improve [`AllocProfiler`] implementation documentation. - Limit [`AllocProfiler`] mean count outputs to 4 significant digits to not be very wide and for consistency with other outputs. ## [0.1.6] - 2023-12-13 ### Added - [`AllocProfiler`] allocator that tracks allocation counts and sizes during benchmarks. ## [0.1.5] - 2023-12-05 ### Added - [`black_box_drop`] convenience function for [`black_box`] + [`drop`]. This is useful when benchmarking a lazy [`Iterator`] to completion with `for_each`: ```rust #[divan::bench] fn parse_iter() { let input: &str = // ... Parser::new(input) .for_each(divan::black_box_drop); } ``` ## [0.1.4] - 2023-12-02 ### Added - `From` implementations for counters on references to `u8`–`u64` and `usize`, such as `From<&u64>` and `From<&&u64>`. This allows for doing: ```rust bencher .with_inputs(|| { ... }) .input_counter(ItemsCount::from) .bench_values(|n| { ... }); ``` - [`Bencher::count_inputs_as`](https://docs.rs/divan/0.1.4/divan/struct.Bencher.html#method.count_inputs_as) method to convert inputs to a `Counter`: ```rust bencher .with_inputs(|| -> usize { // ... }) .count_inputs_as::() .bench_values(|n| -> Vec { (0..n).collect() }); ``` ## [0.1.3] - 2023-11-21 ### Added - Convenience shorthand options for `#[divan::bench]` and `#[divan::bench_group]` counters: - [`bytes_count`](https://docs.rs/divan/0.1.3/divan/attr.bench.html#bytes_count) for `counter = BytesCount::from(n)` - [`chars_count`](https://docs.rs/divan/0.1.3/divan/attr.bench.html#chars_count) for `counter = CharsCount::from(n)` - [`items_count`](https://docs.rs/divan/0.1.3/divan/attr.bench.html#items_count) for `counter = ItemsCount::from(n)` - Support for NetBSD, DragonFly BSD, and Haiku OS by using pre-`main`. - Set global thread counts using: - [`Divan::threads`](https://docs.rs/divan/0.1.3/divan/struct.Divan.html#method.threads) - `--threads A B C...` CLI arg - `DIVAN_THREADS=A,B,C` env var The following example will benchmark across 2, 4, and [available parallelism] thread counts: ```sh DIVAN_THREADS=0,2,4 cargo bench -q -p examples --bench atomic ``` - Set global [`Counter`s](https://docs.rs/divan/0.1.3/divan/counter/trait.Counter.html) at runtime using: - [`Divan::counter`](https://docs.rs/divan/0.1.3/divan/struct.Divan.html#method.counter) - [`Divan::items_count`](https://docs.rs/divan/0.1.3/divan/struct.Divan.html#method.items_count) - [`Divan::bytes_count`](https://docs.rs/divan/0.1.3/divan/struct.Divan.html#method.bytes_count) - [`Divan::chars_count`](https://docs.rs/divan/0.1.3/divan/struct.Divan.html#method.chars_count) - `--items-count N` CLI arg - `--bytes-count N` CLI arg - `--chars-count N` CLI arg - `DIVAN_ITEMS_COUNT=N` env var - `DIVAN_BYTES_COUNT=N` env var - `DIVAN_CHARS_COUNT=N` env var - `From` for [`ItemsCount`](https://docs.rs/divan/0.1.3/divan/counter/struct.ItemsCount.html), [`BytesCount`](https://docs.rs/divan/0.1.3/divan/counter/struct.BytesCount.html), and [`CharsCount`](https://docs.rs/divan/0.1.3/divan/counter/struct.CharsCount.html) where `C` is `u8`–`u64` or `usize` (via `CountUInt` internally). This provides an alternative to the `new` constructor. - [`BytesCount::of_many`](https://docs.rs/divan/0.1.3/divan/counter/struct.BytesCount.html#method.of_many) method similar to [`BytesCount::of`](https://docs.rs/divan/0.1/divan/counter/struct.BytesCount.html#method.of), but with a parameter by which to multiply the size of the type. - [`BytesCount::u64`](https://docs.rs/divan/0.1.3/divan/counter/struct.BytesCount.html#method.u64), [`BytesCount::f64`](https://docs.rs/divan/0.1.3/divan/counter/struct.BytesCount.html#method.f64), and similar methods based on [`BytesCount::of_many`](https://docs.rs/divan/0.1.3/divan/counter/struct.BytesCount.html#method.of_many). ### Removed - [`black_box`] inside benchmark loop when deferring [`Drop`] of outputs. This is now done after the loop. - [`linkme`](https://docs.rs/linkme) dependency in favor of pre-`main` to register benchmarks and benchmark groups. This is generally be more portable and reliable. ### Changed - Now calling [`black_box`] at the end of the benchmark loop when deferring use of inputs or [`Drop`] of outputs. ## [0.1.2] - 2023-10-28 ### Fixed - Multi-threaded benchmarks being spread across CPUs, instead of pinning the main thread to CPU 0 and having all threads inherit the main thread's affinity. ## [0.1.1] - 2023-10-25 ### Fixed - Fix using LLD as linker for Linux by using the same pre-`main` approach as Windows. ## 0.1.0 - 2023-10-04 Initial release. See [blog post](https://nikolaivazquez.com/blog/divan/). [crate]: https://crates.io/crates/divan [crate-badge]: https://img.shields.io/crates/v/divan.svg [Unreleased]: https://github.com/nvzqz/divan/compare/v0.1.21...HEAD [0.1.21]: https://github.com/nvzqz/divan/compare/v0.1.20...v0.1.21 [0.1.20]: https://github.com/nvzqz/divan/compare/v0.1.19...v0.1.20 [0.1.19]: https://github.com/nvzqz/divan/compare/v0.1.18...v0.1.19 [0.1.18]: https://github.com/nvzqz/divan/compare/v0.1.17...v0.1.18 [0.1.17]: https://github.com/nvzqz/divan/compare/v0.1.16...v0.1.17 [0.1.16]: https://github.com/nvzqz/divan/compare/v0.1.15...v0.1.16 [0.1.15]: https://github.com/nvzqz/divan/compare/v0.1.14...v0.1.15 [0.1.14]: https://github.com/nvzqz/divan/compare/v0.1.13...v0.1.14 [0.1.13]: https://github.com/nvzqz/divan/compare/v0.1.12...v0.1.13 [0.1.12]: https://github.com/nvzqz/divan/compare/v0.1.11...v0.1.12 [0.1.11]: https://github.com/nvzqz/divan/compare/v0.1.10...v0.1.11 [0.1.10]: https://github.com/nvzqz/divan/compare/v0.1.9...v0.1.10 [0.1.9]: https://github.com/nvzqz/divan/compare/v0.1.8...v0.1.9 [0.1.8]: https://github.com/nvzqz/divan/compare/v0.1.7...v0.1.8 [0.1.7]: https://github.com/nvzqz/divan/compare/v0.1.6...v0.1.7 [0.1.6]: https://github.com/nvzqz/divan/compare/v0.1.5...v0.1.6 [0.1.5]: https://github.com/nvzqz/divan/compare/v0.1.4...v0.1.5 [0.1.4]: https://github.com/nvzqz/divan/compare/v0.1.3...v0.1.4 [0.1.3]: https://github.com/nvzqz/divan/compare/v0.1.2...v0.1.3 [0.1.2]: https://github.com/nvzqz/divan/compare/v0.1.1...v0.1.2 [0.1.1]: https://github.com/nvzqz/divan/compare/v0.1.0...v0.1.1 [#37]: https://github.com/nvzqz/divan/issues/37 [#59]: https://github.com/nvzqz/divan/issues/59 [#64]: https://github.com/nvzqz/divan/issues/64 [#75]: https://github.com/nvzqz/divan/issues/75 [`AllocProfiler`]: https://docs.rs/divan/latest/divan/struct.AllocProfiler.html [`args`]: https://docs.rs/divan/latest/divan/attr.bench.html#args [`Bencher`]: https://docs.rs/divan/latest/divan/struct.Bencher.html [`black_box_drop`]: https://docs.rs/divan/latest/divan/fn.black_box_drop.html [`black_box`]: https://docs.rs/divan/latest/divan/fn.black_box.html [`consts`]: https://docs.rs/divan/latest/divan/attr.bench.html#consts [`Divan::max_time`]: https://docs.rs/divan/latest/divan/struct.Divan.html#method.max_time [`Divan`]: https://docs.rs/divan/latest/divan/struct.Divan.html [`prelude`]: https://docs.rs/divan/latest/divan/prelude/index.html [bench_attr]: https://docs.rs/divan/latest/divan/attr.bench.html [bench_group_attr]: https://docs.rs/divan/latest/divan/attr.bench_group.html [`BytesCount::of_iter`]: https://docs.rs/divan/0.1/divan/counter/struct.BytesCount.html#method.of_iter [`BytesCount::of_many`]: https://docs.rs/divan/0.1/divan/counter/struct.BytesCount.html#method.of_many [`CyclesCount`]: https://docs.rs/divan/0.1/divan/counter/struct.CyclesCount.html [`Any`]: https://doc.rust-lang.org/std/any/trait.Any.html [`Copy`]: https://doc.rust-lang.org/std/marker/trait.Copy.html [`Debug`]: https://doc.rust-lang.org/std/fmt/trait.Debug.html [`drop`]: https://doc.rust-lang.org/std/mem/fn.drop.html [`Drop`]: https://doc.rust-lang.org/std/ops/trait.Drop.html [`Iterator`]: https://doc.rust-lang.org/std/iter/trait.Iterator.html [`LazyLock`]: https://doc.rust-lang.org/std/sync/struct.LazyLock.html [`Send`]: https://doc.rust-lang.org/std/marker/trait.Send.html [`size_of`]: https://doc.rust-lang.org/std/mem/fn.size_of.html [`Sync`]: https://doc.rust-lang.org/std/marker/trait.Sync.html [`thread_local!`]: https://doc.rust-lang.org/std/macro.thread_local.html [`ToString`]: https://doc.rust-lang.org/std/string/trait.ToString.html [available parallelism]: https://doc.rust-lang.org/std/thread/fn.available_parallelism.html [slice]: https://doc.rust-lang.org/std/primitive.slice.html [MSRV]: https://doc.rust-lang.org/cargo/reference/rust-version.html [`cargo-nextest`]: https://nexte.st [`pthread_key_create`]: https://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_key_create.html divan-0.1.21/Cargo.lock0000644000000162620000000000100102000ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 4 [[package]] name = "anstyle" version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" [[package]] name = "bitflags" version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" [[package]] name = "cc" version = "1.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a012a0df96dd6d06ba9a1b29d6402d1a5d77c6befd2566afdc26e10603dc93d7" dependencies = [ "shlex", ] [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" version = "4.5.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3135e7ec2ef7b10c6ed8950f0f792ed96ee093fa088608f1c76e569722700c84" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" version = "4.5.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "30582fc632330df2bd26877bde0c1f4470d57c582bbc070376afcd04d8cb4838" dependencies = [ "anstyle", "clap_lex", "terminal_size", ] [[package]] name = "clap_lex" version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" [[package]] name = "condtype" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf0a07a401f374238ab8e2f11a104d2851bf9ce711ec69804834de8af45c7af" [[package]] name = "divan" version = "0.1.21" dependencies = [ "cfg-if", "clap", "condtype", "divan-macros", "libc", "mimalloc", "regex-lite", ] [[package]] name = "divan-macros" version = "0.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9556bc800956545d6420a640173e5ba7dfa82f38d3ea5a167eb555bc69ac3323" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "errno" version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", "windows-sys", ] [[package]] name = "libc" version = "0.2.169" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" [[package]] name = "libmimalloc-sys" version = "0.1.39" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23aa6811d3bd4deb8a84dde645f943476d13b248d818edcf8ce0b2f37f036b44" dependencies = [ "cc", "libc", ] [[package]] name = "linux-raw-sys" version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" [[package]] name = "mimalloc" version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68914350ae34959d83f732418d51e2427a794055d0b9529f48259ac07af65633" dependencies = [ "libmimalloc-sys", ] [[package]] name = "proc-macro2" version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" dependencies = [ "unicode-ident", ] [[package]] name = "quote" version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" dependencies = [ "proc-macro2", ] [[package]] name = "regex-lite" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" [[package]] name = "rustix" version = "0.38.42" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f93dc38ecbab2eb790ff964bb77fa94faf256fd3e73285fd7ba0903b76bedb85" dependencies = [ "bitflags", "errno", "libc", "linux-raw-sys", "windows-sys", ] [[package]] name = "shlex" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "syn" version = "2.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46f71c0377baf4ef1cc3e3402ded576dccc315800fbc62dfc7fe04b009773b4a" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "terminal_size" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5352447f921fda68cf61b4101566c0bdb5104eff6804d0678e5227580ab6a4e9" dependencies = [ "rustix", "windows-sys", ] [[package]] name = "unicode-ident" version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" [[package]] name = "windows-sys" version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ "windows-targets", ] [[package]] name = "windows-targets" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", "windows_i686_gnu", "windows_i686_gnullvm", "windows_i686_msvc", "windows_x86_64_gnu", "windows_x86_64_gnullvm", "windows_x86_64_msvc", ] [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" divan-0.1.21/Cargo.toml0000644000000036760000000000100102300ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.80.0" name = "divan" version = "0.1.21" authors = ["Nikolai Vazquez"] build = false autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "Statistically-comfy benchmarking library." homepage = "https://github.com/nvzqz/divan" documentation = "https://docs.rs/divan" readme = "README.md" keywords = [ "benchmark", "criterion", "instrument", "measure", "performance", ] categories = ["development-tools::profiling"] license = "MIT OR Apache-2.0" repository = "https://github.com/nvzqz/divan" [features] default = ["wrap_help"] dyn_thread_local = [] help = ["clap/help"] internal_benches = [] wrap_help = [ "help", "clap/wrap_help", ] [lib] name = "divan" path = "src/lib.rs" [[test]] name = "attr_options" path = "tests/attr_options.rs" [[test]] name = "entry_properties" path = "tests/entry_properties.rs" [[test]] name = "forbid_unsafe" path = "tests/forbid_unsafe.rs" [[test]] name = "weird_usage" path = "tests/weird_usage.rs" [dependencies.cfg-if] version = "1" [dependencies.clap] version = "4" features = [ "std", "env", ] default-features = false [dependencies.condtype] version = "1.3" [dependencies.divan-macros] version = "=0.1.21" [dependencies.regex] version = "0.1" features = [ "std", "string", ] default-features = false package = "regex-lite" [dev-dependencies.mimalloc] version = "0.1" [target."cfg(unix)".dependencies.libc] version = "0.2.148" divan-0.1.21/Cargo.toml.orig000064400000000000000000000027231046102023000137010ustar 00000000000000[package] name = "divan" version = "0.1.21" rust-version = "1.80.0" edition = "2021" authors = ["Nikolai Vazquez"] license = "MIT OR Apache-2.0" description = "Statistically-comfy benchmarking library." repository = "https://github.com/nvzqz/divan" homepage = "https://github.com/nvzqz/divan" documentation = "https://docs.rs/divan" categories = ["development-tools::profiling"] keywords = ["benchmark", "criterion", "instrument", "measure", "performance"] readme = "README.md" [dependencies] divan-macros = { version = "=0.1.21", path = "macros" } cfg-if = "1" clap = { version = "4", default-features = false, features = ["std", "env"] } condtype = "1.3" regex = { package = "regex-lite", version = "0.1", default-features = false, features = ["std", "string"] } [target.'cfg(unix)'.dependencies] libc = { workspace = true } [dev-dependencies] mimalloc = "0.1" [features] default = ["wrap_help"] help = ["clap/help"] wrap_help = ["help", "clap/wrap_help"] # Opt out of faster static thread-local access and instead always dynamically # allocate thread-local storage. # # On x86_64 macOS we use TLS key 11 (reserved for Windows ABI compatability): # https://github.com/apple-oss-distributions/libpthread/blob/libpthread-519/private/pthread/tsd_private.h#L99 dyn_thread_local = [] # Benchmark internals. Not meant for public use. internal_benches = [] [workspace] members = ["macros", "examples", "internal_benches"] [workspace.dependencies] divan = { path = "." } libc = "0.2.148" divan-0.1.21/LICENSE-APACHE000064400000000000000000000261361046102023000127420ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. divan-0.1.21/LICENSE-MIT000064400000000000000000000020601046102023000124400ustar 00000000000000MIT License Copyright (c) 2023 Nikolai Vazquez Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. divan-0.1.21/README.md000064400000000000000000000076451046102023000123010ustar 00000000000000

Divan

docs.rs badge Downloads badge GitHub stars badge CI build status badge

Comfy benchmarking for Rust projects, brought to you by Nikolai Vazquez.

## Sponsor If you or your company find Divan valuable, consider [sponsoring on GitHub](https://github.com/sponsors/nvzqz) or [donating via PayPal](https://paypal.me/nvzqz). Sponsorships help me progress on what's possible with benchmarking in Rust. ## Guide A guide is being worked on. In the meantime, see: - [Announcement post](https://nikolaivazquez.com/blog/divan/) - ["Proving Performance" FOSDEM talk](https://youtu.be/P87C4jNakGs) ## Getting Started Divan `0.1.21` requires Rust `1.80.0` or later. 1. Add the following to your project's [`Cargo.toml`](https://doc.rust-lang.org/cargo/reference/manifest.html): ```toml [dev-dependencies] divan = "0.1.21" [[bench]] name = "example" harness = false ``` 2. Create a benchmarks file at `benches/example.rs`[^1] with your benchmarking code: ```rust fn main() { // Run registered benchmarks. divan::main(); } // Register a `fibonacci` function and benchmark it over multiple cases. #[divan::bench(args = [1, 2, 4, 8, 16, 32])] fn fibonacci(n: u64) -> u64 { if n <= 1 { 1 } else { fibonacci(n - 2) + fibonacci(n - 1) } } ``` 3. Run your benchmarks with [`cargo bench`](https://doc.rust-lang.org/cargo/commands/cargo-bench.html): ```txt example fastest │ slowest │ median │ mean │ samples │ iters ╰─ fibonacci │ │ │ │ │ ├─ 1 0.626 ns │ 1.735 ns │ 0.657 ns │ 0.672 ns │ 100 │ 819200 ├─ 2 2.767 ns │ 3.154 ns │ 2.788 ns │ 2.851 ns │ 100 │ 204800 ├─ 4 6.816 ns │ 7.671 ns │ 7.061 ns │ 7.167 ns │ 100 │ 102400 ├─ 8 57.31 ns │ 62.51 ns │ 57.96 ns │ 58.55 ns │ 100 │ 12800 ├─ 16 2.874 µs │ 3.812 µs │ 2.916 µs │ 3.006 µs │ 100 │ 200 ╰─ 32 6.267 ms │ 6.954 ms │ 6.283 ms │ 6.344 ms │ 100 │ 100 ``` See [`#[divan::bench]`][bench_attr] for info on benchmark function registration. ## Examples Practical example benchmarks can be found in the [`examples/benches`](https://github.com/nvzqz/divan/tree/main/examples/benches) directory. These can be benchmarked locally by running: ```sh git clone https://github.com/nvzqz/divan.git cd divan cargo bench -q -p examples --all-features ``` More thorough usage examples can be found in the [`#[divan::bench]` documentation][bench_attr_examples]. ## License Like the Rust project, this library may be used under either the [MIT License](https://github.com/nvzqz/divan/blob/main/LICENSE-MIT) or [Apache License (Version 2.0)](https://github.com/nvzqz/divan/blob/main/LICENSE-APACHE). [^1]: Within your crate directory, i.e. [`$CARGO_MANIFEST_DIR`](https://doc.rust-lang.org/cargo/reference/environment-variables.html#environment-variables-cargo-sets-for-crates) [bench_attr]: https://docs.rs/divan/latest/divan/attr.bench.html [bench_attr_examples]: https://docs.rs/divan/latest/divan/attr.bench.html#examples divan-0.1.21/WANTED.md000064400000000000000000000032331046102023000123130ustar 00000000000000# Wanted It would be great to have the following features added to Divan. If you have ideas to expand this list, please [find](https://github.com/nvzqz/divan/discussions) or [create](https://github.com/nvzqz/divan/discussions/new?category=ideas) a discussion first. - Async benchmarks - Baseline benchmark - Should match baselines across equal generic types and constants - Idea: ```rs #[divan::bench] fn old() { ... } #[divan::bench(baseline = old)] fn new() { ... } ``` - Cross-device: run benchmarks on other devices and report the data on the local device - HTML output - CSV output - Custom counters - Time complexity of counters - Also space complexity when measuring heap allocation - Measure heap allocations - Custom [`GlobalAlloc`](https://doc.rust-lang.org/std/alloc/trait.GlobalAlloc.html) that wraps another `GlobalAlloc`, defaulting to [`System`](https://doc.rust-lang.org/std/alloc/struct.System.html) - Custom timers - Timer for kernel/user mode - Unix: - [`getrusage(2)`](https://pubs.opengroup.org/onlinepubs/9699919799/functions/getrusage.html) - Per-thread: - Linux/FreeBSD/OpenBSD: [`RUSAGE_THREAD`](https://man7.org/linux/man-pages/man2/getrusage.2.html) - macOS/iOS: [`thread_info(mach_thread_self(), ...)`](https://www.gnu.org/software/hurd/gnumach-doc/Thread-Information.html) - Windows: - [`GetProcessTimes`](https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getprocesstimes) - [`GetThreadTimes`](https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getthreadtimes) divan-0.1.21/rustfmt.toml000064400000000000000000000002451046102023000134100ustar 00000000000000# Rust code formatting; see https://rust-lang.github.io/rustfmt edition = "2021" newline_style = "Unix" use_field_init_shorthand = true use_small_heuristics = "Max" divan-0.1.21/src/alloc.rs000064400000000000000000000466351046102023000132530ustar 00000000000000use std::{alloc::*, fmt, ptr::NonNull}; use cfg_if::cfg_if; use crate::{stats::StatsSet, util::sync::AtomicFlag}; #[cfg(target_os = "macos")] use crate::util::{sync::CachePadded, thread::PThreadKey}; #[cfg(not(target_os = "macos"))] use std::cell::UnsafeCell; /// The `AllocProfiler` when running crate-internal tests. /// /// This enables us to test it for: /// - Undefined behavior with Miri /// - Correctness when tallying #[cfg(test)] #[global_allocator] static ALLOC: AllocProfiler = AllocProfiler::system(); /// Whether to ignore allocation info set during the benchmark. pub(crate) static IGNORE_ALLOC: AtomicFlag = AtomicFlag::new(false); /// Measures [`GlobalAlloc`] memory usage. /// /// # Examples /// /// The default usage is to create a /// [`#[global_allocator]`](macro@global_allocator) that wraps the [`System`] /// allocator with [`AllocProfiler::system()`]: /// /// ``` /// use std::collections::*; /// use divan::AllocProfiler; /// /// #[global_allocator] /// static ALLOC: AllocProfiler = AllocProfiler::system(); /// /// fn main() { /// divan::main(); /// } /// /// #[divan::bench(types = [ /// Vec, /// LinkedList, /// HashSet, /// ])] /// fn from_iter() -> T /// where /// T: FromIterator, /// { /// (0..100).collect() /// } /// /// #[divan::bench(types = [ /// Vec, /// LinkedList, /// HashSet, /// ])] /// fn drop(bencher: divan::Bencher) /// where /// T: FromIterator, /// { /// bencher /// .with_inputs(|| (0..100).collect::()) /// .bench_values(std::mem::drop); /// } /// ``` /// /// Wrap other [`GlobalAlloc`] implementations like /// [`mimalloc`](https://docs.rs/mimalloc) with [`AllocProfiler::new()`]: /// /// ``` /// use divan::AllocProfiler; /// use mimalloc::MiMalloc; /// /// # #[cfg(not(miri))] /// #[global_allocator] /// static ALLOC: AllocProfiler = AllocProfiler::new(MiMalloc); /// ``` /// /// See [`string`](https://github.com/nvzqz/divan/blob/main/examples/benches/string.rs) /// and [`collections`](https://github.com/nvzqz/divan/blob/main/examples/benches/collections.rs) /// benchmarks for more examples. /// /// # Implementation /// /// Collecting allocation information happens at any point during which Divan is /// also measuring the time. As a result, counting allocations affects timing. /// /// To reduce Divan's footprint during benchmarking: /// - Allocation information is recorded in thread-local storage to prevent /// contention when benchmarks involve multiple threads, either through /// options like [`threads`](macro@crate::bench#threads) or internally /// spawning their own threads. /// - It does not check for overflow and assumes it will not happen. This is /// subject to change in the future. /// - Fast thread-local storage access is assembly-optimized on macOS. /// /// Allocation information is the only data Divan records outside of timing, and /// thus it also has the only code that affects timing. Steps for recording /// alloc info: /// 1. Load the thread-local slot for allocation information. /// /// On macOS, this is via the /// [`gs`](https://github.com/nvzqz/divan/blob/v0.1.6/src/util/sync.rs#L34)/[`tpidrro_el0`](https://github.com/nvzqz/divan/blob/v0.1.6/src/util/sync.rs#L47) /// registers for /// [`pthread_getspecific`](https://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_getspecific.html). /// Although this is not guaranteed as stable ABI, in practice many programs /// assume these registers store thread-local data. [`thread_local!`] is used /// on all other platforms. /// /// 2. Increment allocation operation invocation count and bytes count /// (a.k.a. size). /// /// Allocation information is recorded in thread-local storage to prevent /// slowdowns from synchronized sharing when using multiple threads, through /// options like [`threads`](macro@crate::bench#threads). /// /// Note that allocations in threads not controlled by Divan are not currently /// counted. #[derive(Debug, Default)] pub struct AllocProfiler { alloc: Alloc, } unsafe impl GlobalAlloc for AllocProfiler { unsafe fn alloc(&self, layout: Layout) -> *mut u8 { // Tally allocation count. if let Some(mut info) = ThreadAllocInfo::try_current() { // SAFETY: We have exclusive access. let info = unsafe { info.as_mut() }; info.tally_alloc(layout.size()); }; self.alloc.alloc(layout) } unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 { // Tally allocation count. if let Some(mut info) = ThreadAllocInfo::try_current() { // SAFETY: We have exclusive access. let info = unsafe { info.as_mut() }; info.tally_alloc(layout.size()); }; self.alloc.alloc_zeroed(layout) } unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 { // Tally reallocation count. if let Some(mut info) = ThreadAllocInfo::try_current() { // SAFETY: We have exclusive access. let info = unsafe { info.as_mut() }; info.tally_realloc(layout.size(), new_size); }; self.alloc.realloc(ptr, layout, new_size) } unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { // Tally deallocation count. if let Some(mut info) = ThreadAllocInfo::try_current() { // SAFETY: We have exclusive access. let info = unsafe { info.as_mut() }; info.tally_dealloc(layout.size()); }; self.alloc.dealloc(ptr, layout) } } impl AllocProfiler { /// Profiles the [`System`] allocator. #[inline] pub const fn system() -> Self { Self::new(System) } } impl AllocProfiler { /// Profiles a [`GlobalAlloc`]. #[inline] pub const fn new(alloc: A) -> Self { Self { alloc } } } /// Thread-local allocation information. #[derive(Clone, Default)] #[repr(C)] pub(crate) struct ThreadAllocInfo { // NOTE: `tallies` should be ordered first so that `tally_realloc` can // directly index `&self` without an offset. pub tallies: ThreadAllocTallyMap, // NOTE: Max size and count are signed for convenience but can never be // negative due to it being initialized to 0. // // PERF: Grouping current/max fields together by count and size makes // `tally_alloc` take the least time on M1 Mac. pub current_count: ThreadAllocCountSigned, pub max_count: ThreadAllocCountSigned, pub current_size: ThreadAllocCountSigned, pub max_size: ThreadAllocCountSigned, } #[cfg(not(target_os = "macos"))] thread_local! { /// Instance specific to the current thread. /// /// On macOS, we use `ALLOC_PTHREAD_KEY` instead. static CURRENT_THREAD_INFO: UnsafeCell = const { UnsafeCell::new(ThreadAllocInfo::new()) }; } #[cfg(target_os = "macos")] static ALLOC_PTHREAD_KEY: CachePadded> = CachePadded(PThreadKey::new()); impl ThreadAllocInfo { #[inline] pub const fn new() -> Self { Self { tallies: ThreadAllocTallyMap::new(), max_count: 0, current_count: 0, max_size: 0, current_size: 0, } } /// Returns the current thread's allocation information, initializing it on /// first access. /// /// Returns `None` if the thread is terminating and has thus deallocated its /// local instance. #[inline] pub fn current() -> Option> { cfg_if! { if #[cfg(target_os = "macos")] { return Self::try_current().or_else(slow_impl); } else { Self::try_current() } } #[cfg(target_os = "macos")] #[cold] #[inline(never)] fn slow_impl() -> Option> { unsafe { let layout = Layout::new::(); let Some(info_alloc) = NonNull::new(unsafe { System.alloc_zeroed(layout) }) else { handle_alloc_error(layout); }; let success = ALLOC_PTHREAD_KEY.0.set(info_alloc.as_ptr().cast(), |this| { System.dealloc(this.as_ptr().cast(), Layout::new::()); }); if !success { System.dealloc(info_alloc.as_ptr(), layout); return None; } // When using static thread local key, write directly because it // is undefined behavior to call `pthread_setspecific` with a // key that didn't originate from `pthread_key_create`. #[cfg(all(not(miri), not(feature = "dyn_thread_local"), target_arch = "x86_64"))] unsafe { crate::util::thread::fast::set_static_thread_local(info_alloc.as_ptr()); }; Some(info_alloc.cast()) } } } /// Returns the current thread's allocation information if initialized. /// /// Returns `None` if the instance has not yet been allocated or the thread /// is terminating and has thus deallocated its local instance. #[inline] pub fn try_current() -> Option> { cfg_if! { if #[cfg(target_os = "macos")] { // Fast path: static thread local. #[cfg(all( not(miri), not(feature = "dyn_thread_local"), target_arch = "x86_64", ))] return NonNull::new(unsafe { crate::util::thread::fast::get_static_thread_local::().cast_mut() }); #[allow(unreachable_code)] ALLOC_PTHREAD_KEY.0.get() } else { CURRENT_THREAD_INFO.try_with(|info| unsafe { NonNull::new_unchecked(info.get()) }).ok() } } } /// Sets 0 to all values. pub fn clear(&mut self) { *self = Self::new(); } /// Tallies the total count and size of the allocation operation. #[inline] pub fn tally_alloc(&mut self, size: usize) { self.tally_op(AllocOp::Alloc, size); self.current_count += 1; self.max_count = self.max_count.max(self.current_count); self.current_size += size as ThreadAllocCountSigned; self.max_size = self.max_size.max(self.current_size); } /// Tallies the total count and size of the deallocation operation. #[inline] pub fn tally_dealloc(&mut self, size: usize) { self.tally_op(AllocOp::Dealloc, size); self.current_count -= 1; self.current_size -= size as ThreadAllocCountSigned; } /// Tallies the total count and size of the reallocation operation. #[inline] pub fn tally_realloc(&mut self, old_size: usize, new_size: usize) { let (diff, is_shrink) = new_size.overflowing_sub(old_size); let diff = diff as isize; let abs_diff = diff.wrapping_abs() as usize; self.tally_op(AllocOp::realloc(is_shrink), abs_diff); // NOTE: Realloc does not change allocation count. self.current_size += diff as ThreadAllocCountSigned; self.max_size = self.max_size.max(self.current_size); } /// Tallies the total count and size of the allocation operation. #[inline] fn tally_op(&mut self, op: AllocOp, size: usize) { let tally = self.tallies.get_mut(op); tally.count += 1; tally.size += size as ThreadAllocCount; } } /// Allocation numbers being accumulated. /// /// # Memory Layout /// /// Aligning to 16 nudges the compiler to emit aligned SIMD operations. /// /// Placing `count` first generates less code on AArch64. #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] #[repr(C, align(16))] pub(crate) struct AllocTally { /// The number of times this operation was performed. pub count: Count, /// The amount of memory this operation changed. pub size: Count, } pub(crate) type ThreadAllocCount = condtype::num::Usize64; pub(crate) type ThreadAllocCountSigned = condtype::num::Isize64; pub(crate) type ThreadAllocTally = AllocTally; pub(crate) type TotalAllocTally = AllocTally; impl AllocTally> { pub fn is_zero(&self) -> bool { self.count.is_zero() && self.size.is_zero() } } impl AllocTally { #[inline] pub fn as_array(&self) -> &[C; 2] { // SAFETY: This is `#[repr(C)]`, so we can treat it as a contiguous // sequence of items. unsafe { &*(self as *const _ as *const _) } } } /// Allocation number categories. /// /// Note that grow/shrink are first to improve code generation for `realloc`. #[derive(Clone, Copy, PartialEq, Eq)] pub(crate) enum AllocOp { Grow, Shrink, Alloc, Dealloc, } impl AllocOp { pub const ALL: [Self; 4] = { use AllocOp::*; // Use same order as declared so that it can be indexed as-is. [Grow, Shrink, Alloc, Dealloc] }; #[inline] pub fn realloc(shrink: bool) -> Self { // This generates the same code as `std::mem::transmute`. if shrink { Self::Shrink } else { Self::Grow } } #[inline] pub fn name(self) -> &'static str { match self { Self::Grow => "grow", Self::Shrink => "shrink", Self::Alloc => "alloc", Self::Dealloc => "dealloc", } } #[inline] pub fn prefix(self) -> &'static str { match self { Self::Grow => "grow:", Self::Shrink => "shrink:", Self::Alloc => "alloc:", Self::Dealloc => "dealloc:", } } } /// Values keyed by `AllocOp`. #[derive(Clone, Copy, Default, PartialEq, Eq)] pub(crate) struct AllocOpMap { pub values: [T; 4], } pub(crate) type ThreadAllocTallyMap = AllocOpMap; pub(crate) type TotalAllocTallyMap = AllocOpMap; impl fmt::Debug for AllocOpMap { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_map().entries(AllocOp::ALL.iter().map(|&op| (op.name(), self.get(op)))).finish() } } impl ThreadAllocTallyMap { #[inline] pub const fn new() -> Self { unsafe { std::mem::transmute([0u8; size_of::()]) } } /// Returns `true` if all tallies are 0. #[inline] pub fn is_empty(&self) -> bool { self.values.iter().all(|tally| tally.count == 0 && tally.size == 0) } pub fn add_to_total(&self, total: &mut TotalAllocTallyMap) { for (i, value) in self.values.iter().enumerate() { total.values[i].count += value.count as u128; total.values[i].size += value.size as u128; } } } impl AllocOpMap { #[cfg(test)] pub fn from_fn(f: F) -> Self where F: FnMut(AllocOp) -> T, { Self { values: AllocOp::ALL.map(f) } } #[inline] pub const fn get(&self, op: AllocOp) -> &T { &self.values[op as usize] } #[inline] pub fn get_mut(&mut self, op: AllocOp) -> &mut T { &mut self.values[op as usize] } } #[cfg(feature = "internal_benches")] mod benches { use super::*; // We want the approach to scale well with thread count. const THREADS: &[usize] = &[0, 1, 2, 4, 16]; #[crate::bench(crate = crate, threads = THREADS)] fn tally_alloc(bencher: crate::Bencher) { IGNORE_ALLOC.set(true); // Using 0 simulates tallying without affecting benchmark reporting. let size = crate::black_box(0); bencher.bench(|| { if let Some(mut info) = ThreadAllocInfo::try_current() { // SAFETY: We have exclusive access. let info = unsafe { info.as_mut() }; info.tally_alloc(size); } }) } #[crate::bench(crate = crate, threads = THREADS)] fn tally_dealloc(bencher: crate::Bencher) { IGNORE_ALLOC.set(true); // Using 0 simulates tallying without affecting benchmark reporting. let size = crate::black_box(0); bencher.bench(|| { if let Some(mut info) = ThreadAllocInfo::try_current() { // SAFETY: We have exclusive access. let info = unsafe { info.as_mut() }; info.tally_dealloc(size); } }) } #[crate::bench(crate = crate, threads = THREADS)] fn tally_realloc(bencher: crate::Bencher) { IGNORE_ALLOC.set(true); // Using 0 simulates tallying without affecting benchmark reporting. let new_size = crate::black_box(0); let old_size = crate::black_box(0); bencher.bench(|| { if let Some(mut info) = ThreadAllocInfo::try_current() { // SAFETY: We have exclusive access. let info = unsafe { info.as_mut() }; info.tally_realloc(old_size, new_size); } }) } #[crate::bench_group(crate = crate, threads = THREADS)] mod current { use super::*; #[crate::bench(crate = crate)] fn init() -> Option> { ThreadAllocInfo::current() } #[crate::bench(crate = crate)] fn r#try() -> Option> { ThreadAllocInfo::try_current() } } } #[cfg(test)] mod tests { use super::*; /// Tests that `AllocProfiler` is counting correctly. #[test] fn tally() { // Initialize the thread's alloc info. // // SAFETY: This cannot be kept as a reference and is instead a raw // pointer because a reference would cause undefined behavior when // `AllocProfiler` attempts to update tallies. let mut alloc_info = ThreadAllocInfo::current().unwrap(); // Resets the allocation tallies and returns the previous tallies. let mut take_alloc_tallies = || std::mem::take(unsafe { &mut alloc_info.as_mut().tallies }); // Start fresh. _ = take_alloc_tallies(); // Helper to create `ThreadAllocTallyMap` since each operation only // changes `buf` by 1 `i32`. let item_tally = ThreadAllocTally { count: 1, size: size_of::() as _ }; let make_tally_map = |op: AllocOp| { ThreadAllocTallyMap::from_fn(|other_op| { if other_op == op { item_tally } else { Default::default() } }) }; // Test zero. let mut buf: Vec = Vec::new(); assert_eq!(take_alloc_tallies(), Default::default()); // Test allocation. buf.reserve_exact(1); assert_eq!(take_alloc_tallies(), make_tally_map(AllocOp::Alloc)); // Test grow. buf.reserve_exact(2); assert_eq!(take_alloc_tallies(), make_tally_map(AllocOp::Grow)); // Test shrink. buf.shrink_to(1); assert_eq!(take_alloc_tallies(), make_tally_map(AllocOp::Shrink)); // Test dealloc. drop(buf); assert_eq!(take_alloc_tallies(), make_tally_map(AllocOp::Dealloc)); // Test all of the above together. let mut buf: Vec = Vec::new(); buf.reserve_exact(1); // alloc buf.reserve_exact(2); // grow buf.shrink_to(1); // shrink drop(buf); // dealloc assert_eq!(take_alloc_tallies(), ThreadAllocTallyMap { values: [item_tally; 4] }); } } divan-0.1.21/src/benchmark/args.rs000064400000000000000000000265221046102023000150400ustar 00000000000000//! Types used to implement runtime argument support. use std::{ any::{Any, TypeId}, borrow::Cow, mem, slice, sync::OnceLock, }; use crate::{util::ty::TypeCast, Bencher}; /// Holds lazily-initialized runtime arguments to be passed into a benchmark. /// /// `#[divan::bench]` stores this as a `__DIVAN_ARGS` global for each entry, and /// then at runtime it is initialized once by a closure that creates the usable /// `BenchArgsRunner`. pub struct BenchArgs { args: OnceLock, } /// The result of making `BenchArgs` runnable from instantiating the arguments /// list and providing a typed benchmarking implementation. #[derive(Clone, Copy)] pub struct BenchArgsRunner { args: &'static ErasedArgsSlice, bench: fn(Bencher, &ErasedArgsSlice, arg_index: usize), } /// Type-erased `&'static [T]` that also stores names of the arguments. struct ErasedArgsSlice { /// The start of `&[T]`. args: *const (), /// The start of `&[&'static str]`. names: *const &'static str, /// The number of arguments. len: usize, /// The ID of `T` to ensure correctness. arg_type: TypeId, } // SAFETY: Raw pointers in `ErasedArgsSlice` are used in a thread-safe way, and // the argument type is required to be `Send + Sync` when initialized from the // iterator in `BenchArgs::runner`. unsafe impl Send for ErasedArgsSlice {} unsafe impl Sync for ErasedArgsSlice {} impl BenchArgs { /// Creates an uninitialized instance. pub const fn new() -> Self { Self { args: OnceLock::new() } } /// Initializes `self` with the results of `make_args` and returns a /// `BenchArgsRunner` that will execute the benchmarking closure. pub fn runner( &'static self, make_args: impl FnOnce() -> I, arg_to_string: impl Fn(&I::Item) -> String, _bench_impl: B, ) -> BenchArgsRunner where I: IntoIterator, I::Item: Any + Send + Sync, B: FnOnce(Bencher, &I::Item) + Copy, { let args = self.args.get_or_init(|| { let args_iter = make_args().into_iter(); // Reuse arguments for names if already a slice of strings. // // NOTE: We do this over `I::IntoIter` instead of `I` since it works // for both slices and `slice::Iter`. let args_strings: Option<&'static [&str]> = args_iter.cast_ref::>().map(|iter| iter.as_slice()); // Collect arguments into leaked slice. // // Leaking the collected `args` simplifies memory management, such // as when reusing for `names`. We're leaking anyways since this is // accessed via a global `OnceLock`. // // PERF: We could optimize this to reuse arguments when users // provide slices. However, for slices its `Item` is a reference, so // `slice::Iter` would never match here. To make this // optimization, we would need to be able to get the referee type. let args: &'static [I::Item] = Box::leak(args_iter.collect()); // Collect printable representations of arguments. // // PERF: We take multiple opportunities to reuse the provided // arguments buffer or individual strings' buffers: // - `&[&str]` // - `IntoIterator` // - `IntoIterator` // - `IntoIterator>` // - `IntoIterator>` let names: &'static [&str] = 'names: { // PERF: Reuse arguments strings slice. if let Some(args) = args_strings { break 'names args; } // PERF: Reuse our args slice allocation. if let Some(args) = args.cast_ref::<&[&str]>() { break 'names args; } Box::leak( args.iter() .map(|arg| -> &str { // PERF: Reuse strings as-is. if let Some(arg) = arg.cast_ref::() { return arg; } if let Some(arg) = arg.cast_ref::>() { return arg; } if let Some(arg) = arg.cast_ref::>() { return arg; } // Default to `arg_to_string`, which will format via // either `ToString` or `Debug`. Box::leak(arg_to_string(arg).into_boxed_str()) }) .collect(), ) }; ErasedArgsSlice { // We `black_box` arguments to prevent the compiler from // optimizing the benchmark for the provided values. args: crate::black_box(args.as_ptr().cast()), names: names.as_ptr(), len: args.len(), arg_type: TypeId::of::(), } }); BenchArgsRunner { args, bench: bench:: } } } impl Default for BenchArgs { fn default() -> Self { Self::new() } } impl BenchArgsRunner { #[inline] pub(crate) fn bench(&self, bencher: Bencher, index: usize) { (self.bench)(bencher, self.args, index) } #[inline] pub(crate) fn arg_names(&self) -> &'static [&'static str] { self.args.names() } } impl ErasedArgsSlice { /// Retrieves a slice of arguments if the type is `T`. #[inline] fn typed_args(&self) -> Option<&[T]> { if self.arg_type == TypeId::of::() { // SAFETY: `BenchArgs::runner` guarantees storing `len` instances. Some(unsafe { slice::from_raw_parts(self.args.cast(), self.len) }) } else { None } } /// Returns the arguments' names. /// /// Names are in the same order as args and thus their indices can be used /// to reference arguments. #[inline] fn names(&self) -> &'static [&str] { // SAFETY: `BenchArgs::runner` guarantees storing `len` names. unsafe { slice::from_raw_parts(self.names, self.len) } } } /// The `BenchArgsRunner.bench` implementation. fn bench(bencher: Bencher, erased_args: &ErasedArgsSlice, arg_index: usize) where T: Any, B: FnOnce(Bencher, &T) + Copy, { // We defer type checking until the benchmark is run to make safety of this // function easier to audit. Checking here instead of in `BenchArgs::runner` // is late but fine since this check will only fail due to a bug in Divan's // macro code generation. let Some(typed_args) = erased_args.typed_args::() else { type_mismatch::(); // Reduce code size by using a separate function for each `T` instead of // each benchmark closure. #[cold] #[inline(never)] fn type_mismatch() -> ! { unreachable!("incorrect type '{}'", std::any::type_name::()) } }; // SAFETY: The closure is a ZST, so we can construct one out of thin air. // This can be done multiple times without invoking a `Drop` destructor // because it implements `Copy`. let bench_impl: B = unsafe { assert_eq!(size_of::(), 0, "benchmark closure expected to be zero-sized"); mem::zeroed() }; bench_impl(bencher, &typed_args[arg_index]); } #[cfg(test)] mod tests { use super::*; /// Test that optimizations for string items are applied. mod optimizations { use std::borrow::Borrow; use super::*; /// Tests that two slices contain the same exact strings. fn test_eq_ptr, B: Borrow>(a: &[A], b: &[B]) { assert_eq!(a.len(), b.len()); for (a, b) in a.iter().zip(b) { let a = a.borrow(); let b = b.borrow(); assert_eq!(a, b); assert_eq!(a.as_ptr(), b.as_ptr()); } } /// Tests that `&[&str]` reuses the original slice for names. #[test] fn str_slice() { static ARGS: BenchArgs = BenchArgs::new(); static ORIG_ARGS: &[&str] = &["a", "b"]; let runner = ARGS.runner(|| ORIG_ARGS, ToString::to_string, |_, _| {}); let typed_args: Vec<&str> = runner.args.typed_args::<&&str>().unwrap().iter().copied().copied().collect(); let names = runner.arg_names(); // Test values. assert_eq!(names, ORIG_ARGS); assert_eq!(names, typed_args); // Test addresses. assert_eq!(names.as_ptr(), ORIG_ARGS.as_ptr()); assert_ne!(names.as_ptr(), typed_args.as_ptr()); } /// Tests optimizing `IntoIterator` to reuse the same /// allocation for also storing argument names. #[test] fn str_array() { static ARGS: BenchArgs = BenchArgs::new(); let runner = ARGS.runner(|| ["a", "b"], ToString::to_string, |_, _| {}); let typed_args = runner.args.typed_args::<&str>().unwrap(); let names = runner.arg_names(); // Test values. assert_eq!(names, ["a", "b"]); assert_eq!(names, typed_args); // Test addresses. assert_eq!(names.as_ptr(), typed_args.as_ptr()); } /// Tests optimizing `IntoIterator` to reuse the same /// allocation for also storing argument names. #[test] fn string_array() { static ARGS: BenchArgs = BenchArgs::new(); let runner = ARGS.runner(|| ["a".to_owned(), "b".to_owned()], ToString::to_string, |_, _| {}); let typed_args = runner.args.typed_args::().unwrap(); let names = runner.arg_names(); assert_eq!(names, ["a", "b"]); test_eq_ptr(names, typed_args); } /// Tests optimizing `IntoIterator>` to reuse the same /// allocation for also storing argument names. #[test] fn box_str_array() { static ARGS: BenchArgs = BenchArgs::new(); let runner = ARGS.runner( || ["a".to_owned().into_boxed_str(), "b".to_owned().into_boxed_str()], ToString::to_string, |_, _| {}, ); let typed_args = runner.args.typed_args::>().unwrap(); let names = runner.arg_names(); assert_eq!(names, ["a", "b"]); test_eq_ptr(names, typed_args); } /// Tests optimizing `IntoIterator>` to reuse the same /// allocation for also storing argument names. #[test] fn cow_str_array() { static ARGS: BenchArgs = BenchArgs::new(); let runner = ARGS.runner( || [Cow::Owned("a".to_owned()), Cow::Borrowed("b")], ToString::to_string, |_, _| {}, ); let typed_args = runner.args.typed_args::>().unwrap(); let names = runner.arg_names(); assert_eq!(names, ["a", "b"]); test_eq_ptr(names, typed_args); } } } divan-0.1.21/src/benchmark/defer.rs000064400000000000000000000141411046102023000151630ustar 00000000000000use std::{ cell::UnsafeCell, mem::{ManuallyDrop, MaybeUninit}, }; /// Defers input usage and output drop during benchmarking. /// /// To reduce memory usage, this only allocates storage for inputs if outputs do /// not need deferred drop. pub(crate) union DeferStore { /// The variant used if outputs need to be dropped. /// /// Inputs are stored are stored contiguously with outputs in memory. This /// improves performance by: /// - Removing the overhead of `zip` between two separate buffers. /// - Improving cache locality and cache prefetching. Input is strategically /// placed before output because iteration is from low to high addresses, /// so doing this makes memory access patterns very predictable. slots: ManuallyDrop>>, /// The variant used if `Self::ONLY_INPUTS`, i.e. outputs do not need to be /// dropped. inputs: ManuallyDrop>>, } impl Drop for DeferStore { #[inline] fn drop(&mut self) { // SAFETY: The correct variant is used based on `ONLY_INPUTS`. unsafe { if Self::ONLY_INPUTS { ManuallyDrop::drop(&mut self.inputs) } else { ManuallyDrop::drop(&mut self.slots) } } } } impl Default for DeferStore { #[inline] fn default() -> Self { // SAFETY: The correct variant is used based on `ONLY_INPUTS`. unsafe { if Self::ONLY_INPUTS { Self { inputs: ManuallyDrop::new(Vec::new()) } } else { Self { slots: ManuallyDrop::new(Vec::new()) } } } } } impl DeferStore { /// Whether only inputs need to be deferred. /// /// If `true`, outputs do not get inserted into `DeferStore`. const ONLY_INPUTS: bool = !std::mem::needs_drop::(); /// Prepares storage for iterating over `DeferSlot`s for a sample. #[inline] pub fn prepare(&mut self, sample_size: usize) { // Common implementation regardless of `Vec` item type. macro_rules! imp { ($vec:expr) => {{ $vec.clear(); $vec.reserve_exact(sample_size); // SAFETY: `Vec` only contains `MaybeUninit` fields, so values // may be safely created from uninitialized memory. unsafe { $vec.set_len(sample_size) } }}; } // SAFETY: The correct variant is used based on `ONLY_INPUTS`. unsafe { if Self::ONLY_INPUTS { imp!(self.inputs) } else { imp!(self.slots) } } } /// Returns the sample's slots for iteration. /// /// The caller is expected to use the returned slice to initialize inputs /// for the sample loop. /// /// This returns `Err` containing only input slots if `O` does not need /// deferred drop. Ideally this would be implemented directly on `DeferSlot` /// but there's no way to change its size based on `needs_drop::()`. #[inline(always)] pub fn slots(&self) -> Result<&[DeferSlot], &[DeferSlotItem]> { unsafe { if Self::ONLY_INPUTS { Err(&self.inputs) } else { Ok(&self.slots) } } } } /// Storage for a single iteration within a sample. /// /// Input is stored before output to improve cache prefetching since iteration /// progresses from low to high addresses. /// /// # UnsafeCell /// /// `UnsafeCell` is used to allow `output` to safely refer to `input`. Although /// `output` itself is never aliased, it is also stored as `UnsafeCell` in order /// to get mutable access through a shared `&DeferSlot`. /// /// # Safety /// /// All fields **must** be `MaybeUninit`. This allows us to safely set the /// length of `Vec` within the allocated capacity. #[repr(C)] pub(crate) struct DeferSlot { pub input: DeferSlotItem, pub output: DeferSlotItem, } type DeferSlotItem = UnsafeCell>; #[cfg(test)] mod tests { use super::*; /// Tests that accessing an uninitialized `DeferSlot` is safe due to all of /// its fields being `MaybeUninit`. #[test] fn access_uninit_slot() { let mut slot: MaybeUninit> = MaybeUninit::uninit(); let slot_ref = unsafe { slot.assume_init_mut() }; slot_ref.input = UnsafeCell::new(MaybeUninit::new(String::new())); slot_ref.output = UnsafeCell::new(MaybeUninit::new(String::new())); unsafe { let slot = slot.assume_init(); assert_eq!(slot.input.into_inner().assume_init(), ""); assert_eq!(slot.output.into_inner().assume_init(), ""); } } /// Tests that accessing `DeferSlot.input` through an aliased reference in /// `DeferSlot.output` is safe due `input` being an `UnsafeCell`. #[test] fn access_aliased_input() { struct Output<'i> { input: &'i mut String, } impl Drop for Output<'_> { fn drop(&mut self) { assert_eq!(self.input, "hello"); self.input.push_str(" world"); } } let slot: MaybeUninit> = MaybeUninit::uninit(); let slot_ref = unsafe { slot.assume_init_ref() }; // Loop to ensure previous iterations don't affect later uses of the // same entry slot. for _ in 0..5 { unsafe { let input_ptr = slot_ref.input.get().cast::(); let output_ptr = slot_ref.output.get().cast::(); // Initialize input and output. input_ptr.write("hello".to_owned()); output_ptr.write(Output { input: &mut *input_ptr }); // Use and discard output. assert_eq!((*output_ptr).input, "hello"); output_ptr.drop_in_place(); assert_eq!(&*input_ptr, "hello world"); // Discard input. input_ptr.drop_in_place(); } } } } divan-0.1.21/src/benchmark/mod.rs000064400000000000000000001353251046102023000146650ustar 00000000000000use std::{ cell::UnsafeCell, fmt, mem::{self, MaybeUninit}, num::NonZeroUsize, sync::Barrier, }; use crate::{ alloc::{ AllocOp, AllocOpMap, AllocTally, ThreadAllocInfo, ThreadAllocTally, TotalAllocTallyMap, }, black_box, black_box_drop, counter::{ AnyCounter, AsCountUInt, BytesCount, CharsCount, Counter, CounterCollection, CyclesCount, IntoCounter, ItemsCount, KnownCounterKind, MaxCountUInt, }, divan::SharedContext, stats::{RawSample, SampleCollection, Stats, StatsSet, TimeSample}, thread_pool::BENCH_POOL, time::{FineDuration, Timestamp, UntaggedTimestamp}, util::{self, sync::SyncWrap, Unit}, }; #[cfg(test)] mod tests; mod args; mod defer; mod options; use defer::{DeferSlot, DeferStore}; pub use self::{ args::{BenchArgs, BenchArgsRunner}, options::BenchOptions, }; pub(crate) const DEFAULT_SAMPLE_COUNT: u32 = 100; /// Enables contextual benchmarking in [`#[divan::bench]`](attr.bench.html). /// /// # Examples /// /// ``` /// use divan::{Bencher, black_box}; /// /// #[divan::bench] /// fn copy_from_slice(bencher: Bencher) { /// // Input and output buffers get used in the closure. /// let src = (0..100).collect::>(); /// let mut dst = vec![0; src.len()]; /// /// bencher.bench_local(|| { /// black_box(&mut dst).copy_from_slice(black_box(&src)); /// }); /// } /// ``` #[must_use = "a benchmark function must be registered"] pub struct Bencher<'a, 'b, C = BencherConfig> { pub(crate) context: &'a mut BenchContext<'b>, pub(crate) config: C, } /// Public-in-private type for statically-typed `Bencher` configuration. /// /// This enables configuring `Bencher` using the builder pattern with zero /// runtime cost. pub struct BencherConfig { gen_input: GenI, } impl fmt::Debug for Bencher<'_, '_, C> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Bencher").finish_non_exhaustive() } } impl<'a, 'b> Bencher<'a, 'b> { #[inline] pub(crate) fn new(context: &'a mut BenchContext<'b>) -> Self { Self { context, config: BencherConfig { gen_input: Unit } } } } impl<'a, 'b> Bencher<'a, 'b> { /// Benchmarks a function. /// /// The function can be benchmarked in parallel using the [`threads` /// option](macro@crate::bench#threads). If the function is strictly /// single-threaded, use [`Bencher::bench_local`] instead. /// /// # Examples /// /// ``` /// #[divan::bench] /// fn bench(bencher: divan::Bencher) { /// bencher.bench(|| { /// // Benchmarked code... /// }); /// } /// ``` pub fn bench(self, benched: B) where B: Fn() -> O + Sync, { // Reusing `bench_values` for a zero-sized non-drop input type should // have no overhead. self.with_inputs(|| ()).bench_values(|_: ()| benched()); } /// Benchmarks a function on the current thread. /// /// # Examples /// /// ``` /// #[divan::bench] /// fn bench(bencher: divan::Bencher) { /// bencher.bench_local(|| { /// // Benchmarked code... /// }); /// } /// ``` pub fn bench_local(self, mut benched: B) where B: FnMut() -> O, { // Reusing `bench_local_values` for a zero-sized non-drop input type // should have no overhead. self.with_inputs(|| ()).bench_local_values(|_: ()| benched()); } /// Generate inputs for the [benchmarked function](#input-bench). /// /// Time spent generating inputs does not affect benchmark timing. /// /// When [benchmarking in parallel](macro@crate::bench#threads), the input /// generator is called on the same thread as the sample loop that uses that /// input. /// /// # Examples /// /// ``` /// #[divan::bench] /// fn bench(bencher: divan::Bencher) { /// bencher /// .with_inputs(|| { /// // Generate input: /// String::from("...") /// }) /// .bench_values(|s| { /// // Use input by-value: /// s + "123" /// }); /// } /// ``` pub fn with_inputs(self, gen_input: G) -> Bencher<'a, 'b, BencherConfig> { Bencher { context: self.context, config: BencherConfig { gen_input } } } } impl<'a, 'b, GenI> Bencher<'a, 'b, BencherConfig> { /// Assign a [`Counter`] for all iterations of the benchmarked function. /// /// This will either: /// - Assign a new counter /// - Override an existing counter of the same type /// /// If the counter depends on [generated inputs](Self::with_inputs), use /// [`Bencher::input_counter`] instead. /// /// If context is not needed, the counter can instead be set via /// [`#[divan::bench(counters = ...)]`](macro@crate::bench#counters). /// /// # Examples /// /// ``` /// use divan::{Bencher, counter::BytesCount}; /// /// #[divan::bench] /// fn char_count(bencher: Bencher) { /// let s: String = // ... /// # String::new(); /// /// bencher /// .counter(BytesCount::of_str(&s)) /// .bench(|| { /// divan::black_box(&s).chars().count() /// }); /// } /// ``` #[doc(alias = "throughput")] pub fn counter(self, counter: C) -> Self where C: IntoCounter, { let counter = AnyCounter::new(counter); self.context.counters.set_counter(counter); self } } /// Benchmark over [generated inputs](Self::with_inputs). impl<'a, 'b, I, GenI> Bencher<'a, 'b, BencherConfig> where GenI: FnMut() -> I, { /// Calls a closure to create a [`Counter`] for each input of the /// benchmarked function. /// /// This will either: /// - Assign a new counter /// - Override an existing counter of the same type /// /// If the counter is constant, use [`Bencher::counter`] instead. /// /// When [benchmarking in parallel](macro@crate::bench#threads), the input /// counter is called on the same thread as the sample loop that generates /// and uses that input. /// /// # Examples /// /// The following example emits info for the number of bytes processed when /// benchmarking [`char`-counting](std::str::Chars::count). The byte count /// is gotten by calling [`BytesCount::of_str`] on each iteration's input /// [`String`]. /// /// ``` /// use divan::{Bencher, counter::BytesCount}; /// /// #[divan::bench] /// fn char_count(bencher: Bencher) { /// bencher /// .with_inputs(|| -> String { /// // ... /// # String::new() /// }) /// .input_counter(BytesCount::of_str) /// .bench_refs(|s| { /// s.chars().count() /// }); /// } /// ``` pub fn input_counter(self, make_counter: F) -> Self where F: Fn(&I) -> C + Sync + 'static, C: IntoCounter, { self.context.counters.set_input_counter(make_counter); self } /// Creates a [`Counter`] from each input of the benchmarked function. /// /// This may be used if the input returns [`u8`]–[`u64`], [`usize`], or any /// nesting of references to those types. /// /// # Examples /// /// The following example emits info for the number of items processed when /// benchmarking [`FromIterator`] from /// [Range](std::ops::Range)<[usize]> to [`Vec`]. /// /// ``` /// use divan::{Bencher, counter::ItemsCount}; /// /// #[divan::bench] /// fn range_to_vec(bencher: Bencher) { /// bencher /// .with_inputs(|| -> usize { /// // ... /// # 0 /// }) /// .count_inputs_as::() /// .bench_values(|n| -> Vec { /// (0..n).collect() /// }); /// } /// ``` #[inline] pub fn count_inputs_as(self) -> Self where C: Counter, I: AsCountUInt, { match KnownCounterKind::of::() { KnownCounterKind::Bytes => self.input_counter(|c| BytesCount::from(c)), KnownCounterKind::Chars => self.input_counter(|c| CharsCount::from(c)), KnownCounterKind::Cycles => self.input_counter(|c| CyclesCount::from(c)), KnownCounterKind::Items => self.input_counter(|c| ItemsCount::from(c)), } } /// Benchmarks a function over per-iteration [generated inputs](Self::with_inputs), /// provided by-value. /// /// Per-iteration means the benchmarked function is called exactly once for /// each generated input. /// /// The function can be benchmarked in parallel using the [`threads` /// option](macro@crate::bench#threads). If the function is strictly /// single-threaded, use [`Bencher::bench_local_values`] instead. /// /// # Examples /// /// ``` /// #[divan::bench] /// fn bench(bencher: divan::Bencher) { /// bencher /// .with_inputs(|| { /// // Generate input: /// String::from("...") /// }) /// .bench_values(|s| { /// // Use input by-value: /// s + "123" /// }); /// } /// ``` pub fn bench_values(self, benched: B) where B: Fn(I) -> O + Sync, GenI: Fn() -> I + Sync, { self.context.bench_loop_threaded( self.config.gen_input, |input| { // SAFETY: Input is guaranteed to be initialized and not // currently referenced by anything else. let input = unsafe { input.get().read().assume_init() }; benched(input) }, // Input ownership is transferred to `benched`. |_input| {}, ); } /// Benchmarks a function over per-iteration [generated inputs](Self::with_inputs), /// provided by-value. /// /// Per-iteration means the benchmarked function is called exactly once for /// each generated input. /// /// # Examples /// /// ``` /// #[divan::bench] /// fn bench(bencher: divan::Bencher) { /// let mut values = Vec::new(); /// bencher /// .with_inputs(|| { /// // Generate input: /// String::from("...") /// }) /// .bench_local_values(|s| { /// // Use input by-value: /// values.push(s); /// }); /// } /// ``` pub fn bench_local_values(self, mut benched: B) where B: FnMut(I) -> O, { self.context.bench_loop_local( self.config.gen_input, |input| { // SAFETY: Input is guaranteed to be initialized and not // currently referenced by anything else. let input = unsafe { input.get().read().assume_init() }; benched(input) }, // Input ownership is transferred to `benched`. |_input| {}, ); } /// Benchmarks a function over per-iteration [generated inputs](Self::with_inputs), /// provided by-reference. /// /// Per-iteration means the benchmarked function is called exactly once for /// each generated input. /// /// # Examples /// /// ``` /// #[divan::bench] /// fn bench(bencher: divan::Bencher) { /// bencher /// .with_inputs(|| { /// // Generate input: /// String::from("...") /// }) /// .bench_refs(|s| { /// // Use input by-reference: /// *s += "123"; /// }); /// } /// ``` pub fn bench_refs(self, benched: B) where B: Fn(&mut I) -> O + Sync, GenI: Fn() -> I + Sync, { // TODO: Allow `O` to reference `&mut I` as long as `I` outlives `O`. self.context.bench_loop_threaded( self.config.gen_input, |input| { // SAFETY: Input is guaranteed to be initialized and not // currently referenced by anything else. let input = unsafe { (*input.get()).assume_init_mut() }; benched(input) }, // Input ownership was not transferred to `benched`. |input| { // SAFETY: This function is called after `benched` outputs are // dropped, so we have exclusive access. unsafe { (*input.get()).assume_init_drop() } }, ); } /// Benchmarks a function over per-iteration [generated inputs](Self::with_inputs), /// provided by-reference. /// /// Per-iteration means the benchmarked function is called exactly once for /// each generated input. /// /// # Examples /// /// ``` /// #[divan::bench] /// fn bench(bencher: divan::Bencher) { /// bencher /// .with_inputs(|| { /// // Generate input: /// String::from("...") /// }) /// .bench_local_refs(|s| { /// // Use input by-reference: /// *s += "123"; /// }); /// } /// ``` pub fn bench_local_refs(self, mut benched: B) where B: FnMut(&mut I) -> O, { // TODO: Allow `O` to reference `&mut I` as long as `I` outlives `O`. self.context.bench_loop_local( self.config.gen_input, |input| { // SAFETY: Input is guaranteed to be initialized and not // currently referenced by anything else. let input = unsafe { (*input.get()).assume_init_mut() }; benched(input) }, // Input ownership was not transferred to `benched`. |input| { // SAFETY: This function is called after `benched` outputs are // dropped, so we have exclusive access. unsafe { (*input.get()).assume_init_drop() } }, ); } } /// State machine for how the benchmark is being run. #[derive(Clone, Copy)] pub(crate) enum BenchMode { /// The benchmark is being run as `--test`. /// /// Don't collect samples and run exactly once. Test, /// Scale `sample_size` to determine the right size for collecting. Tune { sample_size: u32 }, /// Simply collect samples. Collect { sample_size: u32 }, } impl BenchMode { #[inline] pub fn is_test(self) -> bool { matches!(self, Self::Test) } #[inline] pub fn is_tune(self) -> bool { matches!(self, Self::Tune { .. }) } #[inline] pub fn is_collect(self) -> bool { matches!(self, Self::Collect { .. }) } #[inline] pub fn sample_size(self) -> u32 { match self { Self::Test => 1, Self::Tune { sample_size, .. } | Self::Collect { sample_size, .. } => sample_size, } } } /// `#[divan::bench]` loop context. /// /// Functions called within the benchmark loop should be `#[inline(always)]` to /// ensure instruction cache locality. pub(crate) struct BenchContext<'a> { shared_context: &'a SharedContext, /// User-configured options. pub options: &'a BenchOptions<'a>, /// Whether the benchmark loop was started. pub did_run: bool, /// The number of threads to run the benchmark. The default is 1. /// /// When set to 1, the benchmark loop is guaranteed to stay on the current /// thread and not spawn any threads. pub thread_count: NonZeroUsize, /// Recorded samples. samples: SampleCollection, /// Per-iteration counters grouped by sample. counters: CounterCollection, } impl<'a> BenchContext<'a> { /// Creates a new benchmarking context. pub fn new( shared_context: &'a SharedContext, options: &'a BenchOptions, thread_count: NonZeroUsize, ) -> Self { Self { shared_context, options, thread_count, did_run: false, samples: SampleCollection::default(), counters: options.counters.to_collection(), } } /// Runs the single-threaded loop for benchmarking `benched`. /// /// # Safety /// /// See `bench_loop_threaded`. pub fn bench_loop_local( &mut self, gen_input: impl FnMut() -> I, benched: impl FnMut(&UnsafeCell>) -> O, drop_input: impl Fn(&UnsafeCell>), ) { // SAFETY: Closures are guaranteed to run on the current thread, so they // can safely be mutable and non-`Sync`. unsafe { let gen_input = SyncWrap::new(UnsafeCell::new(gen_input)); let benched = SyncWrap::new(UnsafeCell::new(benched)); let drop_input = SyncWrap::new(drop_input); self.thread_count = NonZeroUsize::MIN; self.bench_loop_threaded::( || (*gen_input.get())(), |input| (*benched.get())(input), |input| drop_input(input), ) } } /// Runs the multi-threaded loop for benchmarking `benched`. /// /// # Safety /// /// If `self.threads` is 1, the incoming closures will not escape the /// current thread. This guarantee ensures `bench_loop_local` can soundly /// reuse this method with mutable non-`Sync` closures. /// /// When `benched` is called: /// - `I` is guaranteed to be initialized. /// - No external `&I` or `&mut I` exists. /// /// When `drop_input` is called: /// - All instances of `O` returned from `benched` have been dropped. /// - The same guarantees for `I` apply as in `benched`, unless `benched` /// escaped references to `I`. fn bench_loop_threaded( &mut self, gen_input: impl Fn() -> I + Sync, benched: impl Fn(&UnsafeCell>) -> O + Sync, drop_input: impl Fn(&UnsafeCell>) + Sync, ) { self.did_run = true; let mut current_mode = self.initial_mode(); let is_test = current_mode.is_test(); let record_sample = self.sample_recorder(gen_input, benched, drop_input); let thread_count = self.thread_count.get(); let aux_thread_count = thread_count - 1; let is_single_thread = aux_thread_count == 0; // Per-thread sample info returned by `record_sample`. These are // processed locally to emit user-facing sample info. As a result, this // only contains `thread_count` many elements at a time. let mut raw_samples = Vec::>::new(); // The time spent benchmarking, in picoseconds. // // Unless `skip_ext_time` is set, this includes time external to // `benched`, such as time spent generating inputs and running drop. let mut elapsed_picos: u128 = 0; // The minimum time for benchmarking, in picoseconds. let min_picos = self.options.min_time().picos; // The remaining time left for benchmarking, in picoseconds. let max_picos = self.options.max_time().picos; // Don't bother running if user specifies 0 max time or 0 samples. if max_picos == 0 || !self.options.has_samples() { return; } let timer = self.shared_context.timer; let timer_kind = timer.kind(); let mut rem_samples = if current_mode.is_collect() { Some(self.options.sample_count.unwrap_or(DEFAULT_SAMPLE_COUNT)) } else { None }; // Only measure precision if we need to tune sample size. let timer_precision = if current_mode.is_tune() { timer.precision() } else { FineDuration::default() }; if !is_test { self.samples.time_samples.reserve(self.options.sample_count.unwrap_or(1) as usize); } let skip_ext_time = self.options.skip_ext_time.unwrap_or_default(); let initial_start = if skip_ext_time { None } else { Some(Timestamp::start(timer_kind)) }; let bench_overheads = timer.bench_overheads(); while { // Conditions for when sampling is over: if elapsed_picos >= max_picos { // Depleted the benchmarking time budget. This is a strict // condition regardless of sample count and minimum time. false } else if rem_samples.unwrap_or(1) > 0 { // More samples expected. true } else { // Continue if we haven't reached the time floor. elapsed_picos < min_picos } } { let sample_size = current_mode.sample_size(); self.samples.sample_size = sample_size; let barrier = if is_single_thread { None } else { Some(Barrier::new(thread_count)) }; // Sample loop helper: let record_sample = || -> RawSample { let mut counter_totals: [u128; KnownCounterKind::COUNT] = [0; KnownCounterKind::COUNT]; // Updates per-input counter info for this sample. let mut count_input = |input: &I| { for counter_kind in KnownCounterKind::ALL { // SAFETY: The `I` type cannot change since `with_inputs` // cannot be called more than once on the same `Bencher`. if let Some(count) = unsafe { self.counters.get_input_count(counter_kind, input) } { let total = &mut counter_totals[counter_kind as usize]; *total = (*total).saturating_add(count as u128); } } }; // Sample loop: let ([start, end], alloc_info) = record_sample(sample_size as usize, barrier.as_ref(), &mut count_input); RawSample { start, end, timer, alloc_info, counter_totals } }; // Sample loop: raw_samples.clear(); BENCH_POOL.par_extend(&mut raw_samples, aux_thread_count, |_| record_sample()); // Convert `&[Option]` to `&[Sample]`. let raw_samples: &[RawSample] = { if let Some(thread) = raw_samples .iter() .enumerate() .find_map(|(thread, sample)| sample.is_none().then_some(thread)) { panic!("Divan benchmarking thread {thread} panicked"); } unsafe { assert_eq!(mem::size_of::(), mem::size_of::>()); std::slice::from_raw_parts(raw_samples.as_ptr().cast(), raw_samples.len()) } }; // If testing, exit the benchmarking loop immediately after timing a // single run. if is_test { break; } let slowest_sample = raw_samples.iter().max_by_key(|s| s.duration()).unwrap(); let slowest_time = slowest_sample.duration(); // TODO: Make tuning be less influenced by early runs. Currently if // early runs are very quick but later runs are slow, benchmarking // will take a very long time. // // TODO: Make `sample_size` consider time generating inputs and // dropping inputs/outputs. Currently benchmarks like // `Bencher::bench_refs(String::clear)` take a very long time. if current_mode.is_tune() { // Clear previous smaller samples. self.samples.clear(); self.counters.clear_input_counts(); // If within 100x timer precision, continue tuning. let precision_multiple = slowest_time.picos / timer_precision.picos; if precision_multiple <= 100 { current_mode = BenchMode::Tune { sample_size: sample_size * 2 }; } else { current_mode = BenchMode::Collect { sample_size }; rem_samples = Some(self.options.sample_count.unwrap_or(DEFAULT_SAMPLE_COUNT)); } } // Returns the sample's duration adjusted for overhead. let sample_duration_sub_overhead = |raw_sample: &RawSample| { let overhead = bench_overheads.total_overhead(sample_size, &raw_sample.alloc_info); FineDuration { picos: raw_sample .duration() .clamp_to(timer_precision) .picos .saturating_sub(overhead.picos), } .clamp_to(timer_precision) }; for raw_sample in raw_samples { let sample_index = self.samples.time_samples.len(); self.samples .time_samples .push(TimeSample { duration: sample_duration_sub_overhead(raw_sample) }); if !raw_sample.alloc_info.tallies.is_empty() { self.samples .alloc_info_by_sample .insert(sample_index as u32, raw_sample.alloc_info.clone()); } // Insert per-input counter information. for counter_kind in KnownCounterKind::ALL { if !self.counters.uses_input_counts(counter_kind) { continue; } let total_count = raw_sample.counter_totals[counter_kind as usize]; // Cannot overflow `MaxCountUInt` because `total_count` // cannot exceed `MaxCountUInt::MAX * sample_size`. let per_iter_count = (total_count / sample_size as u128) as MaxCountUInt; self.counters.push_counter(AnyCounter::known(counter_kind, per_iter_count)); } if let Some(rem_samples) = &mut rem_samples { *rem_samples = rem_samples.saturating_sub(1); } } if let Some(initial_start) = initial_start { let last_end = raw_samples.iter().map(|s| s.end).max().unwrap(); elapsed_picos = last_end.duration_since(initial_start, timer).picos; } else { // Progress by at least 1ns to prevent extremely fast // functions from taking forever when `min_time` is set. let progress_picos = slowest_time.picos.max(1_000); elapsed_picos = elapsed_picos.saturating_add(progress_picos); } } // Reset flag for ignoring allocations. crate::alloc::IGNORE_ALLOC.set(false); } /// Returns a closure that takes the sample size and input counter, and then /// returns a newly recorded sample. fn sample_recorder( &self, gen_input: impl Fn() -> I, benched: impl Fn(&UnsafeCell>) -> O, drop_input: impl Fn(&UnsafeCell>), ) -> impl Fn(usize, Option<&Barrier>, &mut dyn FnMut(&I)) -> ([Timestamp; 2], ThreadAllocInfo) { // We defer: // - Usage of `gen_input` values. // - Drop destructor for `O`, preventing it from affecting sample // measurements. Outputs are stored into a pre-allocated buffer during // the sample loop. The allocation is reused between samples to reduce // time spent between samples. let timer_kind = self.shared_context.timer.kind(); move |sample_size: usize, barrier: Option<&Barrier>, count_input: &mut dyn FnMut(&I)| { let mut defer_store = DeferStore::::default(); let mut saved_alloc_info = ThreadAllocInfo::new(); let mut save_alloc_info = || { if crate::alloc::IGNORE_ALLOC.get() { return; } if let Some(alloc_info) = ThreadAllocInfo::try_current() { // SAFETY: We have exclusive access. saved_alloc_info = unsafe { alloc_info.as_ptr().read() }; } }; // Synchronize all threads to start timed section simultaneously and // clear every thread's memory profiling info. // // This ensures work external to the timed section does not affect // the timing of other threads. let sync_threads = |is_start: bool| { sync_impl(barrier, is_start); // Monomorphize implementation to reduce code size. #[inline(never)] fn sync_impl(barrier: Option<&Barrier>, is_start: bool) { // Ensure benchmarked section has a `ThreadAllocInfo` // allocated for the current thread and clear previous info. let alloc_info = if is_start { ThreadAllocInfo::current() } else { None }; // Synchronize all threads. // // This is the final synchronization point for the end. if let Some(barrier) = barrier { barrier.wait(); } if let Some(mut alloc_info) = alloc_info { // SAFETY: We have exclusive access. let alloc_info = unsafe { alloc_info.as_mut() }; alloc_info.clear(); // Synchronize all threads. if let Some(barrier) = barrier { barrier.wait(); } } } }; // The following logic chooses how to efficiently sample the // benchmark function once and assigns `sample_start`/`sample_end` // before/after the sample loop. // // NOTE: Testing and benchmarking should behave exactly the same // when getting the sample time span. We don't want to introduce // extra work that may worsen measurement quality for real // benchmarking. let sample_start: UntaggedTimestamp; let sample_end: UntaggedTimestamp; if size_of::() == 0 && (size_of::() == 0 || !mem::needs_drop::()) { // Use a range instead of `defer_store` to make the benchmarking // loop cheaper. // Run `gen_input` the expected number of times in case it // updates external state used by `benched`. for _ in 0..sample_size { let input = gen_input(); count_input(&input); // Inputs are consumed/dropped later. mem::forget(input); } sync_threads(true); sample_start = UntaggedTimestamp::start(timer_kind); // Sample loop: for _ in 0..sample_size { // SAFETY: Input is a ZST, so we can construct one out of // thin air. let input = unsafe { UnsafeCell::new(MaybeUninit::::zeroed()) }; mem::forget(black_box(benched(&input))); } sample_end = UntaggedTimestamp::end(timer_kind); sync_threads(false); save_alloc_info(); // Drop outputs and inputs. for _ in 0..sample_size { // Output only needs drop if ZST. if size_of::() == 0 { // SAFETY: Output is a ZST, so we can construct one out // of thin air. unsafe { _ = mem::zeroed::() } } if mem::needs_drop::() { // SAFETY: Input is a ZST, so we can construct one out // of thin air and not worry about aliasing. unsafe { drop_input(&UnsafeCell::new(MaybeUninit::::zeroed())) } } } } else { defer_store.prepare(sample_size); match defer_store.slots() { // Output needs to be dropped. We defer drop in the sample // loop by inserting it into `defer_store`. Ok(defer_slots_slice) => { // Initialize and store inputs. for DeferSlot { input, .. } in defer_slots_slice { // SAFETY: We have exclusive access to `input`. let input = unsafe { &mut *input.get() }; let input = input.write(gen_input()); count_input(input); // Make input opaque to benchmarked function. black_box(input); } // Create iterator before the sample timing section to // reduce benchmarking overhead. let defer_slots_iter = defer_slots_slice.iter(); sync_threads(true); sample_start = UntaggedTimestamp::start(timer_kind); // Sample loop: for defer_slot in defer_slots_iter { // SAFETY: All inputs in `defer_store` were // initialized and we have exclusive access to the // output slot. unsafe { let output = benched(&defer_slot.input); *defer_slot.output.get() = MaybeUninit::new(output); } } sample_end = UntaggedTimestamp::end(timer_kind); sync_threads(false); save_alloc_info(); // Prevent the optimizer from removing writes to inputs // and outputs in the sample loop. black_box(defer_slots_slice); // Drop outputs and inputs. for DeferSlot { input, output } in defer_slots_slice { // SAFETY: All outputs were initialized in the // sample loop and we have exclusive access. unsafe { (*output.get()).assume_init_drop() } if mem::needs_drop::() { // SAFETY: The output was dropped and thus we // have exclusive access to inputs. unsafe { drop_input(input) } } } } // Output does not need to be dropped. Err(defer_inputs_slice) => { // Initialize and store inputs. for input in defer_inputs_slice { // SAFETY: We have exclusive access to `input`. let input = unsafe { &mut *input.get() }; let input = input.write(gen_input()); count_input(input); // Make input opaque to benchmarked function. black_box(input); } // Create iterator before the sample timing section to // reduce benchmarking overhead. let defer_inputs_iter = defer_inputs_slice.iter(); sync_threads(true); sample_start = UntaggedTimestamp::start(timer_kind); // Sample loop: for input in defer_inputs_iter { // SAFETY: All inputs in `defer_store` were // initialized. black_box_drop(unsafe { benched(input) }); } sample_end = UntaggedTimestamp::end(timer_kind); sync_threads(false); save_alloc_info(); // Prevent the optimizer from removing writes to inputs // in the sample loop. black_box(defer_inputs_slice); // Drop inputs. if mem::needs_drop::() { for input in defer_inputs_slice { // SAFETY: We have exclusive access to inputs. unsafe { drop_input(input) } } } } } } // SAFETY: These values are guaranteed to be the correct variant // because they were created from the same `timer_kind`. let interval = unsafe { [sample_start.into_timestamp(timer_kind), sample_end.into_timestamp(timer_kind)] }; (interval, saved_alloc_info) } } #[inline] fn initial_mode(&self) -> BenchMode { if self.shared_context.action.is_test() { BenchMode::Test } else if let Some(sample_size) = self.options.sample_size { BenchMode::Collect { sample_size } } else { BenchMode::Tune { sample_size: 1 } } } pub fn compute_stats(&self) -> Stats { let time_samples = &self.samples.time_samples; let alloc_info_by_sample = &self.samples.alloc_info_by_sample; let sample_count = time_samples.len(); let sample_size = self.samples.sample_size; let total_count = self.samples.iter_count(); let total_duration = self.samples.total_duration(); let mean_duration = FineDuration { picos: total_duration.picos.checked_div(total_count as u128).unwrap_or_default(), }; // Samples sorted by duration. let sorted_samples = self.samples.sorted_samples(); let median_samples = util::slice_middle(&sorted_samples); let index_of_sample = |sample: &TimeSample| -> usize { util::slice_ptr_index(&self.samples.time_samples, sample) }; let counter_count_for_sample = |sample: &TimeSample, counter_kind: KnownCounterKind| -> Option { let counts = self.counters.counts(counter_kind); let index = if self.counters.uses_input_counts(counter_kind) { index_of_sample(sample) } else { 0 }; counts.get(index).copied() }; let min_duration = sorted_samples.first().map(|s| s.duration / sample_size).unwrap_or_default(); let max_duration = sorted_samples.last().map(|s| s.duration / sample_size).unwrap_or_default(); let median_duration = if median_samples.is_empty() { FineDuration::default() } else { let sum: u128 = median_samples.iter().map(|s| s.duration.picos).sum(); FineDuration { picos: sum / median_samples.len() as u128 } / sample_size }; let counts = KnownCounterKind::ALL.map(|counter_kind| { let median: MaxCountUInt = { let mut sum: u128 = 0; for sample in median_samples { let sample_count = counter_count_for_sample(sample, counter_kind)? as u128; // Saturating add in case `MaxUIntCount > u64`. sum = sum.saturating_add(sample_count); } (sum / median_samples.len() as u128) as MaxCountUInt }; Some(StatsSet { fastest: sorted_samples .first() .and_then(|s| counter_count_for_sample(s, counter_kind))?, slowest: sorted_samples .last() .and_then(|s| counter_count_for_sample(s, counter_kind))?, median, mean: self.counters.mean_count(counter_kind), }) }); let sample_alloc_info = |sample: Option<&TimeSample>| -> Option<&ThreadAllocInfo> { sample .and_then(|sample| u32::try_from(index_of_sample(sample)).ok()) .and_then(|index| self.samples.alloc_info_by_sample.get(&index)) }; let sample_alloc_tally = |sample: Option<&TimeSample>, op: AllocOp| -> ThreadAllocTally { sample_alloc_info(sample) .map(|alloc_info| alloc_info.tallies.get(op)) .copied() .unwrap_or_default() }; let mut alloc_total_max_count = 0u128; let mut alloc_total_max_size = 0u128; let mut alloc_total_tallies = TotalAllocTallyMap::default(); for alloc_info in alloc_info_by_sample.values() { alloc_total_max_count += alloc_info.max_count as u128; alloc_total_max_size += alloc_info.max_size as u128; alloc_info.tallies.add_to_total(&mut alloc_total_tallies); } let sample_size = f64::from(sample_size); Stats { sample_count: sample_count as u32, iter_count: total_count, time: StatsSet { fastest: min_duration, slowest: max_duration, median: median_duration, mean: mean_duration, }, max_alloc: StatsSet { fastest: { let alloc_info = sample_alloc_info(sorted_samples.first().copied()); AllocTally { count: alloc_info.map(|info| info.max_count as f64).unwrap_or_default() / sample_size, size: alloc_info.map(|info| info.max_size as f64).unwrap_or_default() / sample_size, } }, slowest: { let alloc_info = sample_alloc_info(sorted_samples.last().copied()); AllocTally { count: alloc_info.map(|info| info.max_count as f64).unwrap_or_default() / sample_size, size: alloc_info.map(|info| info.max_size as f64).unwrap_or_default() / sample_size, } }, // TODO: Switch to median of alloc info itself, rather than // basing off of median times. median: { let alloc_info_for_median = |index| sample_alloc_info(median_samples.get(index).copied()); let max_count_for_median = |index: usize| -> f64 { alloc_info_for_median(index) .map(|info| info.max_count as f64) .unwrap_or_default() }; let max_size_for_median = |index: usize| -> f64 { alloc_info_for_median(index) .map(|info| info.max_size as f64) .unwrap_or_default() }; let median_count = median_samples.len().max(1) as f64; let median_max_count = max_count_for_median(0) + max_count_for_median(1); let median_max_size = max_size_for_median(0) + max_size_for_median(1); AllocTally { count: median_max_count / median_count / sample_size, size: median_max_size / median_count / sample_size, } }, mean: AllocTally { count: alloc_total_max_count as f64 / total_count as f64, size: alloc_total_max_size as f64 / total_count as f64, }, } .transpose(), alloc_tallies: AllocOpMap { values: AllocOp::ALL .map(|op| StatsSet { fastest: { let fastest = sample_alloc_tally(sorted_samples.first().copied(), op); AllocTally { count: fastest.count as f64 / sample_size, size: fastest.size as f64 / sample_size, } }, slowest: { let slowest = sample_alloc_tally(sorted_samples.last().copied(), op); AllocTally { count: slowest.count as f64 / sample_size, size: slowest.size as f64 / sample_size, } }, median: { let tally_for_median = |index: usize| -> ThreadAllocTally { sample_alloc_tally(median_samples.get(index).copied(), op) }; let a = tally_for_median(0); let b = tally_for_median(1); let median_count = median_samples.len().max(1) as f64; let avg_count = (a.count as f64 + b.count as f64) / median_count; let avg_size = (a.size as f64 + b.size as f64) / median_count; AllocTally { count: avg_count / sample_size, size: avg_size / sample_size, } }, mean: { let tally = alloc_total_tallies.get(op); AllocTally { count: tally.count as f64 / total_count as f64, size: tally.size as f64 / total_count as f64, } }, }) .map(StatsSet::transpose), }, counts, } } } impl StatsSet> { #[inline] pub fn transpose(self) -> AllocTally> { AllocTally { count: StatsSet { fastest: self.fastest.count, slowest: self.slowest.count, median: self.median.count, mean: self.mean.count, }, size: StatsSet { fastest: self.fastest.size, slowest: self.slowest.size, median: self.median.size, mean: self.mean.size, }, } } } divan-0.1.21/src/benchmark/options.rs000064400000000000000000000056251046102023000156000ustar 00000000000000use std::{borrow::Cow, time::Duration}; use crate::{counter::CounterSet, time::FineDuration}; /// Benchmarking options set directly by the user in `#[divan::bench]` and /// `#[divan::bench_group]`. /// /// Changes to fields must be reflected in the "Options" sections of the docs /// for `#[divan::bench]` and `#[divan::bench_group]`. #[derive(Clone, Default)] pub struct BenchOptions<'a> { /// The number of sample recordings. pub sample_count: Option, /// The number of iterations inside a single sample. pub sample_size: Option, /// The number of threads to benchmark the sample. This is 1 by default. /// /// If set to 0, this will use [`std::thread::available_parallelism`]. /// /// We use `&'static [usize]` by leaking the input because `BenchOptions` is /// cached on first retrieval. pub threads: Option>, /// Counts the number of values processed each iteration of a benchmarked /// function. pub counters: CounterSet, /// The time floor for benchmarking a function. pub min_time: Option, /// The time ceiling for benchmarking a function. pub max_time: Option, /// When accounting for `min_time` or `max_time`, skip time external to /// benchmarked functions, such as time spent generating inputs and running /// [`Drop`]. pub skip_ext_time: Option, /// Whether the benchmark should be ignored. /// /// This may be set within the attribute or with a separate /// [`#[ignore]`](https://doc.rust-lang.org/reference/attributes/testing.html#the-ignore-attribute). pub ignore: Option, } impl<'a> BenchOptions<'a> { /// Overwrites `other` with values set in `self`. #[must_use] pub(crate) fn overwrite<'b>(&'b self, other: &'b Self) -> Self where 'b: 'a, { Self { // `Copy` values: sample_count: self.sample_count.or(other.sample_count), sample_size: self.sample_size.or(other.sample_size), threads: self.threads.as_deref().or(other.threads.as_deref()).map(Cow::Borrowed), min_time: self.min_time.or(other.min_time), max_time: self.max_time.or(other.max_time), skip_ext_time: self.skip_ext_time.or(other.skip_ext_time), ignore: self.ignore.or(other.ignore), // `Clone` values: counters: self.counters.overwrite(&other.counters), } } /// Returns `true` if non-zero samples are specified. #[inline] pub(crate) fn has_samples(&self) -> bool { self.sample_count != Some(0) && self.sample_size != Some(0) } #[inline] pub(crate) fn min_time(&self) -> FineDuration { self.min_time.map(FineDuration::from).unwrap_or_default() } #[inline] pub(crate) fn max_time(&self) -> FineDuration { self.max_time.map(FineDuration::from).unwrap_or(FineDuration::MAX) } } divan-0.1.21/src/benchmark/tests.rs000064400000000000000000000351671046102023000152530ustar 00000000000000//! Tests every benchmarking loop combination in `Bencher`. When run under Miri, //! this catches memory leaks and UB in `unsafe` code. use std::{ collections::HashSet, sync::atomic::{AtomicUsize, Ordering::SeqCst}, }; use util::defer; use super::*; use crate::{ config::Action, time::{Timer, TimerKind}, }; // We use a small number of runs because Miri is very slow. const SAMPLE_COUNT: u32 = 3; const SAMPLE_SIZE: u32 = 2; // Tests `SAMPLE_COUNT` by including it in the middle and having higher numbers // where `SAMPLE_COUNT % n != 0`. const THREAD_COUNTS: &[usize] = if cfg!(miri) { // Speed up Miri tests while still catching UB/memory issues. &[1, 2] } else { // Exhaustively test expectations. // // Tests `SAMPLE_COUNT` by: // - Including it in the middle // - Having numbers where `SAMPLE_COUNT % n` varies &[1, 2, 3, 4, 5, 6, 9] }; #[track_caller] fn test_bencher(test: &mut dyn FnMut(Bencher)) { // Silence Miri about leaking threads. let _drop_threads = defer(|| BENCH_POOL.drop_threads()); let bench_options = BenchOptions { sample_count: Some(SAMPLE_COUNT), sample_size: Some(SAMPLE_SIZE), ..BenchOptions::default() }; for timer in Timer::available() { for action in [Action::Bench, Action::Test] { let shared_context = SharedContext { action, timer }; for &thread_count in THREAD_COUNTS { let mut bench_context = BenchContext::new( &shared_context, &bench_options, NonZeroUsize::new(thread_count).unwrap(), ); test(Bencher::new(&mut bench_context)); assert!(bench_context.did_run); let samples = &bench_context.samples; // '--test' should run the expected number of times but not // allocate any samples. if action.is_test() { assert_eq!(samples.time_samples.capacity(), 0); } } } } } fn make_string() -> String { ('a'..='z').collect() } /// Tests that the benchmarked function runs the expected number of times when /// running either in benchmark or test mode. /// /// Tests operate over all input/output combinations of: /// - `()` /// - `i32` /// - `String` /// - Zero sized type (ZST) that implements `Drop` /// /// This ensures that any special handling of `size_of` or `needs_drop` does not /// affect the number of runs. #[allow(clippy::unused_unit)] mod run_count { use super::*; fn test(run_bench: fn(Bencher, &(dyn Fn() + Sync))) { test_with_drop_counter(&AtomicUsize::new(usize::MAX), run_bench); } fn test_with_drop_counter( drop_count: &AtomicUsize, run_bench: fn(Bencher, &(dyn Fn() + Sync)), ) { let test_drop_count = drop_count.load(SeqCst) != usize::MAX; let bench_count = AtomicUsize::new(0); let test_count = AtomicUsize::new(0); let mut thread_counts = HashSet::::new(); let mut timer_os = false; let mut timer_tsc = false; test_bencher(&mut |bencher| { let context = &bencher.context; let thread_count = context.thread_count.get(); thread_counts.insert(thread_count as u32); match context.shared_context.timer.kind() { TimerKind::Os => timer_os = true, TimerKind::Tsc => timer_tsc = true, } let is_test = context.shared_context.action.is_test(); let shared_run_count = if is_test { &test_count } else { &bench_count }; let start_run_count = shared_run_count.load(SeqCst); run_bench(bencher, &|| { shared_run_count.fetch_add(1, SeqCst); }); let end_run_count = shared_run_count.load(SeqCst); let run_count = end_run_count - start_run_count; if is_test { assert_eq!(run_count, thread_count); } else { let expected_samples = match SAMPLE_COUNT as usize % thread_count { 0 => SAMPLE_COUNT, rem => SAMPLE_COUNT + (thread_count - rem) as u32, }; let expected_iters = (expected_samples * SAMPLE_SIZE) as usize; assert_eq!(run_count, expected_iters); } }); let thread_count = thread_counts.into_iter().sum::(); let timer_count = timer_os as u32 + timer_tsc as u32; let bench_count = bench_count.into_inner() as u32; let test_count = test_count.into_inner() as u32; let total_count = bench_count + test_count; assert_ne!(total_count, 0); // The drop count should equal the total run count. if test_drop_count { assert_eq!(drop_count.load(SeqCst), total_count as usize); } assert_eq!(test_count, timer_count * thread_count); } #[test] fn bench() { struct DroppedZst; static ZST_DROP_COUNT: AtomicUsize = AtomicUsize::new(0); impl Drop for DroppedZst { fn drop(&mut self) { ZST_DROP_COUNT.fetch_add(1, SeqCst); } } // `()` out. test(|b, f| b.bench(f)); // `i32` out. test(|b, f| { b.bench(|| -> i32 { f(); 100i32 }) }); // `String` out. test(|b, f| { b.bench(|| -> String { f(); make_string() }) }); // `DroppedZst` out. test_with_drop_counter(&ZST_DROP_COUNT, |b, f| { b.bench(|| -> DroppedZst { f(); DroppedZst }) }); } #[test] fn bench_values() { struct DroppedZst; static ZST_DROP_COUNT: AtomicUsize = AtomicUsize::new(0); impl Drop for DroppedZst { fn drop(&mut self) { ZST_DROP_COUNT.fetch_add(1, SeqCst); } } let test_zst_drop = |run_bench| { ZST_DROP_COUNT.store(0, SeqCst); test_with_drop_counter(&ZST_DROP_COUNT, run_bench); }; // `()` in, `()` out. test(|b, f| b.with_inputs(|| ()).bench_values(|_: ()| -> () { f() })); // `()` in, `i32` out. test(|b, f| { b.with_inputs(|| ()).bench_values(|_: ()| -> i32 { f(); 100i32 }) }); // `()` in, `String` out. test(|b, f| { b.with_inputs(|| ()).bench_values(|_: ()| -> String { f(); make_string() }) }); // `()` in, `DroppedZst` out. test_zst_drop(|b, f| { b.with_inputs(|| ()).bench_values(|_: ()| -> DroppedZst { f(); DroppedZst }) }); // `i32` in, `()` out. test(|b, f| b.with_inputs(|| 100i32).bench_values(|_: i32| -> () { f() })); // `i32` in, `i32` out. test(|b, f| { b.with_inputs(|| 100i32).bench_values(|value: i32| -> i32 { f(); value }) }); // `i32` in, `String` out. test(|b, f| { b.with_inputs(|| 100i32).bench_values(|_: i32| -> String { f(); make_string() }) }); // `i32` in, `DroppedZst` out. test_zst_drop(|b, f| { b.with_inputs(|| 100i32).bench_values(|_: i32| -> DroppedZst { f(); DroppedZst }) }); // `String` in, `()` out. test(|b, f| b.with_inputs(make_string).bench_values(|_: String| -> () { f() })); // `String` in, `i32` out. test(|b, f| { b.with_inputs(make_string).bench_values(|_: String| -> i32 { f(); 100i32 }) }); // `String` in, `String` out. test(|b, f| { b.with_inputs(make_string).bench_values(|value: String| -> String { f(); value }) }); // `String` in, `DroppedZst` out. test_zst_drop(|b, f| { b.with_inputs(make_string).bench_values(|_: String| -> DroppedZst { f(); DroppedZst }) }); // `DroppedZst` in, `()` out. test_zst_drop(|b, f| { b.with_inputs(|| DroppedZst).bench_values(|_: DroppedZst| -> () { f() }) }); // `DroppedZst` in, `i32` out. test_zst_drop(|b, f| { b.with_inputs(|| DroppedZst).bench_values(|_: DroppedZst| -> i32 { f(); 100i32 }) }); // `DroppedZst` in, `String` out. test_zst_drop(|b, f| { b.with_inputs(|| DroppedZst).bench_values(|_: DroppedZst| -> String { f(); make_string() }) }); // `DroppedZst` in, `DroppedZst` out. test_zst_drop(|b, f| { b.with_inputs(|| DroppedZst).bench_values(|value: DroppedZst| -> DroppedZst { f(); value }) }); } #[test] fn bench_refs() { struct DroppedZst; static ZST_DROP_COUNT: AtomicUsize = AtomicUsize::new(0); impl Drop for DroppedZst { fn drop(&mut self) { ZST_DROP_COUNT.fetch_add(1, SeqCst); } } let test_zst_drop = |run_bench| { ZST_DROP_COUNT.store(0, SeqCst); test_with_drop_counter(&ZST_DROP_COUNT, run_bench); }; // `&mut ()` in, `()` out. test(|b, f| b.with_inputs(|| ()).bench_refs(|_: &mut ()| -> () { f() })); // `&mut ()` in, `i32` out. test(|b, f| { b.with_inputs(|| ()).bench_refs(|_: &mut ()| -> i32 { f(); 100i32 }) }); // `&mut ()` in, `String` out. test(|b, f| { b.with_inputs(|| ()).bench_refs(|_: &mut ()| -> String { f(); make_string() }) }); // `&mut ()` in, `DroppedZst` out. test_zst_drop(|b, f| { b.with_inputs(|| ()).bench_refs(|_: &mut ()| -> DroppedZst { f(); DroppedZst }) }); // `&mut i32` in, `()` out. test(|b, f| b.with_inputs(|| 100i32).bench_refs(|_: &mut i32| -> () { f() })); // `&mut i32` in, `i32` out. test(|b, f| { b.with_inputs(|| 100i32).bench_refs(|value: &mut i32| -> i32 { f(); *value }) }); // `&mut i32` in, `String` out. test(|b, f| { b.with_inputs(|| 100i32).bench_refs(|_: &mut i32| -> String { f(); make_string() }) }); // `&mut i32` in, `DroppedZst` out. test_zst_drop(|b, f| { b.with_inputs(|| 100i32).bench_refs(|_: &mut i32| -> DroppedZst { f(); DroppedZst }) }); // `&mut String` in, `()` out. test(|b, f| b.with_inputs(make_string).bench_refs(|_: &mut String| -> () { f() })); // `&mut String` in, `i32` out. test(|b, f| { b.with_inputs(make_string).bench_refs(|_: &mut String| -> i32 { f(); 100i32 }) }); // `&mut String` in, `String` out. test(|b, f| { b.with_inputs(make_string).bench_refs(|value: &mut String| -> String { f(); value.clone() }) }); // `&mut String` in, `DroppedZst` out. test_zst_drop(|b, f| { b.with_inputs(make_string).bench_refs(|_: &mut String| -> DroppedZst { f(); DroppedZst }) }); // `&mut DroppedZst` in, `()` out. test_zst_drop(|b, f| { b.with_inputs(|| DroppedZst).bench_refs(|_: &mut DroppedZst| -> () { f() }) }); // `&mut DroppedZst` in, `i32` out. test_zst_drop(|b, f| { b.with_inputs(|| DroppedZst).bench_refs(|_: &mut DroppedZst| -> i32 { f(); 100i32 }) }); // `&mut DroppedZst` in, `String` out. test_zst_drop(|b, f| { b.with_inputs(|| DroppedZst).bench_refs(|_: &mut DroppedZst| -> String { f(); make_string() }) }); // `&mut DroppedZst` in, `DroppedZst` out. test_zst_drop(|b, f| { b.with_inputs(|| { // Adjust counter for input ZST. ZST_DROP_COUNT.fetch_sub(1, SeqCst); DroppedZst }) .bench_refs(|_: &mut DroppedZst| -> DroppedZst { f(); DroppedZst }) }); } } mod no_input { use super::*; #[test] fn string_output() { test_bencher(&mut |b| b.bench(make_string)); } #[test] fn no_output() { test_bencher(&mut |b| b.bench(|| black_box_drop(make_string()))); } } mod string_input { use super::*; #[test] fn string_output() { test_bencher(&mut |b| b.with_inputs(make_string).bench_values(|s| s.to_ascii_uppercase())); } #[test] fn no_output() { test_bencher(&mut |b| b.with_inputs(make_string).bench_refs(|s| s.make_ascii_uppercase())); } } mod zst_input { use super::*; #[test] fn zst_output() { struct DroppedZst; // Each test has its own `ZST_COUNT` global because tests are run // independently in parallel. static ZST_COUNT: AtomicUsize = AtomicUsize::new(0); impl Drop for DroppedZst { fn drop(&mut self) { ZST_COUNT.fetch_sub(1, SeqCst); } } test_bencher(&mut |b| { b.with_inputs(|| { ZST_COUNT.fetch_add(1, SeqCst); DroppedZst }) .bench_values(black_box); }); assert_eq!(ZST_COUNT.load(SeqCst), 0); } #[test] fn no_output() { struct DroppedZst; static ZST_COUNT: AtomicUsize = AtomicUsize::new(0); impl Drop for DroppedZst { fn drop(&mut self) { ZST_COUNT.fetch_sub(1, SeqCst); } } test_bencher(&mut |b| { b.with_inputs(|| { ZST_COUNT.fetch_add(1, SeqCst); DroppedZst }) .bench_values(drop); }); assert_eq!(ZST_COUNT.load(SeqCst), 0); } } divan-0.1.21/src/cli.rs000064400000000000000000000156301046102023000127170ustar 00000000000000use clap::{builder::PossibleValue, value_parser, Arg, ArgAction, ColorChoice, Command, ValueEnum}; use crate::{ config::{ParsedSeconds, SortingAttr}, counter::MaxCountUInt, time::TimerKind, util, }; pub(crate) fn command() -> Command { fn option(name: &'static str) -> Arg { Arg::new(name).long(name) } fn flag(name: &'static str) -> Arg { option(name).action(ArgAction::SetTrue) } fn ignored_flag(name: &'static str) -> Arg { flag(name).hide(true) } // Custom arguments not supported by libtest: // - bytes-format // - sample-count // - sample-size // - timer // - sort // - sortr // TODO: `--format ` let mut cmd = Command::new("divan"); // Support `cargo-nextest` running us with `--list --format terse`. // // TODO: Add CI test to ensure this doesn't break. if util::is_cargo_nextest() { cmd = cmd.arg(option("format").value_parser(["terse"]).requires("list")); } cmd .arg( Arg::new("filter") .value_name("FILTER") .help("Only run benchmarks whose names match this pattern") .action(ArgAction::Append), ) .arg( flag("test") .help("Run benchmarks once to ensure they run successfully") .conflicts_with("list"), ) .arg(flag("list").help("Lists benchmarks").conflicts_with("test")) .arg( option("color") .value_name("WHEN") .help("Controls when to use colors") .value_parser(value_parser!(ColorChoice)) ) .arg( option("skip") .value_name("FILTER") .help("Skip benchmarks whose names match this pattern") .action(ArgAction::Append), ) .arg(flag("exact").help("Filter benchmarks by exact name rather than by pattern")) .arg(flag("ignored").help("Run only ignored benchmarks").conflicts_with("include-ignored")) .arg( flag("include-ignored") .help("Run ignored and not-ignored benchmarks") .conflicts_with("ignored"), ) .arg( option("sort") .env("DIVAN_SORT") .value_name("ATTRIBUTE") .help("Sort benchmarks in ascending order") .value_parser(value_parser!(SortingAttr)) ) .arg( option("sortr") .env("DIVAN_SORTR") .value_name("ATTRIBUTE") .help("Sort benchmarks in descending order") .value_parser(value_parser!(SortingAttr)) .overrides_with("sort"), ) .arg( option("timer") .env("DIVAN_TIMER") .value_name("os|tsc") .help("Set the timer used for measuring samples") .value_parser(value_parser!(TimerKind)), ) .arg( option("sample-count") .env("DIVAN_SAMPLE_COUNT") .value_name("N") .help("Set the number of sampling iterations") .value_parser(value_parser!(u32)), ) .arg( option("sample-size") .env("DIVAN_SAMPLE_SIZE") .value_name("N") .help("Set the number of iterations inside a single sample") .value_parser(value_parser!(u32)), ) .arg( option("threads") .env("DIVAN_THREADS") .value_name("N") .value_delimiter(',') .action(ArgAction::Append) .help("Run across multiple threads to measure contention on atomics and locks") .value_parser(value_parser!(usize)), ) .arg( option("min-time") .env("DIVAN_MIN_TIME") .value_name("SECS") .help("Set the minimum seconds spent benchmarking a single function") .value_parser(value_parser!(ParsedSeconds)), ) .arg( option("max-time") .env("DIVAN_MAX_TIME") .value_name("SECS") .help("Set the maximum seconds spent benchmarking a single function, with priority over '--min-time'") .value_parser(value_parser!(ParsedSeconds)), ) .arg( option("skip-ext-time") .env("DIVAN_SKIP_EXT_TIME") .value_name("true|false") .help("When '--min-time' or '--max-time' is set, skip time external to benchmarked functions") .value_parser(value_parser!(bool)) .num_args(0..=1), ) .arg( option("items-count") .env("DIVAN_ITEMS_COUNT") .value_name("N") .help("Set every benchmark to have a throughput of N items") .value_parser(value_parser!(MaxCountUInt)), ) .arg( option("bytes-count") .env("DIVAN_BYTES_COUNT") .value_name("N") .help("Set every benchmark to have a throughput of N bytes") .value_parser(value_parser!(MaxCountUInt)), ) .arg( option("bytes-format") .env("DIVAN_BYTES_FORMAT") .help("Set the numerical base for bytes in output") .value_name("decimal|binary") .value_parser(value_parser!(crate::counter::PrivBytesFormat)) ) .arg( option("chars-count") .env("DIVAN_CHARS_COUNT") .value_name("N") .help("Set every benchmark to have a throughput of N string scalars") .value_parser(value_parser!(MaxCountUInt)), ) .arg( option("cycles-count") .env("DIVAN_CYCLES_COUNT") .value_name("N") .help("Set every benchmark to have a throughput of N cycles, displayed as Hertz") .value_parser(value_parser!(MaxCountUInt)), ) // ignored: .args([ignored_flag("bench"), ignored_flag("nocapture"), ignored_flag("show-output")]) } impl ValueEnum for TimerKind { fn value_variants<'a>() -> &'a [Self] { &[Self::Os, Self::Tsc] } fn to_possible_value(&self) -> Option { let name = match self { Self::Os => "os", Self::Tsc => "tsc", }; Some(PossibleValue::new(name)) } } impl ValueEnum for SortingAttr { fn value_variants<'a>() -> &'a [Self] { &[Self::Kind, Self::Name, Self::Location] } fn to_possible_value(&self) -> Option { let name = match self { Self::Kind => "kind", Self::Name => "name", Self::Location => "location", }; Some(PossibleValue::new(name)) } } divan-0.1.21/src/compile_fail.rs000064400000000000000000000020621046102023000145660ustar 00000000000000//! Private compile failure tests. //! //! # Repeated Options //! //! Options repeated in `#[divan::bench]` should cause a compile error, even if //! they use raw identifiers. The initial implementation allowed raw identifiers //! to slip through because `syn::Ident` does not consider them to be equal to //! the normal form without the `r#` prefix. //! //! We don't include `r#crate` here because it's not a valid identifier. //! //! ```compile_fail //! #[divan::bench(name = "x", r#name = "x")] //! fn bench() {} //! ``` //! //! ```compile_fail //! #[divan::bench(sample_count = 1, r#sample_count = 1)] //! fn bench() {} //! ``` //! //! ```compile_fail //! #[divan::bench(sample_size = 1, r#sample_size = 1)] //! fn bench() {} //! ``` //! //! # Type Checking //! //! The following won't produce any benchmarks because `types = []`. However, we //! still want to ensure that values in `consts = [...]` match the generic //! const's type of `i32`. //! //! ```compile_fail //! #[divan::bench(types = [], consts = ['a', 'b', 'c'])] //! fn bench() {} //! ``` divan-0.1.21/src/config/filter.rs000064400000000000000000000121361046102023000147000ustar 00000000000000use regex::Regex; use crate::util::split_vec::SplitVec; /// Filters which benchmark/group to run based on its path. pub(crate) enum Filter { Regex(Regex), Exact(String), } impl Filter { fn is_match(&self, s: &str) -> bool { match self { Self::Regex(regex) => regex.is_match(s), Self::Exact(exact) => exact == s, } } } /// Collection of inclusive and exclusive filters. /// /// Inclusive filters indicate that a benchmark/group path should be run without /// running other benchmarks (unless also included). /// /// Exclusive filters make all matching candidate benchmarks be skipped (even if /// explicitly included). As a result, they have priority over inclusive /// filters. #[derive(Default)] pub(crate) struct FilterSet { /// Stores exclusive filters followed by inclusive filters. filters: SplitVec, } impl FilterSet { #[inline] pub fn reserve_exact(&mut self, additional: usize) { self.filters.reserve_exact(additional); } #[inline] pub fn include(&mut self, filter: Filter) { self.insert_filter(filter, true); } #[inline] pub fn exclude(&mut self, filter: Filter) { self.insert_filter(filter, false); } fn insert_filter(&mut self, filter: Filter, inclusive: bool) { self.filters.insert(filter, inclusive); } /// Returns `true` if a benchmark/group path matches these filters, and thus /// the entry should be included. /// /// Negative filters are prioritized over inclusive filters. pub fn is_match(&self, entry_path: &str) -> bool { let filters = self.filters.all(); let inclusive_start = self.filters.split_index(); // If any filter matches, return whether it was inclusive or negative. // Negative filters are placed before inclusive filters because they have // priority. if let Some(index) = filters.iter().position(|f| f.is_match(entry_path)) { return index >= inclusive_start; } // Otherwise succeed only if there are no inclusive filters. filters.len() == inclusive_start } } #[cfg(test)] mod tests { use super::*; /// Empty filter sets should match all strings. #[test] fn empty() { let filters = FilterSet::default(); assert!(filters.is_match("abc")); assert!(filters.is_match("123")); } mod single { use super::*; #[test] fn inclusive_exact() { let mut filters = FilterSet::default(); filters.insert_filter(Filter::Exact("abc".into()), true); assert!(filters.is_match("abc")); assert!(!filters.is_match("ab")); assert!(!filters.is_match("abcd")); } #[test] fn exclusive_exact() { let mut filters = FilterSet::default(); filters.insert_filter(Filter::Exact("abc".into()), false); assert!(!filters.is_match("abc")); assert!(filters.is_match("ab")); assert!(filters.is_match("abcd")); } #[test] fn inclusive_regex() { let mut filters = FilterSet::default(); let regex = Regex::new("abc.*123").unwrap(); filters.insert_filter(Filter::Regex(regex), true); assert!(!filters.is_match("abc")); assert!(filters.is_match("abc123")); assert!(filters.is_match("abc::123")); } #[test] fn exclusive_regex() { let mut filters = FilterSet::default(); let regex = Regex::new("abc.*123").unwrap(); filters.insert_filter(Filter::Regex(regex), false); assert!(filters.is_match("abc")); assert!(!filters.is_match("abc123")); assert!(!filters.is_match("abc::123")); } } /// Multiple inclusive filters should not be restrictive, whereas negative /// filters are increasingly restrictive. mod multi { use super::*; #[test] fn exact() { let mut filters = FilterSet::default(); filters.insert_filter(Filter::Exact("abc".into()), true); filters.insert_filter(Filter::Exact("123".into()), true); assert!(filters.is_match("abc")); assert!(filters.is_match("123")); assert!(!filters.is_match("xyz")); } } /// Negative filters override inclusive filters. mod overridden { use super::*; #[test] fn exact() { let mut filters = FilterSet::default(); filters.insert_filter(Filter::Exact("abc".into()), true); filters.insert_filter(Filter::Exact("abc".into()), false); assert!(!filters.is_match("abc")); } #[test] fn regex() { let mut filters = FilterSet::default(); let regex = Regex::new("abc.*123").unwrap(); filters.insert_filter(Filter::Regex(regex.clone()), true); filters.insert_filter(Filter::Regex(regex), false); assert!(!filters.is_match("abc::123")); assert!(!filters.is_match("123::abc")); } } } divan-0.1.21/src/config/mod.rs000064400000000000000000000114551046102023000141750ustar 00000000000000use std::{cmp::Ordering, error::Error, str::FromStr, time::Duration}; use crate::util::sort::natural_cmp; pub mod filter; /// `Duration` wrapper for parsing seconds from the CLI. #[derive(Clone, Copy)] pub(crate) struct ParsedSeconds(pub Duration); impl FromStr for ParsedSeconds { type Err = Box; fn from_str(s: &str) -> Result { Ok(Self(Duration::try_from_secs_f64(f64::from_str(s)?)?)) } } /// The primary action to perform. #[derive(Clone, Copy, Default)] pub(crate) enum Action { /// Run benchmark loops. #[default] Bench, /// Run benchmarked functions once to ensure they run successfully. Test, /// List benchmarks. List, /// List benchmarks in the style of `cargo test --list --format terse`. /// /// This only applies when running under `cargo-nextest` (`NEXTEST=1`). ListTerse, } #[allow(dead_code)] impl Action { #[inline] pub fn is_bench(&self) -> bool { matches!(self, Self::Bench) } #[inline] pub fn is_test(&self) -> bool { matches!(self, Self::Test) } #[inline] pub fn is_list(&self) -> bool { matches!(self, Self::List) } #[inline] pub fn is_list_terse(&self) -> bool { matches!(self, Self::ListTerse) } } /// How to treat benchmarks based on whether they're marked as `#[ignore]`. #[derive(Copy, Clone, Default)] pub(crate) enum RunIgnored { /// Skip ignored. #[default] No, /// `--include-ignored`. Yes, /// `--ignored`. Only, } impl RunIgnored { pub fn run_ignored(self) -> bool { matches!(self, Self::Yes | Self::Only) } pub fn run_non_ignored(self) -> bool { matches!(self, Self::Yes | Self::No) } pub fn should_run(self, ignored: bool) -> bool { if ignored { self.run_ignored() } else { self.run_non_ignored() } } } /// The attribute to sort benchmarks by. #[derive(Clone, Copy, Default)] pub(crate) enum SortingAttr { /// Sort by kind, then by name and location. #[default] Kind, /// Sort by name, then by location and kind. Name, /// Sort by location, then by kind and name. Location, } impl SortingAttr { /// Returns an array containing `self` along with other attributes that /// should break ties if attributes are equal. pub fn with_tie_breakers(self) -> [Self; 3] { use SortingAttr::*; match self { Kind => [self, Name, Location], Name => [self, Location, Kind], Location => [self, Kind, Name], } } /// Compares benchmark runtime argument names. /// /// This takes `&&str` to handle `SortingAttr::Location` since the strings /// are considered to be within the same `&[&str]`. pub fn cmp_bench_arg_names(self, a: &&str, b: &&str) -> Ordering { for attr in self.with_tie_breakers() { let ordering = match attr { SortingAttr::Kind => Ordering::Equal, SortingAttr::Name => 'ordering: { // Compare as integers. match (a.parse::(), a.parse::()) { (Ok(a_u128), Ok(b_u128)) => break 'ordering a_u128.cmp(&b_u128), (Ok(_), Err(_)) => { if b.parse::().is_ok() { // a > b, because b is negative. break 'ordering Ordering::Greater; } } (Err(_), Ok(_)) => { if a.parse::().is_ok() { // a < b, because a is negative. break 'ordering Ordering::Less; } } (Err(_), Err(_)) => { if let (Ok(a_i128), Ok(b_i128)) = (a.parse::(), a.parse::()) { break 'ordering a_i128.cmp(&b_i128); } } } // Compare as floats. if let (Ok(a), Ok(b)) = (a.parse::(), b.parse::()) { if let Some(ordering) = a.partial_cmp(&b) { break 'ordering ordering; } } natural_cmp(a, b) } SortingAttr::Location => { let a: *const &str = a; let b: *const &str = b; a.cmp(&b) } }; if ordering != Ordering::Equal { return ordering; } } Ordering::Equal } } divan-0.1.21/src/counter/any_counter.rs000064400000000000000000000146301046102023000161540ustar 00000000000000use std::any::TypeId; use crate::{ counter::{ BytesCount, BytesFormat, CharsCount, CyclesCount, IntoCounter, ItemsCount, MaxCountUInt, }, time::FineDuration, util::{fmt::DisplayThroughput, ty::TypeCast}, }; /// Type-erased `Counter`. /// /// This does not implement `Copy` because in the future it will contain /// user-defined counters. #[derive(Clone)] pub(crate) struct AnyCounter { pub kind: KnownCounterKind, count: MaxCountUInt, } impl AnyCounter { #[inline] pub(crate) fn new(counter: C) -> Self { let counter = counter.into_counter(); if let Some(bytes) = counter.cast_ref::() { Self::bytes(bytes.count) } else if let Some(chars) = counter.cast_ref::() { Self::chars(chars.count) } else if let Some(cycles) = counter.cast_ref::() { Self::cycles(cycles.count) } else if let Some(items) = counter.cast_ref::() { Self::items(items.count) } else { unreachable!() } } #[inline] pub(crate) fn known(kind: KnownCounterKind, count: MaxCountUInt) -> Self { Self { kind, count } } #[inline] pub(crate) fn bytes(count: MaxCountUInt) -> Self { Self::known(KnownCounterKind::Bytes, count) } #[inline] pub(crate) fn chars(count: MaxCountUInt) -> Self { Self::known(KnownCounterKind::Chars, count) } #[inline] pub(crate) fn cycles(count: MaxCountUInt) -> Self { Self::known(KnownCounterKind::Cycles, count) } #[inline] pub(crate) fn items(count: MaxCountUInt) -> Self { Self::known(KnownCounterKind::Items, count) } pub(crate) fn display_throughput( &self, duration: FineDuration, bytes_format: BytesFormat, ) -> DisplayThroughput { DisplayThroughput { counter: self, picos: duration.picos as f64, bytes_format } } #[inline] pub(crate) fn count(&self) -> MaxCountUInt { self.count } #[inline] pub(crate) fn known_kind(&self) -> KnownCounterKind { self.kind } } /// Kind of `Counter` defined by this crate. #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub(crate) enum KnownCounterKind { Bytes, Chars, Cycles, Items, } impl KnownCounterKind { pub const COUNT: usize = 4; pub const ALL: [Self; Self::COUNT] = [Self::Bytes, Self::Chars, Self::Cycles, Self::Items]; /// The maximum width for columns displaying counters. pub const MAX_COMMON_COLUMN_WIDTH: usize = "1.111 Kitem/s".len(); #[inline] pub fn of() -> Self { let id = TypeId::of::(); if id == TypeId::of::() { Self::Bytes } else if id == TypeId::of::() { Self::Chars } else if id == TypeId::of::() { Self::Cycles } else if id == TypeId::of::() { Self::Items } else { unreachable!() } } } #[cfg(test)] mod tests { use super::*; #[test] fn known_counter_kind() { macro_rules! test { ($t:ident, $k:ident) => { assert_eq!(KnownCounterKind::of::<$t>(), KnownCounterKind::$k); }; } test!(BytesCount, Bytes); test!(CharsCount, Chars); test!(CyclesCount, Cycles); test!(ItemsCount, Items); } mod display_throughput { use super::*; #[test] fn bytes() { #[track_caller] fn test( bytes: MaxCountUInt, picos: u128, expected_binary: &str, expected_decimal: &str, ) { for (bytes_format, expected) in [ (BytesFormat::Binary, expected_binary), (BytesFormat::Decimal, expected_decimal), ] { assert_eq!( AnyCounter::bytes(bytes) .display_throughput(FineDuration { picos }, bytes_format) .to_string(), expected ); } } #[track_caller] fn test_all(bytes: MaxCountUInt, picos: u128, expected: &str) { test(bytes, picos, expected, expected); } test_all(1, 0, "inf B/s"); test_all(MaxCountUInt::MAX, 0, "inf B/s"); test_all(0, 0, "0 B/s"); test_all(0, 1, "0 B/s"); test_all(0, u128::MAX, "0 B/s"); } #[test] fn chars() { #[track_caller] fn test(chars: MaxCountUInt, picos: u128, expected: &str) { assert_eq!( AnyCounter::chars(chars) .display_throughput(FineDuration { picos }, BytesFormat::default()) .to_string(), expected ); } test(1, 0, "inf char/s"); test(MaxCountUInt::MAX, 0, "inf char/s"); test(0, 0, "0 char/s"); test(0, 1, "0 char/s"); test(0, u128::MAX, "0 char/s"); } #[test] fn cycles() { #[track_caller] fn test(cycles: MaxCountUInt, picos: u128, expected: &str) { assert_eq!( AnyCounter::cycles(cycles) .display_throughput(FineDuration { picos }, BytesFormat::default()) .to_string(), expected ); } test(1, 0, "inf Hz"); test(MaxCountUInt::MAX, 0, "inf Hz"); test(0, 0, "0 Hz"); test(0, 1, "0 Hz"); test(0, u128::MAX, "0 Hz"); } #[test] fn items() { #[track_caller] fn test(items: MaxCountUInt, picos: u128, expected: &str) { assert_eq!( AnyCounter::items(items) .display_throughput(FineDuration { picos }, BytesFormat::default()) .to_string(), expected ); } test(1, 0, "inf item/s"); test(MaxCountUInt::MAX, 0, "inf item/s"); test(0, 0, "0 item/s"); test(0, 1, "0 item/s"); test(0, u128::MAX, "0 item/s"); } } } divan-0.1.21/src/counter/collection.rs000064400000000000000000000110361046102023000157560ustar 00000000000000use crate::counter::{AnyCounter, IntoCounter, KnownCounterKind, MaxCountUInt}; /// Multi-map from counters to their counts and input-based initializer. #[derive(Default)] pub(crate) struct CounterCollection { info: [KnownCounterInfo; KnownCounterKind::COUNT], } #[derive(Default)] struct KnownCounterInfo { // TODO: Inlinable vector. counts: Vec, /// `BencherConfig::with_inputs` can only be called once, so the input type /// cannot change. count_input: Option MaxCountUInt + Sync>>, } impl CounterCollection { #[inline] fn info(&self, counter_kind: KnownCounterKind) -> &KnownCounterInfo { &self.info[counter_kind as usize] } #[inline] fn info_mut(&mut self, counter_kind: KnownCounterKind) -> &mut KnownCounterInfo { &mut self.info[counter_kind as usize] } #[inline] pub(crate) fn counts(&self, counter_kind: KnownCounterKind) -> &[MaxCountUInt] { &self.info(counter_kind).counts } pub(crate) fn mean_count(&self, counter_kind: KnownCounterKind) -> MaxCountUInt { let counts = self.counts(counter_kind); let sum: u128 = counts.iter().map(|&c| c as u128).sum(); (sum / counts.len() as u128) as MaxCountUInt } #[inline] pub(crate) fn uses_input_counts(&self, counter_kind: KnownCounterKind) -> bool { self.info(counter_kind).count_input.is_some() } pub(crate) fn set_counter(&mut self, counter: AnyCounter) { let new_count = counter.count(); let info = self.info_mut(counter.known_kind()); if let Some(old_count) = info.counts.first_mut() { *old_count = new_count; } else { info.counts.push(new_count); } } pub(crate) fn push_counter(&mut self, counter: AnyCounter) { self.info_mut(counter.known_kind()).counts.push(counter.count()); } /// Set the input-based count generator function for a counter. pub(crate) fn set_input_counter(&mut self, make_counter: F) where F: Fn(&I) -> C + Sync + 'static, C: IntoCounter, { let info = self.info_mut(KnownCounterKind::of::()); // Ignore previously-set counts. info.counts.clear(); info.count_input = Some(Box::new(move |input: *const ()| { // SAFETY: Callers to `get_input_count` guarantee that the same `&I` // is passed. let counter = unsafe { make_counter(&*input.cast::()) }; AnyCounter::new(counter).count() })); } /// Calls the user-provided closure to get the counter count for a given /// input. /// /// # Safety /// /// The `I` type must be the same as that used by `set_input_counter`. pub(crate) unsafe fn get_input_count( &self, counter_kind: KnownCounterKind, input: &I, ) -> Option { let from_input = self.info(counter_kind).count_input.as_ref()?; // SAFETY: The caller ensures that this is called on the same input type // used for calling `set_input_counter`. Some(unsafe { from_input(input as *const I as *const ()) }) } /// Removes counts that came from input. pub(crate) fn clear_input_counts(&mut self) { for info in &mut self.info { if info.count_input.is_some() { info.counts.clear(); } } } } /// A set of known and (future) custom counters. #[derive(Clone, Debug, Default)] pub struct CounterSet { counts: [Option; KnownCounterKind::COUNT], } impl CounterSet { pub fn with(mut self, counter: impl IntoCounter) -> Self { self.insert(counter); self } pub fn insert(&mut self, counter: impl IntoCounter) -> &mut Self { let counter = AnyCounter::new(counter); self.counts[counter.known_kind() as usize] = Some(counter.count()); self } pub(crate) fn get(&self, counter_kind: KnownCounterKind) -> Option { self.counts[counter_kind as usize] } /// Overwrites `other` with values set in `self`. pub(crate) fn overwrite(&self, other: &Self) -> Self { Self { counts: KnownCounterKind::ALL.map(|kind| self.get(kind).or(other.get(kind))) } } pub(crate) fn to_collection(&self) -> CounterCollection { CounterCollection { info: KnownCounterKind::ALL.map(|kind| KnownCounterInfo { counts: self.get(kind).into_iter().collect(), count_input: None, }), } } } divan-0.1.21/src/counter/into_counter.rs000064400000000000000000000015311046102023000163320ustar 00000000000000use crate::counter::Counter; /// Conversion into a [`Counter`]. /// /// # Examples /// /// This trait is implemented for unsigned integers over /// [`ItemsCount`](crate::counter::ItemsCount): /// /// ``` /// #[divan::bench] /// fn sort_values(bencher: divan::Bencher) { /// # type T = String; /// let mut values: Vec = // ... /// # Vec::new(); /// bencher /// .counter(values.len()) /// .bench_local(|| { /// divan::black_box(&mut values).sort(); /// }); /// } /// ``` pub trait IntoCounter { /// Which kind of counter are we turning this into? type Counter: Counter; /// Converts into a [`Counter`]. fn into_counter(self) -> Self::Counter; } impl IntoCounter for C { type Counter = C; #[inline] fn into_counter(self) -> Self::Counter { self } } divan-0.1.21/src/counter/mod.rs000064400000000000000000000174551046102023000144150ustar 00000000000000//! Count values processed in each iteration to measure throughput. //! //! # Examples //! //! The following example measures throughput of converting //! [`&[i32]`](prim@slice) into [`Vec`](Vec) by providing [`BytesCount`] //! via [`Bencher::counter`](crate::Bencher::counter): //! //! ``` //! use divan::counter::BytesCount; //! //! #[divan::bench] //! fn slice_into_vec(bencher: divan::Bencher) { //! let ints: &[i32] = &[ //! // ... //! ]; //! //! let bytes = BytesCount::of_slice(ints); //! //! bencher //! .counter(bytes) //! .bench(|| -> Vec { //! divan::black_box(ints).into() //! }); //! } //! ``` use std::any::Any; mod any_counter; mod collection; mod into_counter; mod sealed; mod uint; pub(crate) use self::{ any_counter::{AnyCounter, KnownCounterKind}, collection::{CounterCollection, CounterSet}, sealed::Sealed, uint::{AsCountUInt, CountUInt, MaxCountUInt}, }; pub use into_counter::IntoCounter; /// Counts the number of values processed in each iteration of a benchmarked /// function. /// /// This is used via: /// - [`#[divan::bench(counters = ...)]`](macro@crate::bench#counters) /// - [`#[divan::bench_group(counters = ...)]`](macro@crate::bench_group#counters) /// - [`Bencher::counter`](crate::Bencher::counter) /// - [`Bencher::input_counter`](crate::Bencher::input_counter) #[doc(alias = "throughput")] pub trait Counter: Sized + Any + Sealed {} /// Process N bytes. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct BytesCount { count: MaxCountUInt, } /// Process N [`char`s](char). /// /// This is beneficial when comparing benchmarks between ASCII and Unicode /// implementations, since the number of code points is a common baseline /// reference. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct CharsCount { count: MaxCountUInt, } /// Process N cycles, displayed as Hertz. /// /// This value is user-provided and does not necessarily correspond to the CPU's /// cycle frequency, so it may represent cycles of anything appropriate for the /// benchmarking context. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct CyclesCount { count: MaxCountUInt, } /// Process N items. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct ItemsCount { count: MaxCountUInt, } impl Sealed for BytesCount {} impl Sealed for CharsCount {} impl Sealed for CyclesCount {} impl Sealed for ItemsCount {} impl Counter for BytesCount {} impl Counter for CharsCount {} impl Counter for CyclesCount {} impl Counter for ItemsCount {} impl From for BytesCount { #[inline] fn from(count: C) -> Self { Self::new(count.as_max_uint()) } } impl From for CharsCount { #[inline] fn from(count: C) -> Self { Self::new(count.as_max_uint()) } } impl From for CyclesCount { #[inline] fn from(count: C) -> Self { Self::new(count.as_max_uint()) } } impl From for ItemsCount { #[inline] fn from(count: C) -> Self { Self::new(count.as_max_uint()) } } impl BytesCount { /// Count N bytes. #[inline] pub fn new(count: N) -> Self { Self { count: count.into_max_uint() } } /// Counts the size of a type with [`size_of`]. #[inline] #[doc(alias = "size_of")] pub const fn of() -> Self { Self { count: size_of::() as MaxCountUInt } } /// Counts the size of multiple instances of a type with [`size_of`]. #[inline] #[doc(alias = "size_of")] pub const fn of_many(n: usize) -> Self { match (size_of::() as MaxCountUInt).checked_mul(n as MaxCountUInt) { Some(count) => Self { count }, None => panic!("overflow"), } } /// Counts the size of a value with [`size_of_val`]. #[inline] #[doc(alias = "size_of_val")] pub fn of_val(val: &T) -> Self { // TODO: Make const, https://github.com/rust-lang/rust/issues/46571 Self { count: size_of_val(val) as MaxCountUInt } } /// Counts the bytes of [`Iterator::Item`s](Iterator::Item). #[inline] pub fn of_iter(iter: I) -> Self where I: IntoIterator, { Self::of_many::(iter.into_iter().count()) } /// Counts the bytes of a [`&str`]. /// /// This is like [`BytesCount::of_val`] with the convenience of behaving as /// expected for [`&String`](String) and other types that convert to /// [`&str`]. /// /// [`&str`]: prim@str #[inline] pub fn of_str>(s: &S) -> Self { Self::of_val(s.as_ref()) } /// Counts the bytes of a [slice](prim@slice). /// /// This is like [`BytesCount::of_val`] with the convenience of behaving as /// expected for [`&Vec`](Vec) and other types that convert to /// [`&[T]`](prim@slice). #[inline] pub fn of_slice>(s: &S) -> Self { Self::of_val(s.as_ref()) } } macro_rules! type_bytes { ($ty:ident) => { /// Counts the bytes of multiple #[doc = concat!("[`", stringify!($ty), "`s](", stringify!($ty), ").")] #[inline] pub const fn $ty(n: usize) -> Self { Self::of_many::<$ty>(n) } }; } /// Count bytes of multiple values. impl BytesCount { type_bytes!(f32); type_bytes!(f64); type_bytes!(i8); type_bytes!(u8); type_bytes!(i16); type_bytes!(u16); type_bytes!(i32); type_bytes!(u32); type_bytes!(i64); type_bytes!(u64); type_bytes!(i128); type_bytes!(u128); type_bytes!(isize); type_bytes!(usize); } impl CharsCount { /// Count N [`char`s](char). #[inline] pub fn new(count: N) -> Self { Self { count: count.into_max_uint() } } /// Counts the [`char`s](prim@char) of a [`&str`](prim@str). #[inline] pub fn of_str>(s: &S) -> Self { Self::new(s.as_ref().chars().count()) } } impl CyclesCount { /// Count N cycles. #[inline] pub fn new(count: N) -> Self { Self { count: count.into_max_uint() } } } impl ItemsCount { /// Count N items. #[inline] pub fn new(count: N) -> Self { Self { count: count.into_max_uint() } } /// Counts [`Iterator::Item`s](Iterator::Item). #[inline] pub fn of_iter(iter: I) -> Self where I: IntoIterator, { Self::new(iter.into_iter().count()) } } /// The numerical base for [`BytesCount`] in benchmark outputs. /// /// See [`Divan::bytes_format`](crate::Divan::bytes_format) for more info. #[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord)] #[non_exhaustive] pub enum BytesFormat { /// Powers of 1000, starting with KB (kilobyte). This is the default. #[default] Decimal, /// Powers of 1024, starting with KiB (kibibyte). Binary, } /// Private `BytesFormat` that prevents leaking trait implementations we don't /// want to publicly commit to. #[derive(Clone, Copy)] pub(crate) struct PrivBytesFormat(pub BytesFormat); impl clap::ValueEnum for PrivBytesFormat { fn value_variants<'a>() -> &'a [Self] { &[Self(BytesFormat::Decimal), Self(BytesFormat::Binary)] } fn to_possible_value(&self) -> Option { let name = match self.0 { BytesFormat::Decimal => "decimal", BytesFormat::Binary => "binary", }; Some(clap::builder::PossibleValue::new(name)) } } #[cfg(test)] mod tests { use super::*; mod bytes_count { use super::*; #[test] fn of_iter() { assert_eq!(BytesCount::of_iter::([1, 2, 3]), BytesCount::of_slice(&[1, 2, 3])); } } } divan-0.1.21/src/counter/sealed.rs000064400000000000000000000002741046102023000150620ustar 00000000000000/// Prevents `Counter` from being implemented externally. /// /// Items exist on this trait rather than `Counter` so that they are impossible /// to access externally. pub trait Sealed {} divan-0.1.21/src/counter/uint.rs000064400000000000000000000031321046102023000146000ustar 00000000000000use std::any::Any; use crate::counter::{IntoCounter, ItemsCount}; /// The largest unsigned integer usable by counters provided by this crate. /// /// If `usize > u64`, this is a type alias to `usize`. Otherwise, it is a type /// alias to `u64`. pub type MaxCountUInt = condtype::num::Usize64; /// `u8`-`u64` and `usize`. /// /// We deliberately do not implement this trait for `u128` to make it /// impossible† to overflow `u128` when summing counts for averaging. /// /// †When `usize` is larger than `u64`, it becomes possible to overflow `u128`. /// In this case, Divan assumes pub trait CountUInt: Copy + Any { fn into_max_uint(self) -> MaxCountUInt; } /// A type like `CountUInt` but with more options. pub trait AsCountUInt { fn as_max_uint(&self) -> MaxCountUInt; } impl AsCountUInt for &T { #[inline] fn as_max_uint(&self) -> MaxCountUInt { T::as_max_uint(self) } } macro_rules! impl_uint { ($($i:ty),+) => { $(impl CountUInt for $i { #[inline] fn into_max_uint(self) -> MaxCountUInt { self as _ } })+ $(impl AsCountUInt for $i { #[inline] fn as_max_uint(&self) -> MaxCountUInt { *self as _ } })+ $(impl IntoCounter for $i { type Counter = ItemsCount; #[inline] fn into_counter(self) -> ItemsCount { ItemsCount::new(self) } })+ }; } // These types must be losslessly convertible to `MaxCountUInt`. impl_uint!(u8, u16, u32, u64, usize); divan-0.1.21/src/divan.rs000064400000000000000000000626171046102023000132600ustar 00000000000000#![allow(clippy::too_many_arguments)] use std::{borrow::Cow, cell::RefCell, fmt, num::NonZeroUsize, time::Duration}; use clap::ColorChoice; use regex::Regex; use crate::{ benchmark::BenchOptions, config::{ filter::{Filter, FilterSet}, Action, ParsedSeconds, RunIgnored, SortingAttr, }, counter::{ BytesCount, BytesFormat, CharsCount, CyclesCount, IntoCounter, ItemsCount, MaxCountUInt, PrivBytesFormat, }, entry::{AnyBenchEntry, BenchEntryRunner, EntryTree}, thread_pool::BENCH_POOL, time::{Timer, TimerKind}, tree_painter::{TreeColumn, TreePainter}, util::{self, defer, IntoRegex}, Bencher, }; /// The benchmark runner. #[derive(Default)] pub struct Divan { action: Action, timer: TimerKind, reverse_sort: bool, sorting_attr: SortingAttr, color: ColorChoice, bytes_format: BytesFormat, filters: FilterSet, run_ignored: RunIgnored, bench_options: BenchOptions<'static>, } /// Immutable context shared between entry runs. pub(crate) struct SharedContext { /// The specific action being performed. pub action: Action, /// The timer used to measure samples. pub timer: Timer, } impl fmt::Debug for Divan { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Divan").finish_non_exhaustive() } } impl Divan { /// Perform the configured action. /// /// By default, this will be [`Divan::run_benches`]. pub fn main(&self) { self.run_action(self.action); } /// Benchmark registered functions. pub fn run_benches(&self) { self.run_action(Action::Bench); } /// Test registered functions as if the `--test` flag was used. /// /// Unlike [`Divan::run_benches`], this runs each benchmarked function only /// once. pub fn test_benches(&self) { self.run_action(Action::Test); } /// Print registered functions as if the `--list` flag was used. pub fn list_benches(&self) { self.run_action(Action::Test); } /// Returns `true` if an entry at the given path should be considered for /// running. /// /// This does not take into account `entry.ignored` because that is handled /// separately. fn filter(&self, entry_path: &str) -> bool { self.filters.is_match(entry_path) } pub(crate) fn should_ignore(&self, ignored: bool) -> bool { !self.run_ignored.should_run(ignored) } pub(crate) fn run_action(&self, action: Action) { let _drop_threads = defer(|| BENCH_POOL.drop_threads()); let mut tree: Vec = if cfg!(miri) { // Miri does not work with our linker tricks. Vec::new() } else { let group_entries = &crate::entry::GROUP_ENTRIES; let generic_bench_entries = group_entries .iter() .flat_map(|group| group.generic_benches_iter().map(AnyBenchEntry::GenericBench)); let bench_entries = crate::entry::BENCH_ENTRIES .iter() .map(AnyBenchEntry::Bench) .chain(generic_bench_entries); let mut tree = EntryTree::from_benches(bench_entries); for group in group_entries.iter() { EntryTree::insert_group(&mut tree, group); } tree }; // Filter after inserting groups so that we can properly use groups' // display names. EntryTree::retain(&mut tree, |entry_path| self.filter(entry_path)); // Quick exit without doing unnecessary work. if tree.is_empty() { return; } // When run under `cargo-nextest`, it provides `--list --format terse`. // We don't currently accept this action under any other circumstances. if action.is_list_terse() { self.run_tree_list(&tree, ""); return; } // Sorting is after filtering to compare fewer elements. EntryTree::sort_by_attr(&mut tree, self.sorting_attr, self.reverse_sort); let timer = match self.timer { TimerKind::Os => Timer::Os, TimerKind::Tsc => { match Timer::get_tsc() { Ok(tsc) => tsc, Err(error) => { eprintln!("warning: CPU timestamp counter is unavailable ({error}), defaulting to OS"); Timer::Os } } } }; if action.is_bench() { eprintln!("Timer precision: {}", timer.precision()); } let shared_context = SharedContext { action, timer }; let column_widths = if action.is_bench() { TreeColumn::ALL.map(|column| { if column.is_last() { // The last column doesn't use padding. 0 } else { EntryTree::common_column_width(&tree, column) } }) } else { [0; TreeColumn::COUNT] }; let tree_painter = RefCell::new(TreePainter::new(EntryTree::max_name_span(&tree, 0), column_widths)); self.run_tree(action, &tree, &shared_context, None, &tree_painter); } /// Emits the entries in `tree` for the purpose of `--list --format terse`. /// /// This only happens when running under `cargo-nextest` (`NEXTEST=1`). fn run_tree_list(&self, tree: &[EntryTree], parent_path: &str) { let mut full_path = String::with_capacity(parent_path.len()); for child in tree { let ignore = child.bench_options().and_then(|options| options.ignore).unwrap_or_default(); if self.should_ignore(ignore) { continue; } full_path.clear(); if !parent_path.is_empty() { full_path.push_str(parent_path); full_path.push_str("::"); } full_path.push_str(child.display_name()); match child { EntryTree::Leaf { args: None, .. } => println!("{full_path}: benchmark"), EntryTree::Leaf { args: Some(args), .. } => { for arg in args { println!("{full_path}::{arg}: benchmark") } } EntryTree::Parent { children, .. } => self.run_tree_list(children, &full_path), } } } fn run_tree( &self, action: Action, tree: &[EntryTree], shared_context: &SharedContext, parent_options: Option<&BenchOptions>, tree_painter: &RefCell, ) { for (i, child) in tree.iter().enumerate() { let is_last = i == tree.len() - 1; let name = child.display_name(); let child_options = child.bench_options(); // Overwrite `parent_options` with `child_options` if applicable. let options: BenchOptions; let options: Option<&BenchOptions> = match (parent_options, child_options) { (None, None) => None, (Some(options), None) | (None, Some(options)) => Some(options), (Some(parent_options), Some(child_options)) => { options = child_options.overwrite(parent_options); Some(&options) } }; match child { EntryTree::Leaf { entry, args } => self.run_bench_entry( action, *entry, args.as_deref(), shared_context, options, tree_painter, is_last, ), EntryTree::Parent { children, .. } => { tree_painter.borrow_mut().start_parent(name, is_last); self.run_tree(action, children, shared_context, options, tree_painter); tree_painter.borrow_mut().finish_parent(); } } } } fn run_bench_entry( &self, action: Action, bench_entry: AnyBenchEntry, bench_arg_names: Option<&[&&str]>, shared_context: &SharedContext, entry_options: Option<&BenchOptions>, tree_painter: &RefCell, is_last_entry: bool, ) { use crate::benchmark::BenchContext; let entry_display_name = bench_entry.display_name(); // User runtime options override all other options. let options: BenchOptions; let options: &BenchOptions = match entry_options { None => &self.bench_options, Some(entry_options) => { options = self.bench_options.overwrite(entry_options); &options } }; if self.should_ignore(options.ignore.unwrap_or_default()) { tree_painter.borrow_mut().ignore_leaf(entry_display_name, is_last_entry); return; } // Paint empty leaf when simply listing. if action.is_list() { let mut tree_painter = tree_painter.borrow_mut(); tree_painter.start_leaf(entry_display_name, is_last_entry); tree_painter.finish_empty_leaf(); return; } let mut thread_counts: Vec = options .threads .as_deref() .unwrap_or_default() .iter() .map(|&n| match NonZeroUsize::new(n) { Some(n) => n, None => crate::util::known_parallelism(), }) .collect(); thread_counts.sort_unstable(); thread_counts.dedup(); let thread_counts: &[NonZeroUsize] = if thread_counts.is_empty() { &[NonZeroUsize::MIN] } else { &thread_counts }; // Whether we should emit child branches for thread counts. let has_thread_branches = thread_counts.len() > 1; let run_bench = |bench_display_name: &str, is_last_bench: bool, with_bencher: &dyn Fn(Bencher)| { if has_thread_branches { tree_painter.borrow_mut().start_parent(bench_display_name, is_last_bench); } else { tree_painter.borrow_mut().start_leaf(bench_display_name, is_last_bench); } for (i, &thread_count) in thread_counts.iter().enumerate() { let is_last_thread_count = if has_thread_branches { i == thread_counts.len() - 1 } else { is_last_bench }; if has_thread_branches { tree_painter .borrow_mut() .start_leaf(&format!("t={thread_count}"), is_last_thread_count); } let mut bench_context = BenchContext::new(shared_context, options, thread_count); with_bencher(Bencher::new(&mut bench_context)); if !bench_context.did_run { eprintln!( "warning: No benchmark function registered for '{bench_display_name}'" ); } let should_compute_stats = bench_context.did_run && shared_context.action.is_bench(); if should_compute_stats { let stats = bench_context.compute_stats(); tree_painter.borrow_mut().finish_leaf( is_last_thread_count, &stats, self.bytes_format, ); } else { tree_painter.borrow_mut().finish_empty_leaf(); } } if has_thread_branches { tree_painter.borrow_mut().finish_parent(); } }; match bench_entry.bench_runner() { BenchEntryRunner::Plain(bench) => run_bench(entry_display_name, is_last_entry, bench), BenchEntryRunner::Args(bench_runner) => { tree_painter.borrow_mut().start_parent(entry_display_name, is_last_entry); let bench_runner = bench_runner(); let orig_arg_names = bench_runner.arg_names(); let bench_arg_names = bench_arg_names.unwrap_or_default(); for (i, &arg_name) in bench_arg_names.iter().enumerate() { let is_last_arg = i == bench_arg_names.len() - 1; let arg_index = util::slice_ptr_index(orig_arg_names, arg_name); run_bench(arg_name, is_last_arg, &|bencher| { bench_runner.bench(bencher, arg_index); }); } tree_painter.borrow_mut().finish_parent(); } } } } /// Configuration options. impl Divan { /// Creates an instance with options set by parsing CLI arguments. pub fn from_args() -> Self { Self::default().config_with_args() } /// Sets options by parsing CLI arguments. /// /// This may override any previously-set options. #[must_use] pub fn config_with_args(mut self) -> Self { let mut command = crate::cli::command(); let mut matches = command.get_matches_mut(); let is_exact = matches.get_flag("exact"); // Insert filters. { let mut parse_filter = |filter: String| -> Filter { if is_exact { Filter::Exact(filter) } else { Filter::Regex(Regex::new(&filter).unwrap_or_else(|error| { let kind = clap::error::ErrorKind::ValueValidation; command.error(kind, error).exit(); })) } }; let inclusive_filters = matches.remove_many::("filter"); let exclusive_filters = matches.remove_many::("skip"); // Reduce allocation size and reallocation count. self.filters.reserve_exact({ let inclusive_count = inclusive_filters.as_ref().map(|f| f.len()).unwrap_or_default(); let exclusive_count = exclusive_filters.as_ref().map(|f| f.len()).unwrap_or_default(); inclusive_count + exclusive_count }); if let Some(inclusive_filters) = inclusive_filters { for filter in inclusive_filters { self.filters.include(parse_filter(filter)); } } if let Some(exclusive_filters) = exclusive_filters { for filter in exclusive_filters { self.filters.exclude(parse_filter(filter)); } } } self.action = if matches.get_flag("list") { // We support `--list --format terse` only under `cargo-nextest`. let is_terse = matches .try_get_one::("format") .ok() .flatten() .map(|format| format == "terse") .unwrap_or_default(); if is_terse { Action::ListTerse } else { Action::List } } else if matches.get_flag("test") || !matches.get_flag("bench") { // Either of: // `cargo bench -- --test` // `cargo test --benches` Action::Test } else { Action::Bench }; if let Some(&color) = matches.get_one("color") { self.color = color; } if matches.get_flag("ignored") { self.run_ignored = RunIgnored::Only; } else if matches.get_flag("include-ignored") { self.run_ignored = RunIgnored::Yes; } if let Some(&timer) = matches.get_one("timer") { self.timer = timer; } if let Some(&sorting_attr) = matches.get_one("sortr") { self.reverse_sort = true; self.sorting_attr = sorting_attr; } else if let Some(&sorting_attr) = matches.get_one("sort") { self.reverse_sort = false; self.sorting_attr = sorting_attr; } if let Some(&sample_count) = matches.get_one("sample-count") { self.bench_options.sample_count = Some(sample_count); } if let Some(&sample_size) = matches.get_one("sample-size") { self.bench_options.sample_size = Some(sample_size); } if let Some(thread_counts) = matches.get_many::("threads") { let mut threads: Vec = thread_counts.copied().collect(); threads.sort_unstable(); threads.dedup(); self.bench_options.threads = Some(Cow::Owned(threads)); } if let Some(&ParsedSeconds(min_time)) = matches.get_one("min-time") { self.bench_options.min_time = Some(min_time); } if let Some(&ParsedSeconds(max_time)) = matches.get_one("max-time") { self.bench_options.max_time = Some(max_time); } if let Some(mut skip_ext_time) = matches.get_many::("skip-ext-time") { // If the option is present without a value, then it's `true`. self.bench_options.skip_ext_time = Some(matches!(skip_ext_time.next(), Some(true) | None)); } if let Some(&count) = matches.get_one::("items-count") { self.counter_mut(ItemsCount::new(count)); } if let Some(&count) = matches.get_one::("bytes-count") { self.counter_mut(BytesCount::new(count)); } if let Some(&PrivBytesFormat(bytes_format)) = matches.get_one("bytes-format") { self.bytes_format = bytes_format; } if let Some(&count) = matches.get_one::("chars-count") { self.counter_mut(CharsCount::new(count)); } if let Some(&count) = matches.get_one::("cycles-count") { self.counter_mut(CyclesCount::new(count)); } self } /// Sets whether output should be colored. /// /// This option is equivalent to the `--color` CLI argument, where [`None`] /// here means "auto". #[must_use] pub fn color(mut self, yes: impl Into>) -> Self { self.color = match yes.into() { None => ColorChoice::Auto, Some(true) => ColorChoice::Always, Some(false) => ColorChoice::Never, }; self } /// Also run benchmarks marked [`#[ignore]`](https://doc.rust-lang.org/reference/attributes/testing.html#the-ignore-attribute). /// /// This option is equivalent to the `--include-ignored` CLI argument. #[must_use] pub fn run_ignored(mut self) -> Self { self.run_ignored = RunIgnored::Yes; self } /// Only run benchmarks marked [`#[ignore]`](https://doc.rust-lang.org/reference/attributes/testing.html#the-ignore-attribute). /// /// This option is equivalent to the `--ignored` CLI argument. #[must_use] pub fn run_only_ignored(mut self) -> Self { self.run_ignored = RunIgnored::Only; self } /// Skips benchmarks that match `filter` as a regular expression pattern. /// /// This option is equivalent to the `--skip filter` CLI argument, without /// `--exact`. /// /// # Examples /// /// This method is commonly used with a [`&str`](prim@str) or [`String`]: /// /// ``` /// # use divan::Divan; /// let filter = "(add|sub)"; /// let divan = Divan::default().skip_regex(filter); /// ``` /// /// A pre-built [`Regex`] can also be provided: /// /// ``` /// # use divan::Divan; /// let filter = regex::Regex::new("(add|sub)").unwrap(); /// let divan = Divan::default().skip_regex(filter); /// ``` /// /// Calling this repeatedly will add multiple skip filters: /// /// ``` /// # use divan::Divan; /// let divan = Divan::default() /// .skip_regex("(add|sub)") /// .skip_regex("collections.*default"); /// ``` /// /// # Panics /// /// Panics if `filter` is a string and [`Regex::new`] fails. #[must_use] #[track_caller] pub fn skip_regex(mut self, filter: impl IntoRegex) -> Self { self.filters.exclude(Filter::Regex(filter.into_regex())); self } /// Skips benchmarks that exactly match `filter`. /// /// This option is equivalent to the `--skip filter --exact` CLI arguments. /// /// # Examples /// /// This method is commonly used with a [`&str`](prim@str) or [`String`]: /// /// ``` /// # use divan::Divan; /// let filter = "arithmetic::add"; /// let divan = Divan::default().skip_exact(filter); /// ``` /// /// Calling this repeatedly will add multiple skip filters: /// /// ``` /// # use divan::Divan; /// let divan = Divan::default() /// .skip_exact("arithmetic::add") /// .skip_exact("collections::vec::default"); /// ``` #[must_use] pub fn skip_exact(mut self, filter: impl Into) -> Self { self.filters.exclude(Filter::Exact(filter.into())); self } /// Sets the number of sampling iterations. /// /// This option is equivalent to the `--sample-count` CLI argument. /// /// If a benchmark enables [`threads`](macro@crate::bench#threads), sample /// count becomes a multiple of the number of threads. This is because each /// thread operates over the same sample size to ensure there are always N /// competing threads doing the same amount of work. #[inline] pub fn sample_count(mut self, count: u32) -> Self { self.bench_options.sample_count = Some(count); self } /// Sets the number of iterations inside a single sample. /// /// This option is equivalent to the `--sample-size` CLI argument. #[inline] pub fn sample_size(mut self, count: u32) -> Self { self.bench_options.sample_size = Some(count); self } /// Run across multiple threads. /// /// This enables you to measure contention on [atomics and /// locks](std::sync). A value of 0 indicates [available /// parallelism](std::thread::available_parallelism). /// /// This option is equivalent to the `--threads` CLI argument or /// `DIVAN_THREADS` environment variable. #[inline] pub fn threads(mut self, threads: T) -> Self where T: IntoIterator, { self.bench_options.threads = { let mut threads: Vec = threads.into_iter().collect(); threads.sort_unstable(); threads.dedup(); Some(Cow::Owned(threads)) }; self } /// Sets the time floor for benchmarking a function. /// /// This option is equivalent to the `--min-time` CLI argument. #[inline] pub fn min_time(mut self, time: Duration) -> Self { self.bench_options.min_time = Some(time); self } /// Sets the time ceiling for benchmarking a function. /// /// This option is equivalent to the `--max-time` CLI argument. #[inline] pub fn max_time(mut self, time: Duration) -> Self { self.bench_options.max_time = Some(time); self } /// When accounting for `min_time` or `max_time`, skip time external to /// benchmarked functions. /// /// This option is equivalent to the `--skip-ext-time` CLI argument. #[inline] pub fn skip_ext_time(mut self, skip: bool) -> Self { self.bench_options.skip_ext_time = Some(skip); self } } /// Use [`Counter`s](crate::counter::Counter) to get throughput across all /// benchmarks. impl Divan { #[inline] fn counter_mut(&mut self, counter: C) -> &mut Self { self.bench_options.counters.insert(counter); self } /// Counts the number of values processed. #[inline] pub fn counter(mut self, counter: C) -> Self { self.counter_mut(counter); self } /// Sets the number of items processed. /// /// This option is equivalent to the `--items-count` CLI argument or /// `DIVAN_ITEMS_COUNT` environment variable. #[inline] pub fn items_count>(self, count: C) -> Self { self.counter(count.into()) } /// Sets the number of bytes processed. /// /// This option is equivalent to the `--bytes-count` CLI argument or /// `DIVAN_BYTES_COUNT` environment variable. #[inline] pub fn bytes_count>(self, count: C) -> Self { self.counter(count.into()) } /// Determines how [`BytesCount`] is scaled in benchmark outputs. /// /// This option is equivalent to the `--bytes-format` CLI argument or /// `DIVAN_BYTES_FORMAT` environment variable. #[inline] pub fn bytes_format(mut self, format: BytesFormat) -> Self { self.bytes_format = format; self } /// Sets the number of bytes processed. /// /// This option is equivalent to the `--chars-count` CLI argument or /// `DIVAN_CHARS_COUNT` environment variable. #[inline] pub fn chars_count>(self, count: C) -> Self { self.counter(count.into()) } /// Sets the number of cycles processed, displayed as Hertz. /// /// This option is equivalent to the `--cycles-count` CLI argument or /// `DIVAN_CYCLES_COUNT` environment variable. #[inline] pub fn cycles_count>(self, count: C) -> Self { self.counter(count.into()) } } divan-0.1.21/src/entry/generic.rs000064400000000000000000000135701046102023000147260ustar 00000000000000use std::{ any::{Any, TypeId}, cmp::Ordering, mem::ManuallyDrop, sync::OnceLock, }; use crate::{ entry::{BenchEntryRunner, GroupEntry}, util::sort::natural_cmp, }; /// Compile-time entry for a generic benchmark function, generated by /// `#[divan::bench]`. /// /// Unlike `BenchEntry`, this is for a specific generic type or `const`. /// /// Although this type contains trivially-`Copy` data, it *should not* implement /// `Clone` because the memory address of each instance is used to determine the /// relative order in `GroupEntry.generic_benches` when sorting benchmarks by /// location. pub struct GenericBenchEntry { /// The associated group, for entry metadata. pub group: &'static GroupEntry, /// The benchmarking function. pub bench: BenchEntryRunner, /// A generic type. pub ty: Option, /// A `const` value and associated data. pub const_value: Option, } impl GenericBenchEntry { pub(crate) fn raw_name(&self) -> &str { match (&self.ty, &self.const_value) { (_, Some(const_value)) => const_value.name(), (Some(ty), None) => ty.raw_name(), (None, None) => unreachable!(), } } pub(crate) fn display_name(&self) -> &str { match (&self.ty, &self.const_value) { (_, Some(const_value)) => const_value.name(), (Some(ty), None) => ty.display_name(), (None, None) => unreachable!(), } } pub(crate) fn path_components(&self) -> impl Iterator { let module_path = self.group.meta.module_path_components(); // Generic benchmarks consider their group's raw name to be the path // component after the module path. let group_component = self.group.meta.raw_name; // If this is a generic const benchmark with generic types, the generic // types are considered to be the parent of the const values. let type_component = if self.const_value.is_some() { // FIXME: Switch back to `raw_name` once we have a way to insert // this `display_name` into `EntryTree::Parent`. The current // approach allows different types with the same name to become the // same `EntryTree::Parent`. self.ty.as_ref().map(|ty| ty.display_name()) } else { None }; module_path.chain(Some(group_component)).chain(type_component) } } /// Generic type instantiation. pub struct EntryType { /// [`std::any::type_name`]. get_type_name: fn() -> &'static str, /// [`std::any::TypeId::of`]. #[allow(dead_code)] get_type_id: fn() -> TypeId, } impl EntryType { /// Creates an instance for the given type. pub const fn new() -> Self { Self { get_type_name: std::any::type_name::, get_type_id: TypeId::of:: } } pub(crate) fn raw_name(&self) -> &'static str { (self.get_type_name)() } pub(crate) fn display_name(&self) -> &'static str { let mut type_name = self.raw_name(); // Remove module components in type name. while let Some((prev, next)) = type_name.split_once("::") { // Do not go past generic type boundary. if prev.contains('<') { break; } type_name = next; } type_name } } /// A reference to a `const` as a `&'static T`. pub struct EntryConst { /// `&'static T`. value: *const (), /// [`PartialOrd::partial_cmp`]. partial_cmp: unsafe fn(*const (), *const ()) -> Option, /// [`ToString::to_string`]. to_string: unsafe fn(*const ()) -> String, /// Cached `to_string` result. cached_string: ManuallyDrop>, } // SAFETY: `T: Send + Sync`. unsafe impl Send for EntryConst {} unsafe impl Sync for EntryConst {} impl EntryConst { /// Creates entry data for a `const` values. pub const fn new(value: &'static T) -> Self where T: PartialOrd + ToString + Send + Sync, { unsafe fn partial_cmp(a: *const (), b: *const ()) -> Option { T::partial_cmp(&*a.cast(), &*b.cast()) } unsafe fn to_string(value: *const ()) -> String { T::to_string(&*value.cast()) } Self { value: value as *const T as *const (), partial_cmp: partial_cmp::, to_string: to_string::, cached_string: ManuallyDrop::new(OnceLock::new()), } } /// Returns [`PartialOrd::partial_cmp`] ordering if `<` or `>, falling back /// to comparing [`ToString::to_string`] otherwise. pub(crate) fn cmp_name(&self, other: &Self) -> Ordering { // SAFETY: If both constants have the same `partial_cmp` function // pointer, they are safely comparable. In the context of how this // method is used, it is because the constants are of the same type. // // We don't need a type ID check because constants that are compared to // each other all come from the same code generation unit, so their // `partial_cmp` function pointers will never differ. #[allow(unpredictable_function_pointer_comparisons)] if self.partial_cmp == other.partial_cmp { if let Some(ordering) = unsafe { (self.partial_cmp)(self.value, other.value) } { if !ordering.is_eq() { return ordering; } } } // Fallback to name comparison. natural_cmp(self.name(), other.name()) } /// [`ToString::to_string`]. #[inline] pub(crate) fn name(&self) -> &str { self.cached_string.get_or_init(|| { // SAFETY: The function is guaranteed to call `T::to_string`. let string = unsafe { (self.to_string)(self.value) }; Box::leak(string.into_boxed_str()) }) } } divan-0.1.21/src/entry/list.rs000064400000000000000000000046361046102023000142700ustar 00000000000000use std::{ ptr, sync::atomic::{AtomicPtr, Ordering as AtomicOrdering}, }; /// Linked list of entries. /// /// This is implemented in a thread-safe way despite the fact that constructors /// are run single-threaded. pub struct EntryList { entry: Option<&'static T>, next: AtomicPtr, } impl EntryList { pub(crate) const fn root() -> Self { Self { entry: None, next: AtomicPtr::new(ptr::null_mut()) } } /// Dereferences the `next` pointer. #[inline] fn next(&self) -> Option<&Self> { // SAFETY: `next` is only assigned by `push`, which always receives a // 'static lifetime. unsafe { self.next.load(AtomicOrdering::Relaxed).as_ref() } } } // Externally used by macros or tests. #[allow(missing_docs)] impl EntryList { #[inline] pub const fn new(entry: &'static T) -> Self { Self { entry: Some(entry), next: AtomicPtr::new(ptr::null_mut()) } } /// Creates an iterator over entries in `self`. #[inline] pub fn iter(&self) -> impl Iterator { let mut list = Some(self); std::iter::from_fn(move || -> Option> { let current = list?; list = current.next(); Some(current.entry.as_ref().copied()) }) .flatten() } /// Inserts `other` to the front of the list. /// /// # Safety /// /// This function must be safe to call before `main`. #[inline] pub fn push(&'static self, other: &'static Self) { let mut old_next = self.next.load(AtomicOrdering::Relaxed); loop { // Each publicly-created instance has `list.next` be null, so we can // simply store `self.next` there. other.next.store(old_next, AtomicOrdering::Release); // SAFETY: The content of `other` can already be seen, so we don't // need to strongly order reads into it. let other = other as *const Self as *mut Self; match self.next.compare_exchange_weak( old_next, other, AtomicOrdering::AcqRel, AtomicOrdering::Acquire, ) { // Successfully wrote our thread's value to the list. Ok(_) => return, // Lost the race, store winner's value in `other.next`. Err(new) => old_next = new, } } } } divan-0.1.21/src/entry/meta.rs000064400000000000000000000021661046102023000142370ustar 00000000000000use std::sync::LazyLock; use crate::benchmark::BenchOptions; /// Metadata common to `#[divan::bench]` and `#[divan::bench_group]`. pub struct EntryMeta { /// The entry's display name. pub display_name: &'static str, /// The entry's original name. /// /// This is used to find a `GroupEntry` for a `BenchEntry`. pub raw_name: &'static str, /// The entry's raw `module_path!()`. pub module_path: &'static str, /// Where the entry was defined. pub location: EntryLocation, /// Configures the benchmarker via attribute options. pub bench_options: Option>>, } /// Where an entry is located. #[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord)] #[allow(missing_docs)] pub struct EntryLocation { pub file: &'static str, pub line: u32, pub col: u32, } impl EntryMeta { #[inline] pub(crate) fn bench_options(&self) -> Option<&BenchOptions> { self.bench_options.as_deref() } #[inline] pub(crate) fn module_path_components<'a>(&self) -> impl Iterator { self.module_path.split("::") } } divan-0.1.21/src/entry/mod.rs000064400000000000000000000067751046102023000141020ustar 00000000000000use std::ptr::NonNull; use crate::{benchmark::BenchArgsRunner, Bencher}; mod generic; mod list; mod meta; mod tree; pub use self::{ generic::{EntryConst, EntryType, GenericBenchEntry}, list::EntryList, meta::{EntryLocation, EntryMeta}, }; pub(crate) use tree::EntryTree; /// Benchmark entries generated by `#[divan::bench]`. /// /// Note: generic-type benchmark entries are instead stored in `GROUP_ENTRIES` /// in `generic_benches`. pub static BENCH_ENTRIES: EntryList = EntryList::root(); /// Group entries generated by `#[divan::bench_group]`. pub static GROUP_ENTRIES: EntryList = EntryList::root(); /// Determines how the benchmark entry is run. #[derive(Clone, Copy)] pub enum BenchEntryRunner { /// Benchmark without arguments. Plain(fn(Bencher)), /// Benchmark with runtime arguments. Args(fn() -> BenchArgsRunner), } /// Compile-time entry for a benchmark, generated by `#[divan::bench]`. pub struct BenchEntry { /// Entry metadata. pub meta: EntryMeta, /// The benchmarking function. pub bench: BenchEntryRunner, } /// Compile-time entry for a benchmark group, generated by /// `#[divan::bench_group]` or a generic-type `#[divan::bench]`. pub struct GroupEntry { /// Entry metadata. pub meta: EntryMeta, /// Generic `#[divan::bench]` entries. /// /// This is two-dimensional to make code generation simpler. The outer /// dimension corresponds to types and the inner dimension corresponds to /// constants. pub generic_benches: Option<&'static [&'static [GenericBenchEntry]]>, } impl GroupEntry { pub(crate) fn generic_benches_iter(&self) -> impl Iterator { self.generic_benches.unwrap_or_default().iter().flat_map(|benches| benches.iter()) } } /// `BenchEntry` or `GenericBenchEntry`. #[derive(Clone, Copy)] pub(crate) enum AnyBenchEntry<'a> { Bench(&'a BenchEntry), GenericBench(&'a GenericBenchEntry), } impl<'a> AnyBenchEntry<'a> { /// Returns a pointer to use as the identity of the entry. #[inline] pub fn entry_addr(self) -> NonNull<()> { match self { Self::Bench(entry) => NonNull::from(entry).cast(), Self::GenericBench(entry) => NonNull::from(entry).cast(), } } /// Returns this entry's benchmark runner. #[inline] pub fn bench_runner(self) -> &'a BenchEntryRunner { match self { Self::Bench(BenchEntry { bench, .. }) | Self::GenericBench(GenericBenchEntry { bench, .. }) => bench, } } /// Returns this entry's argument names. #[inline] pub fn arg_names(self) -> Option<&'static [&'static str]> { match self.bench_runner() { BenchEntryRunner::Args(bench_runner) => { let bench_runner = bench_runner(); Some(bench_runner.arg_names()) } _ => None, } } #[inline] pub fn meta(self) -> &'a EntryMeta { match self { Self::Bench(entry) => &entry.meta, Self::GenericBench(entry) => &entry.group.meta, } } #[inline] pub fn raw_name(self) -> &'a str { match self { Self::Bench(entry) => entry.meta.raw_name, Self::GenericBench(entry) => entry.raw_name(), } } #[inline] pub fn display_name(self) -> &'a str { match self { Self::Bench(entry) => entry.meta.display_name, Self::GenericBench(entry) => entry.display_name(), } } } divan-0.1.21/src/entry/tree.rs000064400000000000000000000340161046102023000142470ustar 00000000000000use std::{cmp::Ordering, ptr::NonNull}; use crate::{ benchmark::{BenchOptions, DEFAULT_SAMPLE_COUNT}, config::SortingAttr, counter::KnownCounterKind, entry::{AnyBenchEntry, EntryLocation, EntryMeta, GenericBenchEntry, GroupEntry}, tree_painter::TreeColumn, util::sort::natural_cmp, }; /// `BenchEntry` tree organized by path components. pub(crate) enum EntryTree<'a> { /// Benchmark group; parent to leaves and other parents. Parent { raw_name: &'a str, group: Option<&'a GroupEntry>, children: Vec }, /// Benchmark entry leaf. Leaf { /// The benchmark entry being run. entry: AnyBenchEntry<'a>, /// The names of arguments to run. args: Option>, }, } impl<'a> EntryTree<'a> { /// Constructs a tree from an iterator of benchmark entries in the order /// they're produced. pub fn from_benches(benches: I) -> Vec where I: IntoIterator>, { let mut result = Vec::::new(); for bench in benches { let mut insert_entry = |path_iter| { Self::insert_entry(&mut result, bench, path_iter); }; match bench { AnyBenchEntry::Bench(bench) => { insert_entry(&mut bench.meta.module_path_components()); } AnyBenchEntry::GenericBench(bench) => { insert_entry(&mut bench.path_components()); } } } result } /// Returns the maximum span for a name in `tree`. /// /// This is the number of terminal columns used for labeling benchmark names /// prior to emitting stats columns. pub fn max_name_span(tree: &[Self], depth: usize) -> usize { // The number of terminal columns used per-depth for box drawing // characters. For example, "│ ╰─ " is 6 for depth 2. const DEPTH_COLS: usize = 3; tree.iter() .map(|node| { let node_name_span = { let prefix_len = depth * DEPTH_COLS; let name_len = node.display_name().chars().count(); prefix_len + name_len }; // The maximum span of any descendent. let children_max_span = Self::max_name_span(node.children(), depth + 1); // The maximum span of any runtime argument. let args_max_span = node .arg_names() .unwrap_or_default() .iter() .map(|arg| { let prefix_len = (depth + 1) * DEPTH_COLS; let name_len = arg.chars().count(); prefix_len + name_len }) .max() .unwrap_or_default(); node_name_span.max(children_max_span).max(args_max_span) }) .max() .unwrap_or_default() } /// Returns the likely span for a given column. pub fn common_column_width(tree: &[Self], column: TreeColumn) -> usize { // Time and throughput info. if column.is_time_stat() { return KnownCounterKind::MAX_COMMON_COLUMN_WIDTH; } tree.iter() .map(|tree| { let Some(options) = tree.bench_options() else { return 0; }; let width = match column { TreeColumn::Samples => { let sample_count = options.sample_count.unwrap_or(DEFAULT_SAMPLE_COUNT); 1 + sample_count.checked_ilog10().unwrap_or_default() as usize } // Iters is the last column, so it does not need pad width. // All other columns are time stats handled previously. _ => 0, }; width.max(Self::common_column_width(tree.children(), column)) }) .max() .unwrap_or_default() } /// Inserts the benchmark group into a tree. /// /// Groups are inserted after tree construction because it prevents having /// parents without terminating leaves. Groups that do not match an existing /// parent are not inserted. pub fn insert_group(mut tree: &mut [Self], group: &'a GroupEntry) { // Update `tree` to be the innermost set of subtrees whose parents match // `group.module_path`. 'component: for component in group.meta.module_path_components() { for subtree in tree { match subtree { EntryTree::Parent { raw_name, children, .. } if component == *raw_name => { tree = children; continue 'component; } _ => {} } } // No matches for this component in any subtrees. return; } // Find the matching tree to insert the group into. for subtree in tree { match subtree { EntryTree::Parent { raw_name, group: slot, .. } if group.meta.raw_name == *raw_name => { *slot = Some(group); return; } _ => {} } } } /// Removes entries from the tree whose paths do not match the filter. pub fn retain(tree: &mut Vec, mut filter: impl FnMut(&str) -> bool) { fn retain( tree: &mut Vec, parent_path: &str, filter: &mut impl FnMut(&str) -> bool, ) { tree.retain_mut(|subtree| { let subtree_path: String; let subtree_path: &str = if parent_path.is_empty() { subtree.display_name() } else { subtree_path = format!("{parent_path}::{}", subtree.display_name()); &subtree_path }; match subtree { EntryTree::Parent { children, .. } => { retain(children, subtree_path, filter); // If no children exist, filter out this parent. !children.is_empty() } EntryTree::Leaf { args: None, .. } => filter(subtree_path), EntryTree::Leaf { args: Some(args), .. } => { args.retain(|arg| filter(&format!("{subtree_path}::{arg}"))); // If no arguments exist, filter out this leaf. !args.is_empty() } } }); } retain(tree, "", &mut filter); } /// Sorts the tree by the given ordering. pub fn sort_by_attr(tree: &mut [Self], attr: SortingAttr, reverse: bool) { let apply_reverse = |ordering: Ordering| if reverse { ordering.reverse() } else { ordering }; tree.sort_unstable_by(|a, b| apply_reverse(a.cmp_by_attr(b, attr))); tree.iter_mut().for_each(|tree| { match tree { // Sort benchmark arguments. EntryTree::Leaf { args, .. } => { if let Some(args) = args { args.sort_by(|&a, &b| apply_reverse(attr.cmp_bench_arg_names(a, b))); } } // Sort children. EntryTree::Parent { children, .. } => { Self::sort_by_attr(children, attr, reverse); } } }); } fn cmp_by_attr(&self, other: &Self, attr: SortingAttr) -> Ordering { // We take advantage of the fact that entries have stable addresses, // unlike `EntryTree`. let entry_addr_ordering = match (self.entry_addr(), other.entry_addr()) { (Some(a), Some(b)) => Some(a.cmp(&b)), _ => None, }; // If entries have the same address, then all attributes will be equal. if matches!(entry_addr_ordering, Some(Ordering::Equal)) { return Ordering::Equal; } for attr in attr.with_tie_breakers() { let ordering = match attr { SortingAttr::Kind => self.kind().cmp(&other.kind()), SortingAttr::Name => self.cmp_display_name(other), SortingAttr::Location => { let location_ordering = self.location().cmp(&other.location()); // Use the entry's address to break location ties. // // This makes generic benchmarks use the same order as their // types and constants. if location_ordering.is_eq() { entry_addr_ordering.unwrap_or(Ordering::Equal) } else { location_ordering } } }; if ordering.is_ne() { return ordering; } } Ordering::Equal } /// Helper for constructing a tree. /// /// This uses recursion because the iterative approach runs into limitations /// with mutable borrows. fn insert_entry( tree: &mut Vec, entry: AnyBenchEntry<'a>, rem_modules: &mut dyn Iterator, ) { let Some(current_module) = rem_modules.next() else { tree.push(Self::Leaf { entry, args: entry.arg_names().map(|args| args.iter().collect()), }); return; }; let Some(children) = Self::get_children(tree, current_module) else { tree.push(Self::from_path(entry, current_module, rem_modules)); return; }; Self::insert_entry(children, entry, rem_modules); } /// Constructs a sequence of branches from a module path. fn from_path( entry: AnyBenchEntry<'a>, current_module: &'a str, rem_modules: &mut dyn Iterator, ) -> Self { let child = if let Some(next_module) = rem_modules.next() { Self::from_path(entry, next_module, rem_modules) } else { Self::Leaf { entry, args: entry.arg_names().map(|args| args.iter().collect()) } }; Self::Parent { raw_name: current_module, group: None, children: vec![child] } } /// Finds the `Parent.children` for the corresponding module in `tree`. fn get_children<'t>(tree: &'t mut [Self], module: &str) -> Option<&'t mut Vec> { tree.iter_mut().find_map(|tree| match tree { Self::Parent { raw_name, children, group: _ } if *raw_name == module => Some(children), _ => None, }) } /// Returns an integer denoting the enum variant. /// /// This is used instead of `std::mem::Discriminant` because it does not /// implement `Ord`. pub fn kind(&self) -> i32 { // Leaves should appear before parents. match self { Self::Leaf { .. } => 0, Self::Parent { .. } => 1, } } /// Returns a pointer to use as the identity of the entry. pub fn entry_addr(&self) -> Option> { match self { Self::Leaf { entry, .. } => Some(entry.entry_addr()), Self::Parent { group, .. } => { group.map(|entry: &GroupEntry| NonNull::from(entry).cast()) } } } pub fn meta(&self) -> Option<&'a EntryMeta> { match self { Self::Parent { group, .. } => Some(&(*group)?.meta), Self::Leaf { entry, .. } => Some(entry.meta()), } } pub fn bench_options(&self) -> Option<&'a BenchOptions> { self.meta()?.bench_options() } pub fn raw_name(&self) -> &'a str { match self { Self::Parent { group: Some(group), .. } => group.meta.raw_name, Self::Parent { raw_name, .. } => raw_name, Self::Leaf { entry, .. } => entry.raw_name(), } } pub fn display_name(&self) -> &'a str { if let Self::Leaf { entry, .. } = self { entry.display_name() } else if let Some(common) = self.meta() { common.display_name } else { let raw_name = self.raw_name(); raw_name.strip_prefix("r#").unwrap_or(raw_name) } } /// Returns the location of this entry, group, or the children's earliest /// location. fn location(&self) -> Option<&'a EntryLocation> { if let Some(common) = self.meta() { Some(&common.location) } else { self.children().iter().flat_map(Self::location).min() } } /// Compares display names naturally, taking into account integers. /// /// There is special consideration for the `PartialOrd` implementation of /// constants, so that `EntryConst` can sort integers and floats by value /// instead of lexicographically. fn cmp_display_name(&self, other: &Self) -> Ordering { match (self, other) { ( Self::Leaf { entry: AnyBenchEntry::GenericBench(GenericBenchEntry { const_value: Some(this), .. }), .. }, Self::Leaf { entry: AnyBenchEntry::GenericBench(GenericBenchEntry { const_value: Some(other), .. }), .. }, ) => this.cmp_name(other), _ => natural_cmp(self.display_name(), other.display_name()), } } fn children(&self) -> &[Self] { match self { Self::Leaf { .. } => &[], Self::Parent { children, .. } => children, } } fn arg_names(&self) -> Option<&[&'static &'static str]> { match self { Self::Leaf { args, .. } => args.as_deref(), Self::Parent { .. } => None, } } } divan-0.1.21/src/lib.rs000064400000000000000000001034071046102023000127160ustar 00000000000000//! [bench_attr]: macro@bench //! [bench_attr_examples]: macro@bench#examples //! [bench_attr_threads]: macro@bench#threads #![doc = include_str!("../README.md")] #![warn(missing_docs)] #![allow( unknown_lints, unused_unsafe, clippy::needless_doctest_main, clippy::needless_lifetimes, clippy::new_without_default, clippy::type_complexity, clippy::missing_transmute_annotations )] // Used by generated code. Not public API and thus not subject to SemVer. #[doc(hidden)] #[path = "private.rs"] pub mod __private; #[macro_use] mod util; mod alloc; mod benchmark; // NOTE: "bench" would be imported into the prelude. mod cli; mod compile_fail; mod config; mod divan; mod entry; mod stats; mod thread_pool; mod time; mod tree_painter; pub mod counter; /// `use divan::prelude::*;` to import common items. pub mod prelude { #[doc(no_inline)] pub use crate::{bench, bench_group, black_box, black_box_drop, AllocProfiler, Bencher, Divan}; } /// Prevents compiler optimizations on a value. /// /// `black_box` should only be used on [inputs](#benchmark-inputs) and /// [outputs](#benchmark-outputs) of benchmarks. Newcomers to benchmarking may /// be tempted to also use `black_box` within the implementation, but doing so /// will overly pessimize the measured code without any benefit. /// /// ## Benchmark Inputs /// /// When benchmarking, it's good practice to ensure measurements are accurate by /// preventing the compiler from optimizing based on assumptions about benchmark /// inputs. /// /// The compiler can optimize code for indices it knows about, such as by /// removing bounds checks or unrolling loops. If real-world use of your code /// would not know indices up front, consider preventing optimizations on them /// in benchmarks: /// /// ``` /// use divan::black_box; /// /// const INDEX: usize = // ... /// # 0; /// const SLICE: &[u8] = // ... /// # &[]; /// /// #[divan::bench] /// fn bench() { /// # fn work(_: T) {} /// work(&SLICE[black_box(INDEX)..]); /// } /// ``` /// /// The compiler may also optimize for the data itself, which can also be /// avoided with `black_box`: /// /// ``` /// # use divan::black_box; /// # const INDEX: usize = 0; /// # const SLICE: &[u8] = &[]; /// #[divan::bench] /// fn bench() { /// # fn work(_: T) {} /// work(black_box(&SLICE[black_box(INDEX)..])); /// } /// ``` /// /// ## Benchmark Outputs /// /// When benchmarking, it's best to ensure that all of the code is actually /// being run. If the compiler knows an output is unused, it may remove the code /// that generated the output. This optimization can make benchmarks appear much /// faster than they really are. /// /// At the end of a benchmark, we can force the compiler to treat outputs as if /// they were actually used: /// /// ``` /// # use divan::black_box; /// #[divan::bench] /// fn bench() { /// # let value = 1; /// black_box(value.to_string()); /// } /// ``` /// /// To make the code clearer to readers that the output is discarded, this code /// could instead call [`black_box_drop`]. /// /// Alternatively, the output can be returned from the benchmark: /// /// ``` /// #[divan::bench] /// fn bench() -> String { /// # let value = 1; /// value.to_string() /// } /// ``` /// /// Returning the output will `black_box` it and also avoid measuring the time /// to [drop](Drop) the output, which in this case is the time to deallocate a /// [`String`]. Read more about this in the [`#[divan::bench]` /// docs](macro@bench#drop). /// /// --- /// ///

Standard Library Documentation

/// #[doc(inline)] pub use std::hint::black_box; #[doc(inline)] pub use crate::{alloc::AllocProfiler, benchmark::Bencher, divan::Divan}; /// Runs all registered benchmarks. /// /// # Examples /// /// ``` /// #[divan::bench] /// fn add() -> i32 { /// // ... /// # 0 /// } /// /// fn main() { /// // Run `add` benchmark: /// divan::main(); /// } /// ``` /// /// See [`#[divan::bench]`](macro@bench) for more examples. pub fn main() { Divan::from_args().main(); } /// [`black_box`] + [`drop`] convenience function. /// /// # Examples /// /// This is useful when benchmarking a lazy [`Iterator`] to completion with /// [`for_each`](Iterator::for_each): /// /// ``` /// #[divan::bench] /// fn parse_iter() { /// let input: &str = // ... /// # ""; /// /// # struct Parser; /// # impl Parser { /// # fn new(_: &str) -> Parser { Parser } /// # fn for_each(self, _: fn(&'static str)) {} /// # } /// Parser::new(input) /// .for_each(divan::black_box_drop); /// } /// ``` #[inline] pub fn black_box_drop(dummy: T) { _ = black_box(dummy); } /// Registers a benchmarking function. /// /// # Examples /// /// The quickest way to get started is to benchmark the function as-is: /// /// ``` /// use divan::black_box; /// /// #[divan::bench] /// fn add() -> i32 { /// black_box(1) + black_box(42) /// } /// /// fn main() { /// // Run `add` benchmark: /// divan::main(); /// } /// ``` /// /// If benchmarks need to setup context before running, they can take a /// [`Bencher`] and use [`Bencher::bench`]: /// /// ``` /// use divan::{Bencher, black_box}; /// /// #[divan::bench] /// fn copy_from_slice(bencher: Bencher) { /// let src = (0..100).collect::>(); /// let mut dst = vec![0; src.len()]; /// /// bencher.bench_local(move || { /// black_box(&mut dst).copy_from_slice(black_box(&src)); /// }); /// } /// ``` /// /// Applying this attribute multiple times to the same item will cause a compile /// error: /// /// ```compile_fail /// #[divan::bench] /// #[divan::bench] /// fn bench() { /// // ... /// } /// ``` /// /// # Drop /// /// When a benchmarked function returns a value, it will not be [dropped][Drop] /// until after the current sample loop is finished. This allows for more /// precise timing measurements. /// /// Note that there is an inherent memory cost to defer drop, including /// allocations inside not-yet-dropped values. Also, if the benchmark /// [panics](macro@std::panic), the values will never be dropped. /// /// The following example benchmarks will only measure [`String`] construction /// time, but not deallocation time: /// /// ``` /// use divan::{Bencher, black_box}; /// /// #[divan::bench] /// fn freestanding() -> String { /// black_box("hello").to_uppercase() /// } /// /// #[divan::bench] /// fn contextual(bencher: Bencher) { /// // Setup: /// let s: String = // ... /// # String::new(); /// /// bencher.bench(|| -> String { /// black_box(&s).to_lowercase() /// }); /// } /// ``` /// /// If the returned value *does not* need to be dropped, there is no memory /// cost. Because of this, the following example benchmarks are equivalent: /// /// ``` /// #[divan::bench] /// fn with_return() -> i32 { /// let n: i32 = // ... /// # 0; /// n /// } /// /// #[divan::bench] /// fn without_return() { /// let n: i32 = // ... /// # 0; /// divan::black_box(n); /// } /// ``` /// /// # Options /// /// - [`name`] /// - [`crate`] /// - [`args`] /// - [`consts`] /// - [`types`] /// - [`sample_count`] /// - [`sample_size`] /// - [`threads`] /// - [`counters`] /// - [`bytes_count`] /// - [`chars_count`] /// - [`items_count`] /// - [`min_time`] /// - [`max_time`] /// - [`skip_ext_time`] /// - [`ignore`] /// /// ## `name` /// [`name`]: #name /// /// By default, the benchmark uses the function's name. It can be overridden via /// the [`name`] option: /// /// ``` /// #[divan::bench(name = "my_add")] /// fn add() -> i32 { /// // Will appear as "crate_name::my_add". /// # 0 /// } /// ``` /// /// ## `crate` /// [`crate`]: #crate /// /// The path to the specific `divan` crate instance used by this macro's /// generated code can be specified via the [`crate`] option. This is applicable /// when using `divan` via a macro from your own crate. /// /// ``` /// extern crate divan as sofa; /// /// #[::sofa::bench(crate = ::sofa)] /// fn add() -> i32 { /// // ... /// # 0 /// } /// ``` /// /// ## `args` /// [`args`]: #args /// /// Function arguments can be provided to benchmark the function over multiple /// cases. This is used for comparing across parameters like collection lengths /// and [`enum`](https://doc.rust-lang.org/std/keyword.enum.html) variants. If /// you are not comparing cases and just need to pass a value into the /// benchmark, instead consider passing local values into the [`Bencher::bench`] /// closure or use [`Bencher::with_inputs`] for many distinct values. /// /// The following example benchmarks converting a [`Range`](std::ops::Range) to /// [`Vec`] over different lengths: /// /// ``` /// #[divan::bench(args = [1000, LEN, len()])] /// fn init_vec(len: usize) -> Vec { /// (0..len).collect() /// } /// /// const LEN: usize = // ... /// # 0; /// /// fn len() -> usize { /// // ... /// # 0 /// } /// ``` /// /// The list of arguments can be shared across multiple benchmarks through an /// external [`Iterator`]: /// /// ``` /// const LENS: &[usize] = // ... /// # &[]; /// /// #[divan::bench(args = LENS)] /// fn bench_vec1(len: usize) -> Vec { /// // ... /// # vec![] /// } /// /// #[divan::bench(args = LENS)] /// fn bench_vec2(len: usize) -> Vec { /// // ... /// # vec![] /// } /// ``` /// /// Unlike the [`consts`] option, any argument type is supported if it /// implements [`Any`], [`Copy`], [`Send`], [`Sync`], and [`ToString`] (or /// [`Debug`](std::fmt::Debug)): /// /// ``` /// #[derive(Clone, Copy, Debug)] /// enum Arg { /// A, B /// } /// /// #[divan::bench(args = [Arg::A, Arg::B])] /// fn bench_args(arg: Arg) { /// // ... /// } /// ``` /// /// The argument type does not need to implement [`Copy`] if it is used through /// a reference: /// /// ``` /// #[derive(Debug)] /// enum Arg { /// A, B /// } /// /// #[divan::bench(args = [Arg::A, Arg::B])] /// fn bench_args(arg: &Arg) { /// // ... /// } /// ``` /// /// For convenience, common string types are coerced to [`&str`](primitive@str): /// /// ``` /// fn strings() -> impl Iterator { /// // ... /// # [].into_iter() /// } /// /// #[divan::bench(args = strings())] /// fn bench_strings(s: &str) { /// // ... /// } /// ``` /// /// Arguments can also be used with [`Bencher`]. This allows for generating /// inputs based on [`args`] values or providing throughput information via /// [`Counter`s](crate::counter::Counter): /// /// ``` /// # fn new_value(v: T) -> T { v } /// # fn do_work(_: T) {} /// use divan::Bencher; /// /// #[divan::bench(args = [1, 2, 3])] /// fn bench(bencher: Bencher, len: usize) { /// let value = new_value(len); /// /// bencher /// .counter(len) /// .bench(|| { /// do_work(value); /// }); /// } /// ``` /// /// ## `consts` /// [`consts`]: #consts /// /// Divan supports benchmarking functions with [`const` /// generics](https://doc.rust-lang.org/reference/items/generics.html#const-generics) /// via the [`consts`] option. /// /// The following example benchmarks initialization of [`[i32; N]`](prim@array) /// for values of `N` provided by a [literal](https://doc.rust-lang.org/reference/expressions/literal-expr.html), /// [`const` item](https://doc.rust-lang.org/reference/items/constant-items.html), /// and [`const fn`](https://doc.rust-lang.org/reference/const_eval.html#const-functions): /// /// ``` /// #[divan::bench(consts = [1000, LEN, len()])] /// fn init_array() -> [i32; N] { /// let mut result = [0; N]; /// /// for i in 0..N { /// result[i] = divan::black_box(i as i32); /// } /// /// result /// } /// /// const LEN: usize = // ... /// # 0; /// /// const fn len() -> usize { /// // ... /// # 0 /// } /// ``` /// /// The list of constants can be shared across multiple benchmarks through an /// external [array](prim@array) or [slice](prim@slice): /// /// ``` /// const SIZES: &[usize] = &[1, 2, 5, 10]; /// /// #[divan::bench(consts = SIZES)] /// fn bench_array1() -> [i32; N] { /// // ... /// # [0; N] /// } /// /// #[divan::bench(consts = SIZES)] /// fn bench_array2() -> [i32; N] { /// // ... /// # [0; N] /// } /// ``` /// /// External constants are limited to lengths 1 through 20, because of /// implementation details. This limit does not apply if the list is provided /// directly like in the first example. /// /// ```compile_fail /// const SIZES: [usize; 21] = [ /// // ... /// # 0; 21 /// ]; /// /// #[divan::bench(consts = SIZES)] /// fn bench_array() -> [i32; N] { /// // ... /// # [0; N] /// } /// ``` /// /// ## `types` /// [`types`]: #types /// /// Divan supports benchmarking generic functions over a list of types via the /// [`types`] option. /// /// The following example benchmarks the [`From<&str>`](From) implementations /// for [`&str`](prim@str) and [`String`]: /// /// ``` /// #[divan::bench(types = [&str, String])] /// fn from_str<'a, T>() -> T /// where /// T: From<&'a str>, /// { /// divan::black_box("hello world").into() /// } /// ``` /// /// The [`types`] and [`args`] options can be combined to benchmark _T_ × _A_ /// scenarios. The following example benchmarks the [`FromIterator`] /// implementations for [`Vec`], [`BTreeSet`], and [`HashSet`]: /// /// ``` /// use std::collections::{BTreeSet, HashSet}; /// /// #[divan::bench( /// types = [Vec, BTreeSet, HashSet], /// args = [0, 2, 4, 16, 256, 4096], /// )] /// fn from_range(n: i32) -> T /// where /// T: FromIterator, /// { /// (0..n).collect() /// } /// ``` /// /// [`BTreeSet`]: std::collections::BTreeSet /// [`HashSet`]: std::collections::HashSet /// /// ## `sample_count` /// [`sample_count`]: #sample_count /// /// The number of statistical sample recordings can be set to a predetermined /// [`u32`] value via the [`sample_count`] option. This may be overridden at /// runtime using either the `DIVAN_SAMPLE_COUNT` environment variable or /// `--sample-count` CLI argument. /// /// ``` /// #[divan::bench(sample_count = 1000)] /// fn add() -> i32 { /// // ... /// # 0 /// } /// ``` /// /// If the [`threads`] option is enabled, sample count becomes a multiple of the /// number of threads. This is because each thread operates over the same sample /// size to ensure there are always N competing threads doing the same amount of /// work. /// /// ## `sample_size` /// [`sample_size`]: #sample_size /// /// The number iterations within each statistics sample can be set to a /// predetermined [`u32`] value via the [`sample_size`] option. This may be /// overridden at runtime using either the `DIVAN_SAMPLE_SIZE` environment /// variable or `--sample-size` CLI argument. /// /// ``` /// #[divan::bench(sample_size = 1000)] /// fn add() -> i32 { /// // ... /// # 0 /// } /// ``` /// /// ## `threads` /// [`threads`]: #threads /// /// Benchmarked functions can be run across multiple threads via the [`threads`] /// option. This enables you to measure contention on [atomics and /// locks][std::sync]. The default thread count is the [available parallelism]. /// /// ``` /// use std::sync::Arc; /// /// #[divan::bench(threads)] /// fn arc_clone(bencher: divan::Bencher) { /// let arc = Arc::new(42); /// /// bencher.bench(|| arc.clone()); /// } /// ``` /// /// The [`threads`] option can be set to any of: /// - [`bool`] for [available parallelism] (true) or no parallelism. /// - [`usize`] for a specific number of threads. 0 means use [available /// parallelism] and 1 means no parallelism. /// - [`IntoIterator`] over [`usize`] for multiple thread counts, such as: /// - [`Range`](std::ops::Range) /// - [`[usize; N]`](prim@array) /// - [`&[usize]`](prim@slice) /// /// ``` /// #[divan::bench(threads = false)] /// fn single() { /// // ... /// } /// /// #[divan::bench(threads = 10)] /// fn specific() { /// // ... /// } /// /// #[divan::bench(threads = 0..=8)] /// fn range() { /// // Note: Includes 0 for available parallelism. /// } /// /// #[divan::bench(threads = [0, 1, 4, 8, 16])] /// fn selection() { /// // ... /// } /// ``` /// /// ## `counters` /// [`counters`]: #counters /// /// The [`Counter`s](crate::counter::Counter) of each iteration can be set via /// the [`counters`] option. The following example emits info for the number of /// bytes and number of ints processed when benchmarking [slice sorting](slice::sort): /// /// ``` /// use divan::{Bencher, counter::{BytesCount, ItemsCount}}; /// /// const INTS: &[i32] = &[ /// // ... /// ]; /// /// #[divan::bench(counters = [ /// BytesCount::of_slice(INTS), /// ItemsCount::new(INTS.len()), /// ])] /// fn sort(bencher: Bencher) { /// bencher /// .with_inputs(|| INTS.to_vec()) /// .bench_refs(|ints| ints.sort()); /// } /// ``` /// /// For convenience, singular `counter` allows a single /// [`Counter`](crate::counter::Counter) to be set. The following example emits /// info for the number of bytes processed when benchmarking /// [`char`-counting](std::str::Chars::count): /// /// ``` /// use divan::counter::BytesCount; /// /// const STR: &str = "..."; /// /// #[divan::bench(counter = BytesCount::of_str(STR))] /// fn char_count() -> usize { /// divan::black_box(STR).chars().count() /// } /// ``` /// /// See: /// - [`#[divan::bench_group(counters = ...)]`](macro@bench_group#counters) /// - [`Bencher::counter`] /// - [`Bencher::input_counter`] /// /// ### `bytes_count` /// [`bytes_count`]: #bytes_count /// /// Convenience shorthand for /// [counter](#counters) = [BytesCount](counter::BytesCount)::from(n). /// /// ### `chars_count` /// [`chars_count`]: #chars_count /// /// Convenience shorthand for /// [counter](#counters) = [CharsCount](counter::CharsCount)::from(n). /// /// ### `items_count` /// [`items_count`]: #items_count /// /// Convenience shorthand for /// [counter](#counters) = [ItemsCount](counter::ItemsCount)::from(n). /// /// ## `min_time` /// [`min_time`]: #min_time /// /// The minimum time spent benchmarking each function can be set to a /// predetermined [`Duration`] via the [`min_time`] option. This may be /// overridden at runtime using either the `DIVAN_MIN_TIME` environment variable /// or `--min-time` CLI argument. /// /// Unless [`skip_ext_time`] is set, this includes time external to the /// benchmarked function, such as time spent generating inputs and running /// [`Drop`]. /// /// ``` /// use std::time::Duration; /// /// #[divan::bench(min_time = Duration::from_secs(3))] /// fn add() -> i32 { /// // ... /// # 0 /// } /// ``` /// /// For convenience, [`min_time`] can also be set with seconds as [`u64`] or /// [`f64`]. Invalid values will cause a panic at runtime. /// /// ``` /// #[divan::bench(min_time = 2)] /// fn int_secs() -> i32 { /// // ... /// # 0 /// } /// /// #[divan::bench(min_time = 1.5)] /// fn float_secs() -> i32 { /// // ... /// # 0 /// } /// ``` /// /// ## `max_time` /// [`max_time`]: #max_time /// /// The maximum time spent benchmarking each function can be set to a /// predetermined [`Duration`] via the [`max_time`] option. This may be /// overridden at runtime using either the `DIVAN_MAX_TIME` environment variable /// or `--max-time` CLI argument. /// /// Unless [`skip_ext_time`] is set, this includes time external to the /// benchmarked function, such as time spent generating inputs and running /// [`Drop`]. /// /// If `min_time > max_time`, then [`max_time`] has priority and [`min_time`] /// will not be reached. /// /// ``` /// use std::time::Duration; /// /// #[divan::bench(max_time = Duration::from_secs(5))] /// fn add() -> i32 { /// // ... /// # 0 /// } /// ``` /// /// For convenience, like [`min_time`], [`max_time`] can also be set with /// seconds as [`u64`] or [`f64`]. Invalid values will cause a panic at runtime. /// /// ``` /// #[divan::bench(max_time = 8)] /// fn int_secs() -> i32 { /// // ... /// # 0 /// } /// /// #[divan::bench(max_time = 9.5)] /// fn float_secs() -> i32 { /// // ... /// # 0 /// } /// ``` /// /// ## `skip_ext_time` /// [`skip_ext_time`]: #skip_ext_time /// /// By default, [`min_time`] and [`max_time`] include time external to the /// benchmarked function, such as time spent generating inputs and running /// [`Drop`]. Enabling the [`skip_ext_time`] option will instead make those /// options only consider time spent within the benchmarked function. This may /// be overridden at runtime using either the `DIVAN_SKIP_EXT_TIME` environment /// variable or `--skip-ext-time` CLI argument. /// /// In the following example, [`max_time`] only considers time spent running /// `measured_function`: /// /// ``` /// # fn generate_input() {} /// # fn measured_function(_: ()) {} /// #[divan::bench(max_time = 5, skip_ext_time)] /// fn bench(bencher: divan::Bencher) { /// bencher /// .with_inputs(|| generate_input()) /// .bench_values(|input| measured_function(input)); /// } /// ``` /// /// This option can be set to an explicit [`bool`] value to override parent /// values: /// /// ``` /// #[divan::bench(max_time = 5, skip_ext_time = false)] /// fn bench(bencher: divan::Bencher) { /// // ... /// } /// ``` /// /// ## `ignore` /// [`ignore`]: #ignore /// /// Like [`#[test]`](https://doc.rust-lang.org/reference/attributes/testing.html#the-test-attribute), /// `#[divan::bench]` functions can use [`#[ignore]`](https://doc.rust-lang.org/reference/attributes/testing.html#the-ignore-attribute): /// /// ``` /// #[divan::bench] /// #[ignore] /// fn todo() { /// unimplemented!(); /// } /// # divan::main(); /// ``` /// /// This option can also instead be set within the `#[divan::bench]` attribute: /// /// ``` /// #[divan::bench(ignore)] /// fn todo() { /// unimplemented!(); /// } /// # divan::main(); /// ``` /// /// Like [`skip_ext_time`], this option can be set to an explicit [`bool`] value /// to override parent values: /// /// ``` /// #[divan::bench(ignore = false)] /// fn bench() { /// // ... /// } /// ``` /// /// This can be used to ignore benchmarks based on a runtime condition. The /// following example benchmark will be ignored if an [environment /// variable](std::env::var) is not set to "true": /// /// ``` /// #[divan::bench( /// ignore = std::env::var("BENCH_EXPENSIVE").as_deref() != Ok("true") /// )] /// fn expensive_bench() { /// // ... /// } /// ``` /// /// [`Any`]: std::any::Any /// [`Duration`]: std::time::Duration /// [available parallelism]: std::thread::available_parallelism pub use divan_macros::bench; /// Registers a benchmarking group. /// /// # Examples /// /// This is used for setting [options] shared across /// [`#[divan::bench]`](macro@bench) functions in the same module: /// /// ``` /// #[divan::bench_group( /// sample_count = 100, /// sample_size = 500, /// )] /// mod math { /// use divan::black_box; /// /// #[divan::bench] /// fn add() -> i32 { /// black_box(1) + black_box(42) /// } /// /// #[divan::bench] /// fn div() -> i32 { /// black_box(1) / black_box(42) /// } /// } /// /// fn main() { /// // Run `math::add` and `math::div` benchmarks: /// divan::main(); /// } /// ``` /// /// Benchmarking [options] set on parent groups cascade into child groups and /// their benchmarks: /// /// ``` /// #[divan::bench_group( /// sample_count = 100, /// sample_size = 500, /// )] /// mod parent { /// #[divan::bench_group(sample_size = 1)] /// mod child1 { /// #[divan::bench] /// fn bench() { /// // Will be sampled 100 times with 1 iteration per sample. /// } /// } /// /// #[divan::bench_group(sample_count = 42)] /// mod child2 { /// #[divan::bench] /// fn bench() { /// // Will be sampled 42 times with 500 iterations per sample. /// } /// } /// /// mod child3 { /// #[divan::bench(sample_count = 1)] /// fn bench() { /// // Will be sampled 1 time with 500 iterations per sample. /// } /// } /// } /// ``` /// /// Applying this attribute multiple times to the same item will cause a compile /// error: /// /// ```compile_fail /// #[divan::bench_group] /// #[divan::bench_group] /// mod math { /// // ... /// } /// ``` /// /// # Options /// [options]: #options /// /// - [`name`] /// - [`crate`] /// - [`sample_count`] /// - [`sample_size`] /// - [`threads`] /// - [`counters`] /// - [`bytes_count`] /// - [`chars_count`] /// - [`items_count`] /// - [`min_time`] /// - [`max_time`] /// - [`skip_ext_time`] /// - [`ignore`] /// /// ## `name` /// [`name`]: #name /// /// By default, the benchmark group uses the module's name. It can be overridden /// via the `name` option: /// /// ``` /// #[divan::bench_group(name = "my_math")] /// mod math { /// #[divan::bench(name = "my_add")] /// fn add() -> i32 { /// // Will appear as "crate_name::my_math::my_add". /// # 0 /// } /// } /// ``` /// /// ## `crate` /// [`crate`]: #crate /// /// The path to the specific `divan` crate instance used by this macro's /// generated code can be specified via the [`crate`] option. This is applicable /// when using `divan` via a macro from your own crate. /// /// ``` /// extern crate divan as sofa; /// /// #[::sofa::bench_group(crate = ::sofa)] /// mod math { /// #[::sofa::bench(crate = ::sofa)] /// fn add() -> i32 { /// // ... /// # 0 /// } /// } /// ``` /// /// ## `sample_count` /// [`sample_count`]: #sample_count /// /// The number of statistical sample recordings can be set to a predetermined /// [`u32`] value via the [`sample_count`] option. This may be overridden at /// runtime using either the `DIVAN_SAMPLE_COUNT` environment variable or /// `--sample-count` CLI argument. /// /// ``` /// #[divan::bench_group(sample_count = 1000)] /// mod math { /// #[divan::bench] /// fn add() -> i32 { /// // ... /// # 0 /// } /// } /// ``` /// /// If the [`threads`] option is enabled, sample count becomes a multiple of the /// number of threads. This is because each thread operates over the same sample /// size to ensure there are always N competing threads doing the same amount of /// work. /// /// ## `sample_size` /// [`sample_size`]: #sample_size /// /// The number iterations within each statistical sample can be set to a /// predetermined [`u32`] value via the [`sample_size`] option. This may be /// overridden at runtime using either the `DIVAN_SAMPLE_SIZE` environment /// variable or `--sample-size` CLI argument. /// /// ``` /// #[divan::bench_group(sample_size = 1000)] /// mod math { /// #[divan::bench] /// fn add() -> i32 { /// // ... /// # 0 /// } /// } /// ``` /// /// ## `threads` /// [`threads`]: #threads /// /// See [`#[divan::bench(threads = ...)]`](macro@bench#threads). /// /// ## `counters` /// [`counters`]: #counters /// /// The [`Counter`s](crate::counter::Counter) of each iteration of benchmarked /// functions in a group can be set via the [`counters`] option. The following /// example emits info for the number of bytes and number of ints processed when /// benchmarking [slice sorting](slice::sort): /// /// ``` /// use divan::{Bencher, counter::{BytesCount, ItemsCount}}; /// /// const INTS: &[i32] = &[ /// // ... /// ]; /// /// #[divan::bench_group(counters = [ /// BytesCount::of_slice(INTS), /// ItemsCount::new(INTS.len()), /// ])] /// mod sort { /// use super::*; /// /// #[divan::bench] /// fn default(bencher: Bencher) { /// bencher /// .with_inputs(|| INTS.to_vec()) /// .bench_refs(|ints| ints.sort()); /// } /// /// #[divan::bench] /// fn unstable(bencher: Bencher) { /// bencher /// .with_inputs(|| INTS.to_vec()) /// .bench_refs(|ints| ints.sort_unstable()); /// } /// } /// # fn main() {} /// ``` /// /// For convenience, singular `counter` allows a single /// [`Counter`](crate::counter::Counter) to be set. The following example emits /// info for the number of bytes processed when benchmarking /// [`char`-counting](std::str::Chars::count) and /// [`char`-collecting](std::str::Chars::collect): /// /// ``` /// use divan::counter::BytesCount; /// /// const STR: &str = "..."; /// /// #[divan::bench_group(counter = BytesCount::of_str(STR))] /// mod chars { /// use super::STR; /// /// #[divan::bench] /// fn count() -> usize { /// divan::black_box(STR).chars().count() /// } /// /// #[divan::bench] /// fn collect() -> String { /// divan::black_box(STR).chars().collect() /// } /// } /// # fn main() {} /// ``` /// /// See: /// - [`#[divan::bench(counters = ...)]`](macro@bench#counters) /// - [`Bencher::counter`] /// - [`Bencher::input_counter`] /// /// ### `bytes_count` /// [`bytes_count`]: #bytes_count /// /// Convenience shorthand for /// [counter](#counters) = [BytesCount](counter::BytesCount)::from(n). /// /// ### `chars_count` /// [`chars_count`]: #chars_count /// /// Convenience shorthand for /// [counter](#counters) = [CharsCount](counter::CharsCount)::from(n). /// /// ### `cycles_count` /// [`cycles_count`]: #cycles_count /// /// Convenience shorthand for /// [counter](#counters) = [CyclesCount](counter::CyclesCount)::from(n). /// /// ### `items_count` /// [`items_count`]: #items_count /// /// Convenience shorthand for /// [counter](#counters) = [ItemsCount](counter::ItemsCount)::from(n). /// /// ## `min_time` /// [`min_time`]: #min_time /// /// The minimum time spent benchmarking each function can be set to a /// predetermined [`Duration`] via the [`min_time`] option. This may be /// overridden at runtime using either the `DIVAN_MIN_TIME` environment variable /// or `--min-time` CLI argument. /// /// Unless [`skip_ext_time`] is set, this includes time external to benchmarked /// functions, such as time spent generating inputs and running [`Drop`]. /// /// ``` /// use std::time::Duration; /// /// #[divan::bench_group(min_time = Duration::from_secs(3))] /// mod math { /// #[divan::bench] /// fn add() -> i32 { /// // ... /// # 0 /// } /// } /// ``` /// /// For convenience, [`min_time`] can also be set with seconds as [`u64`] or /// [`f64`]. Invalid values will cause a panic at runtime. /// /// ``` /// #[divan::bench_group(min_time = 2)] /// mod int_secs { /// // ... /// } /// /// #[divan::bench_group(min_time = 1.5)] /// mod float_secs { /// // ... /// } /// ``` /// /// ## `max_time` /// [`max_time`]: #max_time /// /// The maximum time spent benchmarking each function can be set to a /// predetermined [`Duration`] via the [`max_time`] option. This may be /// overridden at runtime using either the `DIVAN_MAX_TIME` environment variable /// or `--max-time` CLI argument. /// /// Unless [`skip_ext_time`] is set, this includes time external to benchmarked /// functions, such as time spent generating inputs and running [`Drop`]. /// /// If `min_time > max_time`, then [`max_time`] has priority and [`min_time`] /// will not be reached. /// /// ``` /// use std::time::Duration; /// /// #[divan::bench_group(max_time = Duration::from_secs(5))] /// mod math { /// #[divan::bench] /// fn add() -> i32 { /// // ... /// # 0 /// } /// } /// ``` /// /// For convenience, like [`min_time`], [`max_time`] can also be set with /// seconds as [`u64`] or [`f64`]. Invalid values will cause a panic at runtime. /// /// ``` /// #[divan::bench_group(max_time = 8)] /// mod int_secs { /// // ... /// } /// /// #[divan::bench_group(max_time = 9.5)] /// mod float_secs { /// // ... /// } /// ``` /// /// ## `skip_ext_time` /// [`skip_ext_time`]: #skip_ext_time /// /// By default, [`min_time`] and [`max_time`] include time external to /// benchmarked functions, such as time spent generating inputs and running /// [`Drop`]. Enabling the [`skip_ext_time`] option will instead make those /// options only consider time spent within benchmarked functions. This may be /// overridden at runtime using either the `DIVAN_SKIP_EXT_TIME` environment /// variable or `--skip-ext-time` CLI argument. /// /// In the following example, [`max_time`] only considers time spent running /// `measured_function`: /// /// ``` /// #[divan::bench_group(skip_ext_time)] /// mod group { /// # fn generate_input() {} /// # fn measured_function(_: ()) {} /// #[divan::bench(max_time = 5)] /// fn bench(bencher: divan::Bencher) { /// bencher /// .with_inputs(|| generate_input()) /// .bench_values(|input| measured_function(input)); /// } /// } /// ``` /// /// This option can be set to an explicit [`bool`] value to override parent /// values: /// /// ``` /// #[divan::bench_group(skip_ext_time = false)] /// mod group { /// // ... /// } /// ``` /// /// ## `ignore` /// [`ignore`]: #ignore /// /// Like [`#[test]`](https://doc.rust-lang.org/reference/attributes/testing.html#the-test-attribute) /// and [`#[divan::bench]`](macro@bench), `#[divan::bench_group]` functions can /// use [`#[ignore]`](https://doc.rust-lang.org/reference/attributes/testing.html#the-ignore-attribute): /// /// ``` /// #[divan::bench_group] /// #[ignore] /// mod math { /// #[divan::bench] /// fn todo() { /// unimplemented!(); /// } /// } /// # divan::main(); /// ``` /// /// This option can also instead be set within the `#[divan::bench_group]` /// attribute: /// /// ``` /// #[divan::bench_group(ignore)] /// mod math { /// #[divan::bench] /// fn todo() { /// unimplemented!(); /// } /// } /// # divan::main(); /// ``` /// /// Like [`skip_ext_time`], this option can be set to an explicit [`bool`] value /// to override parent values: /// /// ``` /// #[divan::bench_group(ignore = false)] /// mod group { /// // ... /// } /// ``` /// /// This can be used to ignore benchmarks based on a runtime condition. The /// following example benchmark group will be ignored if an [environment /// variable](std::env::var) is not set to "true": /// /// ``` /// #[divan::bench_group( /// ignore = std::env::var("BENCH_EXPENSIVE").as_deref() != Ok("true") /// )] /// mod expensive_benches { /// // ... /// } /// ``` /// /// [`Duration`]: std::time::Duration pub use divan_macros::bench_group; divan-0.1.21/src/private.rs000064400000000000000000000130131046102023000136130ustar 00000000000000use std::{ borrow::{Borrow, Cow}, fmt::Debug, }; pub use crate::{ benchmark::{BenchArgs, BenchOptions}, entry::{ BenchEntry, BenchEntryRunner, EntryConst, EntryList, EntryLocation, EntryMeta, EntryType, GenericBenchEntry, GroupEntry, BENCH_ENTRIES, GROUP_ENTRIES, }, time::IntoDuration, }; /// Helper to convert values to strings via `ToString` or fallback to `Debug`. /// /// This works by having a `Debug`-based `ToString::to_string` method that will /// be chosen if the wrapped type implements `Debug` *but not* `ToString`. If /// the wrapped type implements `ToString`, then the inherent /// `ToStringHelper::to_string` method will be chosen instead. pub struct ToStringHelper<'a, T: 'static>(pub &'a T); #[allow(clippy::to_string_trait_impl)] impl ToString for ToStringHelper<'_, T> { #[inline] fn to_string(&self) -> String { format!("{:?}", self.0) } } impl ToStringHelper<'_, T> { #[allow(clippy::inherent_to_string)] #[inline] pub fn to_string(&self) -> String { self.0.to_string() } } /// Used by `#[divan::bench(args = ...)]` to enable polymorphism. pub trait Arg { fn get(this: Self) -> T; } impl Arg for T { #[inline] fn get(this: Self) -> T { this } } impl<'a, T: ?Sized> Arg<&'a T> for &'a Cow<'a, T> where T: ToOwned, { #[inline] fn get(this: Self) -> &'a T { this } } impl<'a> Arg<&'a str> for &'a String { #[inline] fn get(this: Self) -> &'a str { this } } impl Arg for &T { #[inline] fn get(this: Self) -> T { *this } } impl Arg for &&T { #[inline] fn get(this: Self) -> T { **this } } impl Arg for &&&T { #[inline] fn get(this: Self) -> T { ***this } } /// Used by `#[divan::bench(threads = ...)]` to leak thread counts for easy /// global usage in [`BenchOptions::threads`]. /// /// This enables the `threads` option to be polymorphic over: /// - `usize` /// - `bool` /// - `true` is 0 /// - `false` is 1 /// - Iterators: /// - `[usize; N]` /// - `&[usize; N]` /// - `&[usize]` /// /// # Orphan Rules Hack /// /// Normally we can't implement a trait over both `usize` and `I: IntoIterator` /// because the compiler has no guarantee that `usize` will never implement /// `IntoIterator`. Ideally we would handle this with specialization, but that's /// not stable. /// /// The solution here is to make `IntoThreads` generic to implement technically /// different traits for `usize` and `IntoIterator` because of different `IMP` /// values. We then call verbatim `IntoThreads::into_threads(val)` and have the /// compiler infer the generic parameter for the single `IntoThreads` /// implementation. /// /// It's fair to assume that scalar primitives will never implement /// `IntoIterator`, so this hack shouldn't break in the future 🤠. pub trait IntoThreads { fn into_threads(self) -> Cow<'static, [usize]>; } impl IntoThreads<0> for usize { #[inline] fn into_threads(self) -> Cow<'static, [usize]> { let counts = match self { 0 => &[0], 1 => &[1], 2 => &[2], _ => return Cow::Owned(vec![self]), }; Cow::Borrowed(counts) } } impl IntoThreads<0> for bool { #[inline] fn into_threads(self) -> Cow<'static, [usize]> { let counts = if self { // Available parallelism. &[0] } else { // No parallelism. &[1] }; Cow::Borrowed(counts) } } impl IntoThreads<1> for I where I: IntoIterator, I::Item: Borrow, { #[inline] fn into_threads(self) -> Cow<'static, [usize]> { let mut options: Vec = self.into_iter().map(|i| *i.borrow()).collect(); options.sort_unstable(); options.dedup(); Cow::Owned(options) } } /// Used by `#[divan::bench(counters = [...])]`. #[inline] pub fn new_counter_set() -> crate::counter::CounterSet { Default::default() } /// Used by `#[divan::bench]` to truncate arrays for generic `const` benchmarks. pub const fn shrink_array( array: [T; IN], ) -> Option<[T; OUT]> { use std::mem::ManuallyDrop; #[repr(C)] union Transmute { from: ManuallyDrop, into: ManuallyDrop, } let from = ManuallyDrop::new(array); if OUT <= IN { Some(unsafe { ManuallyDrop::into_inner(Transmute { from }.into) }) } else { None } } #[cfg(test)] mod tests { use super::*; #[test] fn into_threads() { macro_rules! test { ($value:expr, $expected:expr) => { assert_eq!(IntoThreads::into_threads($value).as_ref(), $expected); }; } test!(true, &[0]); test!(false, &[1]); test!(0, &[0]); test!(1, &[1]); test!(42, &[42]); test!([0; 0], &[]); test!([0], &[0]); test!([0, 0], &[0]); test!([0, 2, 3, 1], &[0, 1, 2, 3]); test!([0, 0, 2, 3, 2, 1, 3], &[0, 1, 2, 3]); } #[test] fn shrink_array() { let values = [1, 2, 3, 4, 5]; let equal: Option<[i32; 5]> = super::shrink_array(values); assert_eq!(equal, Some(values)); let smaller: Option<[i32; 3]> = super::shrink_array(values); assert_eq!(smaller, Some([1, 2, 3])); let larger: Option<[i32; 100]> = super::shrink_array(values); assert_eq!(larger, None); } } divan-0.1.21/src/stats/mod.rs000064400000000000000000000032411046102023000140600ustar 00000000000000//! Measurement statistics. use crate::{ alloc::{AllocOpMap, AllocTally}, counter::{KnownCounterKind, MaxCountUInt}, time::FineDuration, }; mod sample; pub(crate) use sample::*; /// Statistics from samples. pub(crate) struct Stats { /// Total number of samples taken. pub sample_count: u32, /// Total number of iterations (currently `sample_count * `sample_size`). pub iter_count: u64, /// Timing statistics. pub time: StatsSet, /// Maximum allocated bytes and maximum number of allocations associated /// with the corresponding samples for `time`. pub max_alloc: AllocTally>, /// Allocation statistics associated with the corresponding samples for /// `time`. pub alloc_tallies: AllocOpMap>>, /// `Counter` counts associated with the corresponding samples for `time`. pub counts: [Option>; KnownCounterKind::COUNT], } impl Stats { pub fn get_counts(&self, counter_kind: KnownCounterKind) -> Option<&StatsSet> { self.counts[counter_kind as usize].as_ref() } } #[derive(Debug)] pub(crate) struct StatsSet { /// Associated with minimum amount of time taken by an iteration. pub fastest: T, /// Associated with maximum amount of time taken by an iteration. pub slowest: T, /// Associated with midpoint time taken by an iteration. pub median: T, /// Associated with average time taken by all iterations. pub mean: T, } impl StatsSet { pub fn is_zero(&self) -> bool { self.fastest == 0.0 && self.slowest == 0.0 && self.median == 0.0 && self.mean == 0.0 } } divan-0.1.21/src/stats/sample.rs000064400000000000000000000043431046102023000145660ustar 00000000000000use std::collections::HashMap; use crate::{ alloc::ThreadAllocInfo, counter::KnownCounterKind, time::{FineDuration, Timer, Timestamp}, }; /// Timing measurement. pub(crate) struct TimeSample { /// The time this sample took to run. /// /// This is gotten from [`RawSample`] with: /// `end.duration_since(start, timer).clamp_to(timer.precision())`. pub duration: FineDuration, } /// Unprocessed measurement. /// /// This cannot be serialized because [`Timestamp`] is an implementation detail /// for both the `Instant` and TSC timers. pub(crate) struct RawSample { pub start: Timestamp, pub end: Timestamp, pub timer: Timer, pub alloc_info: ThreadAllocInfo, pub counter_totals: [u128; KnownCounterKind::COUNT], } impl RawSample { /// Simply computes `end - start` without clamping to precision. #[inline] pub fn duration(&self) -> FineDuration { self.end.duration_since(self.start, self.timer) } } /// Sample collection. #[derive(Default)] pub(crate) struct SampleCollection { /// The number of iterations within each sample. pub sample_size: u32, /// Collected timings. pub time_samples: Vec, /// Allocation information associated with `time_samples` by index. pub alloc_info_by_sample: HashMap, } impl SampleCollection { /// Discards all recorded data. #[inline] pub fn clear(&mut self) { self.time_samples.clear(); self.alloc_info_by_sample.clear(); } /// Computes the total number of iterations across all samples. /// /// We use `u64` in case sample count and sizes are huge. #[inline] pub fn iter_count(&self) -> u64 { self.sample_size as u64 * self.time_samples.len() as u64 } /// Computes the total time across all samples. #[inline] pub fn total_duration(&self) -> FineDuration { FineDuration { picos: self.time_samples.iter().map(|s| s.duration.picos).sum() } } /// Returns all samples sorted by duration. #[inline] pub fn sorted_samples(&self) -> Vec<&TimeSample> { let mut result: Vec<&TimeSample> = self.time_samples.iter().collect(); result.sort_unstable_by_key(|s| s.duration); result } } divan-0.1.21/src/thread_pool.rs000064400000000000000000000306071046102023000144510ustar 00000000000000use std::{ num::NonZeroUsize, panic::AssertUnwindSafe, ptr::NonNull, sync::{ atomic::{AtomicUsize, Ordering}, mpsc, Mutex, PoisonError, }, thread::Thread, }; use crate::util::{defer, sync::SyncWrap}; /// Single shared thread pool for running benchmarks on. pub(crate) static BENCH_POOL: ThreadPool = ThreadPool::new(); /// Reusable threads for broadcasting tasks. /// /// This thread pool runs only a single task at a time, since only one benchmark /// should run at a time. Invoking `broadcast` from two threads will cause one /// thread to wait for the other to finish. /// /// # How It Works /// /// Upon calling `broadcast`: /// /// 1. The main thread creates a `Task`, which is a pointer to a `TaskShared` /// pinned on the stack. `TaskShared` stores the function to run, along with /// other fields for coordinating threads. /// /// 2. New threads are spawned if the requested amount is not available. Each /// receives tasks over an associated channel. /// /// 3. The main thread sends the `Task` over the channels to the requested /// amount of threads. Upon receiving the task, each auxiliary thread will /// execute it and then decrement the task's reference count. /// /// 4. The main thread executes the `Task` like auxiliary threads. It then waits /// until the reference count is 0 before returning. pub(crate) struct ThreadPool { threads: Mutex>>, } impl ThreadPool { const fn new() -> Self { Self { threads: Mutex::new(Vec::new()) } } /// Performs the given task and pushes the results into a `vec`. #[inline] pub fn par_extend(&self, vec: &mut Vec>, aux_threads: usize, task: F) where F: Sync + Fn(usize) -> T, T: Sync + Send, { unsafe { let old_len = vec.len(); let additional = aux_threads + 1; vec.reserve_exact(additional); vec.spare_capacity_mut().iter_mut().for_each(|val| { val.write(None); }); vec.set_len(old_len + additional); let ptr = SyncWrap::new(vec.as_mut_ptr().add(old_len)); self.broadcast(aux_threads, move |index| { ptr.add(index).write(Some(task(index))); }); } } /// Performs the given task across the current thread and auxiliary worker /// threads. /// /// This function returns once all threads complete the task. #[inline] pub fn broadcast(&self, aux_threads: usize, task: F) where F: Sync + Fn(usize), { // SAFETY: The `TaskShared` instance is guaranteed to be accessible to // all threads until this function returns, because this thread waits // until `TaskShared.ref_count` is 0 before continuing. unsafe { let task = TaskShared::new(aux_threads, task); let task = Task { shared: NonNull::from(&task).cast() }; self.broadcast_task(aux_threads, task); } } /// Type-erased monomorphized implementation for `broadcast`. unsafe fn broadcast_task(&self, aux_threads: usize, task: Task) { // Send task to auxiliary threads. if aux_threads > 0 { let threads = &mut *self.threads.lock().unwrap_or_else(PoisonError::into_inner); // Spawn more threads if necessary. if let Some(additional) = NonZeroUsize::new(aux_threads.saturating_sub(threads.len())) { spawn(additional, threads); } for thread in &threads[..aux_threads] { thread.send(task).unwrap(); } } // Run the task on the main thread. let main_result = std::panic::catch_unwind(AssertUnwindSafe(|| task.run(0))); // Wait for other threads to finish writing their results. // // SAFETY: The acquire memory ordering ensures that all writes performed // by the task on other threads will become visible to this thread after // returning from `broadcast`. while task.shared.as_ref().ref_count.load(Ordering::Acquire) > 0 { std::thread::park(); } // Don't drop our result until other threads finish, in case the panic // error's drop handler itself also panics. drop(main_result); } pub fn drop_threads(&self) { *self.threads.lock().unwrap_or_else(PoisonError::into_inner) = Default::default(); } #[cfg(test)] fn aux_thread_count(&self) -> usize { self.threads.lock().unwrap_or_else(PoisonError::into_inner).len() } } /// Type-erased function and metadata. #[derive(Clone, Copy)] struct Task { shared: NonNull>, } unsafe impl Send for Task {} unsafe impl Sync for Task {} impl Task { /// Runs this task on behalf of `thread_id`. /// /// # Safety /// /// The caller must ensure: /// /// - This task has not outlived the `TaskShared` it came from, or else /// there will be a use-after-free. /// /// - `thread_id` is within the number of `broadcast` threads requested, so /// that it can be used to index input or output buffers. #[inline] unsafe fn run(&self, thread_id: usize) { let shared_ptr = self.shared.as_ptr(); let shared = &*shared_ptr; (shared.task_fn_ptr)(shared_ptr.cast(), thread_id); } } /// Data stored on the main thread that gets shared with auxiliary threads. /// /// # Memory Layout /// /// Since the benchmark may have thrashed the cache, this type's fields are /// ordered by usage order. This type is also placed on its own cache line. #[repr(C)] struct TaskShared { /// Once an auxiliary thread sets `ref_count` to 0, it should notify the /// main thread to wake up. main_thread: Thread, /// The number of auxiliary threads executing the task. /// /// Once this is 0, the main thread can read any results the task produced. ref_count: AtomicUsize, /// Performs `*result = Some(task_fn(thread))`. task_fn_ptr: unsafe fn(task: *const TaskShared<()>, thread: usize), /// Stores the closure state of the provided task. /// /// This must be stored as the last field so that all other fields are in /// the same place regardless of this field's type. task_fn: F, } impl TaskShared { #[inline] fn new(aux_threads: usize, task_fn: F) -> Self where F: Sync + Fn(usize), { unsafe fn call(task: *const TaskShared<()>, thread: usize) where F: Fn(usize), { let task_fn = &(*task.cast::>()).task_fn; task_fn(thread); } Self { main_thread: std::thread::current(), ref_count: AtomicUsize::new(aux_threads), task_fn_ptr: call::, task_fn, } } } /// Spawns N additional threads and appends their channels to the list. /// /// Threads are given names in the form of `divan-$INDEX`. #[cold] fn spawn(additional: NonZeroUsize, threads: &mut Vec>) { let next_thread_id = threads.len() + 1; threads.extend((next_thread_id..(next_thread_id + additional.get())).map(|thread_id| { // Create single-task channel. Unless another benchmark is running, the // current thread will be immediately unblocked after the auxiliary // thread accepts the task. // // This uses a rendezvous channel (capacity 0) instead of other standard // library channels because it reduces memory usage by many kilobytes. let (sender, receiver) = mpsc::sync_channel::(0); let work = move || { // Abort the process if the caught panic error itself panics when // dropped. let panic_guard = defer(|| std::process::abort()); while let Ok(task) = receiver.recv() { // Run the task on this auxiliary thread. // // SAFETY: The task is valid until `ref_count == 0`. let result = std::panic::catch_unwind(AssertUnwindSafe(|| unsafe { task.run(thread_id) })); // Decrement the `ref_count` count to notify the main thread // that we finished our work. // // SAFETY: This release operation makes writes within the task // become visible to the main thread. unsafe { // Clone the main thread's handle for unparking because the // `TaskShared` will be invalidated when `ref_count` is 0. let main_thread = task.shared.as_ref().main_thread.clone(); if task.shared.as_ref().ref_count.fetch_sub(1, Ordering::Release) == 1 { main_thread.unpark(); } } // Don't drop our result until after notifying the main thread, // in case the panic error's drop handler itself also panics. drop(result); } std::mem::forget(panic_guard); }; std::thread::Builder::new() .name(format!("divan-{thread_id}")) .spawn(work) .expect("failed to spawn thread"); sender })); } #[cfg(test)] mod tests { use super::*; /// Make every thread write its ID to a buffer and then check that the /// buffer contains all IDs. #[test] fn extend() { static TEST_POOL: ThreadPool = ThreadPool::new(); fn test(aux_threads: usize, final_aux_threads: usize) { let total_threads = aux_threads + 1; let mut results = Vec::new(); let expected = (0..total_threads).map(Some).collect::>(); TEST_POOL.par_extend(&mut results, aux_threads, |index| index); assert_eq!(results, expected); assert_eq!(TEST_POOL.aux_thread_count(), final_aux_threads); } test(0, 0); test(1, 1); test(2, 2); test(3, 3); test(4, 4); test(8, 8); // Decreasing auxiliary threads on later calls should still leave // previously spawned threads running. test(4, 8); test(0, 8); // Silence Miri about leaking threads. TEST_POOL.drop_threads(); } /// Execute a task that takes longer on all other threads than the main /// thread. #[test] fn broadcast_sleep() { use std::time::Duration; static TEST_POOL: ThreadPool = ThreadPool::new(); TEST_POOL.broadcast(10, |thread_id| { if thread_id > 0 { std::thread::sleep(Duration::from_millis(10)); } }); // Silence Miri about leaking threads. TEST_POOL.drop_threads(); } /// Checks that thread ID 0 refers to the main thread. #[test] fn broadcast_thread_id() { static TEST_POOL: ThreadPool = ThreadPool::new(); let main_thread = std::thread::current().id(); TEST_POOL.broadcast(10, |thread_id| { let is_main = main_thread == std::thread::current().id(); assert_eq!(is_main, thread_id == 0); }); // Silence Miri about leaking threads. TEST_POOL.drop_threads(); } } #[cfg(feature = "internal_benches")] mod benches { use super::*; fn aux_thread_counts() -> impl Iterator { let mut available_parallelism = std::thread::available_parallelism().ok().map(|n| n.get()); let range = 0..=16; if let Some(n) = available_parallelism { if range.contains(&n) { available_parallelism = None; } } range.chain(available_parallelism) } /// Benchmarks repeatedly using `ThreadPool` for the same number of threads /// on every run. #[crate::bench(crate = crate, args = aux_thread_counts())] fn broadcast(bencher: crate::Bencher, aux_threads: usize) { let pool = ThreadPool::new(); let benched = move || pool.broadcast(aux_threads, crate::black_box_drop); // Warmup to spawn threads. benched(); bencher.bench(benched); } /// Benchmarks using `ThreadPool` once. #[crate::bench(crate = crate, args = aux_thread_counts(), sample_size = 1)] fn broadcast_once(bencher: crate::Bencher, aux_threads: usize) { bencher .with_inputs(ThreadPool::new) .bench_refs(|pool| pool.broadcast(aux_threads, crate::black_box_drop)); } } divan-0.1.21/src/time/fence.rs000064400000000000000000000022221046102023000141570ustar 00000000000000use std::sync::atomic; /// Prevents other operations from affecting timing measurements. #[inline(always)] pub fn full_fence() { asm_fence(); atomic::fence(atomic::Ordering::SeqCst); } /// Prevents the compiler from reordering operations. #[inline(always)] pub fn compiler_fence() { asm_fence(); atomic::compiler_fence(atomic::Ordering::SeqCst); } /// Stronger compiler fence on [platforms with stable `asm!`](https://doc.rust-lang.org/nightly/reference/inline-assembly.html). /// /// This prevents LLVM from removing loops or hoisting logic out of the /// benchmark loop. #[inline(always)] fn asm_fence() { // Miri does not support inline assembly. if cfg!(miri) { return; } #[cfg(any( target_arch = "x86", target_arch = "x86_64", target_arch = "arm", target_arch = "aarch64", target_arch = "riscv32", target_arch = "riscv64", target_arch = "loongarch64", ))] // SAFETY: The inline assembly is a no-op. unsafe { // Preserve flags because we don't want to pessimize user logic. std::arch::asm!("", options(nostack, preserves_flags)); } } divan-0.1.21/src/time/fine_duration.rs000064400000000000000000000306111046102023000157300ustar 00000000000000use std::{fmt, ops, time::Duration}; use crate::util; /// [Picosecond](https://en.wikipedia.org/wiki/Picosecond)-precise [`Duration`]. #[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord)] #[repr(transparent)] pub(crate) struct FineDuration { pub picos: u128, } impl From for FineDuration { #[inline] fn from(duration: Duration) -> Self { Self { picos: duration .as_nanos() .checked_mul(1_000) .unwrap_or_else(|| panic!("{duration:?} is too large to fit in `FineDuration`")), } } } impl fmt::Display for FineDuration { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let sig_figs = f.precision().unwrap_or(4); let picos = self.picos; let mut scale = TimeScale::from_picos(picos); // Prefer formatting picoseconds as nanoseconds if we can. This makes // picoseconds easier to read because they are almost always alongside // nanosecond-scale values. if scale == TimeScale::PicoSec && sig_figs > 3 { scale = TimeScale::NanoSec; } let multiple: u128 = { let sig_figs = u32::try_from(sig_figs).unwrap_or(u32::MAX); 10_u128.saturating_pow(sig_figs) }; // TODO: Format without heap allocation. let mut str: String = match picos::DAY.checked_mul(multiple) { Some(int_day) if picos >= int_day => { // Format using integer representation to not lose precision. (picos / picos::DAY).to_string() } _ => { // Format using floating point representation. // Multiply to allow `sig_figs` digits of fractional precision. let val = (((picos * multiple) / scale.picos()) as f64) / multiple as f64; util::fmt::format_f64(val, sig_figs) } }; str.push(' '); str.push_str(scale.suffix()); // Fill up to specified width. if let Some(fill_len) = f.width().and_then(|width| width.checked_sub(str.len())) { match f.align() { None | Some(fmt::Alignment::Left) => { str.extend(std::iter::repeat(f.fill()).take(fill_len)); } _ => return Err(fmt::Error), } } f.write_str(&str) } } impl fmt::Debug for FineDuration { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fmt::Display::fmt(self, f) } } impl ops::Add for FineDuration { type Output = Self; #[inline] fn add(self, other: Self) -> Self { Self { picos: self.picos + other.picos } } } impl ops::AddAssign for FineDuration { #[inline] fn add_assign(&mut self, other: Self) { self.picos += other.picos } } impl> ops::Div for FineDuration { type Output = Self; #[inline] fn div(self, count: I) -> Self { Self { picos: self.picos / count.into() } } } impl FineDuration { pub const ZERO: Self = Self { picos: 0 }; pub const MAX: Self = Self { picos: u128::MAX }; #[inline] pub fn is_zero(&self) -> bool { self.picos == 0 } /// Round up to `other` if `self` is zero. #[inline] pub fn clamp_to(self, other: Self) -> Self { if self.is_zero() { other } else { self } } /// Returns the smaller non-zero value. #[inline] pub fn clamp_to_min(self, other: Self) -> Self { if self.is_zero() { other } else if other.is_zero() { self } else { self.min(other) } } } mod picos { pub const NANOS: u128 = 1_000; pub const MICROS: u128 = 1_000 * NANOS; pub const MILLIS: u128 = 1_000 * MICROS; pub const SEC: u128 = 1_000 * MILLIS; pub const MIN: u128 = 60 * SEC; pub const HOUR: u128 = 60 * MIN; pub const DAY: u128 = 24 * HOUR; } #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] enum TimeScale { PicoSec, NanoSec, MicroSec, MilliSec, Sec, Min, Hour, Day, } impl TimeScale { #[cfg(test)] const ALL: &'static [Self] = &[ Self::PicoSec, Self::NanoSec, Self::MicroSec, Self::MilliSec, Self::Sec, Self::Min, Self::Hour, Self::Day, ]; /// Determines the scale of time for representing a number of picoseconds. fn from_picos(picos: u128) -> Self { use picos::*; if picos < NANOS { Self::PicoSec } else if picos < MICROS { Self::NanoSec } else if picos < MILLIS { Self::MicroSec } else if picos < SEC { Self::MilliSec } else if picos < MIN { Self::Sec } else if picos < HOUR { Self::Min } else if picos < DAY { Self::Hour } else { Self::Day } } /// Returns the number of picoseconds needed to reach this scale. fn picos(self) -> u128 { use picos::*; match self { Self::PicoSec => 1, Self::NanoSec => NANOS, Self::MicroSec => MICROS, Self::MilliSec => MILLIS, Self::Sec => SEC, Self::Min => MIN, Self::Hour => HOUR, Self::Day => DAY, } } /// Returns the unit suffix. fn suffix(self) -> &'static str { match self { Self::PicoSec => "ps", Self::NanoSec => "ns", Self::MicroSec => "µs", Self::MilliSec => "ms", Self::Sec => "s", Self::Min => "m", Self::Hour => "h", Self::Day => "d", } } } #[cfg(test)] mod tests { use super::*; #[test] fn clamp_to() { #[track_caller] fn test(a: u128, b: u128, expected: u128) { assert_eq!( FineDuration { picos: a }.clamp_to(FineDuration { picos: b }), FineDuration { picos: expected } ); } test(0, 0, 0); test(0, 1, 1); test(0, 2, 2); test(0, 3, 3); test(1, 0, 1); test(1, 1, 1); test(1, 2, 1); test(1, 3, 1); test(2, 0, 2); test(2, 1, 2); test(2, 2, 2); test(2, 3, 2); test(3, 0, 3); test(3, 1, 3); test(3, 2, 3); test(3, 3, 3); } #[test] fn clamp_to_min() { #[track_caller] fn test(a: u128, b: u128, expected: u128) { assert_eq!( FineDuration { picos: a }.clamp_to_min(FineDuration { picos: b }), FineDuration { picos: expected } ); } test(0, 0, 0); test(0, 1, 1); test(0, 2, 2); test(0, 3, 3); test(1, 0, 1); test(1, 1, 1); test(1, 2, 1); test(1, 3, 1); test(2, 0, 2); test(2, 1, 1); test(2, 2, 2); test(2, 3, 2); test(3, 0, 3); test(3, 1, 1); test(3, 2, 2); test(3, 3, 3); } #[allow(clippy::zero_prefixed_literal)] mod fmt { use super::*; #[track_caller] fn test(picos: u128, expected: &str) { let duration = FineDuration { picos }; assert_eq!(duration.to_string(), expected); assert_eq!(format!("{duration:.4}"), expected); assert_eq!(format!("{duration:<0}"), expected); } macro_rules! assert_fmt_eq { ($input:literal, $expected:literal) => { assert_eq!(format!($input), format!($expected)); }; } #[test] fn precision() { for &scale in TimeScale::ALL { let base_duration = FineDuration { picos: scale.picos() }; let incr_duration = FineDuration { picos: scale.picos() + 1 }; if scale == TimeScale::PicoSec { assert_eq!(format!("{base_duration:.0}"), "1 ps"); assert_eq!(format!("{incr_duration:.0}"), "2 ps"); } else { let base_string = base_duration.to_string(); assert_eq!(format!("{base_duration:.0}"), base_string); assert_eq!(format!("{incr_duration:.0}"), base_string); } } } #[test] fn fill() { for &scale in TimeScale::ALL { // Picoseconds are formatted as nanoseconds by default. if scale == TimeScale::PicoSec { continue; } let duration = FineDuration { picos: scale.picos() }; let suffix = scale.suffix(); let pad = " ".repeat(8 - suffix.len()); assert_fmt_eq!("{duration:<2}", "1 {suffix}"); assert_fmt_eq!("{duration:<10}", "1 {suffix}{pad}"); } } #[test] fn pico_sec() { test(000, "0 ns"); test(001, "0.001 ns"); test(010, "0.01 ns"); test(100, "0.1 ns"); test(102, "0.102 ns"); test(120, "0.12 ns"); test(123, "0.123 ns"); test(012, "0.012 ns"); } #[test] fn nano_sec() { test(001_000, "1 ns"); test(010_000, "10 ns"); test(100_000, "100 ns"); test(100_002, "100 ns"); test(100_020, "100 ns"); test(100_200, "100.2 ns"); test(102_000, "102 ns"); test(120_000, "120 ns"); test(001_002, "1.002 ns"); test(001_023, "1.023 ns"); test(001_234, "1.234 ns"); test(001_230, "1.23 ns"); test(001_200, "1.2 ns"); } #[test] fn micro_sec() { test(001_000_000, "1 µs"); test(010_000_000, "10 µs"); test(100_000_000, "100 µs"); test(100_000_002, "100 µs"); test(100_000_020, "100 µs"); test(100_000_200, "100 µs"); test(100_002_000, "100 µs"); test(100_020_000, "100 µs"); test(100_200_000, "100.2 µs"); test(102_000_000, "102 µs"); test(120_000_000, "120 µs"); test(012_000_000, "12 µs"); test(001_200_000, "1.2 µs"); test(001_020_000, "1.02 µs"); test(001_002_000, "1.002 µs"); test(001_000_200, "1 µs"); test(001_000_020, "1 µs"); test(001_000_002, "1 µs"); test(001_230_000, "1.23 µs"); test(001_234_000, "1.234 µs"); test(001_234_500, "1.234 µs"); test(001_234_560, "1.234 µs"); test(001_234_567, "1.234 µs"); } #[test] fn milli_sec() { test(001_000_000_000, "1 ms"); test(010_000_000_000, "10 ms"); test(100_000_000_000, "100 ms"); } #[test] fn sec() { test(picos::SEC, "1 s"); test(picos::SEC * 10, "10 s"); test(picos::SEC * 59, "59 s"); test(picos::MILLIS * 59_999, "59.99 s"); } #[test] fn min() { test(picos::MIN, "1 m"); test(picos::MIN * 10, "10 m"); test(picos::MIN * 59, "59 m"); test(picos::MILLIS * 3_599_000, "59.98 m"); test(picos::MILLIS * 3_599_999, "59.99 m"); test(picos::HOUR - 1, "59.99 m"); } #[test] fn hour() { test(picos::HOUR, "1 h"); test(picos::HOUR * 10, "10 h"); test(picos::HOUR * 23, "23 h"); test(picos::MILLIS * 86_300_000, "23.97 h"); test(picos::MILLIS * 86_399_999, "23.99 h"); test(picos::DAY - 1, "23.99 h"); } #[test] fn day() { test(picos::DAY, "1 d"); test(picos::DAY + picos::DAY / 10, "1.1 d"); test(picos::DAY + picos::DAY / 100, "1.01 d"); test(picos::DAY + picos::DAY / 1000, "1.001 d"); test(picos::DAY * 000010, "10 d"); test(picos::DAY * 000100, "100 d"); test(picos::DAY * 001000, "1000 d"); test(picos::DAY * 010000, "10000 d"); test(picos::DAY * 100000, "100000 d"); test(u128::MAX / 1000, "3938453320844195178 d"); test(u128::MAX, "3938453320844195178974 d"); } } } divan-0.1.21/src/time/mod.rs000064400000000000000000000013051046102023000136570ustar 00000000000000use std::time::Duration; pub mod fence; mod fine_duration; mod timer; mod timestamp; pub(crate) use fine_duration::*; pub(crate) use timer::*; pub(crate) use timestamp::*; /// Private-public trait for being polymorphic over `Duration`. pub trait IntoDuration { /// Converts into a `Duration`. fn into_duration(self) -> Duration; } impl IntoDuration for Duration { #[inline] fn into_duration(self) -> Duration { self } } impl IntoDuration for u64 { #[inline] fn into_duration(self) -> Duration { Duration::from_secs(self) } } impl IntoDuration for f64 { #[inline] fn into_duration(self) -> Duration { Duration::from_secs_f64(self) } } divan-0.1.21/src/time/timer.rs000064400000000000000000000274651046102023000142370ustar 00000000000000use std::{cmp::Ordering, num::NonZeroU64, sync::OnceLock}; use crate::{ alloc::{AllocOp, ThreadAllocInfo}, black_box, time::{FineDuration, TscTimestamp, TscUnavailable, UntaggedTimestamp}, }; /// Measures time. #[derive(Clone, Copy, Default)] pub(crate) enum Timer { /// Operating system timer. #[default] Os, /// CPU timestamp counter. Tsc { /// [`TscTimestamp::frequency`]. frequency: NonZeroU64, }, } impl Timer { const COUNT: usize = 2; /// Returns all available timers. #[cfg(test)] pub fn available() -> Vec { let mut timers = vec![Self::Os]; if let Ok(tsc) = Self::get_tsc() { timers.push(tsc); } timers } /// Attempts to get the CPU timestamp counter. #[inline] pub fn get_tsc() -> Result { Ok(Self::Tsc { frequency: TscTimestamp::frequency()? }) } #[inline] pub fn kind(self) -> TimerKind { match self { Self::Os => TimerKind::Os, Self::Tsc { .. } => TimerKind::Tsc, } } /// Returns the smallest non-zero duration that this timer can measure. /// /// The result is cached. pub fn precision(self) -> FineDuration { static CACHED: [OnceLock; Timer::COUNT] = [OnceLock::new(), OnceLock::new()]; let cached = &CACHED[self.kind() as usize]; *cached.get_or_init(|| self.measure_precision()) } fn measure_precision(self) -> FineDuration { let timer_kind = self.kind(); // Start with the worst possible minimum. let mut min_sample = FineDuration::MAX; let mut seen_count = 0; // If timing in immediate succession fails to produce a non-zero sample, // an artificial delay is added by looping. `usize` is intentionally // used to make looping cheap. let mut delay_len: usize = 0; loop { for _ in 0..100 { // Use `UntaggedTimestamp` to minimize overhead. let sample_start: UntaggedTimestamp; let sample_end: UntaggedTimestamp; if delay_len == 0 { // Immediate succession. sample_start = UntaggedTimestamp::start(timer_kind); sample_end = UntaggedTimestamp::end(timer_kind); } else { // Add delay. sample_start = UntaggedTimestamp::start(timer_kind); for n in 0..delay_len { crate::black_box(n); } sample_end = UntaggedTimestamp::end(timer_kind); } // SAFETY: These values are guaranteed to be the correct variant // because they were created from the same `timer_kind`. let [sample_start, sample_end] = unsafe { [sample_start.into_timestamp(timer_kind), sample_end.into_timestamp(timer_kind)] }; let sample = sample_end.duration_since(sample_start, self); // Discard sample if irrelevant. if sample.is_zero() { continue; } match sample.cmp(&min_sample) { Ordering::Greater => { // If we already delayed a lot, and not hit the seen // count threshold, then use current minimum. if delay_len > 100 { return min_sample; } } Ordering::Equal => { seen_count += 1; // If we've seen this min 100 times, we have high // confidence this is the smallest duration. if seen_count >= 100 { return min_sample; } } Ordering::Less => { min_sample = sample; seen_count = 0; } } } delay_len = delay_len.saturating_add(1); } } /// Returns the overheads added by the benchmarker. /// /// `min_time` and `max_time` do not consider this as benchmarking time. pub fn bench_overheads(self) -> &'static TimedOverhead { // Miri is slow, so don't waste time on this. if cfg!(miri) { return &TimedOverhead::ZERO; } static CACHED: [OnceLock; Timer::COUNT] = [OnceLock::new(), OnceLock::new()]; let cached = &CACHED[self.kind() as usize]; cached.get_or_init(|| TimedOverhead { sample_loop: self.sample_loop_overhead(), tally_alloc: self.measure_tally_alloc_overhead(), tally_dealloc: self.measure_tally_dealloc_overhead(), tally_realloc: self.measure_tally_realloc_overhead(), }) } /// Returns the per-iteration overhead of the benchmarking sample loop. fn sample_loop_overhead(self) -> FineDuration { // Miri is slow, so don't waste time on this. if cfg!(miri) { return FineDuration::default(); } static CACHED: [OnceLock; Timer::COUNT] = [OnceLock::new(), OnceLock::new()]; let cached = &CACHED[self.kind() as usize]; *cached.get_or_init(|| self.measure_sample_loop_overhead()) } /// Calculates the per-iteration overhead of the benchmarking sample loop. fn measure_sample_loop_overhead(self) -> FineDuration { let timer_kind = self.kind(); let sample_count: usize = 100; let sample_size: usize = 10_000; // The minimum non-zero sample. let mut min_sample = FineDuration::default(); for _ in 0..sample_count { let start = UntaggedTimestamp::start(timer_kind); for i in 0..sample_size { _ = crate::black_box(i); } let end = UntaggedTimestamp::end(timer_kind); // SAFETY: These values are guaranteed to be the correct variant because // they were created from the same `timer_kind`. let [start, end] = unsafe { [start.into_timestamp(timer_kind), end.into_timestamp(timer_kind)] }; let mut sample = end.duration_since(start, self); sample.picos /= sample_size as u128; min_sample = min_sample.clamp_to_min(sample); } min_sample } fn measure_tally_alloc_overhead(self) -> FineDuration { let size = black_box(0); self.measure_alloc_info_overhead(|alloc_info| alloc_info.tally_alloc(size)) } fn measure_tally_dealloc_overhead(self) -> FineDuration { let size = black_box(0); self.measure_alloc_info_overhead(|alloc_info| alloc_info.tally_dealloc(size)) } fn measure_tally_realloc_overhead(self) -> FineDuration { let new_size = black_box(0); let old_size = black_box(0); self.measure_alloc_info_overhead(|alloc_info| alloc_info.tally_realloc(old_size, new_size)) } // SAFETY: This function is not reentrant. Calling it within `operation` // would cause aliasing of `ThreadAllocInfo::current`. fn measure_alloc_info_overhead(self, operation: impl Fn(&mut ThreadAllocInfo)) -> FineDuration { // Initialize the current thread's alloc info. let alloc_info = ThreadAllocInfo::current(); let sample_count = 100; let sample_size = 50_000; let result = self.measure_min_time(sample_count, sample_size, || { if let Some(mut alloc_info) = ThreadAllocInfo::try_current() { // SAFETY: We have exclusive access. operation(unsafe { alloc_info.as_mut() }); } }); // Clear alloc info. if let Some(mut alloc_info) = alloc_info { // SAFETY: We have exclusive access. let alloc_info = unsafe { alloc_info.as_mut() }; alloc_info.clear(); } result } /// Calculates the smallest non-zero time to perform an operation. fn measure_min_time( self, sample_count: usize, sample_size: usize, operation: impl Fn(), ) -> FineDuration { let timer_kind = self.kind(); let loop_overhead = self.sample_loop_overhead(); let mut min_sample = FineDuration::default(); for _ in 0..sample_count { let start = UntaggedTimestamp::start(timer_kind); for _ in 0..sample_size { operation(); } let end = UntaggedTimestamp::end(timer_kind); // SAFETY: These values are guaranteed to be the correct variant // because they were created from the same `timer_kind`. let [start, end] = unsafe { [start.into_timestamp(timer_kind), end.into_timestamp(timer_kind)] }; let mut sample = end.duration_since(start, self); sample.picos /= sample_size as u128; // Remove benchmarking loop overhead. sample.picos = sample.picos.saturating_sub(loop_overhead.picos); min_sample = min_sample.clamp_to_min(sample); } min_sample } } /// [`Timer`] kind. #[derive(Clone, Copy, Default)] pub(crate) enum TimerKind { /// Operating system timer. #[default] Os, /// CPU timestamp counter. Tsc, } /// The measured overhead of various benchmarking operations. pub(crate) struct TimedOverhead { pub sample_loop: FineDuration, pub tally_alloc: FineDuration, pub tally_dealloc: FineDuration, pub tally_realloc: FineDuration, } impl TimedOverhead { pub const ZERO: Self = Self { sample_loop: FineDuration::ZERO, tally_alloc: FineDuration::ZERO, tally_dealloc: FineDuration::ZERO, tally_realloc: FineDuration::ZERO, }; pub fn total_overhead(&self, sample_size: u32, alloc_info: &ThreadAllocInfo) -> FineDuration { let sample_loop_overhead = self.sample_loop.picos.saturating_mul(sample_size as u128); let tally_alloc_overhead = self .tally_alloc .picos .saturating_mul(alloc_info.tallies.get(AllocOp::Alloc).count as u128); let tally_dealloc_overhead = self .tally_dealloc .picos .saturating_mul(alloc_info.tallies.get(AllocOp::Dealloc).count as u128); let tally_realloc_overhead = self.tally_realloc.picos.saturating_mul( alloc_info.tallies.get(AllocOp::Grow).count as u128 + alloc_info.tallies.get(AllocOp::Shrink).count as u128, ); FineDuration { picos: sample_loop_overhead .saturating_add(tally_alloc_overhead) .saturating_add(tally_dealloc_overhead) .saturating_add(tally_realloc_overhead), } } } #[cfg(feature = "internal_benches")] mod benches { use super::*; #[crate::bench(crate = crate)] fn get_tsc() -> Result { Timer::get_tsc() } mod measure { use super::*; #[crate::bench(crate = crate)] fn precision() -> FineDuration { Timer::Os.measure_precision() } #[crate::bench(crate = crate)] fn sample_loop_overhead() -> FineDuration { Timer::Os.measure_sample_loop_overhead() } #[crate::bench(crate = crate)] fn tally_alloc_overhead() -> FineDuration { Timer::Os.measure_tally_alloc_overhead() } #[crate::bench(crate = crate)] fn tally_dealloc_overhead() -> FineDuration { Timer::Os.measure_tally_dealloc_overhead() } #[crate::bench(crate = crate)] fn tally_realloc_overhead() -> FineDuration { Timer::Os.measure_tally_realloc_overhead() } } } divan-0.1.21/src/time/timestamp/mod.rs000064400000000000000000000047571046102023000157000ustar 00000000000000use std::time::Instant; use crate::time::{fence, FineDuration, Timer, TimerKind}; mod tsc; pub(crate) use tsc::*; /// A measurement timestamp. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub(crate) enum Timestamp { /// Time provided by the operating system. Os(Instant), /// [CPU timestamp counter](https://en.wikipedia.org/wiki/Time_Stamp_Counter). Tsc(TscTimestamp), } impl Timestamp { #[inline(always)] pub fn start(timer_kind: TimerKind) -> Self { fence::full_fence(); let value = match timer_kind { TimerKind::Os => Self::Os(Instant::now()), TimerKind::Tsc => Self::Tsc(TscTimestamp::start()), }; fence::compiler_fence(); value } pub fn duration_since(self, earlier: Self, timer: Timer) -> FineDuration { match (self, earlier, timer) { (Self::Os(this), Self::Os(earlier), Timer::Os) => this.duration_since(earlier).into(), (Self::Tsc(this), Self::Tsc(earlier), Timer::Tsc { frequency }) => { this.duration_since(earlier, frequency) } _ => unreachable!(), } } } /// A [`Timestamp`] where the variant is determined by an external source of /// truth. /// /// By making the variant tag external to this type, we produce more optimized /// code by: /// - Reusing the same condition variable /// - Reducing the size of the timestamp variables #[derive(Clone, Copy)] pub(crate) union UntaggedTimestamp { /// [`Timestamp::Os`]. pub os: Instant, /// [`Timestamp::Tsc`]. pub tsc: TscTimestamp, } impl UntaggedTimestamp { #[inline(always)] pub fn start(timer_kind: TimerKind) -> Self { fence::full_fence(); let value = match timer_kind { TimerKind::Os => Self { os: Instant::now() }, TimerKind::Tsc => Self { tsc: TscTimestamp::start() }, }; fence::compiler_fence(); value } #[inline(always)] pub fn end(timer_kind: TimerKind) -> Self { fence::compiler_fence(); let value = match timer_kind { TimerKind::Os => Self { os: Instant::now() }, TimerKind::Tsc => Self { tsc: TscTimestamp::end() }, }; fence::full_fence(); value } #[inline(always)] pub unsafe fn into_timestamp(self, timer_kind: TimerKind) -> Timestamp { match timer_kind { TimerKind::Os => Timestamp::Os(self.os), TimerKind::Tsc => Timestamp::Tsc(self.tsc), } } } divan-0.1.21/src/time/timestamp/tsc/aarch64.rs000064400000000000000000000021401046102023000171220ustar 00000000000000use std::arch::asm; use crate::time::TscUnavailable; /// Reads the [`cntfrq_el0`](https://developer.arm.com/documentation/ddi0595/2021-12/AArch64-Registers/CNTFRQ-EL0--Counter-timer-Frequency-register?lang=en) /// register. /// /// This value is set on system initialization and thus does not change between /// reads. #[inline] pub(crate) fn frequency() -> Result { unsafe { let frequency: u64; asm!( "mrs {}, cntfrq_el0", out(reg) frequency, options(nomem, nostack, preserves_flags, pure), ); Ok(frequency) } } /// Reads the [`cntvct_el0`](https://developer.arm.com/documentation/ddi0595/2021-12/AArch64-Registers/CNTVCT-EL0--Counter-timer-Virtual-Count-register?lang=en) /// register. #[inline(always)] pub(crate) fn timestamp() -> u64 { unsafe { let timestamp: u64; asm!( "mrs {}, cntvct_el0", out(reg) timestamp, // Leave off `nomem` because this should be a compiler fence. options(nostack, preserves_flags), ); timestamp } } divan-0.1.21/src/time/timestamp/tsc/mod.rs000064400000000000000000000061661046102023000164650ustar 00000000000000use std::{fmt, num::NonZeroU64}; use crate::time::FineDuration; #[cfg(target_arch = "aarch64")] #[path = "aarch64.rs"] mod arch; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[path = "x86.rs"] mod arch; /// [CPU timestamp counter](https://en.wikipedia.org/wiki/Time_Stamp_Counter). #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] #[repr(transparent)] pub(crate) struct TscTimestamp { pub value: u64, } impl TscTimestamp { /// Gets the timestamp frequency. /// /// On AArch64, this simply reads `cntfrq_el0`. On x86, this measures the /// TSC frequency. #[inline] #[allow(unreachable_code)] pub fn frequency() -> Result { // Miri does not support inline assembly. #[cfg(miri)] return Err(TscUnavailable::Unimplemented); #[cfg(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64"))] return NonZeroU64::new(arch::frequency()?).ok_or(TscUnavailable::ZeroFrequency); Err(TscUnavailable::Unimplemented) } /// Reads the timestamp counter. #[inline(always)] pub fn start() -> Self { #[allow(unused)] let value = 0; #[cfg(target_arch = "aarch64")] let value = arch::timestamp(); #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] let value = arch::start_timestamp(); Self { value } } /// Reads the timestamp counter. #[inline(always)] pub fn end() -> Self { #[allow(unused)] let value = 0; #[cfg(target_arch = "aarch64")] let value = arch::timestamp(); #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] let value = arch::end_timestamp(); Self { value } } pub fn duration_since(self, earlier: Self, frequency: NonZeroU64) -> FineDuration { const PICOS: u128 = 1_000_000_000_000; let Some(diff) = self.value.checked_sub(earlier.value) else { return Default::default(); }; FineDuration { picos: (diff as u128 * PICOS) / frequency.get() as u128 } } } /// Reason for why the timestamp counter cannot be used. #[derive(Clone, Copy)] pub(crate) enum TscUnavailable { /// Not yet implemented for this platform. Unimplemented, /// Got a frequency of 0. ZeroFrequency, /// Missing the appropriate instructions. #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] MissingInstructions, /// The timestamp counter is not guaranteed to be constant. #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] VariableFrequency, } impl fmt::Display for TscUnavailable { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let reason = match self { Self::Unimplemented => "unimplemented", Self::ZeroFrequency => "zero TSC frequency", #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] Self::MissingInstructions => "missing instructions", #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] Self::VariableFrequency => "variable TSC frequency", }; f.write_str(reason) } } divan-0.1.21/src/time/timestamp/tsc/x86.rs000064400000000000000000000157551046102023000163370ustar 00000000000000#[cfg(target_arch = "x86")] use std::arch::x86; #[cfg(target_arch = "x86_64")] use std::arch::x86_64 as x86; use std::time::{Duration, Instant}; use crate::time::{fence, TscUnavailable}; #[inline(always)] pub(crate) fn start_timestamp() -> u64 { // Serialize previous operations before `rdtsc` to ensure they are not // inside the timed section. util::lfence(); let tsc = util::rdtsc(); // Serialize `rdtsc` before any measured code. util::lfence(); tsc } #[inline(always)] pub(crate) fn end_timestamp() -> u64 { // `rdtscp` is serialized after all previous operations. let tsc = util::rdtscp(); // Serialize `rdtscp` before any subsequent code. util::lfence(); tsc } pub(crate) fn frequency() -> Result { if !util::tsc_is_available() { return Err(TscUnavailable::MissingInstructions); } if !util::tsc_is_invariant() { return Err(TscUnavailable::VariableFrequency); } let nominal = nominal_frequency(); let measured = measure::measure_frequency(); // Use the nominal frequency if within 0.1% of the measured frequency. // // The nominal frequency is used for getting an exact value if the measured // frequency is slightly off. It is not blindly trusted because it may not // match the TSC frequency. if let Some(nominal) = nominal { if measured * 0.999 < nominal && nominal < measured * 1.001 { return Ok(nominal.round() as u64); } } Ok(measured.round() as u64) } /// Parses the CPU frequency in the brand name, e.g. "2.50GHz". fn nominal_frequency() -> Option { let name = util::cpu_name()?; let name = { let len = name.iter().position(|&ch| ch == 0).unwrap_or(name.len()); std::str::from_utf8(&name[..len]).ok()? }; #[rustfmt::skip] let frequencies = [ ("MHz", 1e6), ("GHz", 1e9), ("THz", 1e12), ]; for (unit, scale) in frequencies { let Some(unit_start) = name.find(unit) else { continue; }; let pre_unit = &name[..unit_start]; let num = match pre_unit.rsplit_once(' ') { Some((_, num)) => num, None => pre_unit, }; if let Ok(num) = num.parse::() { return Some(num * scale); }; } None } mod util { use super::*; #[inline(always)] pub fn rdtsc() -> u64 { fence::compiler_fence(); // SAFETY: Reading the TSC is memory safe. let tsc = unsafe { x86::_rdtsc() }; fence::compiler_fence(); tsc } #[inline(always)] pub fn rdtscp() -> u64 { fence::compiler_fence(); // SAFETY: Reading the TSC is memory safe. let tsc = unsafe { x86::__rdtscp(&mut 0) }; fence::compiler_fence(); tsc } #[inline(always)] pub fn lfence() { // SAFETY: A load fence is memory safe. unsafe { x86::_mm_lfence() } } #[inline] fn cpuid(leaf: u32) -> x86::CpuidResult { // SAFETY: `cpuid` is never unsafe to call. unsafe { x86::__cpuid(leaf) } } /// Invokes CPUID and converts its output registers to an ordered array. #[inline] fn cpuid_array(leaf: u32) -> [u32; 4] { let cpuid = cpuid(leaf); [cpuid.eax, cpuid.ebx, cpuid.ecx, cpuid.edx] } /// Returns `true` if the given CPUID leaf is available. #[inline] fn cpuid_has_leaf(leaf: u32) -> bool { cpuid(0x8000_0000).eax >= leaf } /// Returns `true` if CPUID indicates that the `rdtsc` and `rdtscp` /// instructions are available. #[inline] pub fn tsc_is_available() -> bool { let bits = cpuid(0x8000_0001).edx; let rdtsc = 1 << 4; let rdtscp = 1 << 27; bits & (rdtsc | rdtscp) != 0 } /// Returns `true` if CPUID indicates that the timestamp counter has a /// constant frequency. #[inline] pub fn tsc_is_invariant() -> bool { let leaf = 0x8000_0007; if !cpuid_has_leaf(leaf) { return false; } cpuid(leaf).edx & (1 << 8) != 0 } /// Returns the processor model name as a null-terminated ASCII string. pub fn cpu_name() -> Option<[u8; 48]> { if !cpuid_has_leaf(0x8000_0004) { return None; } #[rustfmt::skip] let result = [ cpuid_array(0x8000_0002), cpuid_array(0x8000_0003), cpuid_array(0x8000_0004), ]; // SAFETY: Converting from `u32` to bytes. Some(unsafe { std::mem::transmute(result) }) } } mod measure { use super::*; /// Returns the TSC frequency by measuring it. pub fn measure_frequency() -> f64 { const TRIES: usize = 8; // Start with delay of 1ms up to 256ms (2^TRIES). let mut delay_ms = 1; let mut prev_measure = f64::NEG_INFINITY; let mut measures = [0.0; TRIES]; for slot in &mut measures { let measure = measure_frequency_once(Duration::from_millis(delay_ms)); // This measurement is sufficiently accurate if within 0.1% of the // previous. if measure * 0.999 < prev_measure && prev_measure < measure * 1.001 { return measure; } *slot = measure; prev_measure = measure; delay_ms *= 2; } // If no frequencies were within 0.1% of each other, find the frequency // with the smallest delta. let mut min_delta = f64::INFINITY; let mut result_index = 0; for i in 0..TRIES { for j in (i + 1)..TRIES { let delta = (measures[i] - measures[j]).abs(); if delta < min_delta { min_delta = delta; result_index = i; } } } measures[result_index] } fn measure_frequency_once(delay: Duration) -> f64 { let (start_tsc, start_instant) = tsc_instant_pair(); std::thread::sleep(delay); let (end_tsc, end_instant) = tsc_instant_pair(); let elapsed_tsc = end_tsc.saturating_sub(start_tsc); let elapsed_duration = end_instant.duration_since(start_instant); (elapsed_tsc as f64 / elapsed_duration.as_nanos() as f64) * 1e9 } /// Returns a timestamp/instant pair that has a small latency between /// getting the two values. fn tsc_instant_pair() -> (u64, Instant) { let mut best_latency = Duration::MAX; let mut best_pair = (0, Instant::now()); // Make up to 100 attempts to get a low latency pair. for _ in 0..100 { let instant = Instant::now(); let tsc = util::rdtsc(); let latency = instant.elapsed(); let pair = (tsc, instant); if latency.is_zero() { return pair; } if latency < best_latency { best_latency = latency; best_pair = pair; } } best_pair } } divan-0.1.21/src/tree_painter.rs000064400000000000000000000366711046102023000146410ustar 00000000000000//! Happy little trees. use std::{io::Write, iter::repeat}; use crate::{ alloc::{AllocOp, AllocTally}, counter::{AnyCounter, BytesFormat, KnownCounterKind}, stats::{Stats, StatsSet}, util, }; const TREE_COL_BUF: usize = 2; /// Paints tree-style output using box-drawing characters. pub(crate) struct TreePainter { /// The maximum number of characters taken by a name and its prefix. Emitted /// information should be left-padded to start at this column. max_name_span: usize, column_widths: [usize; TreeColumn::COUNT], depth: usize, /// The current prefix to the name and content, e.g. /// │ │ for three levels of nesting with the second level /// being on the last node. current_prefix: String, /// Buffer for writing to before printing to stdout. write_buf: String, } impl TreePainter { pub fn new(max_name_span: usize, column_widths: [usize; TreeColumn::COUNT]) -> Self { Self { max_name_span, column_widths, depth: 0, current_prefix: String::new(), write_buf: String::new(), } } } impl TreePainter { /// Enter a parent node. pub fn start_parent(&mut self, name: &str, is_last: bool) { let is_top_level = self.depth == 0; let has_columns = self.has_columns(); let buf = &mut self.write_buf; buf.clear(); let branch = if is_top_level { "" } else if !is_last { "├─ " } else { "╰─ " }; buf.extend([self.current_prefix.as_str(), branch, name]); // Right-pad name if `has_columns` if has_columns { let max_span = self.max_name_span; let buf_len = buf.chars().count(); let pad_len = TREE_COL_BUF + max_span.saturating_sub(buf_len); buf.extend(repeat(' ').take(pad_len)); if buf_len > max_span { self.max_name_span = buf_len; } } // Write column headings. if has_columns && is_top_level { let names = TreeColumnData::from_fn(TreeColumn::name); names.write(buf, &mut self.column_widths); } // Write column spacers. if has_columns && !is_top_level { TreeColumnData([""; TreeColumn::COUNT]).write(buf, &mut self.column_widths); } println!("{buf}"); self.depth += 1; if !is_top_level { self.current_prefix.push_str(if !is_last { "│ " } else { " " }); } } /// Exit the current parent node. pub fn finish_parent(&mut self) { self.depth -= 1; // Improve legibility for multiple top-level parents. if self.depth == 0 { println!(); } // The prefix is extended by 3 `char`s at a time. let new_prefix_len = { let mut iter = self.current_prefix.chars(); _ = iter.by_ref().rev().nth(2); iter.as_str().len() }; self.current_prefix.truncate(new_prefix_len); } /// Indicate that the next child node was ignored. /// /// This semantically combines start/finish operations. pub fn ignore_leaf(&mut self, name: &str, is_last: bool) { let has_columns = self.has_columns(); let buf = &mut self.write_buf; buf.clear(); let branch = if !is_last { "├─ " } else { "╰─ " }; buf.extend([self.current_prefix.as_str(), branch, name]); right_pad_buffer(buf, &mut self.max_name_span); if has_columns { TreeColumnData::from_first("(ignored)").write(buf, &mut self.column_widths); } else { buf.push_str("(ignored)"); } println!("{buf}"); } /// Enter a leaf node. pub fn start_leaf(&mut self, name: &str, is_last: bool) { let has_columns = self.has_columns(); let buf = &mut self.write_buf; buf.clear(); let branch = if !is_last { "├─ " } else { "╰─ " }; buf.extend([self.current_prefix.as_str(), branch, name]); // Right-pad buffer if this leaf will have info displayed. if has_columns { let max_span = self.max_name_span; let buf_len = buf.chars().count(); let pad_len = TREE_COL_BUF + max_span.saturating_sub(buf_len); buf.extend(repeat(' ').take(pad_len)); if buf_len > max_span { self.max_name_span = buf_len; } } print!("{buf}"); _ = std::io::stdout().flush(); } /// Exit the current leaf node. pub fn finish_empty_leaf(&mut self) { println!(); } /// Exit the current leaf node, emitting statistics. pub fn finish_leaf(&mut self, is_last: bool, stats: &Stats, bytes_format: BytesFormat) { let prep_buffer = |buf: &mut String, max_span: &mut usize| { buf.clear(); buf.push_str(&self.current_prefix); if !is_last { buf.push('│'); } right_pad_buffer(buf, max_span); }; let buf = &mut self.write_buf; buf.clear(); // Serialize max alloc counts and sizes early so we can resize columns // early. let serialized_max_alloc_counts = if stats.max_alloc.size.is_zero() { None } else { Some(TreeColumn::ALL.map(|column| { let Some(&max_alloc_count) = column.get_stat(&stats.max_alloc.count) else { return String::new(); }; let prefix = if column.is_first() { " " } else { "" }; format!("{prefix}{}", util::fmt::format_f64(max_alloc_count, 4)) })) }; let serialized_max_alloc_sizes = if stats.max_alloc.size.is_zero() { None } else { Some(TreeColumn::ALL.map(|column| { let Some(&max_alloc_size) = column.get_stat(&stats.max_alloc.size) else { return String::new(); }; let prefix = if column.is_first() { " " } else { "" }; format!("{prefix}{}", util::fmt::format_bytes(max_alloc_size, 4, bytes_format)) })) }; // Serialize alloc tallies early so we can resize columns early. let serialized_alloc_tallies = AllocOp::ALL.map(|op| { let tally = stats.alloc_tallies.get(op); if tally.is_zero() { return None; } let column_tallies = TreeColumn::ALL.map(|column| { let prefix = if column.is_first() { " " } else { "" }; let tally = AllocTally { count: column.get_stat(&tally.count).copied()?, size: column.get_stat(&tally.size).copied()?, }; Some((prefix, tally)) }); Some(AllocTally { count: column_tallies.map(|tally| { if let Some((prefix, tally)) = tally { format!("{prefix}{}", util::fmt::format_f64(tally.count, 4)) } else { String::new() } }), size: column_tallies.map(|tally| { if let Some((prefix, tally)) = tally { format!("{prefix}{}", util::fmt::format_bytes(tally.size, 4, bytes_format)) } else { String::new() } }), }) }); // Serialize counter stats early so we can resize columns early. let serialized_counters = KnownCounterKind::ALL.map(|counter_kind| { let counter_stats = stats.get_counts(counter_kind); TreeColumn::ALL .map(|column| -> Option { let count = *column.get_stat(counter_stats?)?; let time = *column.get_stat(&stats.time)?; Some( AnyCounter::known(counter_kind, count) .display_throughput(time, bytes_format) .to_string(), ) }) .map(Option::unwrap_or_default) }); // Set column widths based on serialized strings. for column in TreeColumn::time_stats() { let width = &mut self.column_widths[column as usize]; let mut update_width = |s: &str| { *width = (*width).max(s.chars().count()); }; for counter in &serialized_counters { update_width(&counter[column as usize]); } let serialized_max_alloc_counts = serialized_max_alloc_counts.iter().flatten(); let serialized_max_alloc_sizes = serialized_max_alloc_sizes.iter().flatten(); for s in serialized_max_alloc_counts.chain(serialized_max_alloc_sizes) { update_width(s); } for s in serialized_alloc_tallies .iter() .flatten() .flat_map(AllocTally::as_array) .map(|values| &values[column as usize]) { update_width(s); } } // Write time stats with iter and sample counts. TreeColumnData::from_fn(|column| -> String { let stat: &dyn ToString = match column { TreeColumn::Fastest => &stats.time.fastest, TreeColumn::Slowest => &stats.time.slowest, TreeColumn::Median => &stats.time.median, TreeColumn::Mean => &stats.time.mean, TreeColumn::Samples => &stats.sample_count, TreeColumn::Iters => &stats.iter_count, }; stat.to_string() }) .as_ref::() .write(buf, &mut self.column_widths); println!("{buf}"); // Write counter stats. let counter_stats = serialized_counters.map(TreeColumnData); for counter_kind in KnownCounterKind::ALL { let counter_stats = counter_stats[counter_kind as usize].as_ref::(); // Skip empty rows. if counter_stats.0.iter().all(|s| s.is_empty()) { continue; } prep_buffer(buf, &mut self.max_name_span); counter_stats.write(buf, &mut self.column_widths); println!("{buf}"); } // Write max allocated bytes. if serialized_max_alloc_counts.is_some() || serialized_max_alloc_sizes.is_some() { prep_buffer(buf, &mut self.max_name_span); TreeColumnData::from_first("max alloc:").write(buf, &mut self.column_widths); println!("{buf}"); for serialized in [serialized_max_alloc_counts.as_ref(), serialized_max_alloc_sizes.as_ref()] .into_iter() .flatten() { prep_buffer(buf, &mut self.max_name_span); TreeColumnData::from_fn(|column| serialized[column as usize].as_str()) .write(buf, &mut self.column_widths); println!("{buf}"); } } // Write allocation tallies. for op in [AllocOp::Alloc, AllocOp::Dealloc, AllocOp::Grow, AllocOp::Shrink] { let Some(tallies) = &serialized_alloc_tallies[op as usize] else { continue; }; prep_buffer(buf, &mut self.max_name_span); TreeColumnData::from_first(op.prefix()).write(buf, &mut self.column_widths); println!("{buf}"); for value in tallies.as_array() { prep_buffer(buf, &mut self.max_name_span); TreeColumnData::from_fn(|column| value[column as usize].as_str()) .write(buf, &mut self.column_widths); println!("{buf}"); } } } fn has_columns(&self) -> bool { !self.column_widths.iter().all(|&w| w == 0) } } /// Columns of the table next to the tree. #[derive(Clone, Copy, PartialEq, Eq)] pub(crate) enum TreeColumn { Fastest, Slowest, Median, Mean, Samples, Iters, } impl TreeColumn { pub const COUNT: usize = 6; pub const ALL: [Self; Self::COUNT] = { use TreeColumn::*; [Fastest, Slowest, Median, Mean, Samples, Iters] }; #[inline] pub fn time_stats() -> impl Iterator { use TreeColumn::*; [Fastest, Slowest, Median, Mean].into_iter() } #[inline] pub fn is_first(self) -> bool { let [first, ..] = Self::ALL; self == first } #[inline] pub fn is_last(self) -> bool { let [.., last] = Self::ALL; self == last } fn name(self) -> &'static str { match self { Self::Fastest => "fastest", Self::Slowest => "slowest", Self::Median => "median", Self::Mean => "mean", Self::Samples => "samples", Self::Iters => "iters", } } #[inline] pub fn is_time_stat(self) -> bool { use TreeColumn::*; matches!(self, Fastest | Slowest | Median | Mean) } #[inline] fn get_stat(self, stats: &StatsSet) -> Option<&T> { match self { Self::Fastest => Some(&stats.fastest), Self::Slowest => Some(&stats.slowest), Self::Median => Some(&stats.median), Self::Mean => Some(&stats.mean), Self::Samples | Self::Iters => None, } } } #[derive(Default)] struct TreeColumnData([T; TreeColumn::COUNT]); impl TreeColumnData { #[inline] fn from_first(value: T) -> Self where Self: Default, { let mut data = Self::default(); data.0[0] = value; data } #[inline] fn from_fn(f: F) -> Self where F: FnMut(TreeColumn) -> T, { Self(TreeColumn::ALL.map(f)) } } impl TreeColumnData<&str> { /// Writes the column data into the buffer. fn write(&self, buf: &mut String, column_widths: &mut [usize; TreeColumn::COUNT]) { for (column, value) in self.0.iter().enumerate() { let is_first = column == 0; let is_last = column == TreeColumn::COUNT - 1; let value_width = value.chars().count(); // Write separator. if !is_first { let mut sep = " │ "; // Prevent trailing spaces. if is_last && value_width == 0 { sep = &sep[..sep.len() - 1]; }; buf.push_str(sep); } buf.push_str(value); // Right-pad remaining width or update column width to new maximum. if !is_last { if let Some(rem_width) = column_widths[column].checked_sub(value_width) { buf.extend(repeat(' ').take(rem_width)); } else { column_widths[column] = value_width; } } } } } impl TreeColumnData { #[inline] fn as_ref(&self) -> TreeColumnData<&U> where T: AsRef, { TreeColumnData::from_fn(|column| self.0[column as usize].as_ref()) } } fn right_pad_buffer(buf: &mut String, max_span: &mut usize) { let buf_len = buf.chars().count(); let pad_len = TREE_COL_BUF + max_span.saturating_sub(buf_len); buf.extend(repeat(' ').take(pad_len)); if buf_len > *max_span { *max_span = buf_len; } } divan-0.1.21/src/util/fmt.rs000064400000000000000000000155071046102023000137160ustar 00000000000000use std::fmt; use crate::counter::{AnyCounter, BytesFormat, KnownCounterKind}; /// Formats an `f64` to the given number of significant figures. pub(crate) fn format_f64(val: f64, sig_figs: usize) -> String { let mut str = val.to_string(); if let Some(dot_index) = str.find('.') { let fract_digits = sig_figs.saturating_sub(dot_index); if fract_digits == 0 { str.truncate(dot_index); } else { let fract_start = dot_index + 1; let fract_end = fract_start + fract_digits; let fract_range = fract_start..fract_end; if let Some(fract_str) = str.get(fract_range) { // Get the offset from the end before all 0s. let pre_zero = fract_str.bytes().rev().enumerate().find_map(|(i, b)| { if b != b'0' { Some(i) } else { None } }); if let Some(pre_zero) = pre_zero { str.truncate(fract_end - pre_zero); } else { str.truncate(dot_index); } } } } str } pub(crate) fn format_bytes(val: f64, sig_figs: usize, bytes_format: BytesFormat) -> String { let (val, scale) = scale_value(val, bytes_format); let mut result = format_f64(val, sig_figs); result.push(' '); result.push_str(scale.suffix(ScaleFormat::Bytes(bytes_format))); result } pub(crate) struct DisplayThroughput<'a> { pub counter: &'a AnyCounter, pub picos: f64, pub bytes_format: BytesFormat, } impl fmt::Debug for DisplayThroughput<'_> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fmt::Display::fmt(self, f) } } impl fmt::Display for DisplayThroughput<'_> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let picos = self.picos; let count = self.counter.count(); let count_per_sec = if count == 0 { 0. } else { count as f64 * (1e12 / picos) }; let format = match self.counter.kind { KnownCounterKind::Bytes => ScaleFormat::BytesThroughput(self.bytes_format), KnownCounterKind::Chars => ScaleFormat::CharsThroughput, KnownCounterKind::Cycles => ScaleFormat::CyclesThroughput, KnownCounterKind::Items => ScaleFormat::ItemsThroughput, }; let (val, scale) = scale_value(count_per_sec, format.bytes_format()); let sig_figs = f.precision().unwrap_or(4); let mut str = format_f64(val, sig_figs); str.push(' '); str.push_str(scale.suffix(format)); // Fill up to specified width. if let Some(fill_len) = f.width().and_then(|width| width.checked_sub(str.len())) { match f.align() { None | Some(fmt::Alignment::Left) => { str.extend(std::iter::repeat(f.fill()).take(fill_len)); } _ => return Err(fmt::Error), } } f.write_str(&str) } } /// Converts a value to the appropriate scale. fn scale_value(value: f64, bytes_format: BytesFormat) -> (f64, Scale) { let starts = scale_starts(bytes_format); let scale = if value.is_infinite() || value < starts[1] { Scale::One } else if value < starts[2] { Scale::Kilo } else if value < starts[3] { Scale::Mega } else if value < starts[4] { Scale::Giga } else if value < starts[5] { Scale::Tera } else { Scale::Peta }; (value / starts[scale as usize], scale) } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub(crate) enum Scale { One, Kilo, Mega, Giga, Tera, Peta, } #[derive(Clone, Copy)] pub(crate) enum ScaleFormat { Bytes(BytesFormat), BytesThroughput(BytesFormat), CharsThroughput, CyclesThroughput, ItemsThroughput, } impl ScaleFormat { pub fn bytes_format(self) -> BytesFormat { match self { Self::Bytes(format) | Self::BytesThroughput(format) => format, Self::CharsThroughput | Self::CyclesThroughput | Self::ItemsThroughput => { BytesFormat::Decimal } } } } fn scale_starts(bytes_format: BytesFormat) -> &'static [f64; Scale::COUNT] { const STARTS: &[[f64; Scale::COUNT]; 2] = &[ [1., 1e3, 1e6, 1e9, 1e12, 1e15], [ 1., 1024., 1024u64.pow(2) as f64, 1024u64.pow(3) as f64, 1024u64.pow(4) as f64, 1024u64.pow(5) as f64, ], ]; &STARTS[bytes_format as usize] } impl Scale { const COUNT: usize = 6; pub fn suffix(self, format: ScaleFormat) -> &'static str { match format { ScaleFormat::Bytes(format) => { const SUFFIXES: &[[&str; Scale::COUNT]; 2] = &[ ["B", "KB", "MB", "GB", "TB", "PB"], ["B", "KiB", "MiB", "GiB", "TiB", "PiB"], ]; SUFFIXES[format as usize][self as usize] } ScaleFormat::BytesThroughput(format) => { const SUFFIXES: &[[&str; Scale::COUNT]; 2] = &[ ["B/s", "KB/s", "MB/s", "GB/s", "TB/s", "PB/s"], ["B/s", "KiB/s", "MiB/s", "GiB/s", "TiB/s", "PiB/s"], ]; SUFFIXES[format as usize][self as usize] } ScaleFormat::CharsThroughput => { const SUFFIXES: &[&str; Scale::COUNT] = &["char/s", "Kchar/s", "Mchar/s", "Gchar/s", "Tchar/s", "Pchar/s"]; SUFFIXES[self as usize] } ScaleFormat::CyclesThroughput => { const SUFFIXES: &[&str; Scale::COUNT] = &["Hz", "KHz", "MHz", "GHz", "THz", "PHz"]; SUFFIXES[self as usize] } ScaleFormat::ItemsThroughput => { const SUFFIXES: &[&str; Scale::COUNT] = &["item/s", "Kitem/s", "Mitem/s", "Gitem/s", "Titem/s", "Pitem/s"]; SUFFIXES[self as usize] } } } } #[cfg(test)] mod tests { use super::*; #[test] fn scale_value() { #[track_caller] fn test(n: f64, format: BytesFormat, expected_value: f64, expected_scale: Scale) { assert_eq!(super::scale_value(n, format), (expected_value, expected_scale)); } #[track_caller] fn test_decimal(n: f64, expected_value: f64, expected_scale: Scale) { test(n, BytesFormat::Decimal, expected_value, expected_scale); } test_decimal(1., 1., Scale::One); test_decimal(1_000., 1., Scale::Kilo); test_decimal(1_000_000., 1., Scale::Mega); test_decimal(1_000_000_000., 1., Scale::Giga); test_decimal(1_000_000_000_000., 1., Scale::Tera); test_decimal(1_000_000_000_000_000., 1., Scale::Peta); } } divan-0.1.21/src/util/macros.rs000064400000000000000000000005761046102023000144140ustar 00000000000000/// `assert!` that's only checked in debug builds and is otherwise an /// optimization hint in release builds. macro_rules! assert_unchecked { ($condition:expr $(, $message:expr)* $(,)?) => { if cfg!(any(debug_assertions, miri)) { assert!($condition $(, $message)*); } else { $crate::util::assert_unchecked($condition); } } } divan-0.1.21/src/util/mod.rs000064400000000000000000000070401046102023000137000ustar 00000000000000use std::{ mem::ManuallyDrop, num::NonZeroUsize, sync::atomic::{AtomicUsize, Ordering::Relaxed}, }; use regex::Regex; #[macro_use] mod macros; pub mod fmt; pub mod sort; pub mod split_vec; pub mod sync; pub mod thread; pub mod ty; /// Public-in-private type like `()` but meant to be externally-unreachable. /// /// Using this in place of `()` for `GenI` prevents `Bencher::with_inputs` from /// working with `()` unintentionally. #[non_exhaustive] pub struct Unit; /// Public-in-private trait to make `DivanConfig::skip_regex` polymorphic over /// regular expression types. pub trait IntoRegex { fn into_regex(self) -> Regex; } impl IntoRegex for Regex { #[inline] fn into_regex(self) -> Regex { self } } impl IntoRegex for &str { #[inline] #[track_caller] fn into_regex(self) -> Regex { Regex::new(self).unwrap() } } impl IntoRegex for String { #[inline] #[track_caller] fn into_regex(self) -> Regex { Regex::new(&self).unwrap() } } /// [`std::hint::assert_unchecked`] polyfill. #[inline] pub(crate) const unsafe fn assert_unchecked(cond: bool) { if !cond { std::hint::unreachable_unchecked(); } } #[inline] pub(crate) fn defer(f: F) -> impl Drop { struct Defer(ManuallyDrop); impl Drop for Defer { #[inline] fn drop(&mut self) { let f = unsafe { ManuallyDrop::take(&mut self.0) }; f(); } } Defer(ManuallyDrop::new(f)) } /// Returns the index of `ptr` in the slice, assuming it is in the slice. #[inline] pub(crate) fn slice_ptr_index(slice: &[T], ptr: *const T) -> usize { // Safe pointer `offset_from`. (ptr as usize - slice.as_ptr() as usize) / size_of::() } /// Returns the values in the middle of `slice`. /// /// If the slice has an even length, two middle values exist. #[inline] pub(crate) fn slice_middle(slice: &[T]) -> &[T] { let len = slice.len(); if len == 0 { slice } else if len % 2 == 0 { &slice[(len / 2) - 1..][..2] } else { &slice[len / 2..][..1] } } /// Cached [`std::thread::available_parallelism`]. #[inline] pub(crate) fn known_parallelism() -> NonZeroUsize { static CACHED: AtomicUsize = AtomicUsize::new(0); #[cold] fn slow() -> NonZeroUsize { let n = std::thread::available_parallelism().unwrap_or(NonZeroUsize::MIN); match CACHED.compare_exchange(0, n.get(), Relaxed, Relaxed) { Ok(_) => n, // SAFETY: Zero is checked by us and competing threads. Err(n) => unsafe { NonZeroUsize::new_unchecked(n) }, } } match NonZeroUsize::new(CACHED.load(Relaxed)) { Some(n) => n, None => slow(), } } /// Returns `true` if running under [`cargo-nextest`](https://nexte.st). pub(crate) fn is_cargo_nextest() -> bool { std::env::var_os("NEXTEST").unwrap_or_default() == "1" } #[cfg(test)] mod tests { use crate::black_box; use super::*; #[test] fn known_parallelism() { let f: fn() -> NonZeroUsize = super::known_parallelism; assert_eq!(black_box(f)(), black_box(f)()); } #[test] fn slice_middle() { use super::slice_middle; assert_eq!(slice_middle::(&[]), &[]); assert_eq!(slice_middle(&[1]), &[1]); assert_eq!(slice_middle(&[1, 2]), &[1, 2]); assert_eq!(slice_middle(&[1, 2, 3]), &[2]); assert_eq!(slice_middle(&[1, 2, 3, 4]), &[2, 3]); assert_eq!(slice_middle(&[1, 2, 3, 4, 5]), &[3]); } } divan-0.1.21/src/util/sort.rs000064400000000000000000000062061046102023000141130ustar 00000000000000use std::cmp::Ordering; /// Compares strings by treating internal integers as atomic units. pub fn natural_cmp(a: &str, b: &str) -> Ordering { Iterator::cmp(Tokenizer { input: a }, Tokenizer { input: b }) } #[inline] fn cmp_int(mut a: &str, mut b: &str) -> Ordering { a = a.trim_start_matches('0'); b = b.trim_start_matches('0'); // Compare to 0. match (a.is_empty(), b.is_empty()) { (true, true) => return Ordering::Equal, (true, false) => return Ordering::Less, (false, true) => return Ordering::Greater, _ => {} } // Compare length. match a.len().cmp(&b.len()) { Ordering::Equal => {} ord => return ord, } // Compare digits. a.cmp(b) } #[derive(PartialEq, Eq)] #[cfg_attr(test, derive(Debug))] struct Token<'a> { is_int: bool, text: &'a str, } impl PartialOrd for Token<'_> { #[inline] fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Ord for Token<'_> { #[inline] fn cmp(&self, other: &Self) -> Ordering { if self.is_int && other.is_int { cmp_int(self.text, other.text) } else { self.text.cmp(other.text) } } } /// Lexes a string into "tokens". struct Tokenizer<'a> { /// The remaining characters to process. input: &'a str, } impl<'a> Iterator for Tokenizer<'a> { type Item = Token<'a>; #[inline] fn next(&mut self) -> Option { let mut bytes = self.input.bytes(); let is_int = bytes.next()?.is_ascii_digit(); let mut kind_len = 1; for ch in bytes { // Stop on character kind change. if ch.is_ascii_digit() != is_int { break; } kind_len += 1; } unsafe { let text = self.input.get_unchecked(..kind_len); self.input = self.input.get_unchecked(kind_len..); Some(Token { is_int, text }) } } } #[cfg(test)] mod tests { use super::*; #[track_caller] fn test_sort(list: &[&str], cmp: fn(&str, &str) -> Ordering) { let mut copy = list.to_vec(); copy.sort_by(|a, b| cmp(a, b)); assert_eq!(list, copy); } #[test] fn natural_cmp() { #[track_caller] fn test(list: &[&str]) { test_sort(list, super::natural_cmp); } test(&["A<4>", "A<8>", "A<16>", "A<32>", "A<64>"]); } #[test] fn cmp_int() { #[track_caller] fn test(list: &[&str]) { test_sort(list, super::cmp_int); } test(&["4", "8", "16", "32", "64"]); test(&["4", "08"]); test(&["0", "00"]); } #[test] fn tokenize() { #[track_caller] fn test(s: &str, expected: &[Token]) { let tokens: Vec = Tokenizer { input: s }.collect(); assert_eq!(tokens, expected); } test( "A<4>", &[ Token { text: "A<", is_int: false }, Token { text: "4", is_int: true }, Token { text: ">", is_int: false }, ], ); } } divan-0.1.21/src/util/split_vec.rs000064400000000000000000000067761046102023000151300ustar 00000000000000/// `Vec` partitioned in half. /// /// This type exists to make `FilterSet` have a smaller footprint and for /// `FilterSet::is_match` to generate better code. pub(crate) struct SplitVec { items: Vec, split_index: usize, } impl Default for SplitVec { #[inline] fn default() -> Self { Self { items: Vec::default(), split_index: 0 } } } impl SplitVec { /// Inserts an item to the end of either the first or second half. #[inline] pub fn insert(&mut self, value: T, after_split: bool) { unsafe { // Ensure we have at least one slot available. self.items.reserve(1); let old_len = self.items.len(); let old_split = self.split_index(); let start_ptr = self.items.as_mut_ptr(); let last_ptr = start_ptr.add(old_len); let split_ptr = start_ptr.add(old_split); let value_slot = if after_split { last_ptr } else { split_ptr }; // If writing to before the split, then increment the split index // and move any value there to the end. // // NOTE: We can't use `copy_to_nonoverlapping` because both pointers // are the same if `old_len` is 0. if !after_split { split_ptr.copy_to(last_ptr, 1); self.set_split_index(old_split + 1); } value_slot.write(value); self.items.set_len(old_len + 1); } } #[inline] pub fn reserve_exact(&mut self, additional: usize) { self.items.reserve_exact(additional); } /// Returns the slice of all items. #[inline] pub fn all(&self) -> &[T] { &self.items } /// Returns the split halves. #[inline] #[cfg(test)] pub fn split(&self) -> (&[T], &[T]) { self.items.split_at(self.split_index()) } /// Returns where the halves are split. #[inline] pub fn split_index(&self) -> usize { let index = self.split_index; // Optimization hint to remove bounds checks. let len = self.items.len(); unsafe { assert_unchecked!(index <= len, "index {index} out of bounds (len = {len})") } index } /// Sets where the halves are split. #[inline] pub unsafe fn set_split_index(&mut self, new_index: usize) { self.split_index = new_index; } } #[cfg(test)] mod tests { use super::*; #[track_caller] fn test(vec: &SplitVec<&str>, before: &[&str], after: &[&str]) { assert_eq!(vec.split(), (before, after)); } #[test] fn before_split() { let mut vec = SplitVec::<&str>::default(); vec.insert("abc", false); test(&vec, &["abc"], &[]); vec.insert("xyz", false); test(&vec, &["abc", "xyz"], &[]); } #[test] fn after_split() { let mut vec = SplitVec::<&str>::default(); vec.insert("abc", true); test(&vec, &[], &["abc"]); vec.insert("xyz", true); test(&vec, &[], &["abc", "xyz"]); } #[test] fn mixed() { let mut vec = SplitVec::<&str>::default(); vec.insert("abc", false); test(&vec, &["abc"], &[]); vec.insert("xyz", true); test(&vec, &["abc"], &["xyz"]); vec.insert("123", false); test(&vec, &["abc", "123"], &["xyz"]); vec.insert("456", true); test(&vec, &["abc", "123"], &["xyz", "456"]); vec.insert("789", false); test(&vec, &["abc", "123", "789"], &["456", "xyz"]); } } divan-0.1.21/src/util/sync.rs000064400000000000000000000044551046102023000141040ustar 00000000000000//! Synchronization utilities. #![cfg_attr(not(target_os = "macos"), allow(unused))] use std::{ ops::{Deref, DerefMut}, sync::atomic::*, }; /// Makes the wrapped value [`Send`] + [`Sync`] even though it isn't. pub struct SyncWrap { pub value: T, } unsafe impl Sync for SyncWrap {} impl Deref for SyncWrap { type Target = T; #[inline] fn deref(&self) -> &Self::Target { &self.value } } impl DerefMut for SyncWrap { #[inline] fn deref_mut(&mut self) -> &mut Self::Target { &mut self.value } } impl SyncWrap { #[inline] pub const unsafe fn new(value: T) -> Self { Self { value } } } /// A convenience wrapper around `AtomicBool`. pub(crate) struct AtomicFlag(AtomicBool); impl AtomicFlag { #[inline] pub const fn new(value: bool) -> Self { Self(AtomicBool::new(value)) } #[inline] pub fn get(&self) -> bool { self.0.load(Ordering::Relaxed) } #[inline] pub fn set(&self, value: bool) { self.0.store(value, Ordering::Relaxed); } } /// Prevents false sharing by aligning to the cache line. #[derive(Clone, Copy)] #[repr(align(64))] pub(crate) struct CachePadded(pub T); /// Alias to the atomic equivalent of `T`. pub(crate) type Atomic = ::Atomic; /// A type with an associated atomic type. pub(crate) trait WithAtomic { type Atomic; } #[cfg(target_has_atomic = "ptr")] impl WithAtomic for usize { type Atomic = AtomicUsize; } #[cfg(target_has_atomic = "ptr")] impl WithAtomic for isize { type Atomic = AtomicIsize; } #[cfg(target_has_atomic = "8")] impl WithAtomic for u8 { type Atomic = AtomicU8; } #[cfg(target_has_atomic = "8")] impl WithAtomic for i8 { type Atomic = AtomicI8; } #[cfg(target_has_atomic = "16")] impl WithAtomic for u16 { type Atomic = AtomicU16; } #[cfg(target_has_atomic = "16")] impl WithAtomic for i16 { type Atomic = AtomicI16; } #[cfg(target_has_atomic = "32")] impl WithAtomic for u32 { type Atomic = AtomicU32; } #[cfg(target_has_atomic = "32")] impl WithAtomic for i32 { type Atomic = AtomicI32; } #[cfg(target_has_atomic = "64")] impl WithAtomic for u64 { type Atomic = AtomicU64; } #[cfg(target_has_atomic = "64")] impl WithAtomic for i64 { type Atomic = AtomicI64; } divan-0.1.21/src/util/thread.rs000064400000000000000000000147771046102023000144070ustar 00000000000000//! Threading utilities. #![cfg(target_os = "macos")] use std::{marker::PhantomData, ptr::NonNull, sync::atomic::Ordering::*}; use libc::pthread_key_t; use crate::util::sync::Atomic; const KEY_UNINIT: pthread_key_t = 0; /// Thread-local key accessed via /// [`pthread_getspecific`](https://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_getspecific.html). pub(crate) struct PThreadKey { value: AtomicPThreadKey, marker: PhantomData<&'static T>, } impl PThreadKey { #[inline] pub const fn new() -> Self { Self { value: AtomicPThreadKey::new(KEY_UNINIT), marker: PhantomData } } #[inline] pub fn get(&self) -> Option> { match self.value.load(Relaxed) { KEY_UNINIT => None, key => unsafe { cfg_if::cfg_if! { if #[cfg(all( not(miri), any(target_arch = "x86_64", target_arch = "aarch64"), ))] { let thread_local = fast::get_thread_local(key as usize); #[cfg(test)] assert_eq!(thread_local, libc::pthread_getspecific(key)); } else { let thread_local = libc::pthread_getspecific(key); } } NonNull::new(thread_local.cast()) }, } } /// Assigns the value with its destructor. #[inline] pub fn set(&self, ptr: *const T, _: D) -> bool where D: FnOnce(NonNull) + Copy, { assert_eq!(size_of::(), 0); unsafe extern "C" fn dtor(ptr: *mut libc::c_void) where T: 'static, D: FnOnce(NonNull) + Copy, { // SAFETY: The dtor is zero-sized, so we can make one from thin air. let dtor: D = unsafe { std::mem::zeroed() }; // Although we're guaranteed `ptr` is not null, check in case. if let Some(ptr) = NonNull::new(ptr) { dtor(ptr.cast()); } } let shared_key = &self.value; let mut local_key = shared_key.load(Relaxed); // Race against other threads to initialize `shared_key`. if local_key == KEY_UNINIT { if unsafe { libc::pthread_key_create(&mut local_key, Some(dtor::)) } == 0 { // Race to store our key into the global instance. // // On failure, delete our key and use the winner's key. if let Err(their_key) = shared_key.compare_exchange(KEY_UNINIT, local_key, Relaxed, Relaxed) { // SAFETY: No other thread is accessing this key. unsafe { libc::pthread_key_delete(local_key) }; local_key = their_key; } } else { // On create failure, check if another thread succeeded. local_key = shared_key.load(Relaxed); if local_key == KEY_UNINIT { return false; } } } // This is the slow path, so don't bother with writing via // `gs`/`tpidrro_el0` register. // // SAFETY: The key has been created by us or another thread. unsafe { libc::pthread_setspecific(local_key, ptr.cast()) == 0 } } } /// Alias to the atomic equivalent of `pthread_key_t`. pub(crate) type AtomicPThreadKey = Atomic; /// Optimized alternatives to `pthread_getspecific`. pub(crate) mod fast { // Apple reserves key 11 (`__PTK_LIBC_RESERVED_WIN64`) for Windows: // https://github.com/apple-oss-distributions/libpthread/blob/libpthread-519/private/pthread/tsd_private.h#L99 // // Key 6 is also reserved for Windows and Go, but we don't use it because // it's more well known and likely to be used by more libraries. /// Returns a pointer to a static thread-local variable. #[inline] #[cfg(all(not(miri), not(feature = "dyn_thread_local"), target_arch = "x86_64"))] pub fn get_static_thread_local() -> *const T { unsafe { let result; std::arch::asm!( "mov {}, gs:[88]", out(reg) result, options(pure, readonly, nostack, preserves_flags), ); result } } /// Sets the static thread-local variable. /// /// # Safety /// /// If the slot is in use, we will corrupt the other user's memory. #[inline] #[cfg(all(not(miri), not(feature = "dyn_thread_local"), target_arch = "x86_64"))] pub unsafe fn set_static_thread_local(ptr: *const T) { unsafe { std::arch::asm!( "mov gs:[88], {}", in(reg) ptr, options(nostack, preserves_flags), ); } } /// Returns a pointer to the corresponding thread-local variable. /// /// The first element is reserved for `pthread_self`. This is widely known /// and also mentioned in page 251 of "*OS Internals Volume 1" by Jonathan /// Levin. /// /// It appears that `pthread_key_create` allocates a slot into the buffer /// referenced by: /// - [`gs` on x86_64](https://github.com/apple-oss-distributions/xnu/blob/xnu-10002.41.9/libsyscall/os/tsd.h#L126) /// - [`tpidrro_el0` on AArch64](https://github.com/apple-oss-distributions/xnu/blob/xnu-10002.41.9/libsyscall/os/tsd.h#L163) /// /// # Safety /// /// `key` must not cause an out-of-bounds lookup. #[inline] #[cfg(all(not(miri), any(target_arch = "x86_64", target_arch = "aarch64")))] pub unsafe fn get_thread_local(key: usize) -> *mut libc::c_void { #[cfg(target_arch = "x86_64")] { let result; std::arch::asm!( "mov {}, gs:[8 * {1}]", out(reg) result, in(reg) key, options(pure, readonly, nostack, preserves_flags), ); result } #[cfg(target_arch = "aarch64")] { let result: *const *mut libc::c_void; std::arch::asm!( "mrs {0}, tpidrro_el0", // Clear bottom 3 bits just in case. This was historically the CPU // core ID but that changed at some point. "and {0}, {0}, #-8", out(reg) result, options(pure, nomem, nostack, preserves_flags), ); *result.add(key) } } } divan-0.1.21/src/util/ty.rs000064400000000000000000000021021046102023000135470ustar 00000000000000use std::{ any::{Any, TypeId}, marker::PhantomData, }; /// Returns a [`TypeId`] for any type regardless of whether it is `'static`. /// /// Note that **this is not the same** as [`TypeId::of`]. #[inline] pub(crate) fn proxy_type_id() -> TypeId { // Return the type ID of a generic closure. Any::type_id(&|| PhantomData::) } /// Returns `true` if the given types are equal. #[inline] pub(crate) fn is_type_eq() -> bool { proxy_type_id::
() == proxy_type_id::() } /// Convenience trait for type conversions. pub(crate) trait TypeCast { /// Converts a reference if `self` is an instance of `T`. /// /// We require `T: 'static` since we want to ensure when providing a type /// that any lifetimes are static, such as `Cow`. #[inline] fn cast_ref(&self) -> Option<&T> { if is_type_eq::() { // SAFETY: `self` is `&T`. Some(unsafe { &*(self as *const Self as *const T) }) } else { None } } } impl TypeCast for A {} divan-0.1.21/tests/attr_options.rs000064400000000000000000000024041046102023000152430ustar 00000000000000// Tests that attribute options produce the correct results. // Miri cannot discover benchmarks. #![cfg(not(miri))] use std::sync::atomic::{AtomicUsize, Ordering::SeqCst}; use divan::Divan; static CHILD1_ITERS: AtomicUsize = AtomicUsize::new(0); static CHILD2_ITERS: AtomicUsize = AtomicUsize::new(0); static CHILD3_ITERS: AtomicUsize = AtomicUsize::new(0); #[divan::bench_group(sample_count = 10, sample_size = 50)] mod parent { use super::*; // 10 × 1 = 10 #[divan::bench_group(sample_size = 1)] mod child1 { use super::*; #[divan::bench] fn bench() { CHILD1_ITERS.fetch_add(1, SeqCst); } } // 42 × 50 = 2100 #[divan::bench_group(sample_count = 42)] mod child2 { use super::*; #[divan::bench] fn bench() { CHILD2_ITERS.fetch_add(1, SeqCst); } } mod child3 { use super::*; // 1 × 50 = 50 #[divan::bench(sample_count = 1)] fn bench() { CHILD3_ITERS.fetch_add(1, SeqCst); } } } #[test] fn iter_count() { Divan::default().run_benches(); assert_eq!(CHILD1_ITERS.load(SeqCst), 10); assert_eq!(CHILD2_ITERS.load(SeqCst), 2100); assert_eq!(CHILD3_ITERS.load(SeqCst), 50); } divan-0.1.21/tests/entry_properties.rs000064400000000000000000000057231046102023000161420ustar 00000000000000// Tests that entry benchmarks/groups have correct generated properties. // Miri cannot discover benchmarks. #![cfg(not(miri))] use divan::__private::{EntryMeta, BENCH_ENTRIES, GROUP_ENTRIES}; #[divan::bench] fn outer() {} #[divan::bench_group] mod outer_group { #[divan::bench] fn inner() {} #[divan::bench_group] mod inner_group {} } #[divan::bench] #[ignore] fn ignored_1() {} #[divan::bench(ignore)] fn ignored_2() {} #[divan::bench_group] #[allow(unused_attributes)] #[ignore] mod ignored_group { #[divan::bench] fn not_yet_ignored() {} } /// Finds `EntryMeta` based on the entry's raw name. macro_rules! find_meta { ($entries:expr, $raw_name:literal) => { $entries .iter() .map(|entry| &entry.meta) .find(|common| common.raw_name == $raw_name) .expect(concat!($raw_name, " not found")) }; } fn find_outer() -> &'static EntryMeta { find_meta!(BENCH_ENTRIES, "outer") } fn find_inner() -> &'static EntryMeta { find_meta!(BENCH_ENTRIES, "inner") } fn find_outer_group() -> &'static EntryMeta { find_meta!(GROUP_ENTRIES, "outer_group") } fn find_inner_group() -> &'static EntryMeta { find_meta!(GROUP_ENTRIES, "inner_group") } #[test] fn file() { let file = file!(); assert_eq!(find_outer().location.file, file); assert_eq!(find_outer_group().location.file, file); assert_eq!(find_inner().location.file, file); assert_eq!(find_inner_group().location.file, file); } #[test] fn module_path() { let outer_path = module_path!(); assert_eq!(find_outer().module_path, outer_path); assert_eq!(find_outer_group().module_path, outer_path); let inner_path = format!("{outer_path}::outer_group"); assert_eq!(find_inner().module_path, inner_path); assert_eq!(find_inner_group().module_path, inner_path); } #[test] fn line() { assert_eq!(find_outer().location.line, 8); assert_eq!(find_outer_group().location.line, 11); assert_eq!(find_inner().location.line, 13); assert_eq!(find_inner_group().location.line, 16); } #[test] fn column() { assert_eq!(find_outer().location.col, 1); assert_eq!(find_outer_group().location.col, 1); assert_eq!(find_inner().location.col, 5); assert_eq!(find_inner_group().location.col, 5); } #[test] fn ignore() { fn get_ignore(meta: &EntryMeta) -> bool { meta.bench_options.as_ref().and_then(|options| options.ignore).unwrap_or_default() } assert!(get_ignore(find_meta!(BENCH_ENTRIES, "ignored_1"))); assert!(get_ignore(find_meta!(BENCH_ENTRIES, "ignored_2"))); assert!(get_ignore(find_meta!(GROUP_ENTRIES, "ignored_group"))); // Although its parent is marked as `#[ignore]`, it itself is not yet known // to be ignored. assert!(!get_ignore(find_meta!(BENCH_ENTRIES, "not_yet_ignored"))); assert!(!get_ignore(find_inner())); assert!(!get_ignore(find_inner_group())); assert!(!get_ignore(find_outer())); assert!(!get_ignore(find_outer_group())); } divan-0.1.21/tests/forbid_unsafe.rs000064400000000000000000000047711046102023000153350ustar 00000000000000// Exhaustively tests that macros work when linting against `unsafe`. #![forbid(unsafe_code)] use divan::Bencher; const CONST_VALUES: [usize; 3] = [1, 5, 10]; #[divan::bench] fn freestanding() {} #[divan::bench(types = [i32, &str])] fn freestanding_generic_type() {} #[divan::bench(consts = [1, 5, 10])] fn freestanding_generic_const1() {} #[divan::bench(consts = CONST_VALUES)] fn freestanding_generic_const2() {} #[divan::bench(types = [i32, &str], consts = [1, 5, 10])] fn freestanding_generic_type_const1() {} #[divan::bench(types = [i32, &str], consts = CONST_VALUES)] fn freestanding_generic_type_const2() {} #[divan::bench] fn contextual(_: Bencher) {} #[divan::bench(types = [i32, &str])] fn contextual_generic_type(_: Bencher) {} #[divan::bench(consts = [1, 5, 10])] fn contextual_generic_const_1(_: Bencher) {} #[divan::bench(consts = CONST_VALUES)] fn contextual_generic_const_2(_: Bencher) {} #[divan::bench(types = [i32, &str], consts = [1, 5, 10])] fn contextual_generic_type_const_1(_: Bencher) {} #[divan::bench(types = [i32, &str], consts = CONST_VALUES)] fn contextual_generic_type_const_2(_: Bencher) {} #[divan::bench_group] mod group { use super::*; #[divan::bench] fn freestanding() {} #[divan::bench(types = [i32, &str])] fn freestanding_generic_type() {} #[divan::bench(consts = [1, 5, 10])] fn freestanding_generic_const1() {} #[divan::bench(consts = CONST_VALUES)] fn freestanding_generic_const2() {} #[divan::bench(types = [i32, &str], consts = [1, 5, 10])] fn freestanding_generic_type_const1() {} #[divan::bench(types = [i32, &str], consts = CONST_VALUES)] fn freestanding_generic_type_const2() {} #[divan::bench] fn contextual(_: Bencher) {} #[divan::bench(types = [i32, &str])] fn contextual_generic_type(_: Bencher) {} #[divan::bench(consts = [1, 5, 10])] fn contextual_generic_const1(_: Bencher) {} #[divan::bench(consts = CONST_VALUES)] fn contextual_generic_const2(_: Bencher) {} #[divan::bench(types = [i32, &str], consts = [1, 5, 10])] fn contextual_generic_type_const1(_: Bencher) {} #[divan::bench(types = [i32, &str], consts = CONST_VALUES)] fn contextual_generic_type_const2(_: Bencher) {} } divan-0.1.21/tests/weird_usage.rs000064400000000000000000000061421046102023000150170ustar 00000000000000// Tests that ensure weird (but valid) usage behave as expected. // Miri cannot discover benchmarks. #![cfg(not(miri))] use std::time::Duration; use divan::{Divan, __private::BENCH_ENTRIES}; #[divan::bench(bytes_count = 0u8, chars_count = 0u16, cycles_count = 0u32, items_count = 0u64)] fn zero_throughput() {} #[divan::bench(min_time = Duration::ZERO)] fn min_min() {} #[divan::bench(max_time = Duration::MAX)] fn max_max() {} #[divan::bench] fn lifetime<'a>() -> &'a str { "hello" } #[divan::bench] fn embedded() { #[divan::bench] fn inner() { #[divan::bench] fn inner() {} } } #[divan::bench] fn r#raw_ident() {} #[divan::bench(r#name = "raw name ident")] fn raw_name_ident() {} #[divan::bench] extern "system" fn extern_abi_1() {} #[divan::bench] #[allow(improper_ctypes_definitions)] extern "C" fn extern_abi_2(_: divan::Bencher) {} #[divan::bench(types = [i32, u8])] extern "system" fn extern_abi_3() {} #[divan::bench(r#types = [i32, u8])] #[allow(improper_ctypes_definitions)] extern "C" fn extern_abi_4(_: divan::Bencher) {} #[divan::bench(consts = [0, -1, isize::MAX])] extern "system" fn extern_abi_5() {} #[divan::bench(consts = [0, -1, isize::MAX])] #[allow(improper_ctypes_definitions)] extern "C" fn extern_abi_6(_: divan::Bencher) {} macro_rules! consts { () => { [0, -1, isize::MAX] }; } #[divan::bench(consts = consts!())] fn bench_consts() {} #[divan::bench(args = [])] fn empty_args(_: usize) {} #[divan::bench(types = [])] #[allow(dead_code)] fn empty_types() {} #[divan::bench(consts = [])] #[allow(dead_code)] fn empty_consts() {} #[divan::bench(args = [], consts = [])] #[allow(dead_code)] fn empty_args_consts(_: usize) {} #[divan::bench(types = [], consts = [])] #[allow(dead_code)] fn empty_types_consts_1() {} #[divan::bench(consts = [], types = [])] #[allow(dead_code)] fn empty_types_consts_2() {} #[divan::bench(types = [], consts = [])] #[allow(dead_code)] fn empty_types_consts_3() {} #[divan::bench(consts = [], types = [])] #[allow(dead_code)] fn empty_types_consts_4() {} #[test] fn test_fn() { Divan::default().test_benches(); } // Test that each function appears the expected number of times. #[test] fn count() { let mut inner_count = 0; for entry in BENCH_ENTRIES.iter() { if entry.meta.raw_name == "inner" { inner_count += 1; } } assert_eq!(inner_count, 2); } // Test expected `BenchEntry.path` values. #[test] fn path() { for entry in BENCH_ENTRIES.iter() { // Embedded functions do not contain their parent function's name in // their `module_path!()`. if entry.meta.raw_name == "inner" { assert_eq!(entry.meta.module_path, "weird_usage"); } // "r#" is removed from raw identifiers. if entry.meta.raw_name.contains("raw_ident") { assert_eq!(entry.meta.raw_name, "r#raw_ident"); assert_eq!(entry.meta.display_name, "raw_ident"); } } }